Spaces:
Sleeping
Sleeping
File size: 3,941 Bytes
e01c07b 9bf5cf4 e01c07b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
"""Pinterest-specific search engine implementation."""
import re
import time
from typing import List, Optional
from duckduckgo_search import DDGS
from .base import BaseSearchEngine, ImageResult, SearchPlatform
class PinterestSearchEngine(BaseSearchEngine):
"""Search engine for Pinterest images."""
def __init__(self):
super().__init__(SearchPlatform.PINTEREST)
self.pinterest_domains = {
"pinterest.com",
"pinimg.com",
"i.pinimg.com",
"media.pinimg.com",
"s-media-cache-ak0.pinimg.com"
}
def search(self, query: str, max_results: int = 20) -> List[ImageResult]:
"""Search Pinterest for tattoo images."""
results = []
pinterest_queries = [
f"site:pinterest.com {query} tattoo",
f"site:pinterest.com tattoo {query}",
]
try:
with DDGS() as ddgs:
for i, pinterest_query in enumerate(pinterest_queries):
if i > 0:
time.sleep(2) # Rate limiting
try:
search_results = ddgs.images(
pinterest_query,
region="wt-wt",
safesearch="off",
size="Medium",
max_results=max_results // 2
)
for result in search_results:
url = result.get("image")
if url and self.is_valid_url(url):
image_result = self._create_image_result(url, result)
results.append(image_result)
if len(results) >= max_results:
break
except Exception as e:
self.logger.warning(f"Pinterest query failed: {e}")
continue
except Exception as e:
self.logger.error(f"Pinterest search failed: {e}")
return results[:max_results]
def is_valid_url(self, url: str) -> bool:
"""Check if URL is from Pinterest domains."""
return any(domain in url.lower() for domain in self.pinterest_domains)
def get_quality_score(self, url: str, **kwargs) -> float:
"""Calculate Pinterest-specific quality score."""
score = super().get_quality_score(url)
# Pinterest size indicators (higher resolution = higher score)
size_patterns = {
"/736x/": 1.0,
"/564x/": 0.9,
"/474x/": 0.8,
"/236x/": 0.6
}
for pattern, bonus in size_patterns.items():
if pattern in url:
score = bonus
break
# Pinterest CDN reliability bonus
if "i.pinimg.com" in url:
score += 0.1
return min(1.0, score)
def _create_image_result(self, url: str, raw_result: dict) -> ImageResult:
"""Create ImageResult from raw Pinterest search result."""
dimensions = self._extract_dimensions(url)
return ImageResult(
url=url,
platform=self.platform,
quality_score=self.get_quality_score(url),
width=dimensions.get("width"),
height=dimensions.get("height"),
title=raw_result.get("title"),
source_url=raw_result.get("source")
)
def _extract_dimensions(self, url: str) -> dict:
"""Extract image dimensions from Pinterest URL patterns."""
# Pinterest URL pattern: .../236x/... or .../564x314/...
size_match = re.search(r"/(\d+)x(\d*)/", url)
if size_match:
width = int(size_match.group(1))
height = int(size_match.group(2)) if size_match.group(2) else None
return {"width": width, "height": height}
return {} |