"""Pinterest-specific search engine implementation.""" import re import time from typing import List, Optional from duckduckgo_search import DDGS from .base import BaseSearchEngine, ImageResult, SearchPlatform class PinterestSearchEngine(BaseSearchEngine): """Search engine for Pinterest images.""" def __init__(self): super().__init__(SearchPlatform.PINTEREST) self.pinterest_domains = { "pinterest.com", "pinimg.com", "i.pinimg.com", "media.pinimg.com", "s-media-cache-ak0.pinimg.com" } def search(self, query: str, max_results: int = 20) -> List[ImageResult]: """Search Pinterest for tattoo images.""" results = [] pinterest_queries = [ f"site:pinterest.com {query} tattoo", f"site:pinterest.com tattoo {query}", ] try: with DDGS() as ddgs: for i, pinterest_query in enumerate(pinterest_queries): if i > 0: time.sleep(2) # Rate limiting try: search_results = ddgs.images( pinterest_query, region="wt-wt", safesearch="off", size="Medium", max_results=max_results // 2 ) for result in search_results: url = result.get("image") if url and self.is_valid_url(url): image_result = self._create_image_result(url, result) results.append(image_result) if len(results) >= max_results: break except Exception as e: self.logger.warning(f"Pinterest query failed: {e}") continue except Exception as e: self.logger.error(f"Pinterest search failed: {e}") return results[:max_results] def is_valid_url(self, url: str) -> bool: """Check if URL is from Pinterest domains.""" return any(domain in url.lower() for domain in self.pinterest_domains) def get_quality_score(self, url: str, **kwargs) -> float: """Calculate Pinterest-specific quality score.""" score = super().get_quality_score(url) # Pinterest size indicators (higher resolution = higher score) size_patterns = { "/736x/": 1.0, "/564x/": 0.9, "/474x/": 0.8, "/236x/": 0.6 } for pattern, bonus in size_patterns.items(): if pattern in url: score = bonus break # Pinterest CDN reliability bonus if "i.pinimg.com" in url: score += 0.1 return min(1.0, score) def _create_image_result(self, url: str, raw_result: dict) -> ImageResult: """Create ImageResult from raw Pinterest search result.""" dimensions = self._extract_dimensions(url) return ImageResult( url=url, platform=self.platform, quality_score=self.get_quality_score(url), width=dimensions.get("width"), height=dimensions.get("height"), title=raw_result.get("title"), source_url=raw_result.get("source") ) def _extract_dimensions(self, url: str) -> dict: """Extract image dimensions from Pinterest URL patterns.""" # Pinterest URL pattern: .../236x/... or .../564x314/... size_match = re.search(r"/(\d+)x(\d*)/", url) if size_match: width = int(size_match.group(1)) height = int(size_match.group(2)) if size_match.group(2) else None return {"width": width, "height": height} return {}