File size: 3,941 Bytes
e01c07b
 
 
 
 
 
9bf5cf4
e01c07b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""Pinterest-specific search engine implementation."""

import re
import time
from typing import List, Optional

from duckduckgo_search import DDGS

from .base import BaseSearchEngine, ImageResult, SearchPlatform


class PinterestSearchEngine(BaseSearchEngine):
    """Search engine for Pinterest images."""

    def __init__(self):
        super().__init__(SearchPlatform.PINTEREST)
        self.pinterest_domains = {
            "pinterest.com",
            "pinimg.com",
            "i.pinimg.com",
            "media.pinimg.com",
            "s-media-cache-ak0.pinimg.com"
        }

    def search(self, query: str, max_results: int = 20) -> List[ImageResult]:
        """Search Pinterest for tattoo images."""
        results = []

        pinterest_queries = [
            f"site:pinterest.com {query} tattoo",
            f"site:pinterest.com tattoo {query}",
        ]

        try:
            with DDGS() as ddgs:
                for i, pinterest_query in enumerate(pinterest_queries):
                    if i > 0:
                        time.sleep(2)  # Rate limiting

                    try:
                        search_results = ddgs.images(
                            pinterest_query,
                            region="wt-wt",
                            safesearch="off",
                            size="Medium",
                            max_results=max_results // 2
                        )

                        for result in search_results:
                            url = result.get("image")
                            if url and self.is_valid_url(url):
                                image_result = self._create_image_result(url, result)
                                results.append(image_result)

                        if len(results) >= max_results:
                            break

                    except Exception as e:
                        self.logger.warning(f"Pinterest query failed: {e}")
                        continue

        except Exception as e:
            self.logger.error(f"Pinterest search failed: {e}")

        return results[:max_results]

    def is_valid_url(self, url: str) -> bool:
        """Check if URL is from Pinterest domains."""
        return any(domain in url.lower() for domain in self.pinterest_domains)

    def get_quality_score(self, url: str, **kwargs) -> float:
        """Calculate Pinterest-specific quality score."""
        score = super().get_quality_score(url)

        # Pinterest size indicators (higher resolution = higher score)
        size_patterns = {
            "/736x/": 1.0,
            "/564x/": 0.9,
            "/474x/": 0.8,
            "/236x/": 0.6
        }

        for pattern, bonus in size_patterns.items():
            if pattern in url:
                score = bonus
                break

        # Pinterest CDN reliability bonus
        if "i.pinimg.com" in url:
            score += 0.1

        return min(1.0, score)

    def _create_image_result(self, url: str, raw_result: dict) -> ImageResult:
        """Create ImageResult from raw Pinterest search result."""
        dimensions = self._extract_dimensions(url)

        return ImageResult(
            url=url,
            platform=self.platform,
            quality_score=self.get_quality_score(url),
            width=dimensions.get("width"),
            height=dimensions.get("height"),
            title=raw_result.get("title"),
            source_url=raw_result.get("source")
        )

    def _extract_dimensions(self, url: str) -> dict:
        """Extract image dimensions from Pinterest URL patterns."""
        # Pinterest URL pattern: .../236x/... or .../564x314/...
        size_match = re.search(r"/(\d+)x(\d*)/", url)
        if size_match:
            width = int(size_match.group(1))
            height = int(size_match.group(2)) if size_match.group(2) else None
            return {"width": width, "height": height}

        return {}