if present import re m = re.search(r'<title>(.*?)


import os
import asyncio
import random
import requests
from tools.base import Tool
from tools.utils import ToolExecutionError, logger
from typing import Any, Dict, List, Union, Optional
from core.utils.serper_rate_limited import rate_limited_serper_search

def fetch_and_search_links(links: List[str], query: str, max_results: int = 5) -> List[Dict[str, Any]]:
    """
    Fetch the content of each link and return those that contain the query (case-insensitive substring match).
    Returns a list of dicts with title, href, and snippet (first match context).
    """
    results = []
    for url in links:
        try:
            resp = requests.get(url, timeout=10)
            if resp.status_code != 200:
                continue
            text = resp.text
            # Simple case-insensitive search
            idx = text.lower().find(query.lower())
            if idx != -1:
                # Try to extract a snippet around the match
                start = max(0, idx - 60)
                end = min(len(text), idx + 200)
                snippet = text[start:end].replace('\n', ' ').replace('\r', ' ')
                # Use the URL as title fallback
                title = url
                # Try to extract <title> if present
                import re
                m = re.search(r'<title>(.*?)</title>', text, re.IGNORECASE | re.DOTALL)
                if m:
                    title = m.group(1).strip()
                results.append({
                    "title": title,
                    "href": url,
                    "snippet": snippet
                })
                if len(results) >= max_results:
                    break
        except Exception as e:
            logger.warning(f"Failed to fetch/search trusted link {url}: {e}", exc_info=True)
    return results

SERPER_URL = "https://google.serper.dev/search"

from typing import Any, Dict, List, Union


class InternetSearchTool(Tool):

    def openai_spec(self, legacy=False):
        return {
            "name": self.name,
            "description": self.description,
            "parameters": self.args_schema
        }
    """
    Tool for searching the public web and returning top results using the Serper API.

    This tool uses the Serper API to search the web for a query and returns the top results.
    """
    def __init__(self) -> None:
        """
        Initialize the InternetSearchTool with its name, description, and argument schema.
        """
        super().__init__()
        self.name = "search_internet"
        self.description = "Search the public web for a query and return top results."
        self.args_schema = {
            "type": "object",
            "properties": {
                "q": {"type": "string", "description": "search query"},
                "max_results": {"type": "integer", "default": 5}
            },
            "required": ["q"]
        }

    async def run(self, q: str, max_results: int = 5, trusted_links: Optional[List[str]] = None) -> Union[str, Dict[str, Any]]:
        """
        Search the public web for a query and return a summarized answer with direct synthesis from the top results.
        """
        try:
            # 1. If trusted_links are provided, try searching them first
            summary_parts = []
            if trusted_links:
                trusted_results = fetch_and_search_links(trusted_links, q, max_results=max_results)
                if trusted_results:
                    for res in trusted_results:
                        summary_parts.append(f"**{res['title']}**\n{res['snippet']}\n[Read more]({res['href']})\n")
            # 2. Fallback to Serper API with rate limiting and caching
            api_key = os.getenv("SERPER_API_KEY")
            if not api_key:
                raise ToolExecutionError("SERPER_API_KEY missing in env settings.")

            try:
                # Use rate-limited Serper search with automatic caching and retry logic
                response_data = await rate_limited_serper_search(q, api_key, num_results=max_results)
                
                if response_data and "organic" in response_data:
                    results = response_data.get("organic", [])[:max_results]
                    for i in results:
                        summary_parts.append(f"**{i.get('title')}**\n{i.get('snippet')}\n[Read more]({i.get('link')})\n")
                    
                    if summary_parts:
                        return "\n".join(summary_parts)
                    else:
                        return "No relevant results found."
                else:
                    logger.warning(f"InternetSearchTool: No valid response from rate-limited search")
                    return "No relevant results found."
            except Exception as e:
                logger.error(f"InternetSearchTool rate-limited search failed: {e}", exc_info=True)
                raise ToolExecutionError(f"Internet search failed: {e}")
        except Exception as e:
            logger.error(f"InternetSearchTool failed: {e}", exc_info=True)
            raise ToolExecutionError(f"InternetSearchTool failed: {e}")