import os import asyncio import random import requests from tools.base import Tool from tools.utils import ToolExecutionError, logger from typing import Any, Dict, List, Union, Optional from core.utils.serper_rate_limited import rate_limited_serper_search def fetch_and_search_links(links: List[str], query: str, max_results: int = 5) -> List[Dict[str, Any]]: """ Fetch the content of each link and return those that contain the query (case-insensitive substring match). Returns a list of dicts with title, href, and snippet (first match context). """ results = [] for url in links: try: resp = requests.get(url, timeout=10) if resp.status_code != 200: continue text = resp.text # Simple case-insensitive search idx = text.lower().find(query.lower()) if idx != -1: # Try to extract a snippet around the match start = max(0, idx - 60) end = min(len(text), idx + 200) snippet = text[start:end].replace('\n', ' ').replace('\r', ' ') # Use the URL as title fallback title = url # Try to extract if present import re m = re.search(r'<title>(.*?)', text, re.IGNORECASE | re.DOTALL) if m: title = m.group(1).strip() results.append({ "title": title, "href": url, "snippet": snippet }) if len(results) >= max_results: break except Exception as e: logger.warning(f"Failed to fetch/search trusted link {url}: {e}", exc_info=True) return results SERPER_URL = "https://google.serper.dev/search" from typing import Any, Dict, List, Union class InternetSearchTool(Tool): def openai_spec(self, legacy=False): return { "name": self.name, "description": self.description, "parameters": self.args_schema } """ Tool for searching the public web and returning top results using the Serper API. This tool uses the Serper API to search the web for a query and returns the top results. """ def __init__(self) -> None: """ Initialize the InternetSearchTool with its name, description, and argument schema. """ super().__init__() self.name = "search_internet" self.description = "Search the public web for a query and return top results." self.args_schema = { "type": "object", "properties": { "q": {"type": "string", "description": "search query"}, "max_results": {"type": "integer", "default": 5} }, "required": ["q"] } async def run(self, q: str, max_results: int = 5, trusted_links: Optional[List[str]] = None) -> Union[str, Dict[str, Any]]: """ Search the public web for a query and return a summarized answer with direct synthesis from the top results. """ try: # 1. If trusted_links are provided, try searching them first summary_parts = [] if trusted_links: trusted_results = fetch_and_search_links(trusted_links, q, max_results=max_results) if trusted_results: for res in trusted_results: summary_parts.append(f"**{res['title']}**\n{res['snippet']}\n[Read more]({res['href']})\n") # 2. Fallback to Serper API with rate limiting and caching api_key = os.getenv("SERPER_API_KEY") if not api_key: raise ToolExecutionError("SERPER_API_KEY missing in env settings.") try: # Use rate-limited Serper search with automatic caching and retry logic response_data = await rate_limited_serper_search(q, api_key, num_results=max_results) if response_data and "organic" in response_data: results = response_data.get("organic", [])[:max_results] for i in results: summary_parts.append(f"**{i.get('title')}**\n{i.get('snippet')}\n[Read more]({i.get('link')})\n") if summary_parts: return "\n".join(summary_parts) else: return "No relevant results found." else: logger.warning(f"InternetSearchTool: No valid response from rate-limited search") return "No relevant results found." except Exception as e: logger.error(f"InternetSearchTool rate-limited search failed: {e}", exc_info=True) raise ToolExecutionError(f"Internet search failed: {e}") except Exception as e: logger.error(f"InternetSearchTool failed: {e}", exc_info=True) raise ToolExecutionError(f"InternetSearchTool failed: {e}")