import os
import asyncio
import random
import requests
from tools.base import Tool
from tools.utils import ToolExecutionError, logger
from typing import Any, Dict, List, Union, Optional
from core.utils.serper_rate_limited import rate_limited_serper_search
def fetch_and_search_links(links: List[str], query: str, max_results: int = 5) -> List[Dict[str, Any]]:
"""
Fetch the content of each link and return those that contain the query (case-insensitive substring match).
Returns a list of dicts with title, href, and snippet (first match context).
"""
results = []
for url in links:
try:
resp = requests.get(url, timeout=10)
if resp.status_code != 200:
continue
text = resp.text
# Simple case-insensitive search
idx = text.lower().find(query.lower())
if idx != -1:
# Try to extract a snippet around the match
start = max(0, idx - 60)
end = min(len(text), idx + 200)
snippet = text[start:end].replace('\n', ' ').replace('\r', ' ')
# Use the URL as title fallback
title = url
# Try to extract
if present
import re
m = re.search(r'(.*?)', text, re.IGNORECASE | re.DOTALL)
if m:
title = m.group(1).strip()
results.append({
"title": title,
"href": url,
"snippet": snippet
})
if len(results) >= max_results:
break
except Exception as e:
logger.warning(f"Failed to fetch/search trusted link {url}: {e}", exc_info=True)
return results
SERPER_URL = "https://google.serper.dev/search"
from typing import Any, Dict, List, Union
class InternetSearchTool(Tool):
def openai_spec(self, legacy=False):
return {
"name": self.name,
"description": self.description,
"parameters": self.args_schema
}
"""
Tool for searching the public web and returning top results using the Serper API.
This tool uses the Serper API to search the web for a query and returns the top results.
"""
def __init__(self) -> None:
"""
Initialize the InternetSearchTool with its name, description, and argument schema.
"""
super().__init__()
self.name = "search_internet"
self.description = "Search the public web for a query and return top results."
self.args_schema = {
"type": "object",
"properties": {
"q": {"type": "string", "description": "search query"},
"max_results": {"type": "integer", "default": 5}
},
"required": ["q"]
}
async def run(self, q: str, max_results: int = 5, trusted_links: Optional[List[str]] = None) -> Union[str, Dict[str, Any]]:
"""
Search the public web for a query and return a summarized answer with direct synthesis from the top results.
"""
try:
# 1. If trusted_links are provided, try searching them first
summary_parts = []
if trusted_links:
trusted_results = fetch_and_search_links(trusted_links, q, max_results=max_results)
if trusted_results:
for res in trusted_results:
summary_parts.append(f"**{res['title']}**\n{res['snippet']}\n[Read more]({res['href']})\n")
# 2. Fallback to Serper API with rate limiting and caching
api_key = os.getenv("SERPER_API_KEY")
if not api_key:
raise ToolExecutionError("SERPER_API_KEY missing in env settings.")
try:
# Use rate-limited Serper search with automatic caching and retry logic
response_data = await rate_limited_serper_search(q, api_key, num_results=max_results)
if response_data and "organic" in response_data:
results = response_data.get("organic", [])[:max_results]
for i in results:
summary_parts.append(f"**{i.get('title')}**\n{i.get('snippet')}\n[Read more]({i.get('link')})\n")
if summary_parts:
return "\n".join(summary_parts)
else:
return "No relevant results found."
else:
logger.warning(f"InternetSearchTool: No valid response from rate-limited search")
return "No relevant results found."
except Exception as e:
logger.error(f"InternetSearchTool rate-limited search failed: {e}", exc_info=True)
raise ToolExecutionError(f"Internet search failed: {e}")
except Exception as e:
logger.error(f"InternetSearchTool failed: {e}", exc_info=True)
raise ToolExecutionError(f"InternetSearchTool failed: {e}")