IDAgentsFreshTest / tools /pubmed_search.py
IDAgents Developer
Integrate API rate limiters into agent tools for workshop readiness
a674431
raw
history blame
4.44 kB
import os
import requests
from tools.base import Tool
from tools.utils import ToolExecutionError, logger
from core.utils.ncbi_rate_limited import rate_limited_pubmed_search
ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
from typing import Any, Dict, List, Union
class PubMedSearchTool(Tool):
def openai_spec(self, legacy=False):
return {
"name": self.name,
"description": self.description,
"parameters": self.args_schema
}
"""
Tool for searching PubMed for articles and returning top results using NCBI E-utilities.
This tool uses the NCBI E-utilities API to search PubMed for articles and returns the top results.
"""
def __init__(self) -> None:
"""
Initialize the PubMedSearchTool with its name, description, and argument schema.
"""
super().__init__()
self.name = "search_pubmed"
self.description = "Search PubMed for articles and return top results."
self.args_schema = {
"type": "object",
"properties": {
"q": {"type": "string", "description": "search query"},
"max_results": {"type": "integer", "default": 5},
"email": {"type": "string", "description": "user email for NCBI API", "default": os.getenv("NCBI_EMAIL", "")}
},
"required": ["q"]
}
async def run(
self,
q: str,
max_results: int = 5,
email: str = ""
) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
"""
Search PubMed for articles and return the top results.
Args:
q (str): The search query.
max_results (int, optional): The maximum number of results to return. Defaults to 5.
email (str, optional): User email for NCBI API. Defaults to environment variable NCBI_EMAIL.
Returns:
Union[List[Dict[str, Any]], Dict[str, Any]]: A list of article result dicts, or an error dict.
"""
try:
# Use provided email or fall back to environment variable or default
if not email:
email = os.getenv("NCBI_EMAIL", "")
if not email:
# Use a default academic email for research purposes
email = "[email protected]"
logger.info("Using default email for NCBI API access")
api_key = os.getenv("NCBI_API_KEY")
# Use rate-limited PubMed search with automatic caching and retry logic
response_data = await rate_limited_pubmed_search(
query=q,
api_key=api_key,
max_results=max_results
)
if not response_data or "esearchresult" not in response_data:
logger.warning(f"PubMedSearchTool: No valid response from rate-limited search")
return []
idlist = response_data["esearchresult"].get("idlist", [])
if not idlist:
return []
# Fetch summaries for the article IDs
params_esummary = {
"db": "pubmed",
"id": ",".join(idlist),
"retmode": "json",
"tool": "IDweekAgent",
"email": email
}
if api_key:
params_esummary["api_key"] = api_key
resp2 = requests.get(ESUMMARY_URL, params=params_esummary, timeout=15)
resp2.raise_for_status()
summary = resp2.json().get("result", {})
results = []
for uid in idlist:
item = summary.get(uid, {})
results.append({
"uid": uid,
"title": item.get("title"),
"authors": [a.get("name") for a in item.get("authors", [])],
"pubdate": item.get("pubdate"),
"source": item.get("source"),
"link": f"https://pubmed.ncbi.nlm.nih.gov/{uid}/"
})
return results
except Exception as e:
logger.error(f"PubMedSearchTool failed: {e}", exc_info=True)
raise ToolExecutionError(f"PubMedSearchTool failed: {e}")