Spaces:
Sleeping
Sleeping
File size: 4,444 Bytes
8120936 a674431 8120936 a674431 8120936 a674431 8120936 a674431 8120936 a674431 8120936 a674431 8120936 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import os
import requests
from tools.base import Tool
from tools.utils import ToolExecutionError, logger
from core.utils.ncbi_rate_limited import rate_limited_pubmed_search
ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
from typing import Any, Dict, List, Union
class PubMedSearchTool(Tool):
def openai_spec(self, legacy=False):
return {
"name": self.name,
"description": self.description,
"parameters": self.args_schema
}
"""
Tool for searching PubMed for articles and returning top results using NCBI E-utilities.
This tool uses the NCBI E-utilities API to search PubMed for articles and returns the top results.
"""
def __init__(self) -> None:
"""
Initialize the PubMedSearchTool with its name, description, and argument schema.
"""
super().__init__()
self.name = "search_pubmed"
self.description = "Search PubMed for articles and return top results."
self.args_schema = {
"type": "object",
"properties": {
"q": {"type": "string", "description": "search query"},
"max_results": {"type": "integer", "default": 5},
"email": {"type": "string", "description": "user email for NCBI API", "default": os.getenv("NCBI_EMAIL", "")}
},
"required": ["q"]
}
async def run(
self,
q: str,
max_results: int = 5,
email: str = ""
) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
"""
Search PubMed for articles and return the top results.
Args:
q (str): The search query.
max_results (int, optional): The maximum number of results to return. Defaults to 5.
email (str, optional): User email for NCBI API. Defaults to environment variable NCBI_EMAIL.
Returns:
Union[List[Dict[str, Any]], Dict[str, Any]]: A list of article result dicts, or an error dict.
"""
try:
# Use provided email or fall back to environment variable or default
if not email:
email = os.getenv("NCBI_EMAIL", "")
if not email:
# Use a default academic email for research purposes
email = "[email protected]"
logger.info("Using default email for NCBI API access")
api_key = os.getenv("NCBI_API_KEY")
# Use rate-limited PubMed search with automatic caching and retry logic
response_data = await rate_limited_pubmed_search(
query=q,
api_key=api_key,
max_results=max_results
)
if not response_data or "esearchresult" not in response_data:
logger.warning(f"PubMedSearchTool: No valid response from rate-limited search")
return []
idlist = response_data["esearchresult"].get("idlist", [])
if not idlist:
return []
# Fetch summaries for the article IDs
params_esummary = {
"db": "pubmed",
"id": ",".join(idlist),
"retmode": "json",
"tool": "IDweekAgent",
"email": email
}
if api_key:
params_esummary["api_key"] = api_key
resp2 = requests.get(ESUMMARY_URL, params=params_esummary, timeout=15)
resp2.raise_for_status()
summary = resp2.json().get("result", {})
results = []
for uid in idlist:
item = summary.get(uid, {})
results.append({
"uid": uid,
"title": item.get("title"),
"authors": [a.get("name") for a in item.get("authors", [])],
"pubdate": item.get("pubdate"),
"source": item.get("source"),
"link": f"https://pubmed.ncbi.nlm.nih.gov/{uid}/"
})
return results
except Exception as e:
logger.error(f"PubMedSearchTool failed: {e}", exc_info=True)
raise ToolExecutionError(f"PubMedSearchTool failed: {e}")
|