Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| from tools.base import Tool | |
| from tools.utils import ToolExecutionError, logger | |
| from core.utils.ncbi_rate_limited import rate_limited_pubmed_search | |
| ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" | |
| ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi" | |
| from typing import Any, Dict, List, Union | |
| class PubMedSearchTool(Tool): | |
| def openai_spec(self, legacy=False): | |
| return { | |
| "name": self.name, | |
| "description": self.description, | |
| "parameters": self.args_schema | |
| } | |
| """ | |
| Tool for searching PubMed for articles and returning top results using NCBI E-utilities. | |
| This tool uses the NCBI E-utilities API to search PubMed for articles and returns the top results. | |
| """ | |
| def __init__(self) -> None: | |
| """ | |
| Initialize the PubMedSearchTool with its name, description, and argument schema. | |
| """ | |
| super().__init__() | |
| self.name = "search_pubmed" | |
| self.description = "Search PubMed for articles and return top results." | |
| self.args_schema = { | |
| "type": "object", | |
| "properties": { | |
| "q": {"type": "string", "description": "search query"}, | |
| "max_results": {"type": "integer", "default": 5}, | |
| "email": {"type": "string", "description": "user email for NCBI API", "default": os.getenv("NCBI_EMAIL", "")} | |
| }, | |
| "required": ["q"] | |
| } | |
| async def run( | |
| self, | |
| q: str, | |
| max_results: int = 5, | |
| email: str = "" | |
| ) -> Union[List[Dict[str, Any]], Dict[str, Any]]: | |
| """ | |
| Search PubMed for articles and return the top results. | |
| Args: | |
| q (str): The search query. | |
| max_results (int, optional): The maximum number of results to return. Defaults to 5. | |
| email (str, optional): User email for NCBI API. Defaults to environment variable NCBI_EMAIL. | |
| Returns: | |
| Union[List[Dict[str, Any]], Dict[str, Any]]: A list of article result dicts, or an error dict. | |
| """ | |
| try: | |
| # Use provided email or fall back to environment variable or default | |
| if not email: | |
| email = os.getenv("NCBI_EMAIL", "") | |
| if not email: | |
| # Use a default academic email for research purposes | |
| email = "[email protected]" | |
| logger.info("Using default email for NCBI API access") | |
| api_key = os.getenv("NCBI_API_KEY") | |
| # Use rate-limited PubMed search with automatic caching and retry logic | |
| response_data = await rate_limited_pubmed_search( | |
| query=q, | |
| api_key=api_key, | |
| max_results=max_results | |
| ) | |
| if not response_data or "esearchresult" not in response_data: | |
| logger.warning(f"PubMedSearchTool: No valid response from rate-limited search") | |
| return [] | |
| idlist = response_data["esearchresult"].get("idlist", []) | |
| if not idlist: | |
| return [] | |
| # Fetch summaries for the article IDs | |
| params_esummary = { | |
| "db": "pubmed", | |
| "id": ",".join(idlist), | |
| "retmode": "json", | |
| "tool": "IDweekAgent", | |
| "email": email | |
| } | |
| if api_key: | |
| params_esummary["api_key"] = api_key | |
| resp2 = requests.get(ESUMMARY_URL, params=params_esummary, timeout=15) | |
| resp2.raise_for_status() | |
| summary = resp2.json().get("result", {}) | |
| results = [] | |
| for uid in idlist: | |
| item = summary.get(uid, {}) | |
| results.append({ | |
| "uid": uid, | |
| "title": item.get("title"), | |
| "authors": [a.get("name") for a in item.get("authors", [])], | |
| "pubdate": item.get("pubdate"), | |
| "source": item.get("source"), | |
| "link": f"https://pubmed.ncbi.nlm.nih.gov/{uid}/" | |
| }) | |
| return results | |
| except Exception as e: | |
| logger.error(f"PubMedSearchTool failed: {e}", exc_info=True) | |
| raise ToolExecutionError(f"PubMedSearchTool failed: {e}") | |