Spaces:

John-jero
/

IDAgentsFreshTest

Sleeping

File size: 4,444 Bytes


import os
import requests
from tools.base import Tool
from tools.utils import ToolExecutionError, logger
from core.utils.ncbi_rate_limited import rate_limited_pubmed_search

ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"

from typing import Any, Dict, List, Union

class PubMedSearchTool(Tool):

    def openai_spec(self, legacy=False):
        return {
            "name": self.name,
            "description": self.description,
            "parameters": self.args_schema
        }
    """
    Tool for searching PubMed for articles and returning top results using NCBI E-utilities.

    This tool uses the NCBI E-utilities API to search PubMed for articles and returns the top results.
    """
    def __init__(self) -> None:
        """
        Initialize the PubMedSearchTool with its name, description, and argument schema.
        """
        super().__init__()
        self.name = "search_pubmed"
        self.description = "Search PubMed for articles and return top results."
        self.args_schema = {
            "type": "object",
            "properties": {
                "q": {"type": "string", "description": "search query"},
                "max_results": {"type": "integer", "default": 5},
                "email": {"type": "string", "description": "user email for NCBI API", "default": os.getenv("NCBI_EMAIL", "")}
            },
            "required": ["q"]
        }

    async def run(
        self,
        q: str,
        max_results: int = 5,
        email: str = ""
    ) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
        """
        Search PubMed for articles and return the top results.

        Args:
            q (str): The search query.
            max_results (int, optional): The maximum number of results to return. Defaults to 5.
            email (str, optional): User email for NCBI API. Defaults to environment variable NCBI_EMAIL.

        Returns:
            Union[List[Dict[str, Any]], Dict[str, Any]]: A list of article result dicts, or an error dict.
        """
        try:
            # Use provided email or fall back to environment variable or default
            if not email:
                email = os.getenv("NCBI_EMAIL", "")
            
            if not email:
                # Use a default academic email for research purposes
                email = "[email protected]"
                logger.info("Using default email for NCBI API access")
            
            api_key = os.getenv("NCBI_API_KEY")
            
            # Use rate-limited PubMed search with automatic caching and retry logic
            response_data = await rate_limited_pubmed_search(
                query=q,
                api_key=api_key,
                max_results=max_results
            )
            
            if not response_data or "esearchresult" not in response_data:
                logger.warning(f"PubMedSearchTool: No valid response from rate-limited search")
                return []
            
            idlist = response_data["esearchresult"].get("idlist", [])
            if not idlist:
                return []
            
            # Fetch summaries for the article IDs
            params_esummary = {
                "db": "pubmed",
                "id": ",".join(idlist),
                "retmode": "json",
                "tool": "IDweekAgent",
                "email": email
            }
            if api_key:
                params_esummary["api_key"] = api_key
            
            resp2 = requests.get(ESUMMARY_URL, params=params_esummary, timeout=15)
            resp2.raise_for_status()
            summary = resp2.json().get("result", {})
            
            results = []
            for uid in idlist:
                item = summary.get(uid, {})
                results.append({
                    "uid": uid,
                    "title": item.get("title"),
                    "authors": [a.get("name") for a in item.get("authors", [])],
                    "pubdate": item.get("pubdate"),
                    "source": item.get("source"),
                    "link": f"https://pubmed.ncbi.nlm.nih.gov/{uid}/"
                })
            return results
        except Exception as e:
            logger.error(f"PubMedSearchTool failed: {e}", exc_info=True)
            raise ToolExecutionError(f"PubMedSearchTool failed: {e}")