File size: 4,444 Bytes
8120936
 
 
 
 
a674431
8120936
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a674431
 
 
 
 
 
 
 
 
 
 
 
 
8120936
 
a674431
 
8120936
 
 
 
 
a674431
8120936
 
 
a674431
8120936
 
 
a674431
8120936
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

import os
import requests
from tools.base import Tool
from tools.utils import ToolExecutionError, logger
from core.utils.ncbi_rate_limited import rate_limited_pubmed_search

ESEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
ESUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"

from typing import Any, Dict, List, Union

class PubMedSearchTool(Tool):

    def openai_spec(self, legacy=False):
        return {
            "name": self.name,
            "description": self.description,
            "parameters": self.args_schema
        }
    """
    Tool for searching PubMed for articles and returning top results using NCBI E-utilities.

    This tool uses the NCBI E-utilities API to search PubMed for articles and returns the top results.
    """
    def __init__(self) -> None:
        """
        Initialize the PubMedSearchTool with its name, description, and argument schema.
        """
        super().__init__()
        self.name = "search_pubmed"
        self.description = "Search PubMed for articles and return top results."
        self.args_schema = {
            "type": "object",
            "properties": {
                "q": {"type": "string", "description": "search query"},
                "max_results": {"type": "integer", "default": 5},
                "email": {"type": "string", "description": "user email for NCBI API", "default": os.getenv("NCBI_EMAIL", "")}
            },
            "required": ["q"]
        }

    async def run(
        self,
        q: str,
        max_results: int = 5,
        email: str = ""
    ) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
        """
        Search PubMed for articles and return the top results.

        Args:
            q (str): The search query.
            max_results (int, optional): The maximum number of results to return. Defaults to 5.
            email (str, optional): User email for NCBI API. Defaults to environment variable NCBI_EMAIL.

        Returns:
            Union[List[Dict[str, Any]], Dict[str, Any]]: A list of article result dicts, or an error dict.
        """
        try:
            # Use provided email or fall back to environment variable or default
            if not email:
                email = os.getenv("NCBI_EMAIL", "")
            
            if not email:
                # Use a default academic email for research purposes
                email = "[email protected]"
                logger.info("Using default email for NCBI API access")
            
            api_key = os.getenv("NCBI_API_KEY")
            
            # Use rate-limited PubMed search with automatic caching and retry logic
            response_data = await rate_limited_pubmed_search(
                query=q,
                api_key=api_key,
                max_results=max_results
            )
            
            if not response_data or "esearchresult" not in response_data:
                logger.warning(f"PubMedSearchTool: No valid response from rate-limited search")
                return []
            
            idlist = response_data["esearchresult"].get("idlist", [])
            if not idlist:
                return []
            
            # Fetch summaries for the article IDs
            params_esummary = {
                "db": "pubmed",
                "id": ",".join(idlist),
                "retmode": "json",
                "tool": "IDweekAgent",
                "email": email
            }
            if api_key:
                params_esummary["api_key"] = api_key
            
            resp2 = requests.get(ESUMMARY_URL, params=params_esummary, timeout=15)
            resp2.raise_for_status()
            summary = resp2.json().get("result", {})
            
            results = []
            for uid in idlist:
                item = summary.get(uid, {})
                results.append({
                    "uid": uid,
                    "title": item.get("title"),
                    "authors": [a.get("name") for a in item.get("authors", [])],
                    "pubdate": item.get("pubdate"),
                    "source": item.get("source"),
                    "link": f"https://pubmed.ncbi.nlm.nih.gov/{uid}/"
                })
            return results
        except Exception as e:
            logger.error(f"PubMedSearchTool failed: {e}", exc_info=True)
            raise ToolExecutionError(f"PubMedSearchTool failed: {e}")