Upload 9 files
Browse files- .gitignore +10 -0
 - .python-version +1 -0
 - README.md +75 -7
 - app.py +186 -0
 - oaklib_utils.py +55 -0
 - openai_utils.py +69 -0
 - pyproject.toml +11 -0
 - ruff.toml +2 -0
 - uv.lock +0 -0
 
    	
        .gitignore
    ADDED
    
    | 
         @@ -0,0 +1,10 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # Python-generated files
         
     | 
| 2 | 
         
            +
            __pycache__/
         
     | 
| 3 | 
         
            +
            *.py[oc]
         
     | 
| 4 | 
         
            +
            build/
         
     | 
| 5 | 
         
            +
            dist/
         
     | 
| 6 | 
         
            +
            wheels/
         
     | 
| 7 | 
         
            +
            *.egg-info
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            # Virtual environments
         
     | 
| 10 | 
         
            +
            .venv
         
     | 
    	
        .python-version
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            3.13
         
     | 
    	
        README.md
    CHANGED
    
    | 
         @@ -1,14 +1,82 @@ 
     | 
|
| 1 | 
         
             
            ---
         
     | 
| 2 | 
         
             
            title: BioMedNorm MCP Server
         
     | 
| 3 | 
         
            -
            emoji: 🚀
         
     | 
| 4 | 
         
            -
            colorFrom: pink
         
     | 
| 5 | 
         
            -
            colorTo: yellow
         
     | 
| 6 | 
         
             
            sdk: gradio
         
     | 
| 7 | 
         
            -
            sdk_version: 5.33. 
     | 
| 8 | 
         
             
            app_file: app.py
         
     | 
| 9 | 
         
            -
            pinned:  
     | 
| 10 | 
         
             
            license: apache-2.0
         
     | 
| 11 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 12 | 
         
             
            ---
         
     | 
| 13 | 
         | 
| 14 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
             
            ---
         
     | 
| 2 | 
         
             
            title: BioMedNorm MCP Server
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 3 | 
         
             
            sdk: gradio
         
     | 
| 4 | 
         
            +
            sdk_version: 5.33.0
         
     | 
| 5 | 
         
             
            app_file: app.py
         
     | 
| 6 | 
         
            +
            pinned: true
         
     | 
| 7 | 
         
             
            license: apache-2.0
         
     | 
| 8 | 
         
            +
            python_version: 3.13.3
         
     | 
| 9 | 
         
            +
            tags:
         
     | 
| 10 | 
         
            +
            - mcp-server-track
         
     | 
| 11 | 
         
             
            ---
         
     | 
| 12 | 
         | 
| 13 | 
         
            +
            # BioMedNorm MCP Server
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            A MCP server for extracting and normalizing domain-specific entities from biomedical text. We leverage OpenAI LLMs to identify entities and match them to standardized terminology.
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            ## Installation
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            This project uses `uv` from Astral for dependency management. Follow these steps to set up the project:
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            ### Clone the repository
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
            ```bash
         
     | 
| 24 | 
         
            +
            git clone https://github.com/yourusername/entity-extraction-mcp
         
     | 
| 25 | 
         
            +
            cd entity-extraction-mcp
         
     | 
| 26 | 
         
            +
            ```
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
            ### Set up Python environment
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            The project includes a .python-version file that specifies the required Python version. Make sure you have uv installed:
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            ```bash
         
     | 
| 33 | 
         
            +
            # Install uv if you don't have it already
         
     | 
| 34 | 
         
            +
            curl -LsSf https://astral.sh/uv/install.sh | sh
         
     | 
| 35 | 
         
            +
            ```
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
            ### Install dependencies
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
            The project dependencies are defined in `pyproject.toml`. Install them with:
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
            ```bash
         
     | 
| 42 | 
         
            +
            uv pip install -e .
         
     | 
| 43 | 
         
            +
            ```
         
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
            ### Set up environment variables
         
     | 
| 46 | 
         
            +
             
     | 
| 47 | 
         
            +
            The project **requires** an OpenAI API key, which should be stored in a .env file.
         
     | 
| 48 | 
         
            +
             
     | 
| 49 | 
         
            +
            ## Running the application
         
     | 
| 50 | 
         
            +
             
     | 
| 51 | 
         
            +
            Run the application using `uv run`:
         
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
            ```bash
         
     | 
| 54 | 
         
            +
            uv run app.py
         
     | 
| 55 | 
         
            +
            ```
         
     | 
| 56 | 
         
            +
             
     | 
| 57 | 
         
            +
            This command ensures that:
         
     | 
| 58 | 
         
            +
             
     | 
| 59 | 
         
            +
            - All project dependencies are correctly installed
         
     | 
| 60 | 
         
            +
            - The environment variables from .env are loaded
         
     | 
| 61 | 
         
            +
            - The application runs in the proper environment
         
     | 
| 62 | 
         
            +
             
     | 
| 63 | 
         
            +
            After starting the server, you can access:
         
     | 
| 64 | 
         
            +
             
     | 
| 65 | 
         
            +
            - Web interface: `http://your-server:port`
         
     | 
| 66 | 
         
            +
            - MCP endpoint: `http://your-server:port/gradio_api/mcp/sse`
         
     | 
| 67 | 
         
            +
             
     | 
| 68 | 
         
            +
            ## Using the Web Interface
         
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
            - Enter text in the input area
         
     | 
| 71 | 
         
            +
            - Select the entity type (Disease, Tissue, or Cell Type)
         
     | 
| 72 | 
         
            +
            - Click "Normalize"
         
     | 
| 73 | 
         
            +
            - View the normalized entities in the results area
         
     | 
| 74 | 
         
            +
             
     | 
| 75 | 
         
            +
            ## Using as an MCP Tool
         
     | 
| 76 | 
         
            +
             
     | 
| 77 | 
         
            +
            The server exposes an MCP-compatible endpoint that can be used by AI agents. The tool accepts:
         
     | 
| 78 | 
         
            +
             
     | 
| 79 | 
         
            +
            - `paragraph`: Text to extract entities from
         
     | 
| 80 | 
         
            +
            - `target_entity`: Type of entity to extract ("Disease", "Tissue", or "Cell Type")
         
     | 
| 81 | 
         
            +
             
     | 
| 82 | 
         
            +
            and returns a list of normalized entities.
         
     | 
    	
        app.py
    ADDED
    
    | 
         @@ -0,0 +1,186 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            """
         
     | 
| 2 | 
         
            +
            BioMed text normalization MCP server.
         
     | 
| 3 | 
         
            +
            """
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            # import time
         
     | 
| 6 | 
         
            +
            import asyncio
         
     | 
| 7 | 
         
            +
            from typing import Optional
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            import gradio as gr
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            from oaklib_utils import get_candidates
         
     | 
| 12 | 
         
            +
            from openai_utils import ask_openai
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            NER_PROMPT = """
         
     | 
| 15 | 
         
            +
              You are an expert annotator of biomedical text.
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
              Annotate/Identify/Extract all {entity}s in this text: {text}
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
              Instructions:
         
     | 
| 20 | 
         
            +
              1. If no such entity or entities are found, then **return exactly**: Not Found
         
     | 
| 21 | 
         
            +
              2. Extract only the entity. If only an abbreviation is present, expand it based on the 
         
     | 
| 22 | 
         
            +
              biomedical context in the given paragraph. For e.g., BA12 full form is Brodmann (1909) area 12.
         
     | 
| 23 | 
         
            +
              3. Do not provide any additional information or formatting.
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
              Do not guess or hallucinate if you are uncertain. This has high-stakes, so it's better to be safe 
         
     | 
| 26 | 
         
            +
              than sorry. This is very important, so you'd better be sure of your answer, OK?
         
     | 
| 27 | 
         
            +
            """
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
            RAG_PROMPT = """
         
     | 
| 30 | 
         
            +
              You are an expert normalizer of biomedical entities.
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
              Given the following list of candidate standard terms: {top_k_preds},
         
     | 
| 33 | 
         
            +
              find the single closest matching term for this unnormalized entity: {entity}.
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
              Instructions:
         
     | 
| 36 | 
         
            +
              1. **IMPORTANT:** Do **NOT** guess or hallucinate. Do **NOT** provide any term that 
         
     | 
| 37 | 
         
            +
              is not explicitly present in the list of standardized terms.
         
     | 
| 38 | 
         
            +
              2. Do not overgeneralize unless no match is available.
         
     | 
| 39 | 
         
            +
              3. Do not provide any additional information or formatting.
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
              This has high-stakes, so it's better to be safe than sorry. This is very important, so you'd better 
         
     | 
| 42 | 
         
            +
              be sure of your answer, OK?
         
     | 
| 43 | 
         
            +
            """
         
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
             
     | 
| 46 | 
         
            +
            async def extract_entities(paragraph: str, target_entity: str) -> Optional[list[str]]:
         
     | 
| 47 | 
         
            +
              """
         
     | 
| 48 | 
         
            +
              Extract entities of a specific type from a given paragraph.
         
     | 
| 49 | 
         
            +
             
     | 
| 50 | 
         
            +
              Args:
         
     | 
| 51 | 
         
            +
                paragraph (str): The paragraph from which entities are to be extracted.
         
     | 
| 52 | 
         
            +
                target_entity (str): The type of entity to extract from the paragraph (e.g., 'disease', 'tissue').
         
     | 
| 53 | 
         
            +
             
     | 
| 54 | 
         
            +
              Returns:
         
     | 
| 55 | 
         
            +
                Optional[list[str]]: A list of extracted entities of the specified type, or
         
     | 
| 56 | 
         
            +
                None if the model did not return a valid response.
         
     | 
| 57 | 
         
            +
              """
         
     | 
| 58 | 
         
            +
              prompt = NER_PROMPT.format(entity=target_entity, text=paragraph)
         
     | 
| 59 | 
         
            +
              extracted_entities = await ask_openai(prompt, usage="ner")
         
     | 
| 60 | 
         
            +
             
     | 
| 61 | 
         
            +
              return extracted_entities
         
     | 
| 62 | 
         
            +
             
     | 
| 63 | 
         
            +
             
     | 
| 64 | 
         
            +
            async def normalize_entities(raw_terms: list[str]) -> list[Optional[str]]:
         
     | 
| 65 | 
         
            +
              """
         
     | 
| 66 | 
         
            +
              Normalize a list of raw terms to the most appropriate standard terms from a list
         
     | 
| 67 | 
         
            +
              of candidates.
         
     | 
| 68 | 
         
            +
             
     | 
| 69 | 
         
            +
              This function is designed to process the output from extract_entities().
         
     | 
| 70 | 
         
            +
             
     | 
| 71 | 
         
            +
              Args:
         
     | 
| 72 | 
         
            +
                raw_terms (list[str]): List of unnormalized terms, typically from extract_entities().
         
     | 
| 73 | 
         
            +
             
     | 
| 74 | 
         
            +
              Returns:
         
     | 
| 75 | 
         
            +
                list[Optional[str]]: List of best matching standard terms in the same order as the
         
     | 
| 76 | 
         
            +
                input terms. An entry may be None if normalization failed.
         
     | 
| 77 | 
         
            +
              """
         
     | 
| 78 | 
         
            +
             
     | 
| 79 | 
         
            +
              # Do normalization for each entity
         
     | 
| 80 | 
         
            +
              async def process_single_entity(raw_term: str) -> Optional[str]:
         
     | 
| 81 | 
         
            +
                # Generate candidates specifically for this entity
         
     | 
| 82 | 
         
            +
                # If the oaklib function is not async, wrap it with run_in_executor
         
     | 
| 83 | 
         
            +
                candidate_std_terms = await asyncio.to_thread(get_candidates, raw_term)
         
     | 
| 84 | 
         
            +
             
     | 
| 85 | 
         
            +
                # Now use these entity-specific candidates for the OpenAI call
         
     | 
| 86 | 
         
            +
                prompt = RAG_PROMPT.format(entity=raw_term, top_k_preds=candidate_std_terms)
         
     | 
| 87 | 
         
            +
                result = await ask_openai(prompt, usage="rag")
         
     | 
| 88 | 
         
            +
                return result
         
     | 
| 89 | 
         
            +
             
     | 
| 90 | 
         
            +
              # Process all entities in parallel
         
     | 
| 91 | 
         
            +
              tasks = [process_single_entity(entity) for entity in raw_terms]
         
     | 
| 92 | 
         
            +
              normalized_entities = await asyncio.gather(*tasks)
         
     | 
| 93 | 
         
            +
             
     | 
| 94 | 
         
            +
              return normalized_entities
         
     | 
| 95 | 
         
            +
             
     | 
| 96 | 
         
            +
             
     | 
| 97 | 
         
            +
            async def extract_and_normalize(
         
     | 
| 98 | 
         
            +
              paragraph: str, target_entity: str
         
     | 
| 99 | 
         
            +
            ) -> list[Optional[str]]:
         
     | 
| 100 | 
         
            +
              """
         
     | 
| 101 | 
         
            +
              Extract entities from a paragraph and normalize them in one operation.
         
     | 
| 102 | 
         
            +
             
     | 
| 103 | 
         
            +
              Args:
         
     | 
| 104 | 
         
            +
                paragraph: The paragraph from which to extract entities.
         
     | 
| 105 | 
         
            +
                target_entity: The type of entity to extract and normalize.
         
     | 
| 106 | 
         
            +
             
     | 
| 107 | 
         
            +
              Returns:
         
     | 
| 108 | 
         
            +
                list[Optional[str]]: List of best matching standard terms in the same order as the
         
     | 
| 109 | 
         
            +
                input terms. An entry may be None if normalization failed.
         
     | 
| 110 | 
         
            +
              """
         
     | 
| 111 | 
         
            +
              extracted_entities = await extract_entities(paragraph, target_entity)
         
     | 
| 112 | 
         
            +
              if not extracted_entities or len(extracted_entities) == 0:
         
     | 
| 113 | 
         
            +
                return []
         
     | 
| 114 | 
         
            +
             
     | 
| 115 | 
         
            +
              result = await normalize_entities(extracted_entities)
         
     | 
| 116 | 
         
            +
              return result
         
     | 
| 117 | 
         
            +
             
     | 
| 118 | 
         
            +
             
     | 
| 119 | 
         
            +
            # Create a visually appealing Gradio app
         
     | 
| 120 | 
         
            +
            with gr.Blocks(theme=gr.themes.Soft()) as demo:
         
     | 
| 121 | 
         
            +
              gr.Markdown("# Entity Extraction & Normalization")
         
     | 
| 122 | 
         
            +
              gr.Markdown(
         
     | 
| 123 | 
         
            +
                "Enter text and specify the entity type to extract and normalize entities."
         
     | 
| 124 | 
         
            +
              )
         
     | 
| 125 | 
         
            +
             
     | 
| 126 | 
         
            +
              with gr.Row():
         
     | 
| 127 | 
         
            +
                with gr.Column(scale=3):
         
     | 
| 128 | 
         
            +
                  paragraph = gr.Textbox(
         
     | 
| 129 | 
         
            +
                    label="Text Input",
         
     | 
| 130 | 
         
            +
                    placeholder="Enter paragraph here...",
         
     | 
| 131 | 
         
            +
                    lines=5,
         
     | 
| 132 | 
         
            +
                    info="Enter biomedical text input for entity extraction.",
         
     | 
| 133 | 
         
            +
                  )
         
     | 
| 134 | 
         
            +
                with gr.Column(scale=1):
         
     | 
| 135 | 
         
            +
                  target_entity = gr.Dropdown(
         
     | 
| 136 | 
         
            +
                    ["Disease", "Tissue", "Cell Type"],
         
     | 
| 137 | 
         
            +
                    label="Entity Type",
         
     | 
| 138 | 
         
            +
                    value="Disease",
         
     | 
| 139 | 
         
            +
                    info="Select the type of entity you want to extract and normalize from the text.",
         
     | 
| 140 | 
         
            +
                  )
         
     | 
| 141 | 
         
            +
             
     | 
| 142 | 
         
            +
              normalize_btn = gr.Button("Normalize", variant="primary")
         
     | 
| 143 | 
         
            +
             
     | 
| 144 | 
         
            +
              with gr.Row():
         
     | 
| 145 | 
         
            +
                with gr.Column():
         
     | 
| 146 | 
         
            +
                  output = gr.JSON(label="Normalized Entities")
         
     | 
| 147 | 
         
            +
             
     | 
| 148 | 
         
            +
              # Add a loading indicator
         
     | 
| 149 | 
         
            +
              with gr.Row():
         
     | 
| 150 | 
         
            +
                status = gr.Markdown("")
         
     | 
| 151 | 
         
            +
             
     | 
| 152 | 
         
            +
              with gr.Accordion("Example Inputs", open=False):
         
     | 
| 153 | 
         
            +
                gr.Examples(
         
     | 
| 154 | 
         
            +
                  examples=[
         
     | 
| 155 | 
         
            +
                    ["The patient was diagnosed with diabetes and hypertension.", "Disease"],
         
     | 
| 156 | 
         
            +
                    [
         
     | 
| 157 | 
         
            +
                      "Samples of BA12 tissue, weighing approximately 50-100 mg each, were homogenized in nuclei extraction buffer.",
         
     | 
| 158 | 
         
            +
                      "Tissue",
         
     | 
| 159 | 
         
            +
                    ],
         
     | 
| 160 | 
         
            +
                    [
         
     | 
| 161 | 
         
            +
                      "Coupling scTCR-seq with scRNA-seq can reveal the relationship between clonotype and phenotype in T or B cell populations.",
         
     | 
| 162 | 
         
            +
                      "Cell Type",
         
     | 
| 163 | 
         
            +
                    ],
         
     | 
| 164 | 
         
            +
                  ],
         
     | 
| 165 | 
         
            +
                  inputs=[paragraph, target_entity],
         
     | 
| 166 | 
         
            +
                )
         
     | 
| 167 | 
         
            +
             
     | 
| 168 | 
         
            +
              # Set up the button click event
         
     | 
| 169 | 
         
            +
              normalize_btn.click(
         
     | 
| 170 | 
         
            +
                lambda: "Processing...",  # Show loading immediately
         
     | 
| 171 | 
         
            +
                None,
         
     | 
| 172 | 
         
            +
                status,
         
     | 
| 173 | 
         
            +
                queue=False,
         
     | 
| 174 | 
         
            +
              ).then(
         
     | 
| 175 | 
         
            +
                extract_and_normalize,  # Async processing
         
     | 
| 176 | 
         
            +
                [paragraph, target_entity],
         
     | 
| 177 | 
         
            +
                output,
         
     | 
| 178 | 
         
            +
              ).then(
         
     | 
| 179 | 
         
            +
                lambda: "",  # Clear status
         
     | 
| 180 | 
         
            +
                None,
         
     | 
| 181 | 
         
            +
                status,
         
     | 
| 182 | 
         
            +
              )
         
     | 
| 183 | 
         
            +
             
     | 
| 184 | 
         
            +
             
     | 
| 185 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 186 | 
         
            +
              demo.launch(mcp_server=True)
         
     | 
    	
        oaklib_utils.py
    ADDED
    
    | 
         @@ -0,0 +1,55 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            """
         
     | 
| 2 | 
         
            +
            Retrieve top k candidate standard terms for normalization using oaklib.
         
     | 
| 3 | 
         
            +
            """
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            # import argparse
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            from oaklib import get_adapter
         
     | 
| 8 | 
         
            +
            from oaklib.datamodels.search import SearchConfiguration
         
     | 
| 9 | 
         
            +
             
     | 
| 10 | 
         
            +
            adapter = get_adapter("ols:")
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            def get_candidates(term: str, top_k: int = 10) -> list[str]:
         
     | 
| 14 | 
         
            +
              """
         
     | 
| 15 | 
         
            +
              Get top k candidates for RAG.
         
     | 
| 16 | 
         
            +
              """
         
     | 
| 17 | 
         
            +
              # Set config for search (limit # terms returned)
         
     | 
| 18 | 
         
            +
              cfg = SearchConfiguration(limit=top_k)
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
              results = adapter.basic_search(term, config=cfg)
         
     | 
| 21 | 
         
            +
              labels = list(adapter.labels(results))  # list of tuples of ids and labels
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
              # print(f"## Query: {term} -> {labels}")
         
     | 
| 24 | 
         
            +
              candidates = list(label for _, label in labels)
         
     | 
| 25 | 
         
            +
              return candidates
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
            # def main():
         
     | 
| 29 | 
         
            +
            #   parser = argparse.ArgumentParser(
         
     | 
| 30 | 
         
            +
            #     description="Fetch top-K candidate passages for a given term (RAG)"
         
     | 
| 31 | 
         
            +
            #   )
         
     | 
| 32 | 
         
            +
            #   parser.add_argument(
         
     | 
| 33 | 
         
            +
            #     "term", type=str, help="The query term or prompt for which to retrieve candidates"
         
     | 
| 34 | 
         
            +
            #   )
         
     | 
| 35 | 
         
            +
            #   parser.add_argument(
         
     | 
| 36 | 
         
            +
            #     "-k",
         
     | 
| 37 | 
         
            +
            #     "--top_k",
         
     | 
| 38 | 
         
            +
            #     type=int,
         
     | 
| 39 | 
         
            +
            #     default=10,
         
     | 
| 40 | 
         
            +
            #     help="Number of top candidates to return (default: 10)",
         
     | 
| 41 | 
         
            +
            #   )
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
            #   args = parser.parse_args()
         
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
            #   # Call your function
         
     | 
| 46 | 
         
            +
            #   candidates = get_candidates(args.term)
         
     | 
| 47 | 
         
            +
             
     | 
| 48 | 
         
            +
            #   print(f"\nTerm: {args.term!r}")
         
     | 
| 49 | 
         
            +
            #   print(f"Top {args.top_k} candidates:")
         
     | 
| 50 | 
         
            +
            #   for i, cand in enumerate(candidates, start=1):
         
     | 
| 51 | 
         
            +
            #     print(f"  {i:2d}. {cand}")
         
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
             
     | 
| 54 | 
         
            +
            # if __name__ == "__main__":
         
     | 
| 55 | 
         
            +
            #   main()
         
     | 
    	
        openai_utils.py
    ADDED
    
    | 
         @@ -0,0 +1,69 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            """
         
     | 
| 2 | 
         
            +
            Helper functions for structured OpenAI API calls using Pydantic models.
         
     | 
| 3 | 
         
            +
            Includes NER and RAG-specific prompting logic with retry and error handling.
         
     | 
| 4 | 
         
            +
            """
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            import os
         
     | 
| 7 | 
         
            +
            from typing import Literal, Optional, overload, Union
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            from dotenv import load_dotenv
         
     | 
| 10 | 
         
            +
            from openai import AsyncOpenAI
         
     | 
| 11 | 
         
            +
            from pydantic import BaseModel, Field
         
     | 
| 12 | 
         
            +
            from tenacity import retry, retry_if_result, stop_after_attempt, wait_random_exponential
         
     | 
| 13 | 
         
            +
            from tqdm.auto import tqdm
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            load_dotenv()  # take environment variables from .env
         
     | 
| 16 | 
         
            +
            api_key = os.getenv("OPENAI_API_KEY")
         
     | 
| 17 | 
         
            +
            if not api_key:
         
     | 
| 18 | 
         
            +
              raise EnvironmentError("Missing OPENAI_API_KEY in environment.")
         
     | 
| 19 | 
         
            +
            client = AsyncOpenAI(api_key=api_key, timeout=120.0)
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
            class NEROutput(BaseModel):
         
     | 
| 23 | 
         
            +
              answer: list[str] = Field(..., description="List of extracted entities")
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            class RAGOutput(BaseModel):
         
     | 
| 27 | 
         
            +
              answer: str = Field(..., description="Closest match to input term")
         
     | 
| 28 | 
         
            +
              reason: str = Field(..., description="Why you chose the answer match to input term")
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
            def is_invalid_result(result):
         
     | 
| 32 | 
         
            +
              return result is None
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
            @overload
         
     | 
| 35 | 
         
            +
            async def ask_openai(user_prompt: str, usage: Literal["ner"], model: str = ...) -> Optional[list[str]]: ...
         
     | 
| 36 | 
         
            +
            @overload
         
     | 
| 37 | 
         
            +
            async def ask_openai(user_prompt: str, usage: Literal["rag"], model: str = ...) -> Optional[str]: ...
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
            @retry(
         
     | 
| 40 | 
         
            +
              retry=retry_if_result(is_invalid_result),
         
     | 
| 41 | 
         
            +
              wait=wait_random_exponential(min=1, max=60),
         
     | 
| 42 | 
         
            +
              stop=stop_after_attempt(6),
         
     | 
| 43 | 
         
            +
            )
         
     | 
| 44 | 
         
            +
            async def ask_openai(
         
     | 
| 45 | 
         
            +
              user_prompt: str,
         
     | 
| 46 | 
         
            +
              usage: Literal['ner', 'rag'], 
         
     | 
| 47 | 
         
            +
              model: str = "o4-mini-2025-04-16",
         
     | 
| 48 | 
         
            +
            ) -> Optional[Union[list[str], str]]:
         
     | 
| 49 | 
         
            +
              """
         
     | 
| 50 | 
         
            +
              Function to interact with the OpenAI API.
         
     | 
| 51 | 
         
            +
              """
         
     | 
| 52 | 
         
            +
              if model in ["chatgpt-4o-latest", "o1-mini"]:
         
     | 
| 53 | 
         
            +
                raise ValueError(f"Model {model} does not support structured outputs.")
         
     | 
| 54 | 
         
            +
             
     | 
| 55 | 
         
            +
              response_format = NEROutput if usage == 'ner' else RAGOutput
         
     | 
| 56 | 
         
            +
             
     | 
| 57 | 
         
            +
              try:
         
     | 
| 58 | 
         
            +
                response = await client.responses.parse(
         
     | 
| 59 | 
         
            +
                  model=model,
         
     | 
| 60 | 
         
            +
                  input=[{"role": "user", "content": user_prompt}],
         
     | 
| 61 | 
         
            +
                  text_format=response_format,
         
     | 
| 62 | 
         
            +
                  # temperature=0.05,
         
     | 
| 63 | 
         
            +
                )
         
     | 
| 64 | 
         
            +
                response_obj = response.output_parsed
         
     | 
| 65 | 
         
            +
                return response_obj.answer if response_obj else None
         
     | 
| 66 | 
         
            +
             
     | 
| 67 | 
         
            +
              except Exception as e:
         
     | 
| 68 | 
         
            +
                tqdm.write(f"❌ Unexpected error. Error: {e}")
         
     | 
| 69 | 
         
            +
                raise
         
     | 
    	
        pyproject.toml
    ADDED
    
    | 
         @@ -0,0 +1,11 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            [project]
         
     | 
| 2 | 
         
            +
            name = "biomednorm-mcp-server"
         
     | 
| 3 | 
         
            +
            version = "0.1.0"
         
     | 
| 4 | 
         
            +
            description = "Add your description here"
         
     | 
| 5 | 
         
            +
            readme = "README.md"
         
     | 
| 6 | 
         
            +
            requires-python = ">=3.13"
         
     | 
| 7 | 
         
            +
            dependencies = [
         
     | 
| 8 | 
         
            +
                "oaklib>=0.6.23",
         
     | 
| 9 | 
         
            +
                "openai>=1.84.0",
         
     | 
| 10 | 
         
            +
                "python-dotenv>=1.1.0",
         
     | 
| 11 | 
         
            +
            ]
         
     | 
    	
        ruff.toml
    ADDED
    
    | 
         @@ -0,0 +1,2 @@ 
     | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            indent-width = 2
         
     | 
| 2 | 
         
            +
            target-version = "py312"
         
     | 
    	
        uv.lock
    ADDED
    
    | 
         The diff for this file is too large to render. 
		See raw diff 
     | 
| 
         |