| """ | |
| Retrieve top k candidate standard terms for normalization using oaklib. | |
| """ | |
| from oaklib import get_adapter | |
| from oaklib.datamodels.search import SearchConfiguration | |
| adapter = get_adapter("ols:") | |
| def get_candidates(term: str, top_k: int = 10) -> list[tuple[str, str]]: | |
| """ | |
| Get top k candidates for RAG. | |
| """ | |
| # Set config for search (limit # terms returned) | |
| cfg = SearchConfiguration(limit=top_k) | |
| results = adapter.basic_search(term, config=cfg) | |
| labels = list(adapter.labels(results)) # list of tuples of CURIE ids and labels | |
| # Keep both URI and standard terms for "explainable" output | |
| # But first convert CURIE IDs to URIs | |
| # Add explicit if clause because I saw sometimes the curies were None | |
| candidates = list((adapter.curie_to_uri(curie), term) for (curie, term) in labels if curie is not None) | |
| return candidates | |