Merge pull request #165 from TranslatorSRI/add-bulk-endpoints

This PR adds a POST bulk query endpoint, which can be used to submit multiple strings to NameRes to look up at the same time. Closes #70.
TranslatorSRI · Nov 8, 2024 · 58c67d3 · 58c67d3
2 parents d184b71 + 64ee037
commit 58c67d3
Show file tree

Hide file tree

Showing 2 changed files with 121 additions and 3 deletions.
diff --git a/api/server.py b/api/server.py
@@ -11,12 +11,12 @@
 import logging, warnings
 import os
 import re
-from typing import Dict, List, Union, Annotated
+from typing import Dict, List, Union, Annotated, Optional
 
 from fastapi import Body, FastAPI, Query
 from fastapi.responses import RedirectResponse
 import httpx
-from pydantic import BaseModel, conint
+from pydantic import BaseModel, conint, Field
 from starlette.middleware.cors import CORSMiddleware
 
 from .apidocs import get_app_info, construct_open_api_schema
@@ -498,6 +498,87 @@ async def lookup(string: str,
 
     return outputs
 
+## BULK ENDPOINT
+
+class NameResQuery(BaseModel):
+    """
+    A request for name resolution.
+    """
+    strings: List[str] = Field(
+        ..., # Ellipsis means field is required
+        description="The strings to search for. The returned results will be in a dictionary with these values as keys."
+    )
+    autocomplete: Optional[bool] = Field(
+        False,
+        description="Is the input string incomplete (autocomplete=true) or a complete phrase (autocomplete=false)?"
+    )
+    highlighting: Optional[bool] = Field(
+        False,
+        description="Return information on which labels and synonyms matched the search query?"
+    )
+    offset: Optional[int] = Field(
+        0,
+        description="The number of results to skip. Can be used to page through the results of a query.",
+        # Offset should be greater than or equal to zero.
+        ge=0
+    )
+    limit: Optional[int] = Field(
+        10,
+        description="The number of results to skip. Can be used to page through the results of a query.",
+        # Limit should be greater than or equal to zero and less than or equal to 1000.
+        ge=0,
+        le=1000
+    )
+    biolink_types: Optional[List[str]] = Field(
+        [],
+        description="The Biolink types to filter to (with or without the `biolink:` prefix), "
+                    "e.g. `biolink:Disease` or `Disease`. Multiple types will be combined with OR, i.e. filtering "
+                    "for PhenotypicFeature and Disease will return concepts that are either PhenotypicFeatures OR "
+                    "Disease, not concepts that are both PhenotypicFeature AND Disease.",
+    )
+    only_prefixes: Optional[str] = Field(
+        "",
+        description="Pipe-separated, case-sensitive list of prefixes to filter to, e.g. `MONDO|EFO`.",
+        # We can't use `example` here because otherwise it gets filled in when filling this in.
+        # example="MONDO|EFO"
+    )
+    exclude_prefixes: Optional[str] = Field(
+        "",
+        description="Pipe-separated, case-sensitive list of prefixes to exclude, e.g. `UMLS|EFO`.",
+        # We can't use `example` here because otherwise it gets filled in when filling this in.
+        # example="UMLS|EFO"
+    )
+    only_taxa: Optional[str] = Query(
+        "",
+        description="Pipe-separated, case-sensitive list of taxa to filter, "
+                    "e.g. `NCBITaxon:9606|NCBITaxon:10090|NCBITaxon:10116|NCBITaxon:7955`.",
+        # We can't use `example` here because otherwise it gets filled in when filling this in.
+        # example="NCBITaxon:9606|NCBITaxon:10090|NCBITaxon:10116|NCBITaxon:7955"
+    )
+
+
+@app.post("/bulk-lookup",
+          summary="Look up cliques for a fragment of multiple names or synonyms.",
+          description="Returns cliques for each query.",
+          response_model=Dict[str, List[LookupResult]],
+          tags=["lookup"]
+)
+async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
+    result = {}
+    for string in query.strings:
+        result[string] = await lookup(
+            string,
+            query.autocomplete,
+            query.highlighting,
+            query.offset,
+            query.limit,
+            query.biolink_types,
+            query.only_prefixes,
+            query.exclude_prefixes,
+            query.only_taxa)
+    return result
+
+
 # Override open api schema with custom schema
 app.openapi_schema = construct_open_api_schema(app)
 

diff --git a/tests/test_service.py b/tests/test_service.py
@@ -129,6 +129,44 @@ def test_autocomplete():
     assert syns[1]["types"][0] == "biolink:Disease"
 
 
+def test_bulk_lookup():
+    client = TestClient(app)
+    params = {
+        'strings': ['beta-secretase', 'Parkinson'],
+        'limit': 100,
+    }
+    response = client.post("/bulk-lookup", json=params)
+    results = response.json()
+    assert len(results) == 2
+    assert len(results['beta-secretase']) == 2
+    assert results['beta-secretase'][0]['curie'] == 'CHEBI:74925'
+    assert results['beta-secretase'][0]['label'] == 'BACE1 inhibitor'
+    assert len(results['Parkinson']) == 34
+
+    # TODO: this should be MONDO:0005180 "Parkinson disease", but
+    # instead we get MONDO:0000828 "juvenile-onset Parkinson's disease"
+    # as the top result.
+    assert results['Parkinson'][0]['curie'] == 'MONDO:0000828'
+    assert results['Parkinson'][0]['label'] == "juvenile-onset Parkinson disease"
+
+    # Try it again with the biolink_types set.
+    params['biolink_types'] = ['biolink:Disease']
+    response = client.post("/bulk-lookup", json=params)
+    results = response.json()
+    assert len(results) == 2
+    assert len(results['beta-secretase']) == 1
+    # We match MONDO:0011561 "Alzheimer disease 6" because it contains the word "beta".
+    assert results['beta-secretase'][0]['curie'] == 'MONDO:0011561'
+    assert results['beta-secretase'][0]['label'] == 'Alzheimer disease 6'
+
+    assert len(results['Parkinson']) == 33
+    # TODO: this should be MONDO:0005180 "Parkinson disease", but
+    # instead we get MONDO:0000828 "juvenile-onset Parkinson's disease"
+    # as the top result.
+    assert results['Parkinson'][0]['curie'] == 'MONDO:0000828'
+    assert results['Parkinson'][0]['label'] == "juvenile-onset Parkinson disease"
+
+
 def test_synonyms():
     """
     Test the /synonyms endpoints -- these are used to look up all the information we know about a preferred CURIE.
@@ -161,4 +199,3 @@ def test_synonyms():
     mondo_0000828_results = results['MONDO:0000828']
     assert mondo_0000828_results['curie'] == 'MONDO:0000828'
     assert mondo_0000828_results['preferred_name'] == 'juvenile-onset Parkinson disease'
-