Skip to content

Commit

Permalink
Merge pull request #165 from TranslatorSRI/add-bulk-endpoints
Browse files Browse the repository at this point in the history
This PR adds a POST bulk query endpoint, which can be used to submit multiple strings to NameRes to look up at the same time. Closes #70.
  • Loading branch information
gaurav authored Nov 8, 2024
2 parents d184b71 + 64ee037 commit 58c67d3
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 3 deletions.
85 changes: 83 additions & 2 deletions api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
import logging, warnings
import os
import re
from typing import Dict, List, Union, Annotated
from typing import Dict, List, Union, Annotated, Optional

from fastapi import Body, FastAPI, Query
from fastapi.responses import RedirectResponse
import httpx
from pydantic import BaseModel, conint
from pydantic import BaseModel, conint, Field
from starlette.middleware.cors import CORSMiddleware

from .apidocs import get_app_info, construct_open_api_schema
Expand Down Expand Up @@ -498,6 +498,87 @@ async def lookup(string: str,

return outputs

## BULK ENDPOINT

class NameResQuery(BaseModel):
"""
A request for name resolution.
"""
strings: List[str] = Field(
..., # Ellipsis means field is required
description="The strings to search for. The returned results will be in a dictionary with these values as keys."
)
autocomplete: Optional[bool] = Field(
False,
description="Is the input string incomplete (autocomplete=true) or a complete phrase (autocomplete=false)?"
)
highlighting: Optional[bool] = Field(
False,
description="Return information on which labels and synonyms matched the search query?"
)
offset: Optional[int] = Field(
0,
description="The number of results to skip. Can be used to page through the results of a query.",
# Offset should be greater than or equal to zero.
ge=0
)
limit: Optional[int] = Field(
10,
description="The number of results to skip. Can be used to page through the results of a query.",
# Limit should be greater than or equal to zero and less than or equal to 1000.
ge=0,
le=1000
)
biolink_types: Optional[List[str]] = Field(
[],
description="The Biolink types to filter to (with or without the `biolink:` prefix), "
"e.g. `biolink:Disease` or `Disease`. Multiple types will be combined with OR, i.e. filtering "
"for PhenotypicFeature and Disease will return concepts that are either PhenotypicFeatures OR "
"Disease, not concepts that are both PhenotypicFeature AND Disease.",
)
only_prefixes: Optional[str] = Field(
"",
description="Pipe-separated, case-sensitive list of prefixes to filter to, e.g. `MONDO|EFO`.",
# We can't use `example` here because otherwise it gets filled in when filling this in.
# example="MONDO|EFO"
)
exclude_prefixes: Optional[str] = Field(
"",
description="Pipe-separated, case-sensitive list of prefixes to exclude, e.g. `UMLS|EFO`.",
# We can't use `example` here because otherwise it gets filled in when filling this in.
# example="UMLS|EFO"
)
only_taxa: Optional[str] = Query(
"",
description="Pipe-separated, case-sensitive list of taxa to filter, "
"e.g. `NCBITaxon:9606|NCBITaxon:10090|NCBITaxon:10116|NCBITaxon:7955`.",
# We can't use `example` here because otherwise it gets filled in when filling this in.
# example="NCBITaxon:9606|NCBITaxon:10090|NCBITaxon:10116|NCBITaxon:7955"
)


@app.post("/bulk-lookup",
summary="Look up cliques for a fragment of multiple names or synonyms.",
description="Returns cliques for each query.",
response_model=Dict[str, List[LookupResult]],
tags=["lookup"]
)
async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
result = {}
for string in query.strings:
result[string] = await lookup(
string,
query.autocomplete,
query.highlighting,
query.offset,
query.limit,
query.biolink_types,
query.only_prefixes,
query.exclude_prefixes,
query.only_taxa)
return result


# Override open api schema with custom schema
app.openapi_schema = construct_open_api_schema(app)

Expand Down
39 changes: 38 additions & 1 deletion tests/test_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,44 @@ def test_autocomplete():
assert syns[1]["types"][0] == "biolink:Disease"


def test_bulk_lookup():
client = TestClient(app)
params = {
'strings': ['beta-secretase', 'Parkinson'],
'limit': 100,
}
response = client.post("/bulk-lookup", json=params)
results = response.json()
assert len(results) == 2
assert len(results['beta-secretase']) == 2
assert results['beta-secretase'][0]['curie'] == 'CHEBI:74925'
assert results['beta-secretase'][0]['label'] == 'BACE1 inhibitor'
assert len(results['Parkinson']) == 34

# TODO: this should be MONDO:0005180 "Parkinson disease", but
# instead we get MONDO:0000828 "juvenile-onset Parkinson's disease"
# as the top result.
assert results['Parkinson'][0]['curie'] == 'MONDO:0000828'
assert results['Parkinson'][0]['label'] == "juvenile-onset Parkinson disease"

# Try it again with the biolink_types set.
params['biolink_types'] = ['biolink:Disease']
response = client.post("/bulk-lookup", json=params)
results = response.json()
assert len(results) == 2
assert len(results['beta-secretase']) == 1
# We match MONDO:0011561 "Alzheimer disease 6" because it contains the word "beta".
assert results['beta-secretase'][0]['curie'] == 'MONDO:0011561'
assert results['beta-secretase'][0]['label'] == 'Alzheimer disease 6'

assert len(results['Parkinson']) == 33
# TODO: this should be MONDO:0005180 "Parkinson disease", but
# instead we get MONDO:0000828 "juvenile-onset Parkinson's disease"
# as the top result.
assert results['Parkinson'][0]['curie'] == 'MONDO:0000828'
assert results['Parkinson'][0]['label'] == "juvenile-onset Parkinson disease"


def test_synonyms():
"""
Test the /synonyms endpoints -- these are used to look up all the information we know about a preferred CURIE.
Expand Down Expand Up @@ -161,4 +199,3 @@ def test_synonyms():
mondo_0000828_results = results['MONDO:0000828']
assert mondo_0000828_results['curie'] == 'MONDO:0000828'
assert mondo_0000828_results['preferred_name'] == 'juvenile-onset Parkinson disease'

0 comments on commit 58c67d3

Please sign in to comment.