Skip to content

Commit

Permalink
Merge pull request #118 from TranslatorSRI/add-autocomplete-flag
Browse files Browse the repository at this point in the history
NameRes' /lookup endpoint was previously designed for the autocomplete use-case, where we expect to give good results even when the search term is incomplete (e.g. `diab` instead of `diabetes`). We handled this by expanding the search to `({query}) OR ({query}*)`. This PR adds an `autocomplete` flag that allows this expansion to be turned on and off -- this should help improve our accuracy when NameRes is used for named entity linking only.

Closes #108.
  • Loading branch information
gaurav authored Dec 2, 2023
2 parents 54e563d + 3992dab commit e3e2962
Showing 1 changed file with 20 additions and 5 deletions.
25 changes: 20 additions & 5 deletions api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ async def lookup_curies_get(
string: Annotated[str, Query(
description="The string to search for."
)],
autocomplete: Annotated[bool, Query(
description="Is the input string incomplete (autocomplete=true) or a complete phrase (autocomplete=false)?"
)] = True,
offset: Annotated[int, Query(
description="The number of results to skip. Can be used to page through the results of a query.",
# Offset should be greater than or equal to zero.
Expand Down Expand Up @@ -153,7 +156,7 @@ async def lookup_curies_get(
"""
Returns cliques with a name or synonym that contains a specified string.
"""
return await lookup(string, offset, limit, biolink_type, only_prefixes, exclude_prefixes)
return await lookup(string, autocomplete, offset, limit, biolink_type, only_prefixes, exclude_prefixes)


@app.post("/lookup",
Expand All @@ -166,6 +169,9 @@ async def lookup_curies_post(
string: Annotated[str, Query(
description="The string to search for."
)],
autocomplete: Annotated[bool, Query(
description="Is the input string incomplete (autocomplete=true) or a complete phrase (autocomplete=false)?"
)] = True,
offset: Annotated[int, Query(
description="The number of results to skip. Can be used to page through the results of a query.",
# Offset should be greater than or equal to zero.
Expand Down Expand Up @@ -196,17 +202,23 @@ async def lookup_curies_post(
"""
Returns cliques with a name or synonym that contains a specified string.
"""
return await lookup(string, offset, limit, biolink_type, only_prefixes, exclude_prefixes)
return await lookup(string, autocomplete, offset, limit, biolink_type, only_prefixes, exclude_prefixes)


async def lookup(string: str,
autocomplete: bool = False,
offset: int = 0,
limit: conint(le=1000) = 10,
biolink_type: str = None,
only_prefixes: str = "",
exclude_prefixes: str = ""
) -> List[LookupResult]:
"""Returns cliques with a name or synonym that contains a specified string."""
"""
Returns cliques with a name or synonym that contains a specified string.
:param autocomplete: Should we do the lookup in autocomplete mode (in which we expect the final word to be
incomplete) or not (in which the entire phrase is expected to be complete, i.e. as an entity linker)?
"""
#This original code tokenizes on spaces, and then removes all other punctuation.
# so x-linked becomes xlinked and beta-secretasse becomes betasecretase.
# This turns out to be rarely what is wanted, especially because the tokenizer
Expand All @@ -230,8 +242,11 @@ async def lookup(string: str,
# We need to escape '&&' and '||' specially, since they are double-character sequences.
string_lc_escaped = string_lc_escaped.replace('&&', '\\&\\&').replace('||', '\\|\\|')

# Then we combine it into a query that allows for incomplete words.
query = f"({string_lc_escaped}) OR ({string_lc_escaped}*)"
# If in autocomplete mode, we combine it into a query that allows for incomplete words.
if autocomplete:
query = f"({string_lc_escaped}) OR ({string_lc_escaped}*)"
else:
query = f"({string_lc_escaped})"

# Apply filters as needed.
# Biolink type filter
Expand Down

0 comments on commit e3e2962

Please sign in to comment.