Skip to content

Commit

Permalink
Merge pull request #173 from TranslatorSRI/strip-query-before-on-input
Browse files Browse the repository at this point in the history
Bugfix: queries ending with whitespace with autocomplete=true searched for everything. This is because if the query was e.g. `"abc "`, we would expand it to `"abc *"`. This PR updates the code to strip the query string before querying it, which fixes this issue.
  • Loading branch information
gaurav authored Dec 14, 2024
2 parents 9380971 + 05202a2 commit be63e79
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
8 changes: 4 additions & 4 deletions api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,13 +345,13 @@ async def lookup(string: str,
will be returned, rather than filtering to concepts that are both PhenotypicFeature and Disease.
"""

# First, we strip and lowercase the query since all our indexes are case-insensitive.
string_lc = string.strip().lower()

# Do we have a search string at all?
if string.strip() == "":
if string_lc == "":
return []

# First, we lowercase the query since all our indexes are case-insensitive.
string_lc = string.lower()

# For reasons I don't understand, we need to use backslash to escape characters (e.g. "\(") to remove the special
# significance of characters inside round brackets, but not inside double-quotes. So we escape them separately:
# - For a full exact search, we only remove double-quotes and slashes, leaving other special characters as-is.
Expand Down
18 changes: 18 additions & 0 deletions tests/test_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,24 @@ def test_autocomplete():
assert syns[1]["label"] == 'Alzheimer disease 6'
assert syns[1]["types"][0] == "biolink:Disease"

# Previously, searching for an autocomplete query ending in whitespace
# would trigger a blank search (e.g. `abc ` would be expanded into `abc *`).
params = {'string': 'beta-secretase ', 'autocomplete': 'true'}
response = client.post("/lookup", params=params)
syns = response.json()

# When this bug was around, it would result in the following:
# assert len(syns) == 10
# assert syns[0]['curie'] == 'CHEBI:48407'
# assert syns[0]["label"] == 'antiparkinson agent'
# assert syns[0]["types"] == ["biolink:NamedThing"]

# But now we only get beta-secretase.
assert len(syns) == 1
assert syns[0]['curie'] == 'CHEBI:74925'
assert syns[0]["label"] == 'BACE1 inhibitor'
assert syns[0]["types"] == ["biolink:NamedThing"]


def test_bulk_lookup():
client = TestClient(app)
Expand Down

0 comments on commit be63e79

Please sign in to comment.