Skip to content

Commit

Permalink
fix: improve autocomplete
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Nov 9, 2023
1 parent c0183e9 commit eca6445
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 118 deletions.
24 changes: 13 additions & 11 deletions app/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
from app import config
from app._types import SearchResponse
from app.config import check_config_is_defined, settings
from app.postprocessing import load_result_processor
from app.postprocessing import (
load_result_processor,
process_taxonomy_completion_response,
)
from app.query import (
build_completion_query,
build_elasticsearch_query_builder,
Expand All @@ -29,14 +32,10 @@
logger.warning("Main configuration is not set, use CONFIG_PATH envvar")
FILTER_QUERY_BUILDER = None
RESULT_PROCESSOR = None
TAXONOMY_RESULT_PROCESSOR = None
else:
# we cache query builder and result processor here for faster processing
FILTER_QUERY_BUILDER = build_elasticsearch_query_builder(config.CONFIG)
RESULT_PROCESSOR = load_result_processor(config.CONFIG.result_processor)
TAXONOMY_RESULT_PROCESSOR = load_result_processor(
config.CONFIG.taxonomy.autocomplete.result_processor
)


app = FastAPI(
Expand Down Expand Up @@ -168,23 +167,26 @@ def search(
)


@app.get("/taxonomy")
@app.get("/autocomplete")
def taxonomy_autocomplete(
q: Annotated[str, Query(description="User autocomplete query.")],
taxonomy_name: Annotated[
str, Query(description="Name of the taxonomy to search in.")
list[str],
Query(
description="Name(s) of the taxonomy to search in, pass "
"several time the parameter to search in several taxonomies."
),
],
lang: Annotated[
str, Query(description="Language to search in, defaults to 'en'.")
] = "en",
size: Annotated[int, Query(description="Number of results to return.")] = 10,
):
query = build_completion_query(
q=q, taxonomy_name=taxonomy_name, lang=lang, size=size, config=config.CONFIG
q=q, taxonomy_names=taxonomy_name, lang=lang, size=size, config=config.CONFIG
)
results = query.execute()

response = TAXONOMY_RESULT_PROCESSOR.process(results)
es_response = query.execute()
response = process_taxonomy_completion_response(es_response)

return {
**response,
Expand Down
4 changes: 2 additions & 2 deletions app/cli/perform_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def perform_taxonomy_import(config: Config):
# we create a temporary index to import to
# at the end we will change alias to point to it
index_date = datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")
next_index = f"{config.taxonomy.autocomplete.index.name}-{index_date}"
next_index = f"{config.taxonomy.index.name}-{index_date}"

index = generate_taxonomy_index_object(next_index, config)
# create the index
Expand All @@ -281,4 +281,4 @@ def perform_taxonomy_import(config: Config):
import_taxonomies(config, next_index)

# make alias point to new index
update_alias(es, next_index, config.taxonomy.autocomplete.index.name)
update_alias(es, next_index, config.taxonomy.index.name)
28 changes: 5 additions & 23 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,26 +241,6 @@ class TaxonomyIndexConfig(BaseModel):
] = 1


class TaxonomyAutocompleteConfig(BaseModel):
index: Annotated[
TaxonomyIndexConfig,
Field(
description="configuration of the taxonomy index. There is a single index for all taxonomies."
),
]
result_processor: Annotated[
str,
Field(
description="The full qualified reference to the Elasticsearch result processor "
"to use after search query to Elasticsearch."
),
] | None = None
sources: Annotated[
list[TaxonomySourceConfig],
Field(description="configurations of the taxonomy sources (taxonomy URLs)"),
]


class TaxonomyConfig(BaseModel):
sources: Annotated[
list[TaxonomySourceConfig],
Expand All @@ -276,9 +256,11 @@ class TaxonomyConfig(BaseModel):
"`taxonomy_langs` field that can be defined in each document."
),
]
autocomplete: Annotated[
TaxonomyAutocompleteConfig,
Field(description="configuration of taxonomy autocomplete"),
index: Annotated[
TaxonomyIndexConfig,
Field(
description="configuration of the taxonomy index. There is a single index for all taxonomies."
),
]


Expand Down
15 changes: 13 additions & 2 deletions app/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,15 +328,26 @@ def generate_taxonomy_mapping_object(config: Config) -> Mapping:
Object(
required=True,
dynamic=False,
properties={lang: Completion() for lang in supported_langs},
properties={
lang: Completion(
contexts=[
{
"name": "taxonomy_name",
"path": "taxonomy_name",
"type": "category",
}
],
)
for lang in supported_langs
},
),
)
return mapping


def generate_taxonomy_index_object(index_name: str, config: Config) -> Index:
index = Index(index_name)
taxonomy_index_config = config.taxonomy.autocomplete.index
taxonomy_index_config = config.taxonomy.index
index.settings(
number_of_shards=taxonomy_index_config.number_of_shards,
number_of_replicas=taxonomy_index_config.number_of_replicas,
Expand Down
26 changes: 13 additions & 13 deletions app/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,16 @@ def load_result_processor(result_processor: str | None) -> BaseResultProcessor |
return result_processor_cls(result_processor)


class CompletionProcessor(BaseResultProcessor):
def process(self, response: Response) -> JSONType:
output = {"took": response.took, "timed_out": response.timed_out}
options = []
suggestion = response.suggest["taxonomy_suggest"][0]
for option in suggestion.options:
result = {
"id": option._source["id"],
"text": option.text,
}
options.append(result)
output["options"] = options
return output
def process_taxonomy_completion_response(response: Response) -> JSONType:
output = {"took": response.took, "timed_out": response.timed_out}
options = []
suggestion = response.suggest["taxonomy_suggest"][0]
for option in suggestion.options:
result = {
"id": option._source["id"],
"text": option.text,
"taxonomy_name": option._source["taxonomy_name"],
}
options.append(result)
output["options"] = options
return output
30 changes: 23 additions & 7 deletions app/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,23 +253,39 @@ def build_search_query(


def build_completion_query(
q: str, taxonomy_name: str, lang: str, size: int, config: Config
q: str,
taxonomy_names: list[str],
lang: str,
size: int,
config: Config,
fuzziness: int | None = 2,
):
"""Build an elasticsearch_dsl Query.
"""Build an elasticsearch_dsl completion Query.
:param q: the user raw query
:param taxonomy_name: the taxonomy we want to search in
:param q: the user autocomplete query
:param taxonomy_names: a list of taxonomies we want to search in
:param lang: the language we want search in
:param size: number of results to return
:param config: configuration to use
:param fuzziness: fuzziness parameter for completion query
:return: the built Query
"""

query = Search(index=config.taxonomy.autocomplete.index.name)
completion_clause = {
"field": f"names.{lang}",
"size": size,
"contexts": {"taxonomy_name": taxonomy_names},
}

if fuzziness is not None:
completion_clause["fuzzy"] = {"fuzziness": fuzziness}

query = Search(index=config.taxonomy.index.name)
query = query.suggest(
"taxonomy_suggest", q, completion={"field": f"names.{lang}", "size": size}
"taxonomy_suggest",
q,
completion=completion_clause,
)
query = query.query("bool", filter=[Q("term", taxonomy_name=taxonomy_name)])
return query


Expand Down
117 changes: 57 additions & 60 deletions data/config/openfoodfacts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,73 +134,70 @@ taxonomy:
url: https://static.openfoodfacts.org/data/taxonomies/categories.full.json
- name: label
url: https://static.openfoodfacts.org/data/taxonomies/labels.full.json
- name: additive
url: https://static.openfoodfacts.org/data/taxonomies/additives.full.json
- name: allergen
url: https://static.openfoodfacts.org/data/taxonomies/allergens.full.json
- name: amino_acid
url: https://static.openfoodfacts.org/data/taxonomies/amino_acids.full.json
- name: country
url: https://static.openfoodfacts.org/data/taxonomies/countries.full.json
- name: data_quality
url: https://static.openfoodfacts.org/data/taxonomies/data_quality.full.json
- name: food_group
url: https://static.openfoodfacts.org/data/taxonomies/food_groups.full.json
- name: improvement
url: https://static.openfoodfacts.org/data/taxonomies/improvements.full.json
- name: ingredient
url: https://static.openfoodfacts.org/data/taxonomies/ingredients.full.json
- name: ingredients_analysis
url: https://static.openfoodfacts.org/data/taxonomies/ingredients_analysis.full.json
- name: ingredients_processing
url: https://static.openfoodfacts.org/data/taxonomies/ingredients_processing.full.json
- name: label
url: https://static.openfoodfacts.org/data/taxonomies/labels.full.json
- name: language
url: https://static.openfoodfacts.org/data/taxonomies/languages.full.json
- name: mineral
url: https://static.openfoodfacts.org/data/taxonomies/minerals.full.json
- name: misc
url: https://static.openfoodfacts.org/data/taxonomies/misc.full.json
- name: nova_group
url: https://static.openfoodfacts.org/data/taxonomies/nova_groups.full.json
- name: nucleotide
url: https://static.openfoodfacts.org/data/taxonomies/nucleotides.full.json
- name: nutrient
url: https://static.openfoodfacts.org/data/taxonomies/nutrients.full.json
- name: origin
url: https://static.openfoodfacts.org/data/taxonomies/origins.full.json
- name: other_nutritional_substance
url: https://static.openfoodfacts.org/data/taxonomies/other_nutritional_substances.full.json
- name: packaging_material
url: https://static.openfoodfacts.org/data/taxonomies/packaging_materials.full.json
- name: packaging_recycling
url: https://static.openfoodfacts.org/data/taxonomies/packaging_recycling.full.json
- name: packaging_shape
url: https://static.openfoodfacts.org/data/taxonomies/packaging_shapes.full.json
- name: periods_after_opening
url: https://static.openfoodfacts.org/data/taxonomies/periods_after_opening.full.json
- name: preservation
url: https://static.openfoodfacts.org/data/taxonomies/preservation.full.json
- name: state
url: https://static.openfoodfacts.org/data/taxonomies/states.full.json
- name: vitamin
url: https://static.openfoodfacts.org/data/taxonomies/vitamins.full.json
- name: brand
url: https://static.openfoodfacts.org/data/taxonomies/brands.full.json
exported_langs:
- en
- fr
- es
- de
- it
- nl
autocomplete:
index:
number_of_replicas: 1
number_of_shards: 4
result_processor: app.postprocessing.CompletionProcessor
sources:
- name: additives
url: https://static.openfoodfacts.org/data/taxonomies/additives.full.json
- name: allergens
url: https://static.openfoodfacts.org/data/taxonomies/allergens.full.json
- name: amino_acids
url: https://static.openfoodfacts.org/data/taxonomies/amino_acids.full.json
- name: categories
url: https://static.openfoodfacts.org/data/taxonomies/categories.full.json
- name: countries
url: https://static.openfoodfacts.org/data/taxonomies/countries.full.json
- name: data_quality
url: https://static.openfoodfacts.org/data/taxonomies/data_quality.full.json
- name: food_groups
url: https://static.openfoodfacts.org/data/taxonomies/food_groups.full.json
- name: improvements
url: https://static.openfoodfacts.org/data/taxonomies/improvements.full.json
- name: ingredients
url: https://static.openfoodfacts.org/data/taxonomies/ingredients.full.json
- name: ingredients_analysis
url: https://static.openfoodfacts.org/data/taxonomies/ingredients_analysis.full.json
- name: ingredients_processing
url: https://static.openfoodfacts.org/data/taxonomies/ingredients_processing.full.json
- name: labels
url: https://static.openfoodfacts.org/data/taxonomies/labels.full.json
- name: languages
url: https://static.openfoodfacts.org/data/taxonomies/languages.full.json
- name: minerals
url: https://static.openfoodfacts.org/data/taxonomies/minerals.full.json
- name: misc
url: https://static.openfoodfacts.org/data/taxonomies/misc.full.json
- name: nova_groups
url: https://static.openfoodfacts.org/data/taxonomies/nova_groups.full.json
- name: nucleotides
url: https://static.openfoodfacts.org/data/taxonomies/nucleotides.full.json
- name: nutrients
url: https://static.openfoodfacts.org/data/taxonomies/nutrients.full.json
- name: origins
url: https://static.openfoodfacts.org/data/taxonomies/origins.full.json
- name: other_nutritional_substances
url: https://static.openfoodfacts.org/data/taxonomies/other_nutritional_substances.full.json
- name: packaging_materials
url: https://static.openfoodfacts.org/data/taxonomies/packaging_materials.full.json
- name: packaging_recycling
url: https://static.openfoodfacts.org/data/taxonomies/packaging_recycling.full.json
- name: packaging_shapes
url: https://static.openfoodfacts.org/data/taxonomies/packaging_shapes.full.json
- name: periods_after_opening
url: https://static.openfoodfacts.org/data/taxonomies/periods_after_opening.full.json
- name: preservation
url: https://static.openfoodfacts.org/data/taxonomies/preservation.full.json
- name: states
url: https://static.openfoodfacts.org/data/taxonomies/states.full.json
- name: vitamins
url: https://static.openfoodfacts.org/data/taxonomies/vitamins.full.json
index:
number_of_replicas: 1
number_of_shards: 4
supported_langs:
- aa
- ab
Expand Down

0 comments on commit eca6445

Please sign in to comment.