Skip to content

Commit

Permalink
feat: add basic facet view (term bucket with # of items for each value)
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Oct 17, 2023
1 parent 7735dac commit 930b335
Show file tree
Hide file tree
Showing 11 changed files with 322 additions and 1 deletion.
20 changes: 20 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,14 @@ class FieldConfig(BaseModel):
"field are provided."
),
] = False
bucket_agg: Annotated[
bool,
Field(
description="do we add an bucket aggregation to the elasticsearch query for this field. "
"It is used to return a 'faceted-view' with the number of results for each facet value. "
"Only valid for keyword or numeric field types."
),
] = False
taxonomy_name: Annotated[
str | None, Field(description="only for taxonomy field type")
] = None
Expand All @@ -195,6 +203,18 @@ def taxonomy_name_should_be_used_for_taxonomy_type_only(self):
raise ValueError("taxonomy_name should be provided for taxonomy type only")
return self

@model_validator(mode="after")
def bucket_agg_should_be_used_for_keyword_and_numeric_types_only(self):
"""Validator that checks that `bucket_agg` is only provided for
fields with types `keyword`, `double`, `float`, `integer` or `bool`."""
if self.bucket_agg and not (
self.type.is_numeric() or self.type in (FieldType.keyword, FieldType.bool)
):
raise ValueError(
"bucket_agg should be provided for taxonomy or numeric type only"
)
return self

def get_input_field(self):
"""Return the name of the field to use in input data."""
return self.input_field or self.name
Expand Down
1 change: 1 addition & 0 deletions app/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def process(self, response: Response, projection: set[str] | None) -> JSONType:
result = dict((k, v) for k, v in result.items() if k in projection)
hits.append(result)
output["hits"] = hits
output["aggregations"] = response.aggregations.to_dict()
return output

def process_after(self, result: JSONType) -> JSONType:
Expand Down
17 changes: 16 additions & 1 deletion app/query.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import elastic_transport
from elasticsearch_dsl import Q, Search
from elasticsearch_dsl import A, Q, Search
from elasticsearch_dsl.aggs import Agg
from elasticsearch_dsl.query import Query
from luqum import visitor
from luqum.elasticsearch import ElasticsearchQueryBuilder
Expand Down Expand Up @@ -192,6 +193,17 @@ def parse_sort_by_parameter(sort_by: str | None, config: Config) -> str | None:
return sort_by


def create_aggregation_clauses(config: Config) -> dict[str, Agg]:
"""Create term bucket aggregation clauses for all relevant fields as
defined in the config.
"""
clauses = {}
for field in config.fields.values():
if field.bucket_agg:
clauses[field.name] = A("terms", field=field.name)
return clauses


def build_search_query(
q: str,
langs: set[str],
Expand Down Expand Up @@ -226,6 +238,9 @@ def build_search_query(
if filter_query:
query = query.query("bool", filter=filter_query)

for agg_name, agg in create_aggregation_clauses(config).items():
query.aggs.bucket(agg_name, agg)

sort_by = parse_sort_by_parameter(sort_by, config)
if sort_by is not None:
query = query.sort(sort_by)
Expand Down
1 change: 1 addition & 0 deletions app/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def is_success(self):

class SuccessSearchResponse(BaseModel):
hits: list[JSONType]
aggregations: JSONType
page: int
page_size: int
page_count: int
Expand Down
12 changes: 12 additions & 0 deletions data/config/openfoodfacts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ fields:
full_text_search: true
split: true
type: text
brands_tags:
type: keyword
bucket_agg: true
stores:
split: true
type: text
Expand All @@ -41,20 +44,26 @@ fields:
type: text
lang:
type: keyword
bucket_agg: true
lc:
type: keyword
owner:
type: keyword
bucket_agg: true
quantity:
type: text
categories_tags:
type: keyword
bucket_agg: true
labels_tags:
type: keyword
bucket_agg: true
countries_tags:
type: keyword
bucket_agg: true
states_tags:
type: keyword
bucket_agg: true
origins_tags:
type: keyword
ingredients_tags:
Expand All @@ -65,10 +74,13 @@ fields:
type: integer
nutrition_grades:
type: keyword
bucket_agg: true
ecoscore_grade:
type: keyword
bucket_agg: true
nova_groups:
type: keyword
bucket_agg: true
last_modified_t:
type: date
created_t:
Expand Down
52 changes: 52 additions & 0 deletions tests/unit/data/complex_query.json
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,58 @@
"minimum_should_match": 1
}
},
"aggs": {
"brands_tags": {
"terms": {
"field": "brands_tags"
}
},
"lang": {
"terms": {
"field": "lang"
}
},
"owner": {
"terms": {
"field": "owner"
}
},
"categories_tags": {
"terms": {
"field": "categories_tags"
}
},
"labels_tags": {
"terms": {
"field": "labels_tags"
}
},
"countries_tags": {
"terms": {
"field": "countries_tags"
}
},
"states_tags": {
"terms": {
"field": "states_tags"
}
},
"nutrition_grades": {
"terms": {
"field": "nutrition_grades"
}
},
"ecoscore_grade": {
"terms": {
"field": "ecoscore_grade"
}
},
"nova_groups": {
"terms": {
"field": "nova_groups"
}
}
},
"size": 25,
"from": 25
}
52 changes: 52 additions & 0 deletions tests/unit/data/non_existing_filter_field.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,58 @@
]
}
},
"aggs": {
"brands_tags": {
"terms": {
"field": "brands_tags"
}
},
"lang": {
"terms": {
"field": "lang"
}
},
"owner": {
"terms": {
"field": "owner"
}
},
"categories_tags": {
"terms": {
"field": "categories_tags"
}
},
"labels_tags": {
"terms": {
"field": "labels_tags"
}
},
"countries_tags": {
"terms": {
"field": "countries_tags"
}
},
"states_tags": {
"terms": {
"field": "states_tags"
}
},
"nutrition_grades": {
"terms": {
"field": "nutrition_grades"
}
},
"ecoscore_grade": {
"terms": {
"field": "ecoscore_grade"
}
},
"nova_groups": {
"terms": {
"field": "nova_groups"
}
}
},
"size": 25,
"from": 25
}
12 changes: 12 additions & 0 deletions tests/unit/data/openfoodfacts_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ fields:
full_text_search: true
split: true
type: text
brands_tags:
type: keyword
bucket_agg: true
stores:
split: true
type: text
Expand All @@ -41,20 +44,26 @@ fields:
type: text
lang:
type: keyword
bucket_agg: true
lc:
type: keyword
owner:
type: keyword
bucket_agg: true
quantity:
type: text
categories_tags:
type: keyword
bucket_agg: true
labels_tags:
type: keyword
bucket_agg: true
countries_tags:
type: keyword
bucket_agg: true
states_tags:
type: keyword
bucket_agg: true
origins_tags:
type: keyword
ingredients_tags:
Expand All @@ -65,10 +74,13 @@ fields:
type: integer
nutrition_grades:
type: keyword
bucket_agg: true
ecoscore_grade:
type: keyword
bucket_agg: true
nova_groups:
type: keyword
bucket_agg: true
last_modified_t:
type: date
created_t:
Expand Down
52 changes: 52 additions & 0 deletions tests/unit/data/simple_filter_query.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,58 @@
]
}
},
"aggs": {
"brands_tags": {
"terms": {
"field": "brands_tags"
}
},
"lang": {
"terms": {
"field": "lang"
}
},
"owner": {
"terms": {
"field": "owner"
}
},
"categories_tags": {
"terms": {
"field": "categories_tags"
}
},
"labels_tags": {
"terms": {
"field": "labels_tags"
}
},
"countries_tags": {
"terms": {
"field": "countries_tags"
}
},
"states_tags": {
"terms": {
"field": "states_tags"
}
},
"nutrition_grades": {
"terms": {
"field": "nutrition_grades"
}
},
"ecoscore_grade": {
"terms": {
"field": "ecoscore_grade"
}
},
"nova_groups": {
"terms": {
"field": "nova_groups"
}
}
},
"size": 25,
"from": 25
}
Loading

0 comments on commit 930b335

Please sign in to comment.