Skip to content

Commit

Permalink
feat: adding sub fields to object (#271)
Browse files Browse the repository at this point in the history
fixes:
- #237 

Also added the "nested" type (not thoroughly tested)
  • Loading branch information
alexgarel authored Jan 9, 2025
1 parent acd55a1 commit 33368c7
Show file tree
Hide file tree
Showing 10 changed files with 7,839 additions and 20 deletions.
47 changes: 47 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ class FieldType(StrEnum):
* disabled: a field that is not stored nor searchable
(see [Elasticsearch help])
* object: this field contains a dict with sub-fields.
* nested: this field contains an array of objects.
"""

keyword = auto()
Expand All @@ -276,6 +277,7 @@ class FieldType(StrEnum):
# https://www.elastic.co/guide/en/elasticsearch/reference/current/enabled.html
disabled = auto()
object = auto()
nested = auto()

def is_numeric(self):
"""Return wether this field type can be considered numeric"""
Expand Down Expand Up @@ -371,6 +373,25 @@ class FieldConfig(BaseModel):
)
),
] = None
fields: Annotated[
dict[str, "FieldConfig"] | None,
Field(
description=cd_(
"""Sub fields configuration
This is valid only for "object" and "nested" fields,
and must be provided in this case.
Keys are field names,
values contain the field configuration.
Note: that although dynamic fields are supported in Elasticsearch,
we don't support them in Search-a-licious,
because they lead to nasty bugs, and are not meant for production use.
"""
)
),
] = None

@model_validator(mode="after")
def bucket_agg_should_be_used_for_keyword_and_numeric_types_only(self):
Expand All @@ -385,6 +406,23 @@ def bucket_agg_should_be_used_for_keyword_and_numeric_types_only(self):
)
return self

@model_validator(mode="after")
def subfields_only_if_object_or_nested(self):
"""If we have an object or nested field, and only in these cases,
we need a fields object
"""
if self.type in (FieldType.object, FieldType.nested):
if not self.fields:
raise ValueError(
"(sub) fields must be provided for object and nested type"
)
else:
if self.fields is not None:
raise ValueError(
"(sub) fields are only valid for object and nested type"
)
return self

def get_input_field(self):
"""Return the name of the field to use in input data."""
return self.input_field or self.name
Expand All @@ -394,6 +432,15 @@ def has_lang_subfield(self) -> bool:
per languages"""
return self.type in (FieldType.taxonomy, FieldType.text_lang)

@field_validator("fields")
@classmethod
def add_field_name_to_each_field(cls, fields: dict[str, "FieldConfig"]):
"""It's handy to have the name of the field in the field definition"""
if fields:
for field_name, field_item in fields.items():
field_item.name = field_name
return fields


class BaseESIndexConfig(BaseModel):
"""Base class for configuring ElasticSearch indexes"""
Expand Down
16 changes: 14 additions & 2 deletions app/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,20 @@ def generate_dsl_field(
for lang in supported_langs
}
return dsl_field.Object(dynamic=False, properties=properties)
elif field.type == FieldType.object:
return dsl_field.Object(dynamic=True)
elif field.type in (FieldType.object, FieldType.nested):
if not field.fields:
# this should not happen by construction of FieldConfig
raise ValueError("Object fields must have fields")
properties = {
sub_field.name: generate_dsl_field(
sub_field, supported_langs=supported_langs
)
for sub_field in field.fields.values()
}
if field.type == FieldType.nested:
return dsl_field.Nested(properties=properties)
else:
return dsl_field.Object(dynamic=False, properties=properties)
elif field.type == FieldType.disabled:
return dsl_field.Object(enabled=False)
else:
Expand Down
23 changes: 23 additions & 0 deletions data/config/openfoodfacts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,29 @@ indices:
type: disabled
nutriments:
type: object
fields:
energy-kcal_100g:
type: float
energy-kj_100g:
type: float
fat_100g:
type: float
saturated-fat_100g:
type: float
carbohydrates_100g:
type: float
sugars_100g:
type: float
proteins_100g:
type: float
fiber_100g:
type: float
salt_100g:
type: float
sodium_100g:
type: float
alcohol_100g:
type: float
nutriscore_data:
type: disabled
nutriscore_grade:
Expand Down
7 changes: 7 additions & 0 deletions tests/int/data/test_off.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ indices:
type: date
nutriments:
type: object
fields:
energy-kcal_100g:
type: float
energy-kj_100g:
type: float
fat_100g:
type: float
completeness:
type: float
lang_separator: _
Expand Down
21 changes: 21 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from pathlib import Path

import orjson
import pytest

from app._types import JSONType
from app.config import Config
from app.query import build_elasticsearch_query_builder
from app.utils.io import dump_json, load_json

DATA_DIR = Path(__file__).parent / "data"
DEFAULT_CONFIG_PATH = DATA_DIR / "openfoodfacts_config.yml"
Expand All @@ -27,3 +30,21 @@ def default_filter_query_builder(default_config):
"""Fixture that returns Luqum elasticsearch query builder based on default
config."""
yield build_elasticsearch_query_builder(default_config)


@pytest.fixture
def load_expected_result(update_results):
"""Return a helper function to load expected results of a test
or eventually save them."""

def load_expected_result_fn(test_id: str, data: JSONType):
if update_results:
dump_json(DATA_DIR / f"{test_id}.json", data, option=orjson.OPT_INDENT_2)
elif not (DATA_DIR / f"{test_id}.json").exists():
raise RuntimeError(
f"No result file for {test_id}, "
"maybe you need to first run with --update-results."
)
return load_json(DATA_DIR / f"{test_id}.json")

return load_expected_result_fn
7 changes: 7 additions & 0 deletions tests/unit/data/openfoodfacts_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ indices:
type: disabled
nutriments:
type: object
fields:
energy-kcal_100g:
type: float
energy-kj_100g:
type: float
fat_100g:
type: float
nutriscore_data:
type: disabled
nutriscore_grade:
Expand Down
Loading

0 comments on commit 33368c7

Please sign in to comment.