From a6d60cbbde448524a58f01c50ce3c9fbc6d75309 Mon Sep 17 00:00:00 2001 From: Linefeed Date: Fri, 29 Jul 2022 16:07:32 +0200 Subject: [PATCH] feat: add support for Lucene and Elasticsearch Boolean operations (#71) * Introduce the BooleanOperation * add it's resolution in ElasticSearch transformer * add it as a possible resolver for the unknown operation (no explicit operator in query) --- luqum/elasticsearch/tree.py | 26 ++++++++++++++++++++++++++ luqum/elasticsearch/visitor.py | 5 ++++- luqum/tree.py | 15 +++++++++++++++ luqum/utils.py | 8 ++++---- tests/test_elasticsearch/tests.py | 25 ++++++++++++++++++++++++- tests/test_utils.py | 15 ++++++++++++++- 6 files changed, 87 insertions(+), 7 deletions(-) diff --git a/luqum/elasticsearch/tree.py b/luqum/elasticsearch/tree.py index 7926308..90702d6 100644 --- a/luqum/elasticsearch/tree.py +++ b/luqum/elasticsearch/tree.py @@ -373,6 +373,32 @@ class EMustNot(AbstractEMustOperation): operation = 'must_not' +class EBoolOperation(EOperation): + + @property + def json(self): + must_items = [] + should_items = [] + must_not_items = [] + for item in self.items: + if isinstance(item, EMust): + must_items.extend(item.items) + elif isinstance(item, EMustNot): + must_not_items.extend(item.items) + else: + should_items.append(item) + bool_query = {} + if must_items: + bool_query["must"] = [item.json for item in must_items] + if should_items: + bool_query["should"] = [item.json for item in should_items] + if must_not_items: + bool_query["must_not"] = [item.json for item in must_not_items] + + query = dict(bool_query, **self.options) + return {'bool': query} + + class ElasticSearchItemFactory: """ Factory to preconfigure EItems and EOperation diff --git a/luqum/elasticsearch/visitor.py b/luqum/elasticsearch/visitor.py index 9bec382..d96fbe6 100644 --- a/luqum/elasticsearch/visitor.py +++ b/luqum/elasticsearch/visitor.py @@ -6,7 +6,7 @@ from luqum.tree import Word # noqa: F401 from .tree import ( EMust, EMustNot, EShould, EWord, EPhrase, ERange, - ENested) + ENested, EBoolOperation) from ..check import CheckNestedFields from ..naming import get_name from ..utils import ( @@ -342,6 +342,9 @@ def visit_prohibit(self, *args, **kwargs): def visit_plus(self, *args, **kwargs): yield from self._must_operation(*args, **kwargs) + def visit_bool_operation(self, *args, **kwargs): + yield from self._binary_operation(EBoolOperation, *args, **kwargs) + def visit_unknown_operation(self, *args, **kwargs): if self.default_operator == self.SHOULD: yield from self._should_operation(*args, **kwargs) diff --git a/luqum/tree.py b/luqum/tree.py index 2961338..0b1dc5e 100644 --- a/luqum/tree.py +++ b/luqum/tree.py @@ -413,6 +413,21 @@ def children(self, value): self.operands = tuple(value) +class BoolOperation(BaseOperation): + """Lucene Boolean Query. + + This operation assumes that the query builder can utilize a boolean operator + with three possible sections, must, should and must_not. If the + UnknownOperationResolver is asked to resolve_to this operation, the query + builder can utilize this operator directly instead of nested AND/OR. + This also makes it possible to correctly support Lucene queries such as: + "apples +bananas -vegetables" + .. seealso:: + the :py:class:`.utils.UnknownOperationResolver` + """ + op = "" + + class UnknownOperation(BaseOperation): """Unknown Boolean operator. diff --git a/luqum/utils.py b/luqum/utils.py index fe54e59..386fa46 100644 --- a/luqum/utils.py +++ b/luqum/utils.py @@ -8,22 +8,22 @@ from . import visitor from .deprecated_utils import ( # noqa: F401 LuceneTreeTransformer, LuceneTreeVisitor, LuceneTreeVisitorV2) -from .tree import AndOperation, BaseOperation, OrOperation +from .tree import AndOperation, BaseOperation, OrOperation, BoolOperation class UnknownOperationResolver(visitor.TreeTransformer): """Transform the UnknownOperation to OR or AND """ - VALID_OPERATIONS = frozenset([None, AndOperation, OrOperation]) + VALID_OPERATIONS = frozenset([None, AndOperation, OrOperation, BoolOperation]) DEFAULT_OPERATION = AndOperation def __init__(self, resolve_to=None, add_head=" "): """Initialize a new resolver - :param resolve_to: must be either None, OrOperation or AndOperation. + :param resolve_to: must be either None, OrOperation, AndOperation, BoolOperation. - for the latter two the UnknownOperation is repalced by specified operation. + for the latter three the UnknownOperation is replaced by specified operation. if it is None, we use the last operation encountered, as would Lucene do """ diff --git a/tests/test_elasticsearch/tests.py b/tests/test_elasticsearch/tests.py index 286f623..c406cde 100644 --- a/tests/test_elasticsearch/tests.py +++ b/tests/test_elasticsearch/tests.py @@ -6,7 +6,7 @@ from luqum.tree import ( AndOperation, Word, Prohibit, OrOperation, Not, Phrase, SearchField, UnknownOperation, Boost, Fuzzy, Proximity, Range, Group, FieldGroup, - Plus) + Plus, BoolOperation) from luqum.elasticsearch.tree import ElasticSearchItemFactory from luqum.elasticsearch.visitor import EWord, ElasticsearchQueryBuilder @@ -64,6 +64,29 @@ def test_should_transform_or(self): ]}} self.assertDictEqual(result, expected) + def test_bool_transform_bool(self): + tree = BoolOperation( + Word("a"), + Word("b"), + Group(BoolOperation(Plus(Word('f')), Plus(Word('g')))), + Prohibit(Group(BoolOperation(Word("c"), Word("d")))), + Plus(Word('e'))) + result = self.transformer(tree) + expected = {'bool': { + 'must': [ + {'term': {'text': {'value': 'e'}}}], + 'should': [ + {"term": {"text": {"value": 'a'}}}, + {"term": {"text": {"value": 'b'}}}, + {'bool': {'must': [ + {'term': {'text': {"value": 'f'}}}, + {'term': {'text': {"value": 'g'}}}]}}], + 'must_not': [{"bool": {"should": [ + {"term": {"text": {"value": 'c'}}}, + {"term": {"text": {"value": 'd'}}}]}}], + }} + self.assertDictEqual(result, expected) + def test_should_raise_when_or_and_and_on_same_level(self): tree = OrOperation( Word('spam'), diff --git a/tests/test_utils.py b/tests/test_utils.py index 6691054..5147d3b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,8 @@ from unittest import TestCase from luqum.parser import parser -from luqum.tree import Group, Word, AndOperation, OrOperation, UnknownOperation +from luqum.tree import (Group, Word, AndOperation, OrOperation, BoolOperation, + UnknownOperation, Prohibit, Plus) from luqum.utils import UnknownOperationResolver @@ -49,6 +50,18 @@ def test_lucene_resolution_simple(self): resolver = UnknownOperationResolver(resolve_to=None) self.assertEqual(resolver(tree), expected) + def test_lucene_resolution_bool(self): + tree = parser.parse("a b (+f +g) -(c d) +e") + expected = ( + BoolOperation( + Word("a"), + Word("b"), + Group(BoolOperation(Plus(Word("f")), Plus(Word("g")))), + Prohibit(Group(BoolOperation(Word("c"), Word("d")))), + Plus(Word('e')))) + resolver = UnknownOperationResolver(resolve_to=BoolOperation) + self.assertEqual(resolver(tree), expected) + def test_lucene_resolution_last_op(self): tree = ( OrOperation(