diff --git a/docs/source/api.rst b/docs/source/api.rst index 27f630b..20a1fe0 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -10,7 +10,7 @@ luqum.parser --------------- .. automodule:: luqum.parser - :members: parser + :members: parser, parse luqum.threading --------------- diff --git a/luqum/parser.py b/luqum/parser.py index edd9d7d..900f092 100644 --- a/luqum/parser.py +++ b/luqum/parser.py @@ -412,3 +412,21 @@ def p_error(p): **Note**: The parser by itself is not thread safe (because PLY is not). Use :py:func:`luqum.thread.parse` instead """ + +_orig_parse = parser.parse + + +def parse(input=None, lexer=lexer, debug=False, tracking=False, tokenfunc=None): + """ + A function to parse a text Lucene query provided as ``input``. + + The function signature is based on :func:`ply.yacc.LRParser.parse` except + that ``lexer`` default to :data:`luqum.parser.lexer`. + """ + return _orig_parse( + input=input, lexer=lexer, debug=debug, tracking=tracking, tokenfunc=tokenfunc + ) + + +# avoid confusion in lexers by monkey patching +parser.parse = parse diff --git a/luqum/thread.py b/luqum/thread.py index 5a88d17..60d5891 100644 --- a/luqum/thread.py +++ b/luqum/thread.py @@ -1,19 +1,19 @@ import threading -from ply import lex - from . import parser - thread_local = threading.local() def parse(input=None, lexer=None, debug=False, tracking=False): """A (hopefully) thread safe version of :py:meth:`luqum.parser.parse` - PLY is not thread safe because of its lexer state, but cloning it we can be thread safe. - see: https://github.com/jurismarches/luqum/issues/72 + PLY is not thread safe because of its lexer state, but cloning it we can be + thread safe. see: https://github.com/jurismarches/luqum/issues/72 + + Warning: The parameter ``lexer``, ``debug`` and ``tracking`` are not used. + They are still present for signature compatibility. """ if not hasattr(thread_local, "lexer"): - thread_local.lexer = lex.lexer.clone() + thread_local.lexer = parser.lexer.clone() return parser.parser.parse(input, lexer=thread_local.lexer) diff --git a/tests/alternative_lexer.py b/tests/alternative_lexer.py new file mode 100644 index 0000000..6cb4bd0 --- /dev/null +++ b/tests/alternative_lexer.py @@ -0,0 +1,46 @@ +""" +Fake Lexer to test: [Multiple Parsers and +Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37) +""" + +# List of token names. This is always required +tokens = ( + "NUMBER", + "PLUS", + "MINUS", + "TIMES", + "DIVIDE", + "LPAREN", + "RPAREN", +) + +# Regular expression rules for simple tokens +t_PLUS = r"\+" +t_MINUS = r"-" +t_TIMES = r"\*" +t_DIVIDE = r"/" +t_LPAREN = r"\(" +t_RPAREN = r"\)" + + +# A regular expression rule with some action code +def t_NUMBER(t): + r"\d+" + t.value = int(t.value) + return t + + +# Define a rule so we can track line numbers +def t_newline(t): + r"\n+" + t.lexer.lineno += len(t.value) + + +# A string containing ignored characters (spaces and tabs) +t_ignore = " \t" + + +# Error handling rule +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) diff --git a/tests/test_parser.py b/tests/test_parser.py index 96d901d..fb17d06 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,12 +1,16 @@ from decimal import Decimal from unittest import TestCase +import ply.lex as lex +import pytest + from luqum.exceptions import IllegalCharacterError, ParseSyntaxError -from luqum.parser import lexer, parser +from luqum.parser import lexer, parser, parse from luqum.tree import ( SearchField, FieldGroup, Group, Word, Phrase, Regex, Proximity, Fuzzy, Boost, Range, From, To, Not, AndOperation, OrOperation, Plus, Prohibit, UnknownOperation) +from tests import alternative_lexer class TestLexer(TestCase): @@ -537,3 +541,24 @@ def test_negative_values_in_ranges(self): # semantically incorrect but correct from the parser's perspective parsed = parser.parse("[5 TO -1]") self.assertEqual(str(parsed), "[5 TO -1]") + + +def test_lex_global_state(): + """ + Last Lexer is used globally by default by the parser. If another library + creates another lexer, it should not impact luqum. + + More info: [Multiple Parsers and + Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37) + """ + qs = '(title:"foo bar" AND body:"quick fox")' + + lex.lex(module=alternative_lexer) + + with pytest.raises(ParseSyntaxError): + parser.parse(qs, lexer=lex.lexer) + + # if there is a "luqum.exceptions.ParseSyntaxError", + # the wrong lexer was used. + parse(qs) + parser.parse(qs) diff --git a/tests/test_thread.py b/tests/test_thread.py index 82bf220..b7deba3 100644 --- a/tests/test_thread.py +++ b/tests/test_thread.py @@ -1,8 +1,11 @@ import queue import threading +import ply.lex as lex + from luqum.parser import parser from luqum.thread import parse +from tests import alternative_lexer def test_thread_parse(): @@ -31,3 +34,19 @@ def run(q): assert result_queue.qsize() == 100 for i in range(100): assert result_queue.get() == expected_tree + + +def test_thread_lex_global_state(): + """ + Last Lexer is used globally by default by the parser. If another library + creates another lexer, it should not impact luqum. + + More info: [Multiple Parsers and + Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37) + """ + qs = '(title:"foo bar" AND body:"quick fox")' + + lex.lex(module=alternative_lexer) + # if there is a "luqum.exceptions.ParseSyntaxError", the wrong lexer was + # used. + parse(qs)