jurismarches · Morikko · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -10,7 +10,7 @@ luqum.parser
 ---------------
 
 .. automodule:: luqum.parser
-   :members: parser
+   :members: parser, parse
 
 luqum.threading
 ---------------

diff --git a/luqum/parser.py b/luqum/parser.py
@@ -412,3 +412,21 @@ def p_error(p):
 **Note**: The parser by itself is not thread safe (because PLY is not).
 Use :py:func:`luqum.thread.parse` instead
 """
+
+_orig_parse = parser.parse
+
+
+def parse(input=None, lexer=lexer, debug=False, tracking=False, tokenfunc=None):
+    """
+    A function to parse a text Lucene query provided as ``input``.
+
+    The function signature is based on :func:`ply.yacc.LRParser.parse` except
+    that ``lexer`` default to :data:`luqum.parser.lexer`.
+    """
+    return _orig_parse(
+        input=input, lexer=lexer, debug=debug, tracking=tracking, tokenfunc=tokenfunc
+    )
+
+
+# avoid confusion in lexers by monkey patching
+parser.parse = parse
diff --git a/luqum/thread.py b/luqum/thread.py
@@ -1,19 +1,19 @@
 import threading
 
-from ply import lex
-
 from . import parser
 
-
 thread_local = threading.local()
 
 
 def parse(input=None, lexer=None, debug=False, tracking=False):
     """A (hopefully) thread safe version of :py:meth:`luqum.parser.parse`
 
-    PLY is not thread safe because of its lexer state, but cloning it we can be thread safe.
-    see: https://github.com/jurismarches/luqum/issues/72
+    PLY is not thread safe because of its lexer state, but cloning it we can be
+    thread safe. see: https://github.com/jurismarches/luqum/issues/72
+
+    Warning: The parameter ``lexer``, ``debug`` and ``tracking`` are not used.
+    They are still present for signature compatibility.
     """
     if not hasattr(thread_local, "lexer"):
-        thread_local.lexer = lex.lexer.clone()
+        thread_local.lexer = parser.lexer.clone()
     return parser.parser.parse(input, lexer=thread_local.lexer)
diff --git a/tests/alternative_lexer.py b/tests/alternative_lexer.py
@@ -0,0 +1,46 @@
+"""
+Fake Lexer to test: [Multiple Parsers and
+Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37)
+"""
+
+# List of token names.   This is always required
+tokens = (
+    "NUMBER",
+    "PLUS",
+    "MINUS",
+    "TIMES",
+    "DIVIDE",
+    "LPAREN",
+    "RPAREN",
+)
+
+# Regular expression rules for simple tokens
+t_PLUS = r"\+"
+t_MINUS = r"-"
+t_TIMES = r"\*"
+t_DIVIDE = r"/"
+t_LPAREN = r"\("
+t_RPAREN = r"\)"
+
+
+# A regular expression rule with some action code
+def t_NUMBER(t):
+    r"\d+"
+    t.value = int(t.value)
+    return t
+
+
+# Define a rule so we can track line numbers
+def t_newline(t):
+    r"\n+"
+    t.lexer.lineno += len(t.value)
+
+
+# A string containing ignored characters (spaces and tabs)
+t_ignore = " \t"
+
+
+# Error handling rule
+def t_error(t):
+    print("Illegal character '%s'" % t.value[0])
+    t.lexer.skip(1)
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -1,12 +1,16 @@
 from decimal import Decimal
 from unittest import TestCase
 
+import ply.lex as lex
+import pytest
+
 from luqum.exceptions import IllegalCharacterError, ParseSyntaxError
-from luqum.parser import lexer, parser
+from luqum.parser import lexer, parser, parse
 from luqum.tree import (
     SearchField, FieldGroup, Group,
     Word, Phrase, Regex, Proximity, Fuzzy, Boost, Range, From, To,
     Not, AndOperation, OrOperation, Plus, Prohibit, UnknownOperation)
+from tests import alternative_lexer
 
 
 class TestLexer(TestCase):
@@ -537,3 +541,24 @@ def test_negative_values_in_ranges(self):
         # semantically incorrect but correct from the parser's perspective
         parsed = parser.parse("[5 TO -1]")
         self.assertEqual(str(parsed), "[5 TO -1]")
+
+
+def test_lex_global_state():
+    """
+    Last Lexer is used globally by default by the parser. If another library
+    creates another lexer, it should not impact luqum.
+
+    More info: [Multiple Parsers and
+    Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37)
+    """
+    qs = '(title:"foo bar" AND body:"quick fox")'
+
+    lex.lex(module=alternative_lexer)
+
+    with pytest.raises(ParseSyntaxError):
+        parser.parse(qs, lexer=lex.lexer)
+
+    # if there is a "luqum.exceptions.ParseSyntaxError",
+    # the wrong lexer was used.
+    parse(qs)
+    parser.parse(qs)
diff --git a/tests/test_thread.py b/tests/test_thread.py
@@ -1,8 +1,11 @@
 import queue
 import threading
 
+import ply.lex as lex
+
 from luqum.parser import parser
 from luqum.thread import parse
+from tests import alternative_lexer
 
 
 def test_thread_parse():
@@ -31,3 +34,19 @@ def run(q):
     assert result_queue.qsize() == 100
     for i in range(100):
         assert result_queue.get() == expected_tree
+
+
+def test_thread_lex_global_state():
+    """
+    Last Lexer is used globally by default by the parser. If another library
+    creates another lexer, it should not impact luqum.
+
+    More info: [Multiple Parsers and
+    Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37)
+    """
+    qs = '(title:"foo bar" AND body:"quick fox")'
+
+    lex.lex(module=alternative_lexer)
+    # if there is a "luqum.exceptions.ParseSyntaxError", the wrong lexer was
+    # used.
+    parse(qs)