Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lexer global state management #109

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/api.rst
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@ luqum.parser
---------------

.. automodule:: luqum.parser
:members: parser
:members: parser, parse

luqum.threading
---------------
18 changes: 18 additions & 0 deletions luqum/parser.py
Original file line number Diff line number Diff line change
@@ -412,3 +412,21 @@ def p_error(p):
**Note**: The parser by itself is not thread safe (because PLY is not).
Use :py:func:`luqum.thread.parse` instead
"""

_orig_parse = parser.parse


def parse(input=None, lexer=lexer, debug=False, tracking=False, tokenfunc=None):
"""
A function to parse a text Lucene query provided as ``input``.
The function signature is based on :func:`ply.yacc.LRParser.parse` except
that ``lexer`` default to :data:`luqum.parser.lexer`.
"""
return _orig_parse(
input=input, lexer=lexer, debug=debug, tracking=tracking, tokenfunc=tokenfunc
)


# avoid confusion in lexers by monkey patching
parser.parse = parse
Morikko marked this conversation as resolved.
Show resolved Hide resolved
12 changes: 6 additions & 6 deletions luqum/thread.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import threading

from ply import lex

from . import parser


thread_local = threading.local()


def parse(input=None, lexer=None, debug=False, tracking=False):
"""A (hopefully) thread safe version of :py:meth:`luqum.parser.parse`
PLY is not thread safe because of its lexer state, but cloning it we can be thread safe.
see: https://github.com/jurismarches/luqum/issues/72
PLY is not thread safe because of its lexer state, but cloning it we can be
thread safe. see: https://github.com/jurismarches/luqum/issues/72
Warning: The parameter ``lexer``, ``debug`` and ``tracking`` are not used.
They are still present for signature compatibility.
alexgarel marked this conversation as resolved.
Show resolved Hide resolved
"""
if not hasattr(thread_local, "lexer"):
thread_local.lexer = lex.lexer.clone()
thread_local.lexer = parser.lexer.clone()
return parser.parser.parse(input, lexer=thread_local.lexer)
46 changes: 46 additions & 0 deletions tests/alternative_lexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
Fake Lexer to test: [Multiple Parsers and
Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37)
"""

# List of token names. This is always required
tokens = (
"NUMBER",
"PLUS",
"MINUS",
"TIMES",
"DIVIDE",
"LPAREN",
"RPAREN",
)

# Regular expression rules for simple tokens
t_PLUS = r"\+"
t_MINUS = r"-"
t_TIMES = r"\*"
t_DIVIDE = r"/"
t_LPAREN = r"\("
t_RPAREN = r"\)"


# A regular expression rule with some action code
def t_NUMBER(t):
r"\d+"
t.value = int(t.value)
return t


# Define a rule so we can track line numbers
def t_newline(t):
r"\n+"
t.lexer.lineno += len(t.value)


# A string containing ignored characters (spaces and tabs)
t_ignore = " \t"


# Error handling rule
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
27 changes: 26 additions & 1 deletion tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from decimal import Decimal
from unittest import TestCase

import ply.lex as lex
import pytest

from luqum.exceptions import IllegalCharacterError, ParseSyntaxError
from luqum.parser import lexer, parser
from luqum.parser import lexer, parser, parse
from luqum.tree import (
SearchField, FieldGroup, Group,
Word, Phrase, Regex, Proximity, Fuzzy, Boost, Range, From, To,
Not, AndOperation, OrOperation, Plus, Prohibit, UnknownOperation)
from tests import alternative_lexer


class TestLexer(TestCase):
@@ -537,3 +541,24 @@ def test_negative_values_in_ranges(self):
# semantically incorrect but correct from the parser's perspective
parsed = parser.parse("[5 TO -1]")
self.assertEqual(str(parsed), "[5 TO -1]")


def test_lex_global_state():
"""
Last Lexer is used globally by default by the parser. If another library
creates another lexer, it should not impact luqum.
More info: [Multiple Parsers and
Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37)
"""
qs = '(title:"foo bar" AND body:"quick fox")'

lex.lex(module=alternative_lexer)

with pytest.raises(ParseSyntaxError):
parser.parse(qs, lexer=lex.lexer)

# if there is a "luqum.exceptions.ParseSyntaxError",
# the wrong lexer was used.
parse(qs)
parser.parse(qs)
19 changes: 19 additions & 0 deletions tests/test_thread.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import queue
import threading

import ply.lex as lex

from luqum.parser import parser
from luqum.thread import parse
from tests import alternative_lexer


def test_thread_parse():
@@ -31,3 +34,19 @@ def run(q):
assert result_queue.qsize() == 100
for i in range(100):
assert result_queue.get() == expected_tree


def test_thread_lex_global_state():
"""
Last Lexer is used globally by default by the parser. If another library
creates another lexer, it should not impact luqum.
More info: [Multiple Parsers and
Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37)
"""
qs = '(title:"foo bar" AND body:"quick fox")'

lex.lex(module=alternative_lexer)
# if there is a "luqum.exceptions.ParseSyntaxError", the wrong lexer was
# used.
parse(qs)