From 6a06bcc90c66db63aa2617e312d0ef2948d7e944 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sat, 26 Feb 2022 17:39:10 +0100 Subject: [PATCH] Rename lexer_state->lexer_thread, and make a few adjustments for the benefit of Lark-Cython --- lark/lark.py | 2 +- lark/lexer.py | 15 +++++++------ lark/parser_frontends.py | 2 +- lark/parsers/lalr_interactive_parser.py | 28 +++++++++++++++---------- lark/parsers/lalr_parser.py | 2 +- tests/test_parser.py | 8 +++---- 6 files changed, 33 insertions(+), 24 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index dfd06623..b2a94334 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -574,7 +574,7 @@ def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]: lexer = self._build_lexer(dont_ignore) else: lexer = self.lexer - lexer_thread = LexerThread(lexer, text) + lexer_thread = LexerThread.from_text(lexer, text) stream = lexer_thread.lex(None) if self.options.postlex: return self.options.postlex.process(stream) diff --git a/lark/lexer.py b/lark/lexer.py index f650fca9..ec71a12d 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -352,18 +352,21 @@ class LexerThread: """A thread that ties a lexer instance and a lexer state, to be used by the parser """ - def __init__(self, lexer, text): + def __init__(self, lexer: 'Lexer', lexer_state: LexerState): self.lexer = lexer - self.state = LexerState(text) + self.state = lexer_state + + @classmethod + def from_text(cls, lexer: 'Lexer', text: str): + return cls(lexer, LexerState(text)) def lex(self, parser_state): return self.lexer.lex(self.state, parser_state) def __copy__(self): - copied = object.__new__(LexerThread) - copied.lexer = self.lexer - copied.state = copy(self.state) - return copied + return type(self)(self.lexer, copy(self.state)) + + _Token = Token _Callback = Callable[[Token], Token] diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index ae5e1ea2..be32603f 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -90,7 +90,7 @@ def _verify_start(self, start=None): def _make_lexer_thread(self, text): cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread - return text if self.skip_lexer else cls(self.lexer, text) + return text if self.skip_lexer else cls.from_text(self.lexer, text) def parse(self, text, start=None, on_error=None): chosen_start = self._verify_start(start) diff --git a/lark/parsers/lalr_interactive_parser.py b/lark/parsers/lalr_interactive_parser.py index 3ad79899..c9658daf 100644 --- a/lark/parsers/lalr_interactive_parser.py +++ b/lark/parsers/lalr_interactive_parser.py @@ -2,9 +2,10 @@ from typing import Iterator, List from copy import copy +import warnings from lark.exceptions import UnexpectedToken -from lark.lexer import Token +from lark.lexer import Token, LexerThread class InteractiveParser: @@ -12,12 +13,17 @@ class InteractiveParser: For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. """ - def __init__(self, parser, parser_state, lexer_state): + def __init__(self, parser, parser_state, lexer_thread: LexerThread): self.parser = parser self.parser_state = parser_state - self.lexer_state = lexer_state + self.lexer_thread = lexer_thread self.result = None + @property + def lexer_state(self) -> LexerThread: + warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning) + return self.lexer_thread + def feed_token(self, token: Token): """Feed the parser with a token, and advance it to the next state, as if it received it from the lexer. @@ -33,7 +39,7 @@ def iter_parse(self) -> Iterator[Token]: When the parse is over, the resulting tree can be found in ``InteractiveParser.result``. """ - for token in self.lexer_state.lex(self.parser_state): + for token in self.lexer_thread.lex(self.parser_state): yield token self.result = self.feed_token(token) @@ -47,7 +53,7 @@ def exhaust_lexer(self) -> List[Token]: def feed_eof(self, last_token=None): """Feed a '$END' Token. Borrows from 'last_token' if given.""" - eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) + eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1) return self.feed_token(eof) @@ -59,7 +65,7 @@ def __copy__(self): return type(self)( self.parser, copy(self.parser_state), - copy(self.lexer_state), + copy(self.lexer_thread), ) def copy(self): @@ -69,12 +75,12 @@ def __eq__(self, other): if not isinstance(other, InteractiveParser): return False - return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state + return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread def as_immutable(self): """Convert to an ``ImmutableInteractiveParser``.""" p = copy(self) - return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state) + return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread) def pretty(self): """Print the output of ``choices()`` in a way that's easier to read.""" @@ -100,7 +106,7 @@ def accepts(self): if t.isupper(): # is terminal? new_cursor = copy(self) try: - new_cursor.feed_token(Token(t, '')) + new_cursor.feed_token(self.lexer_thread._Token(t, '')) except UnexpectedToken: pass else: @@ -121,7 +127,7 @@ class ImmutableInteractiveParser(InteractiveParser): result = None def __hash__(self): - return hash((self.parser_state, self.lexer_state)) + return hash((self.parser_state, self.lexer_thread)) def feed_token(self, token): c = copy(self) @@ -139,5 +145,5 @@ def exhaust_lexer(self): def as_mutable(self): """Convert to an ``InteractiveParser``.""" p = copy(self) - return InteractiveParser(p.parser, p.parser_state, p.lexer_state) + return InteractiveParser(p.parser, p.parser_state, p.lexer_thread) diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 84032250..2837b296 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -45,7 +45,7 @@ def parse(self, lexer, start, on_error=None): while True: if isinstance(e, UnexpectedCharacters): - s = e.interactive_parser.lexer_state.state + s = e.interactive_parser.lexer_thread.state p = s.line_ctr.char_pos if not on_error(e): diff --git a/tests/test_parser.py b/tests/test_parser.py index 67f242d2..49137476 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2518,12 +2518,12 @@ def test_parser_interactive_parser(self): ip_copy = ip.copy() self.assertEqual(ip_copy.parser_state, ip.parser_state) - self.assertEqual(ip_copy.lexer_state.state, ip.lexer_state.state) + self.assertEqual(ip_copy.lexer_thread.state, ip.lexer_thread.state) self.assertIsNot(ip_copy.parser_state, ip.parser_state) - self.assertIsNot(ip_copy.lexer_state.state, ip.lexer_state.state) - self.assertIsNot(ip_copy.lexer_state.state.line_ctr, ip.lexer_state.state.line_ctr) + self.assertIsNot(ip_copy.lexer_thread.state, ip.lexer_thread.state) + self.assertIsNot(ip_copy.lexer_thread.state.line_ctr, ip.lexer_thread.state.line_ctr) - res = ip.feed_eof(ip.lexer_state.state.last_token) + res = ip.feed_eof(ip.lexer_thread.state.last_token) self.assertEqual(res, Tree('start', ['a', 'b'])) self.assertRaises(UnexpectedToken ,ip.feed_eof)