Skip to content

Commit

Permalink
fix #125: parsing of regex rules in peg syntaxes
Browse files Browse the repository at this point in the history
  • Loading branch information
igordejanovic committed Jan 5, 2024
1 parent 087c3c4 commit 844986e
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 6 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ please take a look at related PRs and issues and see if the change affects you.

## [Unreleased]

- Fix parsing of regex rules in peg and cleanpeg syntaxes ([#125]). Thanks
@smurfix for reporting ([#123]).
- **(BIC)** Removed support for Python 3.6. The minimal supported version is 3.7.
- Added sypport for Python 3.12.
- Migrated to pyproject.toml for project configuration.
Expand Down Expand Up @@ -66,6 +68,8 @@ please take a look at related PRs and issues and see if the change affects you.
- fix: #98 suppressed match in zero-or-more [#98]. Thanks @vpavlu for reporting
the issue.

[#125]: https://github.com/textX/Arpeggio/issues/125
[#123]: https://github.com/textX/Arpeggio/discussions/123
[#101]: https://github.com/textX/Arpeggio/issues/101
[#98]: https://github.com/textX/Arpeggio/issues/98
[#96]: https://github.com/textX/Arpeggio/issues/96
Expand Down
4 changes: 2 additions & 2 deletions arpeggio/cleanpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ def expression(): return [regex, rule_crossref,
str_match], Not(ASSIGNMENT)

# PEG Lexical rules
def regex(): return [("r'", _(r'''[^'\\]*(?:\\.[^'\\]*)*'''), "'"),
('r"', _(r'''[^"\\]*(?:\\.[^"\\]*)*'''), '"')]
def regex(): return _(r"""(r'[^'\\]*(?:\\.[^'\\]*)*')|"""
r'''(r"[^"\\]*(?:\\.[^"\\]*)*")''')
def rule_name(): return _(r"[a-zA-Z_]([a-zA-Z_]|[0-9])*")
def rule_crossref(): return rule_name
def str_match(): return _(r'''(?s)('[^'\\]*(?:\\.[^'\\]*)*')|'''
Expand Down
6 changes: 3 additions & 3 deletions arpeggio/peg.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def expression(): return [regex, rule_crossref,
str_match]

# PEG Lexical rules
def regex(): return [("r'", _(r'''[^'\\]*(?:\\.[^'\\]*)*'''), "'"),
('r"', _(r'''[^"\\]*(?:\\.[^"\\]*)*'''), '"')]
def regex(): return _(r"""(r'[^'\\]*(?:\\.[^'\\]*)*')|"""
r'''(r"[^"\\]*(?:\\.[^"\\]*)*")''')
def rule_name(): return _(r"[a-zA-Z_]([a-zA-Z_]|[0-9])*")
def rule_crossref(): return rule_name
def str_match(): return _(r'''(?s)('[^'\\]*(?:\\.[^'\\]*)*')|'''
Expand Down Expand Up @@ -216,7 +216,7 @@ def visit_rule_crossref(self, node, children):
return CrossRef(node.value)

def visit_regex(self, node, children):
match = _(children[0], ignore_case=self.ignore_case)
match = _(node.value[2:-1], ignore_case=self.ignore_case)
match.compile()
return match

Expand Down
18 changes: 18 additions & 0 deletions arpeggio/tests/regressions/test_issue_123.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from arpeggio.cleanpeg import ParserPEG


grammar = r'''
Root = 'a'
BlockComment = r'/\*.*?\*/'
LineComment = r'//[^\r\n]*'
Comment = LineComment / BlockComment
'''

def test_issue_123():

parser = ParserPEG(grammar, 'Root', 'Comment')
parser.parse(r'''
// This is comment
a
//
''')
1 change: 1 addition & 0 deletions arpeggio/tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def test_examples():
# Filter out __init__.py
examples = [f for f in glob.glob(examples_pat) if f != '__init__.py']
for e in examples:
print(e)
example_dir = os.path.dirname(e)
sys.path.insert(0, example_dir)
(module_name, _) = os.path.splitext(os.path.basename(e))
Expand Down
2 changes: 1 addition & 1 deletion examples/peg_peg/peg.peg
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

rule_name <- r'[a-zA-Z_]([a-zA-Z_]|[0-9])*';
rule_crossref <- rule_name;
regex <- 'r\'' r'(\\\'|[^\'])*' '\'';
regex <- r'(r\'[^\'\\]*(?:\\.[^\'\\]*)*\')|(r"[^"\\]*(?:\\.[^"\\]*)*")';
str_match <- r'\'(\\\'|[^\'])*\'|"[^"]*"';
LEFT_ARROW <- '<-';
SLASH <- '/';
Expand Down

0 comments on commit 844986e

Please sign in to comment.