From 87bf88b640c17d3247bfa796b1192c3a96b14ad0 Mon Sep 17 00:00:00 2001 From: Eric Milles Date: Fri, 20 Mar 2020 10:01:41 -0500 Subject: [PATCH] GroovyLexer: parse error for control and format characters Inspired by: https://stackoverflow.com/a/59984875 See also: https://github.com/groovy/groovy-eclipse/issues/1060 --- src/antlr/GroovyLexer.g4 | 4 +-- .../parser/antlr4/SyntaxErrorTest.groovy | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/antlr/GroovyLexer.g4 b/src/antlr/GroovyLexer.g4 index fd930cbc17ac..e23808249c46 100644 --- a/src/antlr/GroovyLexer.g4 +++ b/src/antlr/GroovyLexer.g4 @@ -891,7 +891,7 @@ JavaLetter : [a-zA-Z$_] // these are the "java letters" below 0x7F | // covers all characters above 0x7F which are not a surrogate ~[\u0000-\u007F\uD800-\uDBFF] - { Character.isJavaIdentifierStart(_input.LA(-1)) }? + { Character.isJavaIdentifierStart(_input.LA(-1)) && !Character.isIdentifierIgnorable(_input.LA(-1)) }? | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF [\uD800-\uDBFF] [\uDC00-\uDFFF] { Character.isJavaIdentifierStart(Character.toCodePoint((char) _input.LA(-2), (char) _input.LA(-1))) }? @@ -907,7 +907,7 @@ JavaLetterOrDigit : [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F | // covers all characters above 0x7F which are not a surrogate ~[\u0000-\u007F\uD800-\uDBFF] - { Character.isJavaIdentifierPart(_input.LA(-1)) }? + { Character.isJavaIdentifierPart(_input.LA(-1)) && !Character.isIdentifierIgnorable(_input.LA(-1)) }? | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF [\uD800-\uDBFF] [\uDC00-\uDFFF] { Character.isJavaIdentifierPart(Character.toCodePoint((char) _input.LA(-2), (char) _input.LA(-1))) }? diff --git a/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy b/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy index 6d15e9801778..eae298ad5d09 100644 --- a/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy +++ b/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy @@ -75,6 +75,38 @@ final class SyntaxErrorTest extends GroovyTestCase { TestUtils.doRunAndShouldFail('fail/UnexpectedCharacter_01x.groovy') } + void 'test groovy core - UnexpectedCharacter 2'() { + def err = expectParseError '''\ + |def \u200Bname = null + |'''.stripMargin() + + // TODO: Could the character be escaped in the error message? + assert err == '''\ + |startup failed: + |test.groovy: 1: Unexpected input: 'def \u200B' @ line 1, column 5. + | def \u200Bname = null + | ^ + | + |1 error + |'''.stripMargin() + + // + + err = expectParseError '''\ + |def na\u200Bme = null + |'''.stripMargin() + + // TODO: Could the character be escaped in the error message? + assert err == '''\ + |startup failed: + |test.groovy: 1: Unexpected input: '\u200B'; Expecting @ line 1, column 7. + | def na\u200Bme = null + | ^ + | + |1 error + |'''.stripMargin() + } + void 'test groovy core - ParExpression'() { TestUtils.doRunAndShouldFail('fail/ParExpression_01x.groovy') TestUtils.doRunAndShouldFail('fail/ParExpression_02x.groovy')