diff --git a/src/antlr/GroovyLexer.g4 b/src/antlr/GroovyLexer.g4 index a6c7e72e24c..9348ef475eb 100644 --- a/src/antlr/GroovyLexer.g4 +++ b/src/antlr/GroovyLexer.g4 @@ -891,7 +891,7 @@ JavaLetter : [a-zA-Z$_] // these are the "java letters" below 0x7F | // covers all characters above 0x7F which are not a surrogate ~[\u0000-\u007F\uD800-\uDBFF] - { Character.isJavaIdentifierStart(_input.LA(-1)) }? + { Character.isJavaIdentifierStart(_input.LA(-1)) && !Character.isIdentifierIgnorable(_input.LA(-1)) }? | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF [\uD800-\uDBFF] [\uDC00-\uDFFF] { Character.isJavaIdentifierStart(Character.toCodePoint((char) _input.LA(-2), (char) _input.LA(-1))) }? @@ -907,7 +907,7 @@ JavaLetterOrDigit : [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F | // covers all characters above 0x7F which are not a surrogate ~[\u0000-\u007F\uD800-\uDBFF] - { Character.isJavaIdentifierPart(_input.LA(-1)) }? + { Character.isJavaIdentifierPart(_input.LA(-1)) && !Character.isIdentifierIgnorable(_input.LA(-1)) }? | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF [\uD800-\uDBFF] [\uDC00-\uDFFF] { Character.isJavaIdentifierPart(Character.toCodePoint((char) _input.LA(-2), (char) _input.LA(-1))) }? @@ -928,7 +928,7 @@ ELLIPSIS : '...'; // // Whitespace, line escape and comments // -WS : ([ \t\u000C]+ | LineEscape+) -> skip +WS : ([ \t]+ | LineEscape+) -> skip ; // Inside (...) and [...] but not {...}, ignore newlines. diff --git a/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy b/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy index 54de385a5ce..0b789dbf117 100644 --- a/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy +++ b/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy @@ -75,6 +75,54 @@ final class SyntaxErrorTest extends GroovyTestCase { TestUtils.doRunAndShouldFail('fail/UnexpectedCharacter_01x.groovy') } + void 'test groovy core - UnexpectedCharacter 2'() { + def err = expectParseError '''\ + |def \u200Bname = null + |'''.stripMargin() + + // TODO: Could the character be escaped in the error message? + assert err == '''\ + |startup failed: + |test.groovy: 1: Unexpected input: 'def \u200B' @ line 1, column 5. + | def \u200Bname = null + | ^ + | + |1 error + |'''.stripMargin() + + // + + err = expectParseError '''\ + |def na\u200Bme = null + |'''.stripMargin() + + // TODO: Could the character be escaped in the error message? + assert err == '''\ + |startup failed: + |test.groovy: 1: Unexpected input: '\u200B'; Expecting @ line 1, column 7. + | def na\u200Bme = null + | ^ + | + |1 error + |'''.stripMargin() + + // + + err = expectParseError '''\ + |def na\u000Cme = null + |'''.stripMargin() + + // TODO: Could the character be escaped in the error message? + assert err == '''\ + |startup failed: + |test.groovy: 1: Unexpected input: '\u000C'; Expecting @ line 1, column 7. + | def na\u000Cme = null + | ^ + | + |1 error + |'''.stripMargin() + } + void 'test groovy core - ParExpression'() { TestUtils.doRunAndShouldFail('fail/ParExpression_01x.groovy') TestUtils.doRunAndShouldFail('fail/ParExpression_02x.groovy')