Skip to content

Commit

Permalink
Also support \u{...} for universal character names
Browse files Browse the repository at this point in the history
Add an error message to the failure path

Add `//G` grammar comments

Run regression tests
  • Loading branch information
hsutter committed Jul 12, 2024
1 parent 10115ca commit 4048888
Showing 1 changed file with 53 additions and 10 deletions.
63 changes: 53 additions & 10 deletions source/lex.h
Original file line number Diff line number Diff line change
Expand Up @@ -930,21 +930,27 @@ auto lex_line(
return 0;
};

//G simple-hexadecimal-digit-sequence:
//G hexadecimal-digit
//G simple-hexadecimal-digit-sequence hexadecimal-digit
//G
//G hexadecimal-escape-sequence:
//G '\x' hexadecimal-digit
//G hexadecimal-escape-sequence hexadecimal-digit
//G '\x{' simple-hexadecimal-digit-sequence '}'
//G
auto peek_is_hexadecimal_escape_sequence = [&](int offset)
{
if (
peek( offset) == '\\'
peek(offset) == '\\'
&& peek(1+offset) == 'x'
&& (is_hexadecimal_digit(peek(2+offset))
|| (peek(2+offset) == '{' && is_hexadecimal_digit(peek(3+offset)))
&& (
is_hexadecimal_digit(peek(2+offset))
|| (peek(2+offset) == '{' && is_hexadecimal_digit(peek(3+offset)))
)
)
)
{
bool has_bracket = peek(2+offset) == '{';
auto has_bracket = peek(2+offset) == '{';
auto j = 3;

if (has_bracket) { ++j; }
Expand All @@ -961,6 +967,11 @@ auto lex_line(
if (peek(j+offset) == '}') {
++j;
} else {
errors.emplace_back(
source_position(lineno, i + offset),
"invalid hexadecimal escape sequence - \\x{ must"
" be followed by hexadecimal digits and a closing }"
);
return 0;
}
}
Expand All @@ -972,6 +983,7 @@ auto lex_line(
//G universal-character-name:
//G '\u' hex-quad
//G '\U' hex-quad hex-quad
//G '\u{' simple-hexadecimal-digit-sequence '}'
//G
//G hex-quad:
//G hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
Expand All @@ -981,6 +993,7 @@ auto lex_line(
if (
peek(offset) == '\\'
&& peek(1 + offset) == 'u'
&& peek(2 + offset) != '{'
)
{
auto j = 2;
Expand All @@ -994,11 +1007,41 @@ auto lex_line(
if (j == 6) { return j; }
errors.emplace_back(
source_position( lineno, i + offset ),
"invalid universal character name (\\u must"
" be followed by 4 hexadecimal digits)"
"invalid universal character name - \\u without { must"
" be followed by 4 hexadecimal digits"
);
}
if (

else if (
peek(offset) == '\\'
&& peek(1 + offset) == 'u'
&& peek(2 + offset) == '{'
)
{
auto j = 4;

while (
peek(j + offset)
&& is_hexadecimal_digit(peek(j + offset))
)
{
++j;
}

if (peek(j + offset) == '}') {
++j;
}
else {
errors.emplace_back(
source_position(lineno, i + offset),
"invalid universal character name - \\u{ must"
" be followed by hexadecimal digits and a closing }"
);
}
return j;
}

else if (
peek(offset) == '\\'
&& peek(1+offset) == 'U'
)
Expand All @@ -1014,8 +1057,8 @@ auto lex_line(
if (j == 10) { return j; }
errors.emplace_back(
source_position(lineno, i+offset),
"invalid universal character name (\\U must"
" be followed by 8 hexadecimal digits)"
"invalid universal character name - \\U must"
" be followed by 8 hexadecimal digits"
);
}
return 0;
Expand Down

0 comments on commit 4048888

Please sign in to comment.