Skip to content

Commit

Permalink
Fix percent delimiter strings with crlfs
Browse files Browse the repository at this point in the history
parse.y treats CRLF as a LF and basically "normalizes" them before
parsing.  That means a string like `%\nfoo\r\n` is actually treated as
`%\nfoo\n` for the purposes of parsing.  This happens on both the
opening side of the percent string as well as on the closing side.  So
for example `%\r\nfoo\n` must be treated as `%\nfoo\n`.

To handle this in Prism, when we start a % string, we check if it starts
with `\r\n`, and then consider the terminator to actually be `\n`.  Then
we check if there are `\r\n` as we lex the string and treat those as
`\n`, but only in the case the start was a `\n`.

Fixes: #3230

[Bug #20938]

Co-authored-by: John Hawthorn <[email protected]>
Co-authored-by: eileencodes <[email protected]>
Co-authored-by: Kevin Newton <[email protected]>
  • Loading branch information
3 people authored and tenderlove committed Dec 11, 2024
1 parent 75a6171 commit e573cea
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 3 deletions.
31 changes: 28 additions & 3 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -10508,6 +10508,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
}

const uint8_t *end = parser->current.end - 1;
assert(end >= start);
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));

token_buffer->cursor = end;
Expand Down Expand Up @@ -10588,9 +10589,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
}

const uint8_t delimiter = *parser->current.end;
parser->current.end += eol_length;
uint8_t delimiter = *parser->current.end;

// If our delimiter is \r\n, we want to treat it as if it's \n.
// For example, %\r\nfoo\r\n should be "foo"
if (eol_length == 2) {
delimiter = *(parser->current.end + 1);
}

parser->current.end += eol_length;
return delimiter;
}

Expand Down Expand Up @@ -12340,10 +12347,28 @@ parser_lex(pm_parser_t *parser) {
continue;
}

bool is_terminator = (*breakpoint == lex_mode->as.string.terminator);

// If the terminator is newline, we need to consider \r\n _also_ a newline
// For example: `%\nfoo\r\n`
// The string should be "foo", not "foo\r"
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
if (lex_mode->as.string.terminator == '\n') {
is_terminator = true;
}

// If the terminator is a CR, but we see a CRLF, we need to
// treat the CRLF as a newline, meaning this is _not_ the
// terminator
if (lex_mode->as.string.terminator == '\r') {
is_terminator = false;
}
}

// Note that we have to check the terminator here first because we could
// potentially be parsing a % string that has a # character as the
// terminator.
if (*breakpoint == lex_mode->as.string.terminator) {
if (is_terminator) {
// If this terminator doesn't actually close the string, then we need
// to continue on past it.
if (lex_mode->as.string.nesting > 0) {
Expand Down
58 changes: 58 additions & 0 deletions test/prism/percent_delimiter_string_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# frozen_string_literal: true

require_relative "test_helper"

module Prism
class PercentDelimiterStringTest < TestCase
def test_newline_terminator_with_lf_crlf
str = "%\n123456\r\n"
assert_parse "123456", str
end

def test_newline_terminator_with_lf_crlf_with_extra_cr
str = "%\n123456\r\r\n"
assert_parse "123456\r", str
end

def test_newline_terminator_with_crlf_pair
str = "%\r\n123456\r\n"
assert_parse "123456", str
end

def test_newline_terminator_with_crlf_crlf_with_extra_cr
str = "%\r\n123456\r\r\n"
assert_parse "123456\r", str
end

def test_newline_terminator_with_cr_cr
str = "%\r123456\r;\n"
assert_parse "123456", str
end

def test_newline_terminator_with_crlf_lf
str = "%\r\n123456\n;\n"
assert_parse "123456", str
end

def test_cr_crlf
str = "%\r1\r\n \r"
assert_parse "1\n ", str
end

def test_lf_crlf
str = "%\n1\r\n \n"
assert_parse "1", str
end

def test_lf_lf
str = "%\n1\n \n"
assert_parse "1", str
end

def assert_parse(expected, str)
tree = Prism.parse str
node = tree.value.breadth_first_search { |x| Prism::StringNode === x }
assert_equal expected, node.unescaped
end
end
end

0 comments on commit e573cea

Please sign in to comment.