Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not accept unicode escape characters in byte strings or as byte #23625

Merged
merged 1 commit into from
Mar 28, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 29 additions & 11 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,7 @@ impl<'a> StringReader<'a> {
let start_bpos = self.last_pos;
let mut accum_int = 0;

let mut valid = true;
for _ in 0..n_digits {
if self.is_eof() {
let last_bpos = self.last_pos;
Expand All @@ -750,13 +751,16 @@ impl<'a> StringReader<'a> {
if self.curr_is(delim) {
let last_bpos = self.last_pos;
self.err_span_(start_bpos, last_bpos, "numeric character escape is too short");
valid = false;
break;
}
let c = self.curr.unwrap_or('\x00');
accum_int *= 16;
accum_int += c.to_digit(16).unwrap_or_else(|| {
self.err_span_char(self.last_pos, self.pos,
"illegal character in numeric character escape", c);

valid = false;
0
});
self.bump();
Expand All @@ -767,10 +771,11 @@ impl<'a> StringReader<'a> {
self.last_pos,
"this form of character escape may only be used \
with characters in the range [\\x00-\\x7f]");
valid = false;
}

match char::from_u32(accum_int) {
Some(_) => true,
Some(_) => valid,
None => {
let last_bpos = self.last_pos;
self.err_span_(start_bpos, last_bpos, "illegal numeric character escape");
Expand Down Expand Up @@ -799,7 +804,18 @@ impl<'a> StringReader<'a> {
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
'x' => self.scan_byte_escape(delim, !ascii_only),
'u' if self.curr_is('{') => {
self.scan_unicode_escape(delim)
let valid = self.scan_unicode_escape(delim);
if valid && ascii_only {
self.err_span_(
escaped_pos,
self.last_pos,
"unicode escape sequences cannot be used as a byte or in \
a byte string"
);
false
} else {
valid
}
}
'\n' if delim == '"' => {
self.consume_whitespace();
Expand Down Expand Up @@ -869,6 +885,7 @@ impl<'a> StringReader<'a> {
let start_bpos = self.last_pos;
let mut count = 0;
let mut accum_int = 0;
let mut valid = true;

while !self.curr_is('}') && count <= 6 {
let c = match self.curr {
Expand All @@ -884,29 +901,30 @@ impl<'a> StringReader<'a> {
self.fatal_span_(self.last_pos, self.pos,
"unterminated unicode escape (needed a `}`)");
} else {
self.fatal_span_char(self.last_pos, self.pos,
self.err_span_char(self.last_pos, self.pos,
"illegal character in unicode escape", c);
}
valid = false;
0
});
self.bump();
count += 1;
}

if count > 6 {
self.fatal_span_(start_bpos, self.last_pos,
self.err_span_(start_bpos, self.last_pos,
"overlong unicode escape (can have at most 6 hex digits)");
valid = false;
}

self.bump(); // past the ending }

let mut valid = count >= 1 && count <= 6;
if char::from_u32(accum_int).is_none() {
valid = false;
if valid && (char::from_u32(accum_int).is_none() || count == 0) {
self.err_span_(start_bpos, self.last_pos, "illegal unicode character escape");
valid= false;
}

if !valid {
self.fatal_span_(start_bpos, self.last_pos, "illegal unicode character escape");
}

valid
}

Expand Down Expand Up @@ -1330,7 +1348,7 @@ impl<'a> StringReader<'a> {
"unterminated byte constant".to_string());
}

let id = if valid { self.name_from(start) } else { token::intern("??") };
let id = if valid { self.name_from(start) } else { token::intern("?") };
self.bump(); // advance curr past token
return token::Byte(id);
}
Expand Down
45 changes: 45 additions & 0 deletions src/test/parse-fail/issue-23620-invalid-escapes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

fn main() {
let _ = b"\u{a66e}";
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string

let _ = b'\u{a66e}';
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string

let _ = b'\u';
//~^ ERROR unknown byte escape: u

let _ = b'\x5';
//~^ ERROR numeric character escape is too short

let _ = b'\xxy';
//~^ ERROR illegal character in numeric character escape: x
//~^^ ERROR illegal character in numeric character escape: y

let _ = '\x5';
//~^ ERROR numeric character escape is too short

let _ = '\xxy';
//~^ ERROR illegal character in numeric character escape: x
//~^^ ERROR illegal character in numeric character escape: y

let _ = b"\u{a4a4} \xf \u";
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
//~^^ ERROR illegal character in numeric character escape:
//~^^^ ERROR unknown byte escape: u

let _ = "\u{ffffff} \xf \u";
//~^ ERROR illegal unicode character escape
//~^^ ERROR illegal character in numeric character escape:
//~^^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
//~^^^^ ERROR unknown character escape: u
}
5 changes: 4 additions & 1 deletion src/test/parse-fail/new-unicode-escapes-4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,8 @@
// except according to those terms.

pub fn main() {
let s = "\u{lol}"; //~ ERROR illegal character in unicode escape
let s = "\u{lol}";
//~^ ERROR illegal character in unicode escape: l
//~^^ ERROR illegal character in unicode escape: o
//~^^^ ERROR illegal character in unicode escape: l
}