From 49cdf36d2b993e08833bfdda2563b0c22ee42de7 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Mon, 8 Apr 2013 01:26:51 +1000 Subject: [PATCH 1/3] libcore: from_str_common: correctly signal failure on repeating base 2^n numbers. A number like 0b1_1111_1111 == 511 would be parsed to Some(255u8) rather than None by from_str_common, since 255 * 2 + 1 == 255 (mod 256) so the overflow wasn't detected. Only applied to conversions where the radix was a power of 2, and where all digits repeated. Closes #5770. --- src/libcore/num/strconv.rs | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/libcore/num/strconv.rs b/src/libcore/num/strconv.rs index 5299203eb4273..687b6344b39b2 100644 --- a/src/libcore/num/strconv.rs +++ b/src/libcore/num/strconv.rs @@ -448,7 +448,7 @@ priv static DIGIT_E_RADIX: uint = ('e' as uint) - ('a' as uint) + 11u; * - Could accept option to allow ignoring underscores, allowing for numbers * formated like `FF_AE_FF_FF`. */ -pub fn from_str_bytes_common+ +pub fn from_str_bytes_common+ Mul+Sub+Neg+Add+ NumStrConv>( buf: &[u8], radix: uint, negative: bool, fractional: bool, @@ -531,9 +531,12 @@ pub fn from_str_bytes_common+ accum -= cast(digit as int); } - // Detect overflow by comparing to last value - if accum_positive && accum < last_accum { return None; } - if !accum_positive && accum > last_accum { return None; } + // Detect overflow by comparing to last value, except + // if we've not seen any non-zero digits. + if last_accum != _0 { + if accum_positive && accum <= last_accum { return None; } + if !accum_positive && accum >= last_accum { return None; } + } last_accum = accum; } None => match c { @@ -637,7 +640,7 @@ pub fn from_str_bytes_common+ * `from_str_bytes_common()`, for details see there. */ #[inline(always)] -pub fn from_str_common+Mul+ +pub fn from_str_common+Mul+ Sub+Neg+Add+NumStrConv>( buf: &str, radix: uint, negative: bool, fractional: bool, special: bool, exponent: ExponentFormat, empty_zero: bool @@ -645,3 +648,19 @@ pub fn from_str_common+Mul+ from_str_bytes_common(str::to_bytes(buf), radix, negative, fractional, special, exponent, empty_zero) } + +#[cfg(test)] +mod test { + use super::*; + use option::*; + + #[test] + fn from_str_issue5770() { + // try to parse 0b1_1111_1111 = 511 as a u8. Caused problems + // since 255*2+1 == 255 (mod 256) so the overflow wasn't + // detected. + let n : Option = from_str_common("111111111", 2, false, false, false, + ExpNone, false); + assert_eq!(n, None); + } +} From 41c6f67109ed8adee209f28ce7f9dbe1432a45bd Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Mon, 8 Apr 2013 00:23:42 +1000 Subject: [PATCH 2/3] libcore: from_str_common: provide option to ignore underscores. Implement the possible improvement listed in the comment on from_str_bytes_common. --- src/libcore/num/f32.rs | 6 ++--- src/libcore/num/f64.rs | 6 ++--- src/libcore/num/float.rs | 6 ++--- src/libcore/num/int-template.rs | 6 ++--- src/libcore/num/strconv.rs | 38 ++++++++++++++++++++++++-------- src/libcore/num/uint-template.rs | 6 ++--- 6 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/libcore/num/f32.rs b/src/libcore/num/f32.rs index 6361a6a5cb75e..fa82001151e2a 100644 --- a/src/libcore/num/f32.rs +++ b/src/libcore/num/f32.rs @@ -507,7 +507,7 @@ impl num::ToStrRadix for f32 { #[inline(always)] pub fn from_str(num: &str) -> Option { strconv::from_str_common(num, 10u, true, true, true, - strconv::ExpDec, false) + strconv::ExpDec, false, false) } /** @@ -540,7 +540,7 @@ pub fn from_str(num: &str) -> Option { #[inline(always)] pub fn from_str_hex(num: &str) -> Option { strconv::from_str_common(num, 16u, true, true, true, - strconv::ExpBin, false) + strconv::ExpBin, false, false) } /** @@ -565,7 +565,7 @@ pub fn from_str_hex(num: &str) -> Option { #[inline(always)] pub fn from_str_radix(num: &str, rdx: uint) -> Option { strconv::from_str_common(num, rdx, true, true, false, - strconv::ExpNone, false) + strconv::ExpNone, false, false) } impl from_str::FromStr for f32 { diff --git a/src/libcore/num/f64.rs b/src/libcore/num/f64.rs index 9e731e61ec49e..67dfabacd0b0e 100644 --- a/src/libcore/num/f64.rs +++ b/src/libcore/num/f64.rs @@ -529,7 +529,7 @@ impl num::ToStrRadix for f64 { #[inline(always)] pub fn from_str(num: &str) -> Option { strconv::from_str_common(num, 10u, true, true, true, - strconv::ExpDec, false) + strconv::ExpDec, false, false) } /** @@ -562,7 +562,7 @@ pub fn from_str(num: &str) -> Option { #[inline(always)] pub fn from_str_hex(num: &str) -> Option { strconv::from_str_common(num, 16u, true, true, true, - strconv::ExpBin, false) + strconv::ExpBin, false, false) } /** @@ -587,7 +587,7 @@ pub fn from_str_hex(num: &str) -> Option { #[inline(always)] pub fn from_str_radix(num: &str, rdx: uint) -> Option { strconv::from_str_common(num, rdx, true, true, false, - strconv::ExpNone, false) + strconv::ExpNone, false, false) } impl from_str::FromStr for f64 { diff --git a/src/libcore/num/float.rs b/src/libcore/num/float.rs index c80d52f496b49..2508292970303 100644 --- a/src/libcore/num/float.rs +++ b/src/libcore/num/float.rs @@ -242,7 +242,7 @@ impl num::ToStrRadix for float { #[inline(always)] pub fn from_str(num: &str) -> Option { strconv::from_str_common(num, 10u, true, true, true, - strconv::ExpDec, false) + strconv::ExpDec, false, false) } /** @@ -275,7 +275,7 @@ pub fn from_str(num: &str) -> Option { #[inline(always)] pub fn from_str_hex(num: &str) -> Option { strconv::from_str_common(num, 16u, true, true, true, - strconv::ExpBin, false) + strconv::ExpBin, false, false) } /** @@ -300,7 +300,7 @@ pub fn from_str_hex(num: &str) -> Option { #[inline(always)] pub fn from_str_radix(num: &str, radix: uint) -> Option { strconv::from_str_common(num, radix, true, true, false, - strconv::ExpNone, false) + strconv::ExpNone, false, false) } impl from_str::FromStr for float { diff --git a/src/libcore/num/int-template.rs b/src/libcore/num/int-template.rs index a3cbd9fe7e3a4..db90ec7946537 100644 --- a/src/libcore/num/int-template.rs +++ b/src/libcore/num/int-template.rs @@ -202,21 +202,21 @@ impl ops::Neg for T { #[inline(always)] pub fn from_str(s: &str) -> Option { strconv::from_str_common(s, 10u, true, false, false, - strconv::ExpNone, false) + strconv::ExpNone, false, false) } /// Parse a string as a number in the given base. #[inline(always)] pub fn from_str_radix(s: &str, radix: uint) -> Option { strconv::from_str_common(s, radix, true, false, false, - strconv::ExpNone, false) + strconv::ExpNone, false, false) } /// Parse a byte slice as a number in the given base. #[inline(always)] pub fn parse_bytes(buf: &[u8], radix: uint) -> Option { strconv::from_str_bytes_common(buf, radix, true, false, false, - strconv::ExpNone, false) + strconv::ExpNone, false, false) } impl FromStr for T { diff --git a/src/libcore/num/strconv.rs b/src/libcore/num/strconv.rs index 687b6344b39b2..95da5bc29f5df 100644 --- a/src/libcore/num/strconv.rs +++ b/src/libcore/num/strconv.rs @@ -429,6 +429,8 @@ priv static DIGIT_E_RADIX: uint = ('e' as uint) - ('a' as uint) + 11u; * `FFp128`. The exponent string itself is always base 10. * Can conflict with `radix`, see Failure. * - `empty_zero` - Whether to accept a empty `buf` as a 0 or not. + * - `ignore_underscores` - Whether all underscores within the string should + * be ignored. * * # Return value * Returns `Some(n)` if `buf` parses to a number n without overflowing, and @@ -443,16 +445,13 @@ priv static DIGIT_E_RADIX: uint = ('e' as uint) - ('a' as uint) + 11u; * between digit and exponent sign `'p'`. * - Fails if `radix` > 18 and `special == true` due to conflict * between digit and lowest first character in `inf` and `NaN`, the `'i'`. - * - * # Possible improvements - * - Could accept option to allow ignoring underscores, allowing for numbers - * formated like `FF_AE_FF_FF`. */ pub fn from_str_bytes_common+ Mul+Sub+Neg+Add+ NumStrConv>( buf: &[u8], radix: uint, negative: bool, fractional: bool, - special: bool, exponent: ExponentFormat, empty_zero: bool + special: bool, exponent: ExponentFormat, empty_zero: bool, + ignore_underscores: bool ) -> Option { match exponent { ExpDec if radix >= DIGIT_E_RADIX // decimal exponent 'e' @@ -540,6 +539,7 @@ pub fn from_str_bytes_common+ last_accum = accum; } None => match c { + '_' if ignore_underscores => {} 'e' | 'E' | 'p' | 'P' => { exp_found = true; break; // start of exponent @@ -583,6 +583,7 @@ pub fn from_str_bytes_common+ last_accum = accum; } None => match c { + '_' if ignore_underscores => {} 'e' | 'E' | 'p' | 'P' => { exp_found = true; break; // start of exponent @@ -610,6 +611,7 @@ pub fn from_str_bytes_common+ if exp_found { let c = buf[i] as char; let base = match (c, exponent) { + // c is never _ so don't need to handle specially ('e', ExpDec) | ('E', ExpDec) => 10u, ('p', ExpBin) | ('P', ExpBin) => 2u, _ => return None // char doesn't fit given exponent format @@ -618,7 +620,8 @@ pub fn from_str_bytes_common+ // parse remaining bytes as decimal integer, // skipping the exponent char let exp: Option = from_str_bytes_common( - buf.slice(i+1, len), 10, true, false, false, ExpNone, false); + buf.slice(i+1, len), 10, true, false, false, ExpNone, false, + ignore_underscores); match exp { Some(exp_pow) => { @@ -643,10 +646,12 @@ pub fn from_str_bytes_common+ pub fn from_str_common+Mul+ Sub+Neg+Add+NumStrConv>( buf: &str, radix: uint, negative: bool, fractional: bool, - special: bool, exponent: ExponentFormat, empty_zero: bool + special: bool, exponent: ExponentFormat, empty_zero: bool, + ignore_underscores: bool ) -> Option { from_str_bytes_common(str::to_bytes(buf), radix, negative, - fractional, special, exponent, empty_zero) + fractional, special, exponent, empty_zero, + ignore_underscores) } #[cfg(test)] @@ -654,13 +659,28 @@ mod test { use super::*; use option::*; + #[test] + fn from_str_ignore_underscores() { + let s : Option = from_str_common("__1__", 2, false, false, false, + ExpNone, false, true); + assert_eq!(s, Some(1u8)); + + let n : Option = from_str_common("__1__", 2, false, false, false, + ExpNone, false, false); + assert_eq!(n, None); + + let f : Option = from_str_common("_1_._1_e_1_", 10, false, true, false, + ExpDec, false, true); + assert_eq!(f, Some(1.1e1f32)); + } + #[test] fn from_str_issue5770() { // try to parse 0b1_1111_1111 = 511 as a u8. Caused problems // since 255*2+1 == 255 (mod 256) so the overflow wasn't // detected. let n : Option = from_str_common("111111111", 2, false, false, false, - ExpNone, false); + ExpNone, false, false); assert_eq!(n, None); } } diff --git a/src/libcore/num/uint-template.rs b/src/libcore/num/uint-template.rs index 400417284a279..39af025217c93 100644 --- a/src/libcore/num/uint-template.rs +++ b/src/libcore/num/uint-template.rs @@ -168,21 +168,21 @@ impl ops::Neg for T { #[inline(always)] pub fn from_str(s: &str) -> Option { strconv::from_str_common(s, 10u, false, false, false, - strconv::ExpNone, false) + strconv::ExpNone, false, false) } /// Parse a string as a number in the given base. #[inline(always)] pub fn from_str_radix(s: &str, radix: uint) -> Option { strconv::from_str_common(s, radix, false, false, false, - strconv::ExpNone, false) + strconv::ExpNone, false, false) } /// Parse a byte slice as a number in the given base. #[inline(always)] pub fn parse_bytes(buf: &[u8], radix: uint) -> Option { strconv::from_str_bytes_common(buf, radix, false, false, false, - strconv::ExpNone, false) + strconv::ExpNone, false, false) } impl FromStr for T { From 0c2ceb1a2e93e2e7624d384e19da6783cbb720ba Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Mon, 8 Apr 2013 00:39:28 +1000 Subject: [PATCH 3/3] libsyntax: fail lexing with an error message on an int literal larger than 2^64. Stops an ICE. Closes #5544. --- src/libsyntax/parse/lexer.rs | 11 +++++++++-- src/test/compile-fail/issue-5544-a.rs | 14 ++++++++++++++ src/test/compile-fail/issue-5544-b.rs | 14 ++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 src/test/compile-fail/issue-5544-a.rs create mode 100644 src/test/compile-fail/issue-5544-b.rs diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 5e06ecf60908e..b6ec15d86413b 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -442,7 +442,11 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { if str::len(num_str) == 0u { rdr.fatal(~"no valid digits found for number"); } - let parsed = u64::from_str_radix(num_str, base as uint).get(); + let parsed = match u64::from_str_radix(num_str, base as uint) { + Some(p) => p, + None => rdr.fatal(~"int literal is too large") + }; + match tp { either::Left(t) => return token::LIT_INT(parsed as i64, t), either::Right(t) => return token::LIT_UINT(parsed, t) @@ -503,7 +507,10 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { if str::len(num_str) == 0u { rdr.fatal(~"no valid digits found for number"); } - let parsed = u64::from_str_radix(num_str, base as uint).get(); + let parsed = match u64::from_str_radix(num_str, base as uint) { + Some(p) => p, + None => rdr.fatal(~"int literal is too large") + }; debug!("lexing %s as an unsuffixed integer literal", num_str); diff --git a/src/test/compile-fail/issue-5544-a.rs b/src/test/compile-fail/issue-5544-a.rs new file mode 100644 index 0000000000000..42a18ba5fb765 --- /dev/null +++ b/src/test/compile-fail/issue-5544-a.rs @@ -0,0 +1,14 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + let _i = 18446744073709551616; // 2^64 + //~^ ERROR int literal is too large +} diff --git a/src/test/compile-fail/issue-5544-b.rs b/src/test/compile-fail/issue-5544-b.rs new file mode 100644 index 0000000000000..bbe43e652a800 --- /dev/null +++ b/src/test/compile-fail/issue-5544-b.rs @@ -0,0 +1,14 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + let _i = 0xff_ffff_ffff_ffff_ffff; + //~^ ERROR int literal is too large +}