From 49cdf36d2b993e08833bfdda2563b0c22ee42de7 Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Mon, 8 Apr 2013 01:26:51 +1000
Subject: [PATCH 1/3] libcore: from_str_common: correctly signal failure on
 repeating base 2^n numbers.

A number like 0b1_1111_1111 == 511 would be parsed to Some(255u8) rather than None
by from_str_common, since 255 * 2 + 1 == 255 (mod 256) so the overflow wasn't detected.

Only applied to conversions where the radix was a power of 2, and where all digits
repeated.

Closes #5770.
---
 src/libcore/num/strconv.rs | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)
diff --git a/src/libcore/num/strconv.rs b/src/libcore/num/strconv.rs
index 5299203eb4273..687b6344b39b2 100644
--- a/src/libcore/num/strconv.rs
+++ b/src/libcore/num/strconv.rs
@@ -448,7 +448,7 @@ priv static DIGIT_E_RADIX: uint = ('e' as uint) - ('a' as uint) + 11u;
  * - Could accept option to allow ignoring underscores, allowing for numbers
  *   formated like `FF_AE_FF_FF`.
  */
-pub fn from_str_bytes_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+
+pub fn from_str_bytes_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+
                                     Mul<T,T>+Sub<T,T>+Neg<T>+Add<T,T>+
                                     NumStrConv>(
         buf: &[u8], radix: uint, negative: bool, fractional: bool,
@@ -531,9 +531,12 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+
                     accum -= cast(digit as int);
                 }
 
-                // Detect overflow by comparing to last value
-                if accum_positive && accum < last_accum { return None; }
-                if !accum_positive && accum > last_accum { return None; }
+                // Detect overflow by comparing to last value, except
+                // if we've not seen any non-zero digits.
+                if last_accum != _0 {
+                    if accum_positive && accum <= last_accum { return None; }
+                    if !accum_positive && accum >= last_accum { return None; }
+                }
                 last_accum = accum;
             }
             None => match c {
@@ -637,7 +640,7 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+
  * `from_str_bytes_common()`, for details see there.
  */
 #[inline(always)]
-pub fn from_str_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+Mul<T,T>+
+pub fn from_str_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+Mul<T,T>+
                               Sub<T,T>+Neg<T>+Add<T,T>+NumStrConv>(
         buf: &str, radix: uint, negative: bool, fractional: bool,
         special: bool, exponent: ExponentFormat, empty_zero: bool
@@ -645,3 +648,19 @@ pub fn from_str_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+Mul<T,T>+
     from_str_bytes_common(str::to_bytes(buf), radix, negative,
                             fractional, special, exponent, empty_zero)
 }
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use option::*;
+
+    #[test]
+    fn from_str_issue5770() {
+        // try to parse 0b1_1111_1111 = 511 as a u8. Caused problems
+        // since 255*2+1 == 255 (mod 256) so the overflow wasn't
+        // detected.
+        let n : Option<u8> = from_str_common("111111111", 2, false, false, false,
+                                             ExpNone, false);
+        assert_eq!(n, None);
+    }
+}

From 41c6f67109ed8adee209f28ce7f9dbe1432a45bd Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Mon, 8 Apr 2013 00:23:42 +1000
Subject: [PATCH 2/3] libcore: from_str_common: provide option to ignore
 underscores.

Implement the possible improvement listed in the comment on
from_str_bytes_common.
---
 src/libcore/num/f32.rs           |  6 ++---
 src/libcore/num/f64.rs           |  6 ++---
 src/libcore/num/float.rs         |  6 ++---
 src/libcore/num/int-template.rs  |  6 ++---
 src/libcore/num/strconv.rs       | 38 ++++++++++++++++++++++++--------
 src/libcore/num/uint-template.rs |  6 ++---
 6 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/src/libcore/num/f32.rs b/src/libcore/num/f32.rs
index 6361a6a5cb75e..fa82001151e2a 100644
--- a/src/libcore/num/f32.rs
+++ b/src/libcore/num/f32.rs
@@ -507,7 +507,7 @@ impl num::ToStrRadix for f32 {
 #[inline(always)]
 pub fn from_str(num: &str) -> Option<f32> {
     strconv::from_str_common(num, 10u, true, true, true,
-                             strconv::ExpDec, false)
+                             strconv::ExpDec, false, false)
 }
 
 /**
@@ -540,7 +540,7 @@ pub fn from_str(num: &str) -> Option<f32> {
 #[inline(always)]
 pub fn from_str_hex(num: &str) -> Option<f32> {
     strconv::from_str_common(num, 16u, true, true, true,
-                             strconv::ExpBin, false)
+                             strconv::ExpBin, false, false)
 }
 
 /**
@@ -565,7 +565,7 @@ pub fn from_str_hex(num: &str) -> Option<f32> {
 #[inline(always)]
 pub fn from_str_radix(num: &str, rdx: uint) -> Option<f32> {
     strconv::from_str_common(num, rdx, true, true, false,
-                             strconv::ExpNone, false)
+                             strconv::ExpNone, false, false)
 }
 
 impl from_str::FromStr for f32 {
diff --git a/src/libcore/num/f64.rs b/src/libcore/num/f64.rs
index 9e731e61ec49e..67dfabacd0b0e 100644
--- a/src/libcore/num/f64.rs
+++ b/src/libcore/num/f64.rs
@@ -529,7 +529,7 @@ impl num::ToStrRadix for f64 {
 #[inline(always)]
 pub fn from_str(num: &str) -> Option<f64> {
     strconv::from_str_common(num, 10u, true, true, true,
-                             strconv::ExpDec, false)
+                             strconv::ExpDec, false, false)
 }
 
 /**
@@ -562,7 +562,7 @@ pub fn from_str(num: &str) -> Option<f64> {
 #[inline(always)]
 pub fn from_str_hex(num: &str) -> Option<f64> {
     strconv::from_str_common(num, 16u, true, true, true,
-                             strconv::ExpBin, false)
+                             strconv::ExpBin, false, false)
 }
 
 /**
@@ -587,7 +587,7 @@ pub fn from_str_hex(num: &str) -> Option<f64> {
 #[inline(always)]
 pub fn from_str_radix(num: &str, rdx: uint) -> Option<f64> {
     strconv::from_str_common(num, rdx, true, true, false,
-                             strconv::ExpNone, false)
+                             strconv::ExpNone, false, false)
 }
 
 impl from_str::FromStr for f64 {
diff --git a/src/libcore/num/float.rs b/src/libcore/num/float.rs
index c80d52f496b49..2508292970303 100644
--- a/src/libcore/num/float.rs
+++ b/src/libcore/num/float.rs
@@ -242,7 +242,7 @@ impl num::ToStrRadix for float {
 #[inline(always)]
 pub fn from_str(num: &str) -> Option<float> {
     strconv::from_str_common(num, 10u, true, true, true,
-                             strconv::ExpDec, false)
+                             strconv::ExpDec, false, false)
 }
 
 /**
@@ -275,7 +275,7 @@ pub fn from_str(num: &str) -> Option<float> {
 #[inline(always)]
 pub fn from_str_hex(num: &str) -> Option<float> {
     strconv::from_str_common(num, 16u, true, true, true,
-                             strconv::ExpBin, false)
+                             strconv::ExpBin, false, false)
 }
 
 /**
@@ -300,7 +300,7 @@ pub fn from_str_hex(num: &str) -> Option<float> {
 #[inline(always)]
 pub fn from_str_radix(num: &str, radix: uint) -> Option<float> {
     strconv::from_str_common(num, radix, true, true, false,
-                             strconv::ExpNone, false)
+                             strconv::ExpNone, false, false)
 }
 
 impl from_str::FromStr for float {
diff --git a/src/libcore/num/int-template.rs b/src/libcore/num/int-template.rs
index a3cbd9fe7e3a4..db90ec7946537 100644
--- a/src/libcore/num/int-template.rs
+++ b/src/libcore/num/int-template.rs
@@ -202,21 +202,21 @@ impl ops::Neg<T> for T {
 #[inline(always)]
 pub fn from_str(s: &str) -> Option<T> {
     strconv::from_str_common(s, 10u, true, false, false,
-                         strconv::ExpNone, false)
+                         strconv::ExpNone, false, false)
 }
 
 /// Parse a string as a number in the given base.
 #[inline(always)]
 pub fn from_str_radix(s: &str, radix: uint) -> Option<T> {
     strconv::from_str_common(s, radix, true, false, false,
-                         strconv::ExpNone, false)
+                         strconv::ExpNone, false, false)
 }
 
 /// Parse a byte slice as a number in the given base.
 #[inline(always)]
 pub fn parse_bytes(buf: &[u8], radix: uint) -> Option<T> {
     strconv::from_str_bytes_common(buf, radix, true, false, false,
-                               strconv::ExpNone, false)
+                               strconv::ExpNone, false, false)
 }
 
 impl FromStr for T {
diff --git a/src/libcore/num/strconv.rs b/src/libcore/num/strconv.rs
index 687b6344b39b2..95da5bc29f5df 100644
--- a/src/libcore/num/strconv.rs
+++ b/src/libcore/num/strconv.rs
@@ -429,6 +429,8 @@ priv static DIGIT_E_RADIX: uint = ('e' as uint) - ('a' as uint) + 11u;
  *                  `FFp128`. The exponent string itself is always base 10.
  *                  Can conflict with `radix`, see Failure.
  * - `empty_zero` - Whether to accept a empty `buf` as a 0 or not.
+ * - `ignore_underscores` - Whether all underscores within the string should
+ *                          be ignored.
  *
  * # Return value
  * Returns `Some(n)` if `buf` parses to a number n without overflowing, and
@@ -443,16 +445,13 @@ priv static DIGIT_E_RADIX: uint = ('e' as uint) - ('a' as uint) + 11u;
  *   between digit and exponent sign `'p'`.
  * - Fails if `radix` > 18 and `special == true` due to conflict
  *   between digit and lowest first character in `inf` and `NaN`, the `'i'`.
- *
- * # Possible improvements
- * - Could accept option to allow ignoring underscores, allowing for numbers
- *   formated like `FF_AE_FF_FF`.
  */
 pub fn from_str_bytes_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+
                                     Mul<T,T>+Sub<T,T>+Neg<T>+Add<T,T>+
                                     NumStrConv>(
         buf: &[u8], radix: uint, negative: bool, fractional: bool,
-        special: bool, exponent: ExponentFormat, empty_zero: bool
+        special: bool, exponent: ExponentFormat, empty_zero: bool,
+        ignore_underscores: bool
         ) -> Option<T> {
     match exponent {
         ExpDec if radix >= DIGIT_E_RADIX       // decimal exponent 'e'
@@ -540,6 +539,7 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+
                 last_accum = accum;
             }
             None => match c {
+                '_' if ignore_underscores => {}
                 'e' | 'E' | 'p' | 'P' => {
                     exp_found = true;
                     break;                       // start of exponent
@@ -583,6 +583,7 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+
                     last_accum = accum;
                 }
                 None => match c {
+                    '_' if ignore_underscores => {}
                     'e' | 'E' | 'p' | 'P' => {
                         exp_found = true;
                         break;                   // start of exponent
@@ -610,6 +611,7 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+
     if exp_found {
         let c = buf[i] as char;
         let base = match (c, exponent) {
+            // c is never _ so don't need to handle specially
             ('e', ExpDec) | ('E', ExpDec) => 10u,
             ('p', ExpBin) | ('P', ExpBin) => 2u,
             _ => return None // char doesn't fit given exponent format
@@ -618,7 +620,8 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+
         // parse remaining bytes as decimal integer,
         // skipping the exponent char
         let exp: Option<int> = from_str_bytes_common(
-            buf.slice(i+1, len), 10, true, false, false, ExpNone, false);
+            buf.slice(i+1, len), 10, true, false, false, ExpNone, false,
+            ignore_underscores);
 
         match exp {
             Some(exp_pow) => {
@@ -643,10 +646,12 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+
 pub fn from_str_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+Mul<T,T>+
                               Sub<T,T>+Neg<T>+Add<T,T>+NumStrConv>(
         buf: &str, radix: uint, negative: bool, fractional: bool,
-        special: bool, exponent: ExponentFormat, empty_zero: bool
+        special: bool, exponent: ExponentFormat, empty_zero: bool,
+        ignore_underscores: bool
         ) -> Option<T> {
     from_str_bytes_common(str::to_bytes(buf), radix, negative,
-                            fractional, special, exponent, empty_zero)
+                          fractional, special, exponent, empty_zero,
+                          ignore_underscores)
 }
 
 #[cfg(test)]
@@ -654,13 +659,28 @@ mod test {
     use super::*;
     use option::*;
 
+    #[test]
+    fn from_str_ignore_underscores() {
+        let s : Option<u8> = from_str_common("__1__", 2, false, false, false,
+                                             ExpNone, false, true);
+        assert_eq!(s, Some(1u8));
+
+        let n : Option<u8> = from_str_common("__1__", 2, false, false, false,
+                                             ExpNone, false, false);
+        assert_eq!(n, None);
+
+        let f : Option<f32> = from_str_common("_1_._1_e_1_", 10, false, true, false,
+                                              ExpDec, false, true);
+        assert_eq!(f, Some(1.1e1f32));
+    }
+
     #[test]
     fn from_str_issue5770() {
         // try to parse 0b1_1111_1111 = 511 as a u8. Caused problems
         // since 255*2+1 == 255 (mod 256) so the overflow wasn't
         // detected.
         let n : Option<u8> = from_str_common("111111111", 2, false, false, false,
-                                             ExpNone, false);
+                                             ExpNone, false, false);
         assert_eq!(n, None);
     }
 }
diff --git a/src/libcore/num/uint-template.rs b/src/libcore/num/uint-template.rs
index 400417284a279..39af025217c93 100644
--- a/src/libcore/num/uint-template.rs
+++ b/src/libcore/num/uint-template.rs
@@ -168,21 +168,21 @@ impl ops::Neg<T> for T {
 #[inline(always)]
 pub fn from_str(s: &str) -> Option<T> {
     strconv::from_str_common(s, 10u, false, false, false,
-                             strconv::ExpNone, false)
+                             strconv::ExpNone, false, false)
 }
 
 /// Parse a string as a number in the given base.
 #[inline(always)]
 pub fn from_str_radix(s: &str, radix: uint) -> Option<T> {
     strconv::from_str_common(s, radix, false, false, false,
-                             strconv::ExpNone, false)
+                             strconv::ExpNone, false, false)
 }
 
 /// Parse a byte slice as a number in the given base.
 #[inline(always)]
 pub fn parse_bytes(buf: &[u8], radix: uint) -> Option<T> {
     strconv::from_str_bytes_common(buf, radix, false, false, false,
-                                   strconv::ExpNone, false)
+                                   strconv::ExpNone, false, false)
 }
 
 impl FromStr for T {

From 0c2ceb1a2e93e2e7624d384e19da6783cbb720ba Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Mon, 8 Apr 2013 00:39:28 +1000
Subject: [PATCH 3/3] libsyntax: fail lexing with an error message on an int
 literal larger than 2^64.

Stops an ICE.

Closes #5544.
---
 src/libsyntax/parse/lexer.rs          | 11 +++++++++--
 src/test/compile-fail/issue-5544-a.rs | 14 ++++++++++++++
 src/test/compile-fail/issue-5544-b.rs | 14 ++++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 src/test/compile-fail/issue-5544-a.rs
 create mode 100644 src/test/compile-fail/issue-5544-b.rs

diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs
index 5e06ecf60908e..b6ec15d86413b 100644
--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
@@ -442,7 +442,11 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
         if str::len(num_str) == 0u {
             rdr.fatal(~"no valid digits found for number");
         }
-        let parsed = u64::from_str_radix(num_str, base as uint).get();
+        let parsed = match u64::from_str_radix(num_str, base as uint) {
+            Some(p) => p,
+            None => rdr.fatal(~"int literal is too large")
+        };
+
         match tp {
           either::Left(t) => return token::LIT_INT(parsed as i64, t),
           either::Right(t) => return token::LIT_UINT(parsed, t)
@@ -503,7 +507,10 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
         if str::len(num_str) == 0u {
             rdr.fatal(~"no valid digits found for number");
         }
-        let parsed = u64::from_str_radix(num_str, base as uint).get();
+        let parsed = match u64::from_str_radix(num_str, base as uint) {
+            Some(p) => p,
+            None => rdr.fatal(~"int literal is too large")
+        };
 
         debug!("lexing %s as an unsuffixed integer literal",
                num_str);
diff --git a/src/test/compile-fail/issue-5544-a.rs b/src/test/compile-fail/issue-5544-a.rs
new file mode 100644
index 0000000000000..42a18ba5fb765
--- /dev/null
+++ b/src/test/compile-fail/issue-5544-a.rs
@@ -0,0 +1,14 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+fn main() {
+    let _i = 18446744073709551616; // 2^64
+    //~^ ERROR int literal is too large
+}
diff --git a/src/test/compile-fail/issue-5544-b.rs b/src/test/compile-fail/issue-5544-b.rs
new file mode 100644
index 0000000000000..bbe43e652a800
--- /dev/null
+++ b/src/test/compile-fail/issue-5544-b.rs
@@ -0,0 +1,14 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+fn main() {
+    let _i = 0xff_ffff_ffff_ffff_ffff;
+    //~^ ERROR int literal is too large
+}