Merge pull request #212 from Alexhuszagh/patch-fmt

Patch bug when parsing floats with decimal mantissa radixes and non-decimal exponent radixes.
Alexhuszagh · Jan 12, 2025 · b18cbc3 · b18cbc3
2 parents 44a41f4 + 6b7b33b
commit b18cbc3
Show file tree

Hide file tree

Showing 5 changed files with 111 additions and 40 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -9,27 +9,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Additional trait impls for `f16` and `bf16` to better match Rust's interface.
-- Added `Options::buffer_size_const` for integer and float writers.
-- Added `build_checked` and `build_unchecked` to our `NumberFormatBuilder` API.
-- Added `build_checked` to our `Options` API.
-- Added `has_digit_separator` to `NumberFormat`.
-- Re-export `NumberFormat` to our other crates.
+- Additional trait impls for `f16` and `bf16` to better match Rust's interface (#192).
+- Added `Options::buffer_size_const` for integer and float writers (#204).
+- Added `build_checked` and `build_unchecked` to our `NumberFormatBuilder` API (#204).
+- Added `build_checked` to our `Options` API (#204).
+- Added `has_digit_separator` to `NumberFormat` (#204).
+- Re-export `NumberFormat` to our other crates (#204).
+- Add `Options::from_radix` for all options for similar APIs for each (#208).
 
 ### Changed
 
-- Lowered the MSRV from 1.63.0 to 1.61.0 and adds support for most testing on 1.60.0.
-- Reduced the required buffer size for integer and float writers when using `buffer_size` and `buffer_size_const` for decimal numbers.
-- Deprecated `NumberFormatBuilder::build` due to a lack of validation.
-- Deprecated `Options::set_*` in our write float API since options should be considered immutable.
-- Removed `static_assertions` dependency.
-- Migrate to using an external crate for our half-precision floats.
-- Simplify feature detection internally to make auto-doc more reliable.
+- Lowered the MSRV from 1.63.0 to 1.61.0 and adds support for most testing on 1.60.0 (#204).
+- Reduced the required buffer size for integer and float writers when using `buffer_size` and `buffer_size_const` for decimal numbers (#204).
+- Deprecated `NumberFormatBuilder::build` due to a lack of validation (#204).
+- Deprecated `Options::set_*` in our write float API since options should be considered immutable (#204).
+- Removed `static_assertions` dependency (#204).
+- Migrate to using an external crate for our half-precision floats (#198).
+- Simplify feature detection internally to make auto-doc more reliable (#207).
 
 ### Fixed
 
-- Bug where the `radix` feature wasn't enabling `power-of-two` in `lexical-core` or `lexical`.
+- Bug where the `radix` feature wasn't enabling `power-of-two` in `lexical-core` or `lexical` (#204).
 - Fixed performance issues due to a lack of inlining on the Eisel-Lemire algorithm (#210).
+- Issue with parsing non-decimal exponent radixes when using a decimal mantissa radix for floating-point numbers (#212).
 
 ## [1.0.5] 2024-12-08
 

diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs
@@ -37,7 +37,7 @@ use crate::slow::slow_radix;
 // API
 // ---
 
-/// Check f radix is a power-of-2.
+/// Check if the radix is a power-of-2.
 #[cfg(feature = "power-of-two")]
 macro_rules! is_power_two {
     ($radix:expr) => {
@@ -46,27 +46,31 @@ macro_rules! is_power_two {
 }
 
 /// Check if the radix is valid and error otherwise
+#[cfg(feature = "power-of-two")]
 macro_rules! check_radix {
     ($format:ident) => {{
         let format = NumberFormat::<{ $format }> {};
-        #[cfg(feature = "power-of-two")]
-        {
-            if format.radix() != format.exponent_base() {
-                let valid_radix = matches!(
-                    (format.radix(), format.exponent_base()),
-                    (4, 2) | (8, 2) | (16, 2) | (32, 2) | (16, 4)
-                );
-                if !valid_radix {
-                    return Err(Error::InvalidRadix);
-                }
+        if format.error() != Error::Success {
+            return Err(Error::InvalidRadix);
+        } else if format.radix() != format.exponent_base() {
+            let valid_radix = matches!(
+                (format.radix(), format.exponent_base()),
+                (4, 2) | (8, 2) | (16, 2) | (32, 2) | (16, 4)
+            );
+            if !valid_radix {
+                return Err(Error::InvalidRadix);
             }
         }
+    }};
+}
 
-        #[cfg(not(feature = "power-of-two"))]
-        {
-            if format.radix() != format.exponent_base() {
-                return Err(Error::InvalidRadix);
-            }
+/// Check if the decimal radix is valid and error otherwise.
+#[cfg(not(feature = "power-of-two"))]
+macro_rules! check_radix {
+    ($format:ident) => {{
+        let format = NumberFormat::<{ $format }> {};
+        if format.error() != Error::Success {
+            return Err(Error::InvalidRadix);
         }
     }};
 }
@@ -558,7 +562,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
     let start = byte.clone();
     #[cfg(not(feature = "compact"))]
     parse_8digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa);
-    parse_digits::<_, _, FORMAT>(byte.integer_iter(), |digit| {
+    parse_digits(byte.integer_iter(), format.mantissa_radix(), |digit| {
         mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
     });
     let mut n_digits = byte.current_count() - start.current_count();
@@ -615,7 +619,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
         let before = byte.clone();
         #[cfg(not(feature = "compact"))]
         parse_8digits::<_, FORMAT>(byte.fraction_iter(), &mut mantissa);
-        parse_digits::<_, _, FORMAT>(byte.fraction_iter(), |digit| {
+        parse_digits(byte.fraction_iter(), format.mantissa_radix(), |digit| {
             mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
         });
         n_after_dot = byte.current_count() - before.current_count();
@@ -697,9 +701,9 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
 
         let is_negative_exponent = parse_exponent_sign(&mut byte)?;
         let before = byte.current_count();
-        parse_digits::<_, _, FORMAT>(byte.exponent_iter(), |digit| {
+        parse_digits(byte.exponent_iter(), format.exponent_radix(), |digit| {
             if explicit_exponent < 0x10000000 {
-                explicit_exponent *= format.radix() as i64;
+                explicit_exponent *= format.exponent_radix() as i64;
                 explicit_exponent += digit as i64;
             }
         });
@@ -734,7 +738,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
 
     // Get the number of parsed digits (total), and redo if we had overflow.
     let end = byte.cursor();
-    let mut step = u64_step(format.radix());
+    let mut step = u64_step(format.mantissa_radix());
     let mut many_digits = false;
     #[cfg(feature = "format")]
     if !format.required_mantissa_digits() && n_digits == 0 {
@@ -860,13 +864,11 @@ pub fn parse_complete_number<'a, const FORMAT: u128>(
 
 /// Iteratively parse and consume digits from bytes.
 #[inline(always)]
-pub fn parse_digits<'a, Iter, Cb, const FORMAT: u128>(mut iter: Iter, mut cb: Cb)
+pub fn parse_digits<'a, Iter, Cb>(mut iter: Iter, radix: u32, mut cb: Cb)
 where
     Iter: DigitsIter<'a>,
     Cb: FnMut(u32),
 {
-    let format = NumberFormat::<{ FORMAT }> {};
-    let radix = format.radix();
     while let Some(&c) = iter.peek() {
         match char_to_digit_const(c, radix) {
             Some(v) => cb(v),
@@ -881,6 +883,10 @@ where
 }
 
 /// Iteratively parse and consume digits in intervals of 8.
+///
+/// # Preconditions
+///
+/// The iterator must be of the significant digits, not the exponent.
 #[inline(always)]
 #[cfg(not(feature = "compact"))]
 pub fn parse_8digits<'a, Iter, const FORMAT: u128>(mut iter: Iter, mantissa: &mut u64)
@@ -904,7 +910,8 @@ where
 ///
 /// # Preconditions
 ///
-/// There must be at least `step` digits left in iterator.
+/// There must be at least `step` digits left in iterator. The iterator almost
+/// must be of the significant digits, not the exponent.
 #[cfg_attr(not(feature = "compact"), inline(always))]
 pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
     mut iter: Iter,

diff --git a/lexical-parse-float/tests/parse_tests.rs b/lexical-parse-float/tests/parse_tests.rs
@@ -154,7 +154,7 @@ fn parse_digits_test() {
     let mut mantissa: u64 = 0;
     let digits = b"1234567890123456789012345";
     let mut byte = digits.bytes::<{ FORMAT }>();
-    parse::parse_digits::<_, _, FORMAT>(byte.integer_iter(), |digit| {
+    parse::parse_digits(byte.integer_iter(), 10, |digit| {
         mantissa = mantissa.wrapping_mul(10).wrapping_add(digit as _);
     });
     assert_eq!(mantissa, 1096246371337559929);

diff --git a/lexical-util/src/feature_format.rs b/lexical-util/src/feature_format.rs
@@ -68,24 +68,41 @@ impl<const FORMAT: u128> NumberFormat<FORMAT> {
     /// Create new instance (for methods and validation).
     ///
     /// This uses the same settings as in the `FORMAT` packed struct.
+    #[inline(always)]
     pub const fn new() -> Self {
         Self {}
     }
 
     // VALIDATION
 
     /// Determine if the number format is valid.
+    #[inline(always)]
     pub const fn is_valid(&self) -> bool {
         self.error().is_success()
     }
 
     /// Get the error type from the format.
     ///
     /// If [`Error::Success`] is returned, then no error occurred.
+    #[inline(always)]
     pub const fn error(&self) -> Error {
         format_error_impl(FORMAT)
     }
 
+    /// Determine if the radixes in the number format are valid.
+    #[inline(always)]
+    pub const fn is_valid_radix(&self) -> bool {
+        self.error_radix().is_success()
+    }
+
+    /// Get the error type from the radix-only for the format.
+    ///
+    /// If [`Error::Success`] is returned, then no error occurred.
+    #[inline(always)]
+    pub const fn error_radix(&self) -> Error {
+        radix_error_impl(FORMAT)
+    }
+
     // NON-DIGIT SEPARATOR FLAGS & MASKS
 
     /// If digits are required before the decimal point.
@@ -1372,6 +1389,20 @@ impl<const FORMAT: u128> Default for NumberFormat<FORMAT> {
     }
 }
 
+/// Get if the radix is valid.
+#[inline(always)]
+pub(crate) const fn radix_error_impl(format: u128) -> Error {
+    if !flags::is_valid_radix(flags::mantissa_radix(format)) {
+        Error::InvalidMantissaRadix
+    } else if !flags::is_valid_radix(flags::exponent_base(format)) {
+        Error::InvalidExponentBase
+    } else if !flags::is_valid_radix(flags::exponent_radix(format)) {
+        Error::InvalidExponentRadix
+    } else {
+        Error::Success
+    }
+}
+
 /// Get the error type from the format.
 #[inline(always)]
 #[allow(clippy::if_same_then_else)] // reason="all are different logic conditions"

diff --git a/lexical-util/src/not_feature_format.rs b/lexical-util/src/not_feature_format.rs
@@ -74,22 +74,39 @@ impl<const FORMAT: u128> NumberFormat<FORMAT> {
     /// Create new instance (for methods and validation).
     ///
     /// This uses the same settings as in the `FORMAT` packed struct.
+    #[inline(always)]
     pub const fn new() -> Self {
         Self {}
     }
 
     // VALIDATION
 
     /// Determine if the number format is valid.
+    #[inline(always)]
     pub const fn is_valid(&self) -> bool {
         self.error().is_success()
     }
 
     /// Get the error type from the format.
+    #[inline(always)]
     pub const fn error(&self) -> Error {
         format_error_impl(FORMAT)
     }
 
+    /// Determine if the radixes in the number format are valid.
+    #[inline(always)]
+    pub const fn is_valid_radix(&self) -> bool {
+        self.error_radix().is_success()
+    }
+
+    /// Get the error type from the radix-only for the format.
+    ///
+    /// If [`Error::Success`] is returned, then no error occurred.
+    #[inline(always)]
+    pub const fn error_radix(&self) -> Error {
+        radix_error_impl(FORMAT)
+    }
+
     // NON-DIGIT SEPARATOR FLAGS & MASKS
 
     /// If digits are required before the decimal point.
@@ -1376,6 +1393,20 @@ impl<const FORMAT: u128> Default for NumberFormat<FORMAT> {
     }
 }
 
+/// Get if the radix is valid.
+#[inline(always)]
+pub(crate) const fn radix_error_impl(format: u128) -> Error {
+    if !flags::is_valid_radix(flags::mantissa_radix(format)) {
+        Error::InvalidMantissaRadix
+    } else if !flags::is_valid_radix(flags::exponent_base(format)) {
+        Error::InvalidExponentBase
+    } else if !flags::is_valid_radix(flags::exponent_radix(format)) {
+        Error::InvalidExponentRadix
+    } else {
+        Error::Success
+    }
+}
+
 /// Get the error type from the format.
 #[inline(always)]
 pub(crate) const fn format_error_impl(format: u128) -> Error {