Skip to content

Commit

Permalink
Merge pull request #212 from Alexhuszagh/patch-fmt
Browse files Browse the repository at this point in the history
Patch bug when parsing floats with decimal mantissa radixes and non-decimal exponent radixes.
  • Loading branch information
Alexhuszagh authored Jan 12, 2025
2 parents 44a41f4 + 6b7b33b commit b18cbc3
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 40 deletions.
30 changes: 16 additions & 14 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- Additional trait impls for `f16` and `bf16` to better match Rust's interface.
- Added `Options::buffer_size_const` for integer and float writers.
- Added `build_checked` and `build_unchecked` to our `NumberFormatBuilder` API.
- Added `build_checked` to our `Options` API.
- Added `has_digit_separator` to `NumberFormat`.
- Re-export `NumberFormat` to our other crates.
- Additional trait impls for `f16` and `bf16` to better match Rust's interface (#192).
- Added `Options::buffer_size_const` for integer and float writers (#204).
- Added `build_checked` and `build_unchecked` to our `NumberFormatBuilder` API (#204).
- Added `build_checked` to our `Options` API (#204).
- Added `has_digit_separator` to `NumberFormat` (#204).
- Re-export `NumberFormat` to our other crates (#204).
- Add `Options::from_radix` for all options for similar APIs for each (#208).

### Changed

- Lowered the MSRV from 1.63.0 to 1.61.0 and adds support for most testing on 1.60.0.
- Reduced the required buffer size for integer and float writers when using `buffer_size` and `buffer_size_const` for decimal numbers.
- Deprecated `NumberFormatBuilder::build` due to a lack of validation.
- Deprecated `Options::set_*` in our write float API since options should be considered immutable.
- Removed `static_assertions` dependency.
- Migrate to using an external crate for our half-precision floats.
- Simplify feature detection internally to make auto-doc more reliable.
- Lowered the MSRV from 1.63.0 to 1.61.0 and adds support for most testing on 1.60.0 (#204).
- Reduced the required buffer size for integer and float writers when using `buffer_size` and `buffer_size_const` for decimal numbers (#204).
- Deprecated `NumberFormatBuilder::build` due to a lack of validation (#204).
- Deprecated `Options::set_*` in our write float API since options should be considered immutable (#204).
- Removed `static_assertions` dependency (#204).
- Migrate to using an external crate for our half-precision floats (#198).
- Simplify feature detection internally to make auto-doc more reliable (#207).

### Fixed

- Bug where the `radix` feature wasn't enabling `power-of-two` in `lexical-core` or `lexical`.
- Bug where the `radix` feature wasn't enabling `power-of-two` in `lexical-core` or `lexical` (#204).
- Fixed performance issues due to a lack of inlining on the Eisel-Lemire algorithm (#210).
- Issue with parsing non-decimal exponent radixes when using a decimal mantissa radix for floating-point numbers (#212).

## [1.0.5] 2024-12-08

Expand Down
57 changes: 32 additions & 25 deletions lexical-parse-float/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ use crate::slow::slow_radix;
// API
// ---

/// Check f radix is a power-of-2.
/// Check if the radix is a power-of-2.
#[cfg(feature = "power-of-two")]
macro_rules! is_power_two {
($radix:expr) => {
Expand All @@ -46,27 +46,31 @@ macro_rules! is_power_two {
}

/// Check if the radix is valid and error otherwise
#[cfg(feature = "power-of-two")]
macro_rules! check_radix {
($format:ident) => {{
let format = NumberFormat::<{ $format }> {};
#[cfg(feature = "power-of-two")]
{
if format.radix() != format.exponent_base() {
let valid_radix = matches!(
(format.radix(), format.exponent_base()),
(4, 2) | (8, 2) | (16, 2) | (32, 2) | (16, 4)
);
if !valid_radix {
return Err(Error::InvalidRadix);
}
if format.error() != Error::Success {
return Err(Error::InvalidRadix);
} else if format.radix() != format.exponent_base() {
let valid_radix = matches!(
(format.radix(), format.exponent_base()),
(4, 2) | (8, 2) | (16, 2) | (32, 2) | (16, 4)
);
if !valid_radix {
return Err(Error::InvalidRadix);
}
}
}};
}

#[cfg(not(feature = "power-of-two"))]
{
if format.radix() != format.exponent_base() {
return Err(Error::InvalidRadix);
}
/// Check if the decimal radix is valid and error otherwise.
#[cfg(not(feature = "power-of-two"))]
macro_rules! check_radix {
($format:ident) => {{
let format = NumberFormat::<{ $format }> {};
if format.error() != Error::Success {
return Err(Error::InvalidRadix);
}
}};
}
Expand Down Expand Up @@ -558,7 +562,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
let start = byte.clone();
#[cfg(not(feature = "compact"))]
parse_8digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa);
parse_digits::<_, _, FORMAT>(byte.integer_iter(), |digit| {
parse_digits(byte.integer_iter(), format.mantissa_radix(), |digit| {
mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
});
let mut n_digits = byte.current_count() - start.current_count();
Expand Down Expand Up @@ -615,7 +619,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
let before = byte.clone();
#[cfg(not(feature = "compact"))]
parse_8digits::<_, FORMAT>(byte.fraction_iter(), &mut mantissa);
parse_digits::<_, _, FORMAT>(byte.fraction_iter(), |digit| {
parse_digits(byte.fraction_iter(), format.mantissa_radix(), |digit| {
mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
});
n_after_dot = byte.current_count() - before.current_count();
Expand Down Expand Up @@ -697,9 +701,9 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(

let is_negative_exponent = parse_exponent_sign(&mut byte)?;
let before = byte.current_count();
parse_digits::<_, _, FORMAT>(byte.exponent_iter(), |digit| {
parse_digits(byte.exponent_iter(), format.exponent_radix(), |digit| {
if explicit_exponent < 0x10000000 {
explicit_exponent *= format.radix() as i64;
explicit_exponent *= format.exponent_radix() as i64;
explicit_exponent += digit as i64;
}
});
Expand Down Expand Up @@ -734,7 +738,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(

// Get the number of parsed digits (total), and redo if we had overflow.
let end = byte.cursor();
let mut step = u64_step(format.radix());
let mut step = u64_step(format.mantissa_radix());
let mut many_digits = false;
#[cfg(feature = "format")]
if !format.required_mantissa_digits() && n_digits == 0 {
Expand Down Expand Up @@ -860,13 +864,11 @@ pub fn parse_complete_number<'a, const FORMAT: u128>(

/// Iteratively parse and consume digits from bytes.
#[inline(always)]
pub fn parse_digits<'a, Iter, Cb, const FORMAT: u128>(mut iter: Iter, mut cb: Cb)
pub fn parse_digits<'a, Iter, Cb>(mut iter: Iter, radix: u32, mut cb: Cb)
where
Iter: DigitsIter<'a>,
Cb: FnMut(u32),
{
let format = NumberFormat::<{ FORMAT }> {};
let radix = format.radix();
while let Some(&c) = iter.peek() {
match char_to_digit_const(c, radix) {
Some(v) => cb(v),
Expand All @@ -881,6 +883,10 @@ where
}

/// Iteratively parse and consume digits in intervals of 8.
///
/// # Preconditions
///
/// The iterator must be of the significant digits, not the exponent.
#[inline(always)]
#[cfg(not(feature = "compact"))]
pub fn parse_8digits<'a, Iter, const FORMAT: u128>(mut iter: Iter, mantissa: &mut u64)
Expand All @@ -904,7 +910,8 @@ where
///
/// # Preconditions
///
/// There must be at least `step` digits left in iterator.
/// There must be at least `step` digits left in iterator. The iterator almost
/// must be of the significant digits, not the exponent.
#[cfg_attr(not(feature = "compact"), inline(always))]
pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
mut iter: Iter,
Expand Down
2 changes: 1 addition & 1 deletion lexical-parse-float/tests/parse_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ fn parse_digits_test() {
let mut mantissa: u64 = 0;
let digits = b"1234567890123456789012345";
let mut byte = digits.bytes::<{ FORMAT }>();
parse::parse_digits::<_, _, FORMAT>(byte.integer_iter(), |digit| {
parse::parse_digits(byte.integer_iter(), 10, |digit| {
mantissa = mantissa.wrapping_mul(10).wrapping_add(digit as _);
});
assert_eq!(mantissa, 1096246371337559929);
Expand Down
31 changes: 31 additions & 0 deletions lexical-util/src/feature_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,24 +68,41 @@ impl<const FORMAT: u128> NumberFormat<FORMAT> {
/// Create new instance (for methods and validation).
///
/// This uses the same settings as in the `FORMAT` packed struct.
#[inline(always)]
pub const fn new() -> Self {
Self {}
}

// VALIDATION

/// Determine if the number format is valid.
#[inline(always)]
pub const fn is_valid(&self) -> bool {
self.error().is_success()
}

/// Get the error type from the format.
///
/// If [`Error::Success`] is returned, then no error occurred.
#[inline(always)]
pub const fn error(&self) -> Error {
format_error_impl(FORMAT)
}

/// Determine if the radixes in the number format are valid.
#[inline(always)]
pub const fn is_valid_radix(&self) -> bool {
self.error_radix().is_success()
}

/// Get the error type from the radix-only for the format.
///
/// If [`Error::Success`] is returned, then no error occurred.
#[inline(always)]
pub const fn error_radix(&self) -> Error {
radix_error_impl(FORMAT)
}

// NON-DIGIT SEPARATOR FLAGS & MASKS

/// If digits are required before the decimal point.
Expand Down Expand Up @@ -1372,6 +1389,20 @@ impl<const FORMAT: u128> Default for NumberFormat<FORMAT> {
}
}

/// Get if the radix is valid.
#[inline(always)]
pub(crate) const fn radix_error_impl(format: u128) -> Error {
if !flags::is_valid_radix(flags::mantissa_radix(format)) {
Error::InvalidMantissaRadix
} else if !flags::is_valid_radix(flags::exponent_base(format)) {
Error::InvalidExponentBase
} else if !flags::is_valid_radix(flags::exponent_radix(format)) {
Error::InvalidExponentRadix
} else {
Error::Success
}
}

/// Get the error type from the format.
#[inline(always)]
#[allow(clippy::if_same_then_else)] // reason="all are different logic conditions"
Expand Down
31 changes: 31 additions & 0 deletions lexical-util/src/not_feature_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,22 +74,39 @@ impl<const FORMAT: u128> NumberFormat<FORMAT> {
/// Create new instance (for methods and validation).
///
/// This uses the same settings as in the `FORMAT` packed struct.
#[inline(always)]
pub const fn new() -> Self {
Self {}
}

// VALIDATION

/// Determine if the number format is valid.
#[inline(always)]
pub const fn is_valid(&self) -> bool {
self.error().is_success()
}

/// Get the error type from the format.
#[inline(always)]
pub const fn error(&self) -> Error {
format_error_impl(FORMAT)
}

/// Determine if the radixes in the number format are valid.
#[inline(always)]
pub const fn is_valid_radix(&self) -> bool {
self.error_radix().is_success()
}

/// Get the error type from the radix-only for the format.
///
/// If [`Error::Success`] is returned, then no error occurred.
#[inline(always)]
pub const fn error_radix(&self) -> Error {
radix_error_impl(FORMAT)
}

// NON-DIGIT SEPARATOR FLAGS & MASKS

/// If digits are required before the decimal point.
Expand Down Expand Up @@ -1376,6 +1393,20 @@ impl<const FORMAT: u128> Default for NumberFormat<FORMAT> {
}
}

/// Get if the radix is valid.
#[inline(always)]
pub(crate) const fn radix_error_impl(format: u128) -> Error {
if !flags::is_valid_radix(flags::mantissa_radix(format)) {
Error::InvalidMantissaRadix
} else if !flags::is_valid_radix(flags::exponent_base(format)) {
Error::InvalidExponentBase
} else if !flags::is_valid_radix(flags::exponent_radix(format)) {
Error::InvalidExponentRadix
} else {
Error::Success
}
}

/// Get the error type from the format.
#[inline(always)]
pub(crate) const fn format_error_impl(format: u128) -> Error {
Expand Down

0 comments on commit b18cbc3

Please sign in to comment.