From e4887220f2ba4bb90e1ea065662e4ba6d1be4d38 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 19 Jun 2022 21:47:52 -0700 Subject: [PATCH 01/10] Add Decimal256 --- arrow/src/array/array_binary.rs | 2 +- arrow/src/util/decimal.rs | 269 ++++++++++++++++++++++++-------- 2 files changed, 205 insertions(+), 66 deletions(-) diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs index 3efb25888be2..ba3fa13ff32f 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow/src/array/array_binary.rs @@ -33,7 +33,7 @@ use crate::datatypes::{ }; use crate::error::{ArrowError, Result}; use crate::util::bit_util; -use crate::util::decimal::Decimal128; +use crate::util::decimal::{BasicDecimal, Decimal128}; use crate::{buffer::MutableBuffer, datatypes::DataType}; /// See [`BinaryArray`] and [`LargeBinaryArray`] for storing diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index b78af3acc6cd..b135ba339778 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -17,109 +17,211 @@ //! Decimal related utils +use crate::error::{ArrowError, Result}; +use num::bigint::BigInt; use std::cmp::Ordering; +pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { + /// The bit-width of the internal representation. + const BIT_WIDTH: usize; + + /// Tries to create a decimal value from precision, scale and bytes. + /// If the length of bytes isn't same as the bit width of this decimal, + /// returning an error. The bytes should be stored in little-endian order. + /// + /// Safety: + /// This method doesn't validate if the decimal value represented by the bytes + /// can be fitted into the specified precision. + fn try_new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> Result + where + Self: Sized, + { + if bytes.len() == Self::BIT_WIDTH / 8 { + Ok(Self::new(precision, scale, bytes)) + } else { + Err(ArrowError::InvalidArgumentError(format!( + "Input to Decimal{} must be {} bytes", + Self::BIT_WIDTH, + Self::BIT_WIDTH / 8 + ))) + } + } + + /// Creates a decimal value from precision, scale, and bytes. + /// + /// Safety: + /// This method doesn't check if the length of bytes is compatible with this decimal. + /// Use `try_new_from_bytes` for safe constructor. + fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self; + + /// Returns the raw bytes of the integer representation of the decimal. + fn raw_value(&self) -> &[u8]; + + /// Returns the precision of the decimal. + fn precision(&self) -> usize; + + /// Returns the scale of the decimal. + fn scale(&self) -> usize; + + /// Returns the string representation of the decimal. + fn as_string(&self) -> String { + let raw_bytes = self.raw_value(); + let integer = BigInt::from_signed_bytes_le(raw_bytes); + let value_str = integer.to_string(); + + if self.scale() == 0 { + value_str + } else { + let (sign, rest) = + value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 }); + + if rest.len() > self.scale() { + // Decimal separator is in the middle of the string + let (whole, decimal) = value_str.split_at(value_str.len() - self.scale()); + format!("{}.{}", whole, decimal) + } else { + // String has to be padded + format!("{}0.{:0>width$}", sign, rest, width = self.scale()) + } + } + } +} + /// Represents a decimal value with precision and scale. -/// The decimal value is represented by a signed 128-bit integer. +/// The decimal value could represented by a signed 128-bit integer. #[derive(Debug)] pub struct Decimal128 { #[allow(dead_code)] precision: usize, scale: usize, - value: i128, + value: [u8; 16], } -impl PartialOrd for Decimal128 { - fn partial_cmp(&self, other: &Self) -> Option { - assert_eq!( - self.scale, other.scale, - "Cannot compare two Decimal128 with different scale: {}, {}", - self.scale, other.scale - ); - self.value.partial_cmp(&other.value) +impl BasicDecimal for Decimal128 { + const BIT_WIDTH: usize = 128; + + fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self { + Decimal128 { + precision, + scale, + value: bytes.try_into().unwrap(), + } } -} -impl Ord for Decimal128 { - fn cmp(&self, other: &Self) -> Ordering { - assert_eq!( - self.scale, other.scale, - "Cannot compare two Decimal128 with different scale: {}, {}", - self.scale, other.scale - ); - self.value.cmp(&other.value) + fn raw_value(&self) -> &[u8] { + &self.value } -} -impl PartialEq for Decimal128 { - fn eq(&self, other: &Self) -> bool { - assert_eq!( - self.scale, other.scale, - "Cannot compare two Decimal128 with different scale: {}, {}", - self.scale, other.scale - ); - self.value.eq(&other.value) + fn precision(&self) -> usize { + self.precision } -} -impl Eq for Decimal128 {} + fn scale(&self) -> usize { + self.scale + } +} impl Decimal128 { - pub fn new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> Self { - let as_array = bytes.try_into(); - let value = match as_array { - Ok(v) if bytes.len() == 16 => i128::from_le_bytes(v), - _ => panic!("Input to Decimal128 is not 128bit integer."), - }; - + /// Creates `Decimal128` from an `i128` value. + pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self { Decimal128 { precision, scale, - value, + value: value.to_le_bytes(), } } - pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self { - Decimal128 { + /// Returns `i128` representation of the decimal. + pub fn as_i128(&self) -> i128 { + i128::from_le_bytes(self.value) + } +} + +impl From for i128 { + fn from(decimal: Decimal128) -> Self { + decimal.as_i128() + } +} + +/// Represents a decimal value with precision and scale. +/// The decimal value could be represented by a signed 256-bit integer. +#[derive(Debug)] +pub struct Decimal256 { + #[allow(dead_code)] + precision: usize, + scale: usize, + value: [u8; 32], +} + +impl BasicDecimal for Decimal256 { + const BIT_WIDTH: usize = 256; + + fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self { + Decimal256 { precision, scale, - value, + value: bytes.try_into().unwrap(), } } - pub fn as_i128(&self) -> i128 { - self.value + fn raw_value(&self) -> &[u8] { + &self.value } - pub fn as_string(&self) -> String { - let value_str = self.value.to_string(); + fn precision(&self) -> usize { + self.precision + } - if self.scale == 0 { - value_str - } else { - let (sign, rest) = value_str.split_at(if self.value >= 0 { 0 } else { 1 }); + fn scale(&self) -> usize { + self.scale + } +} - if rest.len() > self.scale { - // Decimal separator is in the middle of the string - let (whole, decimal) = value_str.split_at(value_str.len() - self.scale); - format!("{}.{}", whole, decimal) - } else { - // String has to be padded - format!("{}0.{:0>width$}", sign, rest, width = self.scale) +macro_rules! def_eq_ord_for_decimal { + ($ty:ident) => { + impl PartialOrd for $ty { + fn partial_cmp(&self, other: &Self) -> Option { + assert_eq!( + self.scale, other.scale, + "Cannot compare two Decimals with different scale: {}, {}", + self.scale, other.scale + ); + self.value.partial_cmp(&other.value) } } - } -} -impl From for i128 { - fn from(decimal: Decimal128) -> Self { - decimal.as_i128() - } + impl Ord for $ty { + fn cmp(&self, other: &Self) -> Ordering { + assert_eq!( + self.scale, other.scale, + "Cannot compare two Decimals with different scale: {}, {}", + self.scale, other.scale + ); + self.value.cmp(&other.value) + } + } + + impl PartialEq for $ty { + fn eq(&self, other: &Self) -> bool { + assert_eq!( + self.scale, other.scale, + "Cannot compare two Decimals with different scale: {}, {}", + self.scale, other.scale + ); + self.value.eq(&other.value) + } + } + + impl Eq for $ty {} + }; } +def_eq_ord_for_decimal!(Decimal128); +def_eq_ord_for_decimal!(Decimal256); + #[cfg(test)] mod tests { - use crate::util::decimal::Decimal128; + use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256}; #[test] fn decimal_128_to_string() { @@ -132,9 +234,46 @@ mod tests { #[test] fn decimal_128_from_bytes() { - let bytes = 100_i128.to_le_bytes(); - let value = Decimal128::new_from_bytes(5, 2, &bytes); + let mut bytes = 100_i128.to_le_bytes(); + let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap(); + assert_eq!(value.as_string(), "1.00"); + + bytes = (-1_i128).to_le_bytes(); + let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap(); + assert_eq!(value.as_string(), "-0.01"); + + bytes = i128::MAX.to_le_bytes(); + let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap(); + assert_eq!( + value.as_string(), + "1701411834604692317316873037158841057.27" + ); + + bytes = i128::MIN.to_le_bytes(); + let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap(); + assert_eq!( + value.as_string(), + "-1701411834604692317316873037158841057.28" + ); + } + + #[test] + fn decimal_256_from_bytes() { + let mut bytes = vec![0; 32]; + bytes[0..16].clone_from_slice(&100_i128.to_le_bytes()); + let value = Decimal256::try_new_from_bytes(5, 2, bytes.as_slice()).unwrap(); assert_eq!(value.as_string(), "1.00"); + + bytes[0..16].clone_from_slice(&i128::MAX.to_le_bytes()); + let value = Decimal256::try_new_from_bytes(5, 4, &bytes).unwrap(); + assert_eq!( + value.as_string(), + "17014118346046923173168730371588410.5727" + ); + + bytes = vec![255; 32]; + let value = Decimal256::try_new_from_bytes(5, 2, &bytes).unwrap(); + assert_eq!(value.as_string(), "-0.01"); } fn i128_func(value: impl Into) -> i128 { From 0c369fc2ed3653c0f8a5c07637e09c7755e7a215 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 20 Jun 2022 10:54:26 -0700 Subject: [PATCH 02/10] Dedup --- arrow/src/util/decimal.rs | 72 +++++++++++++-------------------------- 1 file changed, 24 insertions(+), 48 deletions(-) diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index b135ba339778..e5fa70d877aa 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -97,30 +97,6 @@ pub struct Decimal128 { value: [u8; 16], } -impl BasicDecimal for Decimal128 { - const BIT_WIDTH: usize = 128; - - fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self { - Decimal128 { - precision, - scale, - value: bytes.try_into().unwrap(), - } - } - - fn raw_value(&self) -> &[u8] { - &self.value - } - - fn precision(&self) -> usize { - self.precision - } - - fn scale(&self) -> usize { - self.scale - } -} - impl Decimal128 { /// Creates `Decimal128` from an `i128` value. pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self { @@ -153,32 +129,32 @@ pub struct Decimal256 { value: [u8; 32], } -impl BasicDecimal for Decimal256 { - const BIT_WIDTH: usize = 256; - - fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self { - Decimal256 { - precision, - scale, - value: bytes.try_into().unwrap(), - } - } +macro_rules! def_decimal { + ($ty:ident, $bit:expr) => { + impl BasicDecimal for $ty { + const BIT_WIDTH: usize = $bit; + + fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self { + $ty { + precision, + scale, + value: bytes.try_into().unwrap(), + } + } - fn raw_value(&self) -> &[u8] { - &self.value - } + fn raw_value(&self) -> &[u8] { + &self.value + } - fn precision(&self) -> usize { - self.precision - } + fn precision(&self) -> usize { + self.precision + } - fn scale(&self) -> usize { - self.scale - } -} + fn scale(&self) -> usize { + self.scale + } + } -macro_rules! def_eq_ord_for_decimal { - ($ty:ident) => { impl PartialOrd for $ty { fn partial_cmp(&self, other: &Self) -> Option { assert_eq!( @@ -216,8 +192,8 @@ macro_rules! def_eq_ord_for_decimal { }; } -def_eq_ord_for_decimal!(Decimal128); -def_eq_ord_for_decimal!(Decimal256); +def_decimal!(Decimal128, 128); +def_decimal!(Decimal256, 256); #[cfg(test)] mod tests { From cb4c555a26caff7f199025636377587d27e1f3c7 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 20 Jun 2022 12:40:28 -0700 Subject: [PATCH 03/10] Truncate string representation by precision --- arrow/src/util/decimal.rs | 47 ++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index e5fa70d877aa..67e684963264 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -19,7 +19,7 @@ use crate::error::{ArrowError, Result}; use num::bigint::BigInt; -use std::cmp::Ordering; +use std::cmp::{min, Ordering}; pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { /// The bit-width of the internal representation. @@ -36,6 +36,13 @@ pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { where Self: Sized, { + if precision < scale { + return Err(ArrowError::InvalidArgumentError(format!( + "Precision {} is less than scale {}", + precision, scale + ))); + } + if bytes.len() == Self::BIT_WIDTH / 8 { Ok(Self::new(precision, scale, bytes)) } else { @@ -64,6 +71,8 @@ pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { fn scale(&self) -> usize; /// Returns the string representation of the decimal. + /// If the string representation cannot be fitted with the precision of the decimal, + /// the string will be truncated. fn as_string(&self) -> String { let raw_bytes = self.raw_value(); let integer = BigInt::from_signed_bytes_le(raw_bytes); @@ -77,6 +86,11 @@ pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { if rest.len() > self.scale() { // Decimal separator is in the middle of the string + let mut bound = min(self.precision(), rest.len()); + if sign.len() == 1 { + bound += 1; + } + let value_str = value_str[0..bound].to_string(); let (whole, decimal) = value_str.split_at(value_str.len() - self.scale()); format!("{}.{}", whole, decimal) } else { @@ -99,7 +113,7 @@ pub struct Decimal128 { impl Decimal128 { /// Creates `Decimal128` from an `i128` value. - pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self { + pub(crate) fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self { Decimal128 { precision, scale, @@ -208,6 +222,13 @@ mod tests { assert_eq!(value.as_string(), "0.100"); } + #[test] + fn decimal_invalid_precision_scale() { + let bytes = 100_i128.to_le_bytes(); + let err = Decimal128::try_new_from_bytes(5, 6, &bytes); + assert!(err.is_err()); + } + #[test] fn decimal_128_from_bytes() { let mut bytes = 100_i128.to_le_bytes(); @@ -219,18 +240,24 @@ mod tests { assert_eq!(value.as_string(), "-0.01"); bytes = i128::MAX.to_le_bytes(); - let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap(); - assert_eq!( - value.as_string(), - "1701411834604692317316873037158841057.27" - ); + let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap(); + assert_eq!(value.as_string(), "170141183460469231731687303715884105.72"); bytes = i128::MIN.to_le_bytes(); - let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap(); + let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap(); assert_eq!( value.as_string(), - "-1701411834604692317316873037158841057.28" + "-170141183460469231731687303715884105.72" ); + + // Truncated + bytes = 12345_i128.to_le_bytes(); + let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap(); + assert_eq!(value.as_string(), "1.23"); + + bytes = (-12345_i128).to_le_bytes(); + let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap(); + assert_eq!(value.as_string(), "-1.23"); } #[test] @@ -241,7 +268,7 @@ mod tests { assert_eq!(value.as_string(), "1.00"); bytes[0..16].clone_from_slice(&i128::MAX.to_le_bytes()); - let value = Decimal256::try_new_from_bytes(5, 4, &bytes).unwrap(); + let value = Decimal256::try_new_from_bytes(40, 4, &bytes).unwrap(); assert_eq!( value.as_string(), "17014118346046923173168730371588410.5727" From d75366ccc46369b9c98bf749e1afe6dfbf0d4212 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 22 Jun 2022 13:37:13 -0700 Subject: [PATCH 04/10] Update arrow/src/util/decimal.rs Co-authored-by: Remzi Yang <59198230+HaoYang670@users.noreply.github.com> --- arrow/src/util/decimal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index 67e684963264..61cf0b161249 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -86,7 +86,7 @@ pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { if rest.len() > self.scale() { // Decimal separator is in the middle of the string - let mut bound = min(self.precision(), rest.len()); + let bound = min(self.precision(), rest.len()) + sign.len(); if sign.len() == 1 { bound += 1; } From 84a2075a0b9585b0e5bcf6f8b82d372f583b2b24 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 22 Jun 2022 13:38:07 -0700 Subject: [PATCH 05/10] Update arrow/src/util/decimal.rs Co-authored-by: Remzi Yang <59198230+HaoYang670@users.noreply.github.com> --- arrow/src/util/decimal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index 61cf0b161249..2bd23e65f32b 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -90,7 +90,7 @@ pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { if sign.len() == 1 { bound += 1; } - let value_str = value_str[0..bound].to_string(); + let value_str = &value_str[0..bound]; let (whole, decimal) = value_str.split_at(value_str.len() - self.scale()); format!("{}.{}", whole, decimal) } else { From 3346246f9dee53171ee8d60efb4a523a4410f7fe Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 22 Jun 2022 13:38:15 -0700 Subject: [PATCH 06/10] Update arrow/src/util/decimal.rs Co-authored-by: Andrew Lamb --- arrow/src/util/decimal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index 2bd23e65f32b..fa85c893b23d 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -73,7 +73,7 @@ pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { /// Returns the string representation of the decimal. /// If the string representation cannot be fitted with the precision of the decimal, /// the string will be truncated. - fn as_string(&self) -> String { + fn to_string(&self) -> String { let raw_bytes = self.raw_value(); let integer = BigInt::from_signed_bytes_le(raw_bytes); let value_str = integer.to_string(); From 40c1084194ae974b55cfb043252517eac40e09fb Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 22 Jun 2022 14:05:00 -0700 Subject: [PATCH 07/10] For review --- arrow/src/array/array_binary.rs | 2 +- arrow/src/util/decimal.rs | 52 +++++++++++++++++++++------------ 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs index ba3fa13ff32f..378f48757687 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow/src/array/array_binary.rs @@ -823,7 +823,7 @@ impl DecimalArray { #[inline] pub fn value_as_string(&self, row: usize) -> String { - self.value(row).as_string() + self.value(row).to_string() } pub fn from_fixed_size_list_array( diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index fa85c893b23d..e88193b6ba3e 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -77,19 +77,17 @@ pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { let raw_bytes = self.raw_value(); let integer = BigInt::from_signed_bytes_le(raw_bytes); let value_str = integer.to_string(); + let (sign, rest) = + value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 }); if self.scale() == 0 { - value_str + let bound = min(self.precision(), rest.len()) + sign.len(); + let value_str = &value_str[0..bound]; + value_str.to_string() } else { - let (sign, rest) = - value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 }); - if rest.len() > self.scale() { // Decimal separator is in the middle of the string let bound = min(self.precision(), rest.len()) + sign.len(); - if sign.len() == 1 { - bound += 1; - } let value_str = &value_str[0..bound]; let (whole, decimal) = value_str.split_at(value_str.len() - self.scale()); format!("{}.{}", whole, decimal) @@ -216,10 +214,10 @@ mod tests { #[test] fn decimal_128_to_string() { let mut value = Decimal128::new_from_i128(5, 2, 100); - assert_eq!(value.as_string(), "1.00"); + assert_eq!(value.to_string(), "1.00"); value = Decimal128::new_from_i128(5, 3, 100); - assert_eq!(value.as_string(), "0.100"); + assert_eq!(value.to_string(), "0.100"); } #[test] @@ -233,31 +231,31 @@ mod tests { fn decimal_128_from_bytes() { let mut bytes = 100_i128.to_le_bytes(); let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap(); - assert_eq!(value.as_string(), "1.00"); + assert_eq!(value.to_string(), "1.00"); bytes = (-1_i128).to_le_bytes(); let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap(); - assert_eq!(value.as_string(), "-0.01"); + assert_eq!(value.to_string(), "-0.01"); bytes = i128::MAX.to_le_bytes(); let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap(); - assert_eq!(value.as_string(), "170141183460469231731687303715884105.72"); + assert_eq!(value.to_string(), "170141183460469231731687303715884105.72"); bytes = i128::MIN.to_le_bytes(); let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap(); assert_eq!( - value.as_string(), + value.to_string(), "-170141183460469231731687303715884105.72" ); // Truncated bytes = 12345_i128.to_le_bytes(); let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap(); - assert_eq!(value.as_string(), "1.23"); + assert_eq!(value.to_string(), "1.23"); bytes = (-12345_i128).to_le_bytes(); let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap(); - assert_eq!(value.as_string(), "-1.23"); + assert_eq!(value.to_string(), "-1.23"); } #[test] @@ -265,18 +263,36 @@ mod tests { let mut bytes = vec![0; 32]; bytes[0..16].clone_from_slice(&100_i128.to_le_bytes()); let value = Decimal256::try_new_from_bytes(5, 2, bytes.as_slice()).unwrap(); - assert_eq!(value.as_string(), "1.00"); + assert_eq!(value.to_string(), "1.00"); bytes[0..16].clone_from_slice(&i128::MAX.to_le_bytes()); let value = Decimal256::try_new_from_bytes(40, 4, &bytes).unwrap(); assert_eq!( - value.as_string(), + value.to_string(), "17014118346046923173168730371588410.5727" ); + // i128 maximum + 1 + bytes[0..16].clone_from_slice(&0_i128.to_le_bytes()); + bytes[15] = 128; + let value = Decimal256::try_new_from_bytes(40, 4, &bytes).unwrap(); + assert_eq!( + value.to_string(), + "17014118346046923173168730371588410.5728" + ); + + // smaller than i128 minimum + bytes = vec![255; 32]; + bytes[31] = 128; + let value = Decimal256::try_new_from_bytes(79, 4, &bytes).unwrap(); + assert_eq!( + value.to_string(), + "-5744373177007483132341216834415376678658315645522012356644966081642565415.7313" + ); + bytes = vec![255; 32]; let value = Decimal256::try_new_from_bytes(5, 2, &bytes).unwrap(); - assert_eq!(value.as_string(), "-0.01"); + assert_eq!(value.to_string(), "-0.01"); } fn i128_func(value: impl Into) -> i128 { From 31554412c1b7af25685fba3151b32af61646a69b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 22 Jun 2022 14:14:27 -0700 Subject: [PATCH 08/10] Fix clippy --- arrow/src/util/decimal.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index e88193b6ba3e..da8455d30176 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -84,17 +84,15 @@ pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { let bound = min(self.precision(), rest.len()) + sign.len(); let value_str = &value_str[0..bound]; value_str.to_string() + } else if rest.len() > self.scale() { + // Decimal separator is in the middle of the string + let bound = min(self.precision(), rest.len()) + sign.len(); + let value_str = &value_str[0..bound]; + let (whole, decimal) = value_str.split_at(value_str.len() - self.scale()); + format!("{}.{}", whole, decimal) } else { - if rest.len() > self.scale() { - // Decimal separator is in the middle of the string - let bound = min(self.precision(), rest.len()) + sign.len(); - let value_str = &value_str[0..bound]; - let (whole, decimal) = value_str.split_at(value_str.len() - self.scale()); - format!("{}.{}", whole, decimal) - } else { - // String has to be padded - format!("{}0.{:0>width$}", sign, rest, width = self.scale()) - } + // String has to be padded + format!("{}0.{:0>width$}", sign, rest, width = self.scale()) } } } From d0677df347b0e5f7ff296c18a6c5e9de1af74185 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 22 Jun 2022 21:06:21 -0700 Subject: [PATCH 09/10] For review --- arrow/src/util/decimal.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index da8455d30176..2abf4fbf65ed 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -79,14 +79,13 @@ pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { let value_str = integer.to_string(); let (sign, rest) = value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 }); + let bound = min(self.precision(), rest.len()) + sign.len(); if self.scale() == 0 { - let bound = min(self.precision(), rest.len()) + sign.len(); let value_str = &value_str[0..bound]; value_str.to_string() } else if rest.len() > self.scale() { // Decimal separator is in the middle of the string - let bound = min(self.precision(), rest.len()) + sign.len(); let value_str = &value_str[0..bound]; let (whole, decimal) = value_str.split_at(value_str.len() - self.scale()); format!("{}.{}", whole, decimal) From 32be11ad98d0fe8e86ce752c4eb9dfd6e6860c3d Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 22 Jun 2022 23:11:36 -0700 Subject: [PATCH 10/10] Move another one --- arrow/src/util/decimal.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index 2abf4fbf65ed..7649b6b417fc 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -80,13 +80,12 @@ pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq { let (sign, rest) = value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 }); let bound = min(self.precision(), rest.len()) + sign.len(); + let value_str = &value_str[0..bound]; if self.scale() == 0 { - let value_str = &value_str[0..bound]; value_str.to_string() } else if rest.len() > self.scale() { // Decimal separator is in the middle of the string - let value_str = &value_str[0..bound]; let (whole, decimal) = value_str.split_at(value_str.len() - self.scale()); format!("{}.{}", whole, decimal) } else {