From 32f3605abd69c12deecb8b989b8885f69cab09ed Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 29 Jul 2022 11:19:58 -0700 Subject: [PATCH 1/2] Rename DataType::Decimal to DataType::Decimal128 --- arrow/src/array/array.rs | 4 +- arrow/src/array/array_decimal.rs | 22 ++-- arrow/src/array/builder/decimal_builder.rs | 4 +- arrow/src/array/builder/struct_builder.rs | 2 +- arrow/src/array/data.rs | 14 +-- arrow/src/array/equal/decimal.rs | 2 +- arrow/src/array/equal/mod.rs | 2 +- arrow/src/array/ord.rs | 2 +- arrow/src/array/transform/mod.rs | 6 +- arrow/src/compute/kernels/cast.rs | 34 ++--- arrow/src/compute/kernels/sort.rs | 2 +- arrow/src/compute/kernels/take.rs | 2 +- arrow/src/csv/reader.rs | 6 +- arrow/src/csv/writer.rs | 2 +- arrow/src/datatypes/datatype.rs | 15 ++- arrow/src/datatypes/ffi.rs | 8 +- arrow/src/datatypes/field.rs | 2 +- arrow/src/ffi.rs | 4 +- arrow/src/ipc/convert.rs | 6 +- arrow/src/ipc/reader.rs | 2 +- arrow/src/util/display.rs | 2 +- arrow/src/util/integration_util.rs | 2 +- integration-testing/src/lib.rs | 2 +- parquet/src/arrow/array_reader/builder.rs | 116 ++++++++---------- .../src/arrow/array_reader/primitive_array.rs | 2 +- parquet/src/arrow/arrow_writer/levels.rs | 2 +- parquet/src/arrow/arrow_writer/mod.rs | 6 +- parquet/src/arrow/schema.rs | 17 +-- parquet/src/arrow/schema/primitive.rs | 12 +- 29 files changed, 154 insertions(+), 148 deletions(-) diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs index 5c5231296316..422916996cde 100644 --- a/arrow/src/array/array.rs +++ b/arrow/src/array/array.rs @@ -482,7 +482,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef { dt => panic!("Unexpected dictionary key type {:?}", dt), }, DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef, - DataType::Decimal(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef, + DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef, DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef, dt => panic!("Unexpected data type {:?}", dt), } @@ -647,7 +647,7 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef { ) }) } - DataType::Decimal(_, _) => { + DataType::Decimal128(_, _) => { new_null_sized_decimal(data_type, length, std::mem::size_of::()) } DataType::Decimal256(_, _) => new_null_sized_decimal(data_type, length, 32), diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs index 473160858201..6a453fc922e0 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow/src/array/array_decimal.rs @@ -56,7 +56,7 @@ use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256}; /// .with_precision_and_scale(23, 6) /// .unwrap(); /// -/// assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type()); +/// assert_eq!(&DataType::Decimal128(23, 6), decimal_array.data_type()); /// assert_eq!(8_887_000_000_i128, decimal_array.value(0).as_i128()); /// assert_eq!("8887.000000", decimal_array.value_as_string(0)); /// assert_eq!(3, decimal_array.len()); @@ -170,7 +170,7 @@ pub trait BasicDecimalArray>: Self::VALUE_LENGTH, ); let data_type = if Self::VALUE_LENGTH == 16 { - DataType::Decimal(precision, scale) + DataType::Decimal128(precision, scale) } else { DataType::Decimal256(precision, scale) }; @@ -206,7 +206,7 @@ pub trait BasicDecimalArray>: let list_offset = v.offset(); let child_offset = child_data.offset(); let data_type = if Self::VALUE_LENGTH == 16 { - DataType::Decimal(precision, scale) + DataType::Decimal128(precision, scale) } else { DataType::Decimal256(precision, scale) }; @@ -314,11 +314,11 @@ impl Decimal128Array { assert_eq!( self.data.data_type(), - &DataType::Decimal(self.precision, self.scale) + &DataType::Decimal128(self.precision, self.scale) ); // safety: self.data is valid DataType::Decimal as checked above - let new_data_type = DataType::Decimal(precision, scale); + let new_data_type = DataType::Decimal128(precision, scale); self.precision = precision; self.scale = scale; self.data = self.data.with_data_type(new_data_type); @@ -328,7 +328,7 @@ impl Decimal128Array { /// The default precision and scale used when not specified. pub fn default_type() -> DataType { // Keep maximum precision - DataType::Decimal(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE) + DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE) } } @@ -341,7 +341,7 @@ impl From for Decimal128Array { ); let values = data.buffers()[0].as_ptr(); let (precision, scale) = match data.data_type() { - DataType::Decimal(precision, scale) => (*precision, *scale), + DataType::Decimal128(precision, scale) => (*precision, *scale), _ => panic!("Expected data type to be Decimal"), }; Self { @@ -523,7 +523,7 @@ mod tests { 192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, ]; - let array_data = ArrayData::builder(DataType::Decimal(38, 6)) + let array_data = ArrayData::builder(DataType::Decimal128(38, 6)) .len(2) .add_buffer(Buffer::from(&values[..])) .build() @@ -580,7 +580,7 @@ mod tests { fn test_decimal_from_iter_values() { let array = Decimal128Array::from_iter_values(vec![-100, 0, 101].into_iter()); assert_eq!(array.len(), 3); - assert_eq!(array.data_type(), &DataType::Decimal(38, 10)); + assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); assert_eq!(-100_i128, array.value(0).into()); assert!(!array.is_null(0)); assert_eq!(0_i128, array.value(1).into()); @@ -594,7 +594,7 @@ mod tests { let array: Decimal128Array = vec![Some(-100), None, Some(101)].into_iter().collect(); assert_eq!(array.len(), 3); - assert_eq!(array.data_type(), &DataType::Decimal(38, 10)); + assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); assert_eq!(-100_i128, array.value(0).into()); assert!(!array.is_null(0)); assert!(array.is_null(1)); @@ -665,7 +665,7 @@ mod tests { .with_precision_and_scale(20, 2) .unwrap(); - assert_eq!(arr.data_type(), &DataType::Decimal(20, 2)); + assert_eq!(arr.data_type(), &DataType::Decimal128(20, 2)); assert_eq!(arr.precision(), 20); assert_eq!(arr.scale(), 2); diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index d015d3dcecda..81b37764b846 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -248,7 +248,7 @@ mod tests { builder.append_value(-8_887_000_000_i128).unwrap(); let decimal_array: Decimal128Array = builder.finish(); - assert_eq!(&DataType::Decimal(38, 6), decimal_array.data_type()); + assert_eq!(&DataType::Decimal128(38, 6), decimal_array.data_type()); assert_eq!(3, decimal_array.len()); assert_eq!(1, decimal_array.null_count()); assert_eq!(32, decimal_array.value_offset(2)); @@ -268,7 +268,7 @@ mod tests { .unwrap(); let decimal_array: Decimal128Array = builder.finish(); - assert_eq!(&DataType::Decimal(38, 6), decimal_array.data_type()); + assert_eq!(&DataType::Decimal128(38, 6), decimal_array.data_type()); assert_eq!(3, decimal_array.len()); assert_eq!(1, decimal_array.null_count()); assert_eq!(32, decimal_array.value_offset(2)); diff --git a/arrow/src/array/builder/struct_builder.rs b/arrow/src/array/builder/struct_builder.rs index 373a84582831..554e3c553db5 100644 --- a/arrow/src/array/builder/struct_builder.rs +++ b/arrow/src/array/builder/struct_builder.rs @@ -112,7 +112,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box { Box::new(FixedSizeBinaryBuilder::new(capacity, *len)) } - DataType::Decimal(precision, scale) => { + DataType::Decimal128(precision, scale) => { Box::new(Decimal128Builder::new(capacity, *precision, *scale)) } DataType::Utf8 => Box::new(StringBuilder::new(capacity)), diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index c38107b25875..b927c32c7c6e 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -193,7 +193,7 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff DataType::FixedSizeList(_, _) | DataType::Struct(_) => { [empty_buffer, MutableBuffer::new(0)] } - DataType::Decimal(_, _) | DataType::Decimal256(_, _) => [ + DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => [ MutableBuffer::new(capacity * mem::size_of::()), empty_buffer, ], @@ -385,11 +385,11 @@ impl ArrayData { #[inline] pub(crate) fn with_data_type(mut self, new_data_type: DataType) -> Self { assert!( - matches!(self.data_type, DataType::Decimal(_, _)), + matches!(self.data_type, DataType::Decimal128(_, _)), "only DecimalType is supported for existing type" ); assert!( - matches!(new_data_type, DataType::Decimal(_, _)), + matches!(new_data_type, DataType::Decimal128(_, _)), "only DecimalType is supported for new datatype" ); self.data_type = new_data_type; @@ -582,7 +582,7 @@ impl ArrayData { | DataType::LargeBinary | DataType::Interval(_) | DataType::FixedSizeBinary(_) - | DataType::Decimal(_, _) + | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => vec![], DataType::List(field) => { vec![Self::new_empty(field.data_type())] @@ -1015,7 +1015,7 @@ impl ArrayData { pub fn validate_values(&self) -> Result<()> { match &self.data_type { - DataType::Decimal(p, _) => { + DataType::Decimal128(p, _) => { let values_buffer: &[i128] = self.typed_buffer(0, self.len)?; for value in values_buffer { validate_decimal_precision(*value, *p)?; @@ -1345,7 +1345,7 @@ pub(crate) fn layout(data_type: &DataType) -> DataTypeLayout { } } DataType::Dictionary(key_type, _value_type) => layout(key_type), - DataType::Decimal(_, _) => { + DataType::Decimal128(_, _) => { // Decimals are always some fixed width; The rust implementation // always uses 16 bytes / size of i128 DataTypeLayout::new_fixed_width(size_of::()) @@ -2818,7 +2818,7 @@ mod tests { let fixed_size_array = fixed_size_builder.finish(); // Build ArrayData for Decimal - let builder = ArrayData::builder(DataType::Decimal(5, 3)) + let builder = ArrayData::builder(DataType::Decimal128(5, 3)) .len(fixed_size_array.len()) .add_buffer(fixed_size_array.data_ref().child_data()[0].buffers()[0].clone()); let array_data = unsafe { builder.build_unchecked() }; diff --git a/arrow/src/array/equal/decimal.rs b/arrow/src/array/equal/decimal.rs index 7c44037be398..42a7d29e27d2 100644 --- a/arrow/src/array/equal/decimal.rs +++ b/arrow/src/array/equal/decimal.rs @@ -29,7 +29,7 @@ pub(super) fn decimal_equal( len: usize, ) -> bool { let size = match lhs.data_type() { - DataType::Decimal(_, _) => 16, + DataType::Decimal128(_, _) => 16, DataType::Decimal256(_, _) => 32, _ => unreachable!(), }; diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs index 270147eaeec3..6fdc06f837c0 100644 --- a/arrow/src/array/equal/mod.rs +++ b/arrow/src/array/equal/mod.rs @@ -187,7 +187,7 @@ fn equal_values( DataType::FixedSizeBinary(_) => { fixed_binary_equal(lhs, rhs, lhs_start, rhs_start, len) } - DataType::Decimal(_, _) | DataType::Decimal256(_, _) => { + DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => { decimal_equal(lhs, rhs, lhs_start, rhs_start, len) } DataType::List(_) => list_equal::(lhs, rhs, lhs_start, rhs_start, len), diff --git a/arrow/src/array/ord.rs b/arrow/src/array/ord.rs index 888c31c5d955..1e19c7cc2fca 100644 --- a/arrow/src/array/ord.rs +++ b/arrow/src/array/ord.rs @@ -226,7 +226,7 @@ pub fn build_compare(left: &dyn Array, right: &dyn Array) -> Result { + (Decimal128(_, _), Decimal128(_, _)) => { let left: Decimal128Array = Decimal128Array::from(left.data().clone()); let right: Decimal128Array = Decimal128Array::from(right.data().clone()); Box::new(move |i, j| left.value(i).cmp(&right.value(j))) diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs index 570be29ed336..3664a2055210 100644 --- a/arrow/src/array/transform/mod.rs +++ b/arrow/src/array/transform/mod.rs @@ -205,7 +205,7 @@ fn build_extend_dictionary( fn build_extend(array: &ArrayData) -> Extend { use crate::datatypes::*; match array.data_type() { - DataType::Decimal(_, _) => primitive::build_extend::(array), + DataType::Decimal128(_, _) => primitive::build_extend::(array), DataType::Null => null::build_extend(array), DataType::Boolean => boolean::build_extend(array), DataType::UInt8 => primitive::build_extend::(array), @@ -256,7 +256,7 @@ fn build_extend(array: &ArrayData) -> Extend { fn build_extend_nulls(data_type: &DataType) -> ExtendNulls { use crate::datatypes::*; Box::new(match data_type { - DataType::Decimal(_, _) => primitive::extend_nulls::, + DataType::Decimal128(_, _) => primitive::extend_nulls::, DataType::Null => null::extend_nulls, DataType::Boolean => boolean::extend_nulls, DataType::UInt8 => primitive::extend_nulls::, @@ -410,7 +410,7 @@ impl<'a> MutableArrayData<'a> { }; let child_data = match &data_type { - DataType::Decimal(_, _) + DataType::Decimal128(_, _) | DataType::Decimal256(_, _) | DataType::Null | DataType::Boolean diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 25aa525b4520..7e803766bb11 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -72,11 +72,11 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { match (from_type, to_type) { // TODO UTF8/unsigned numeric to decimal // cast one decimal type to another decimal type - (Decimal(_, _), Decimal(_, _)) => true, + (Decimal128(_, _), Decimal128(_, _)) => true, // signed numeric to decimal - (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal(_, _)) | + (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal128(_, _)) | // decimal to signed numeric - (Decimal(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) + (Decimal128(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) | ( Null, Boolean @@ -109,8 +109,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { | Map(_, _) | Dictionary(_, _) ) => true, - (Decimal(_, _), _) => false, - (_, Decimal(_, _)) => false, + (Decimal128(_, _), _) => false, + (_, Decimal128(_, _)) => false, (Struct(_), _) => false, (_, Struct(_)) => false, (LargeList(list_from), LargeList(list_to)) => { @@ -410,8 +410,10 @@ pub fn cast_with_options( return Ok(array.clone()); } match (from_type, to_type) { - (Decimal(_, s1), Decimal(p2, s2)) => cast_decimal_to_decimal(array, s1, p2, s2), - (Decimal(_, scale), _) => { + (Decimal128(_, s1), Decimal128(p2, s2)) => { + cast_decimal_to_decimal(array, s1, p2, s2) + } + (Decimal128(_, scale), _) => { // cast decimal to other type match to_type { Int8 => { @@ -439,7 +441,7 @@ pub fn cast_with_options( ))), } } - (_, Decimal(precision, scale)) => { + (_, Decimal128(precision, scale)) => { // cast data to decimal match from_type { // TODO now just support signed numeric to decimal, support decimal to numeric later @@ -2205,8 +2207,8 @@ mod tests { #[test] fn test_cast_decimal_to_decimal() { - let input_type = DataType::Decimal(20, 3); - let output_type = DataType::Decimal(20, 4); + let input_type = DataType::Decimal128(20, 3); + let output_type = DataType::Decimal128(20, 4); assert!(can_cast_types(&input_type, &output_type)); let array = vec![Some(1123456), Some(2123456), Some(3123456), None]; let input_decimal_array = create_decimal_array(&array, 20, 3).unwrap(); @@ -2226,7 +2228,7 @@ mod tests { let array = vec![Some(123456), None]; let input_decimal_array = create_decimal_array(&array, 10, 0).unwrap(); let array = Arc::new(input_decimal_array) as ArrayRef; - let result = cast(&array, &DataType::Decimal(2, 2)); + let result = cast(&array, &DataType::Decimal128(2, 2)); assert!(result.is_err()); assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal of precision 2. Max is 99", result.unwrap_err().to_string()); @@ -2234,7 +2236,7 @@ mod tests { #[test] fn test_cast_decimal_to_numeric() { - let decimal_type = DataType::Decimal(38, 2); + let decimal_type = DataType::Decimal128(38, 2); // negative test assert!(!can_cast_types(&decimal_type, &DataType::UInt8)); let value_array: Vec> = @@ -2355,7 +2357,7 @@ mod tests { #[test] fn test_cast_numeric_to_decimal() { // test negative cast type - let decimal_type = DataType::Decimal(38, 6); + let decimal_type = DataType::Decimal128(38, 6); assert!(!can_cast_types(&DataType::UInt64, &decimal_type)); // i8, i16, i32, i64 @@ -2408,7 +2410,7 @@ mod tests { // the 100 will be converted to 1000_i128, but it is out of range for max value in the precision 3. let array = Int8Array::from(vec![1, 2, 3, 4, 100]); let array = Arc::new(array) as ArrayRef; - let casted_array = cast(&array, &DataType::Decimal(3, 1)); + let casted_array = cast(&array, &DataType::Decimal128(3, 1)); assert!(casted_array.is_err()); assert_eq!("Invalid argument error: 1000 is too large to store in a Decimal of precision 3. Max is 999", casted_array.unwrap_err().to_string()); @@ -4282,7 +4284,7 @@ mod tests { #[test] fn test_cast_null_array_to_from_decimal_array() { - let data_type = DataType::Decimal(12, 4); + let data_type = DataType::Decimal128(12, 4); let array = new_null_array(&DataType::Null, 4); assert_eq!(array.data_type(), &DataType::Null); let cast_array = cast(&array, &data_type).expect("cast failed"); @@ -4804,7 +4806,7 @@ mod tests { Dictionary(Box::new(DataType::Int8), Box::new(DataType::Int32)), Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)), Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), - Decimal(38, 0), + Decimal128(38, 0), ] } diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index 912733cf1f26..0a3d0541ce3c 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -145,7 +145,7 @@ pub fn sort_to_indices( let (v, n) = partition_validity(values); Ok(match values.data_type() { - DataType::Decimal(_, _) => sort_decimal(values, v, n, cmp, &options, limit), + DataType::Decimal128(_, _) => sort_decimal(values, v, n, cmp, &options, limit), DataType::Boolean => sort_boolean(values, v, n, &options, limit), DataType::Int8 => { sort_primitive::(values, v, n, cmp, &options, limit) diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs index 6c217a3d8a29..5bfd257fcf46 100644 --- a/arrow/src/compute/kernels/take.rs +++ b/arrow/src/compute/kernels/take.rs @@ -148,7 +148,7 @@ where let values = values.as_any().downcast_ref::().unwrap(); Ok(Arc::new(take_boolean(values, indices)?)) } - DataType::Decimal(_, _) => { + DataType::Decimal128(_, _) => { let decimal_values = values.as_any().downcast_ref::().unwrap(); Ok(Arc::new(take_decimal128(decimal_values, indices)?)) diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index d00bd729c096..7c533a8f8b24 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -544,7 +544,7 @@ fn parse( let field = &fields[i]; match field.data_type() { DataType::Boolean => build_boolean_array(line_number, rows, i), - DataType::Decimal(precision, scale) => { + DataType::Decimal128(precision, scale) => { build_decimal_array(line_number, rows, i, *precision, *scale) } DataType::Int8 => { @@ -1206,8 +1206,8 @@ mod tests { fn test_csv_reader_with_decimal() { let schema = Schema::new(vec![ Field::new("city", DataType::Utf8, false), - Field::new("lat", DataType::Decimal(38, 6), false), - Field::new("lng", DataType::Decimal(38, 6), false), + Field::new("lat", DataType::Decimal128(38, 6), false), + Field::new("lng", DataType::Decimal128(38, 6), false), ]); let file = File::open("test/data/decimal_test.csv").unwrap(); diff --git a/arrow/src/csv/writer.rs b/arrow/src/csv/writer.rs index 6735d9668560..394047cac55c 100644 --- a/arrow/src/csv/writer.rs +++ b/arrow/src/csv/writer.rs @@ -223,7 +223,7 @@ impl Writer { DataType::Timestamp(time_unit, time_zone) => { self.handle_timestamp(time_unit, time_zone.as_ref(), row_index, col)? } - DataType::Decimal(..) => make_string_from_decimal(col, row_index)?, + DataType::Decimal128(..) => make_string_from_decimal(col, row_index)?, t => { // List and Struct arrays not supported by the writer, any // other type needs to be implemented diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs index 429a94f24b9c..88ab3edbd71e 100644 --- a/arrow/src/datatypes/datatype.rs +++ b/arrow/src/datatypes/datatype.rs @@ -189,14 +189,19 @@ pub enum DataType { /// This type mostly used to represent low cardinality string /// arrays or a limited set of primitive types as integers. Dictionary(Box, Box), - /// Exact decimal value with precision and scale + /// Exact 128-bit width decimal value with precision and scale + /// + /// * precision is the total number of digits + /// * scale is the number of digits past the decimal + /// + /// For example the number 123.45 has precision 5 and scale 2. + Decimal128(usize, usize), + /// Exact 256-bit width decimal value with precision and scale /// /// * precision is the total number of digits /// * scale is the number of digits past the decimal /// /// For example the number 123.45 has precision 5 and scale 2. - Decimal(usize, usize), - /// Exact decimal value with 256 bits width Decimal256(usize, usize), /// A Map is a logical nested type that is represented as /// @@ -563,7 +568,7 @@ impl DataType { }; if bit_width == 128 { - Ok(DataType::Decimal(precision, scale)) + Ok(DataType::Decimal128(precision, scale)) } else if bit_width == 256 { Ok(DataType::Decimal256(precision, scale)) } else { @@ -850,7 +855,7 @@ impl DataType { TimeUnit::Nanosecond => "NANOSECOND", }}), DataType::Dictionary(_, _) => json!({ "name": "dictionary"}), - DataType::Decimal(precision, scale) => { + DataType::Decimal128(precision, scale) => { json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 128}) } DataType::Decimal256(precision, scale) => { diff --git a/arrow/src/datatypes/ffi.rs b/arrow/src/datatypes/ffi.rs index 7ad468b5ed9e..60d285315c0b 100644 --- a/arrow/src/datatypes/ffi.rs +++ b/arrow/src/datatypes/ffi.rs @@ -108,7 +108,7 @@ impl TryFrom<&FFI_ArrowSchema> for DataType { "The decimal type requires an integer scale".to_string(), ) })?; - DataType::Decimal(parsed_precision, parsed_scale) + DataType::Decimal128(parsed_precision, parsed_scale) }, [precision, scale, bits] => { if *bits != "128" { @@ -124,7 +124,7 @@ impl TryFrom<&FFI_ArrowSchema> for DataType { "The decimal type requires an integer scale".to_string(), ) })?; - DataType::Decimal(parsed_precision, parsed_scale) + DataType::Decimal128(parsed_precision, parsed_scale) } _ => { return Err(ArrowError::CDataInterface(format!( @@ -253,7 +253,9 @@ fn get_format_string(dtype: &DataType) -> Result { DataType::LargeUtf8 => Ok("U".to_string()), DataType::FixedSizeBinary(num_bytes) => Ok(format!("w:{}", num_bytes)), DataType::FixedSizeList(_, num_elems) => Ok(format!("+w:{}", num_elems)), - DataType::Decimal(precision, scale) => Ok(format!("d:{},{}", precision, scale)), + DataType::Decimal128(precision, scale) => { + Ok(format!("d:{},{}", precision, scale)) + } DataType::Date32 => Ok("tdD".to_string()), DataType::Date64 => Ok("tdm".to_string()), DataType::Time32(TimeUnit::Second) => Ok("tts".to_string()), diff --git a/arrow/src/datatypes/field.rs b/arrow/src/datatypes/field.rs index 42fb8ce1db9e..abb80d64aaf3 100644 --- a/arrow/src/datatypes/field.rs +++ b/arrow/src/datatypes/field.rs @@ -675,7 +675,7 @@ impl Field { | DataType::FixedSizeBinary(_) | DataType::Utf8 | DataType::LargeUtf8 - | DataType::Decimal(_, _) + | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => { if self.data_type != from.data_type { return Err(ArrowError::SchemaError( diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 2d95b4ea639a..2d529317801d 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -322,7 +322,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result { (DataType::Int64, 1) | (DataType::Date64, 1) | (DataType::Time64(_), 1) => size_of::() * 8, (DataType::Float32, 1) => size_of::() * 8, (DataType::Float64, 1) => size_of::() * 8, - (DataType::Decimal(..), 1) => size_of::() * 8, + (DataType::Decimal128(..), 1) => size_of::() * 8, (DataType::Timestamp(..), 1) => size_of::() * 8, (DataType::Duration(..), 1) => size_of::() * 8, // primitive types have a single buffer @@ -337,7 +337,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result { (DataType::Int64, _) | (DataType::Date64, _) | (DataType::Time64(_), _) | (DataType::Float32, _) | (DataType::Float64, _) | - (DataType::Decimal(..), _) | + (DataType::Decimal128(..), _) | (DataType::Timestamp(..), _) | (DataType::Duration(..), _) => { return Err(ArrowError::CDataInterface(format!( diff --git a/arrow/src/ipc/convert.rs b/arrow/src/ipc/convert.rs index dbbb6b961a10..705bd5cb3012 100644 --- a/arrow/src/ipc/convert.rs +++ b/arrow/src/ipc/convert.rs @@ -322,7 +322,7 @@ pub(crate) fn get_data_type(field: ipc::Field, may_be_dictionary: bool) -> DataT let fsb = field.type_as_decimal().unwrap(); let bit_width = fsb.bitWidth(); if bit_width == 128 { - DataType::Decimal(fsb.precision() as usize, fsb.scale() as usize) + DataType::Decimal128(fsb.precision() as usize, fsb.scale() as usize) } else if bit_width == 256 { DataType::Decimal256(fsb.precision() as usize, fsb.scale() as usize) } else { @@ -667,7 +667,7 @@ pub(crate) fn get_fb_field_type<'a>( // type in the DictionaryEncoding metadata in the parent field get_fb_field_type(value_type, is_nullable, fbb) } - Decimal(precision, scale) => { + Decimal128(precision, scale) => { let mut builder = ipc::DecimalBuilder::new(fbb); builder.add_precision(*precision as i32); builder.add_scale(*scale as i32); @@ -965,7 +965,7 @@ mod tests { 123, true, ), - Field::new("decimal", DataType::Decimal(10, 6), false), + Field::new("decimal", DataType::Decimal128(10, 6), false), ], md, ); diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs index a9d28bd67f4e..f3af214cee0a 100644 --- a/arrow/src/ipc/reader.rs +++ b/arrow/src/ipc/reader.rs @@ -506,7 +506,7 @@ fn create_primitive_array( unsafe { builder.build_unchecked() } } - Decimal(_, _) | Decimal256(_, _) => { + Decimal128(_, _) | Decimal256(_, _) => { // read 3 buffers let builder = ArrayData::builder(data_type.clone()) .len(length) diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs index c97e0b1aa444..26bc8a1923a6 100644 --- a/arrow/src/util/display.rs +++ b/arrow/src/util/display.rs @@ -319,7 +319,7 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result make_string!(array::Float16Array, column, row), DataType::Float32 => make_string!(array::Float32Array, column, row), DataType::Float64 => make_string!(array::Float64Array, column, row), - DataType::Decimal(..) => make_string_from_decimal(column, row), + DataType::Decimal128(..) => make_string_from_decimal(column, row), DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => { make_string_datetime!(array::TimestampSecondArray, column, row) } diff --git a/arrow/src/util/integration_util.rs b/arrow/src/util/integration_util.rs index aadf0327734d..0077b2fb72aa 100644 --- a/arrow/src/util/integration_util.rs +++ b/arrow/src/util/integration_util.rs @@ -360,7 +360,7 @@ impl ArrowJsonBatch { let arr = arr.as_any().downcast_ref::().unwrap(); arr.equals_json(&json_array.iter().collect::>()[..]) } - DataType::Decimal(_, _) => { + DataType::Decimal128(_, _) => { let arr = arr.as_any().downcast_ref::().unwrap(); arr.equals_json(&json_array.iter().collect::>()[..]) } diff --git a/integration-testing/src/lib.rs b/integration-testing/src/lib.rs index 781416e67521..212c254d93aa 100644 --- a/integration-testing/src/lib.rs +++ b/integration-testing/src/lib.rs @@ -594,7 +594,7 @@ fn array_from_json( ))), } } - DataType::Decimal(precision, scale) => { + DataType::Decimal128(precision, scale) => { let mut b = Decimal128Builder::new(json_col.count, *precision, *scale); // C++ interop tests involve incompatible decimal values unsafe { diff --git a/parquet/src/arrow/array_reader/builder.rs b/parquet/src/arrow/array_reader/builder.rs index 7a19d5fbc601..d9c1bedb246c 100644 --- a/parquet/src/arrow/array_reader/builder.rs +++ b/parquet/src/arrow/array_reader/builder.rs @@ -25,11 +25,19 @@ use crate::arrow::array_reader::{ ComplexObjectArrayReader, ListArrayReader, MapArrayReader, NullArrayReader, PrimitiveArrayReader, RowGroupCollection, StructArrayReader, }; -use crate::arrow::buffer::converter::{DecimalArrayConverter, DecimalByteArrayConvert, DecimalFixedLengthByteArrayConverter, FixedLenBinaryConverter, FixedSizeArrayConverter, Int96ArrayConverter, Int96Converter, IntervalDayTimeArrayConverter, IntervalDayTimeConverter, IntervalYearMonthArrayConverter, IntervalYearMonthConverter}; +use crate::arrow::buffer::converter::{ + DecimalArrayConverter, DecimalByteArrayConvert, DecimalFixedLengthByteArrayConverter, + FixedLenBinaryConverter, FixedSizeArrayConverter, Int96ArrayConverter, + Int96Converter, IntervalDayTimeArrayConverter, IntervalDayTimeConverter, + IntervalYearMonthArrayConverter, IntervalYearMonthConverter, +}; use crate::arrow::schema::{convert_schema, ParquetField, ParquetFieldType}; use crate::arrow::ProjectionMask; use crate::basic::Type as PhysicalType; -use crate::data_type::{BoolType, ByteArrayType, DoubleType, FixedLenByteArrayType, FloatType, Int32Type, Int64Type, Int96Type}; +use crate::data_type::{ + BoolType, ByteArrayType, DoubleType, FixedLenByteArrayType, FloatType, Int32Type, + Int64Type, Int96Type, +}; use crate::errors::Result; use crate::schema::types::{ColumnDescriptor, ColumnPath, SchemaDescPtr, Type}; @@ -155,13 +163,11 @@ fn build_primitive_reader( let arrow_type = Some(field.arrow_type.clone()); match physical_type { - PhysicalType::BOOLEAN => Ok(Box::new( - PrimitiveArrayReader::::new( - page_iterator, - column_desc, - arrow_type, - )?, - )), + PhysicalType::BOOLEAN => Ok(Box::new(PrimitiveArrayReader::::new( + page_iterator, + column_desc, + arrow_type, + )?)), PhysicalType::INT32 => { if let Some(DataType::Null) = arrow_type { Ok(Box::new(NullArrayReader::::new( @@ -169,22 +175,18 @@ fn build_primitive_reader( column_desc, )?)) } else { - Ok(Box::new( - PrimitiveArrayReader::::new( - page_iterator, - column_desc, - arrow_type, - )?, - )) + Ok(Box::new(PrimitiveArrayReader::::new( + page_iterator, + column_desc, + arrow_type, + )?)) } } - PhysicalType::INT64 => Ok(Box::new( - PrimitiveArrayReader::::new( - page_iterator, - column_desc, - arrow_type, - )?, - )), + PhysicalType::INT64 => Ok(Box::new(PrimitiveArrayReader::::new( + page_iterator, + column_desc, + arrow_type, + )?)), PhysicalType::INT96 => { // get the optional timezone information from arrow type let timezone = arrow_type.as_ref().and_then(|data_type| { @@ -205,50 +207,40 @@ fn build_primitive_reader( arrow_type, )?)) } - PhysicalType::FLOAT => Ok(Box::new( - PrimitiveArrayReader::::new( - page_iterator, - column_desc, - arrow_type, - )?, - )), - PhysicalType::DOUBLE => Ok(Box::new( - PrimitiveArrayReader::::new( - page_iterator, - column_desc, - arrow_type, - )?, - )), + PhysicalType::FLOAT => Ok(Box::new(PrimitiveArrayReader::::new( + page_iterator, + column_desc, + arrow_type, + )?)), + PhysicalType::DOUBLE => Ok(Box::new(PrimitiveArrayReader::::new( + page_iterator, + column_desc, + arrow_type, + )?)), PhysicalType::BYTE_ARRAY => match arrow_type { - Some(DataType::Dictionary(_, _)) => make_byte_array_dictionary_reader( - page_iterator, - column_desc, - arrow_type, - ), - Some(DataType::Decimal(precision, scale)) => { + Some(DataType::Dictionary(_, _)) => { + make_byte_array_dictionary_reader(page_iterator, column_desc, arrow_type) + } + Some(DataType::Decimal128(precision, scale)) => { // read decimal data from parquet binary physical type - let convert = DecimalByteArrayConvert::new(DecimalArrayConverter::new(precision as i32, scale as i32)); - Ok(Box::new( - ComplexObjectArrayReader::::new( - page_iterator, - column_desc, - convert, - arrow_type - )? - )) - }, - _ => make_byte_array_reader( - page_iterator, - column_desc, - arrow_type, - ), - }, - PhysicalType::FIXED_LEN_BYTE_ARRAY => match field.arrow_type { - DataType::Decimal(precision, scale) => { - let converter = DecimalFixedLengthByteArrayConverter::new(DecimalArrayConverter::new( + let convert = DecimalByteArrayConvert::new(DecimalArrayConverter::new( precision as i32, scale as i32, )); + Ok(Box::new(ComplexObjectArrayReader::< + ByteArrayType, + DecimalByteArrayConvert, + >::new( + page_iterator, column_desc, convert, arrow_type + )?)) + } + _ => make_byte_array_reader(page_iterator, column_desc, arrow_type), + }, + PhysicalType::FIXED_LEN_BYTE_ARRAY => match field.arrow_type { + DataType::Decimal128(precision, scale) => { + let converter = DecimalFixedLengthByteArrayConverter::new( + DecimalArrayConverter::new(precision as i32, scale as i32), + ); Ok(Box::new(ComplexObjectArrayReader::< FixedLenByteArrayType, DecimalFixedLengthByteArrayConverter, diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs index 700b12b0a0b3..ca7522b86d66 100644 --- a/parquet/src/arrow/array_reader/primitive_array.rs +++ b/parquet/src/arrow/array_reader/primitive_array.rs @@ -183,7 +183,7 @@ where let a = arrow::compute::cast(&array, &ArrowType::Date32)?; arrow::compute::cast(&a, &target_type)? } - ArrowType::Decimal(p, s) => { + ArrowType::Decimal128(p, s) => { let array = match array.data_type() { ArrowType::Int32 => array .as_any() diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs index f88d986ea9e9..9a90d40d5a85 100644 --- a/parquet/src/arrow/arrow_writer/levels.rs +++ b/parquet/src/arrow/arrow_writer/levels.rs @@ -88,7 +88,7 @@ fn is_leaf(data_type: &DataType) -> bool { | DataType::Interval(_) | DataType::Binary | DataType::LargeBinary - | DataType::Decimal(_, _) + | DataType::Decimal128(_, _) | DataType::FixedSizeBinary(_) ) } diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 8a79a116f548..a46c5859a1f6 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -302,7 +302,7 @@ fn write_leaves( | ArrowDataType::Binary | ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 - | ArrowDataType::Decimal(_, _) + | ArrowDataType::Decimal128(_, _) | ArrowDataType::Decimal256(_, _) | ArrowDataType::FixedSizeBinary(_) => { let mut writer = get_writer(row_group_writer)?; @@ -595,7 +595,7 @@ fn write_leaf( .unwrap(); get_fsb_array_slice(array, indices) } - ArrowDataType::Decimal(_, _) => { + ArrowDataType::Decimal128(_, _) => { let array = column .as_any() .downcast_ref::() @@ -952,7 +952,7 @@ mod tests { #[test] fn arrow_writer_decimal() { - let decimal_field = Field::new("a", DataType::Decimal(5, 2), false); + let decimal_field = Field::new("a", DataType::Decimal128(5, 2), false); let schema = Schema::new(vec![decimal_field]); let decimal_values = vec![10_000, 50_000, 0, -100] diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema.rs index 53d67d380412..2cb47bc00e7e 100644 --- a/parquet/src/arrow/schema.rs +++ b/parquet/src/arrow/schema.rs @@ -380,7 +380,8 @@ fn arrow_to_parquet_type(field: &Field) -> Result { .with_length(*length) .build() } - DataType::Decimal(precision, scale) | DataType::Decimal256(precision, scale) => { + DataType::Decimal128(precision, scale) + | DataType::Decimal256(precision, scale) => { // Decimal precision determines the Parquet physical type to use. // TODO(ARROW-12018): Enable the below after ARROW-10818 Decimal support // @@ -549,10 +550,10 @@ mod tests { parquet_to_arrow_schema(&parquet_schema, None).unwrap(); let arrow_fields = vec![ - Field::new("decimal1", DataType::Decimal(4,2), false), - Field::new("decimal2", DataType::Decimal(12,2), false), - Field::new("decimal3", DataType::Decimal(30,2), false), - Field::new("decimal4", DataType::Decimal(33,2), false), + Field::new("decimal1", DataType::Decimal128(4, 2), false), + Field::new("decimal2", DataType::Decimal128(12, 2), false), + Field::new("decimal3", DataType::Decimal128(30, 2), false), + Field::new("decimal4", DataType::Decimal128(33, 2), false), ]; assert_eq!(&arrow_fields, converted_arrow_schema.fields()); } @@ -1575,9 +1576,9 @@ mod tests { // true, // ), Field::new("c35", DataType::Null, true), - Field::new("c36", DataType::Decimal(2, 1), false), - Field::new("c37", DataType::Decimal(50, 20), false), - Field::new("c38", DataType::Decimal(18, 12), true), + Field::new("c36", DataType::Decimal128(2, 1), false), + Field::new("c37", DataType::Decimal128(50, 20), false), + Field::new("c38", DataType::Decimal128(18, 12), true), Field::new( "c39", DataType::Map( diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs index 4bf6876d09da..c05a13565b12 100644 --- a/parquet/src/arrow/schema/primitive.rs +++ b/parquet/src/arrow/schema/primitive.rs @@ -112,7 +112,7 @@ fn decimal_type(scale: i32, precision: i32) -> Result { .try_into() .map_err(|_| arrow_err!("precision cannot be negative: {}", precision))?; - Ok(DataType::Decimal(precision, scale)) + Ok(DataType::Decimal128(precision, scale)) } fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result { @@ -224,7 +224,7 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result Result { +fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result { match (info.logical_type(), info.converted_type()) { (Some(LogicalType::String), _) => Ok(DataType::Utf8), (Some(LogicalType::Json), _) => Ok(DataType::Binary), @@ -235,8 +235,12 @@ fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32 ) -> Result< (None, ConvertedType::BSON) => Ok(DataType::Binary), (None, ConvertedType::ENUM) => Ok(DataType::Binary), (None, ConvertedType::UTF8) => Ok(DataType::Utf8), - (Some(LogicalType::Decimal {precision, scale}), _) => Ok(DataType::Decimal(precision as usize, scale as usize)), - (None, ConvertedType::DECIMAL) => Ok(DataType::Decimal(precision as usize, scale as usize)), + (Some(LogicalType::Decimal { precision, scale }), _) => { + Ok(DataType::Decimal128(precision as usize, scale as usize)) + } + (None, ConvertedType::DECIMAL) => { + Ok(DataType::Decimal128(precision as usize, scale as usize)) + } (logical, converted) => Err(arrow_err!( "Unable to convert parquet BYTE_ARRAY logical type {:?} or converted type {}", logical, From c92d2abdbff3252286c3bff4acccc19a2263e3ac Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 29 Jul 2022 13:38:04 -0700 Subject: [PATCH 2/2] Update doc --- arrow/src/array/data.rs | 2 +- arrow/src/datatypes/datatype.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index b927c32c7c6e..985f9cc36744 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -380,7 +380,7 @@ impl ArrayData { /// panic's if the new DataType is not compatible with the /// existing type. /// - /// Note: currently only changing a [DataType::Decimal]s precision + /// Note: currently only changing a [DataType::Decimal128]s precision /// and scale are supported #[inline] pub(crate) fn with_data_type(mut self, new_data_type: DataType) -> Self { diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs index 88ab3edbd71e..9f23e6f790ac 100644 --- a/arrow/src/datatypes/datatype.rs +++ b/arrow/src/datatypes/datatype.rs @@ -264,7 +264,7 @@ impl fmt::Display for DataType { } /// `MAX_DECIMAL_FOR_EACH_PRECISION[p]` holds the maximum `i128` value -/// that can be stored in [DataType::Decimal] value of precision `p` +/// that can be stored in [DataType::Decimal128] value of precision `p` pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ 9, 99, @@ -350,7 +350,7 @@ pub const MAX_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [ ]; /// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value -/// that can be stored in a [DataType::Decimal] value of precision `p` +/// that can be stored in a [DataType::Decimal128] value of precision `p` pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ -9, -99, @@ -435,10 +435,10 @@ pub const MIN_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [ "-999999999999999999999999999999999999999999999999999999999999999999999999999", ]; -/// The maximum precision for [DataType::Decimal] values +/// The maximum precision for [DataType::Decimal128] values pub const DECIMAL128_MAX_PRECISION: usize = 38; -/// The maximum scale for [DataType::Decimal] values +/// The maximum scale for [DataType::Decimal128] values pub const DECIMAL128_MAX_SCALE: usize = 38; /// The maximum precision for [DataType::Decimal256] values @@ -447,7 +447,7 @@ pub const DECIMAL256_MAX_PRECISION: usize = 76; /// The maximum scale for [DataType::Decimal256] values pub const DECIMAL256_MAX_SCALE: usize = 76; -/// The default scale for [DataType::Decimal] and [DataType::Decimal256] values +/// The default scale for [DataType::Decimal128] and [DataType::Decimal256] values pub const DECIMAL_DEFAULT_SCALE: usize = 10; /// Validates that the specified `i128` value can be properly