diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs index 4671d82673fd..4d5575b47533 100644 --- a/arrow/src/array/transform/mod.rs +++ b/arrow/src/array/transform/mod.rs @@ -78,18 +78,13 @@ impl<'a> _MutableArrayData<'a> { } }; - let mut array_data_builder = ArrayDataBuilder::new(self.data_type) + ArrayDataBuilder::new(self.data_type) .offset(0) .len(self.len) .null_count(self.null_count) .buffers(buffers) - .child_data(child_data); - if self.null_count > 0 { - array_data_builder = - array_data_builder.null_bit_buffer(Some(self.null_buffer.into())); - } - - array_data_builder + .child_data(child_data) + .null_bit_buffer((self.null_count > 0).then(|| self.null_buffer.into())) } } @@ -184,48 +179,23 @@ fn build_extend_dictionary( max: usize, ) -> Option { use crate::datatypes::*; + macro_rules! validate_and_build { + ($dt: ty) => {{ + let _: $dt = max.try_into().ok()?; + let offset: $dt = offset.try_into().ok()?; + Some(primitive::build_extend_with_offset(array, offset)) + }}; + } match array.data_type() { DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() { - DataType::UInt8 => { - let _: u8 = max.try_into().ok()?; - let offset: u8 = offset.try_into().ok()?; - Some(primitive::build_extend_with_offset(array, offset)) - } - DataType::UInt16 => { - let _: u16 = max.try_into().ok()?; - let offset: u16 = offset.try_into().ok()?; - Some(primitive::build_extend_with_offset(array, offset)) - } - DataType::UInt32 => { - let _: u32 = max.try_into().ok()?; - let offset: u32 = offset.try_into().ok()?; - Some(primitive::build_extend_with_offset(array, offset)) - } - DataType::UInt64 => { - let _: u64 = max.try_into().ok()?; - let offset: u64 = offset.try_into().ok()?; - Some(primitive::build_extend_with_offset(array, offset)) - } - DataType::Int8 => { - let _: i8 = max.try_into().ok()?; - let offset: i8 = offset.try_into().ok()?; - Some(primitive::build_extend_with_offset(array, offset)) - } - DataType::Int16 => { - let _: i16 = max.try_into().ok()?; - let offset: i16 = offset.try_into().ok()?; - Some(primitive::build_extend_with_offset(array, offset)) - } - DataType::Int32 => { - let _: i32 = max.try_into().ok()?; - let offset: i32 = offset.try_into().ok()?; - Some(primitive::build_extend_with_offset(array, offset)) - } - DataType::Int64 => { - let _: i64 = max.try_into().ok()?; - let offset: i64 = offset.try_into().ok()?; - Some(primitive::build_extend_with_offset(array, offset)) - } + DataType::UInt8 => validate_and_build!(u8), + DataType::UInt16 => validate_and_build!(u16), + DataType::UInt32 => validate_and_build!(u32), + DataType::UInt64 => validate_and_build!(u64), + DataType::Int8 => validate_and_build!(i8), + DataType::Int16 => validate_and_build!(i16), + DataType::Int32 => validate_and_build!(i32), + DataType::Int64 => validate_and_build!(i64), _ => unreachable!(), }, _ => None, @@ -394,7 +364,7 @@ impl<'a> MutableArrayData<'a> { /// a [Capacities] variant is not yet supported. pub fn with_capacities( arrays: Vec<&'a ArrayData>, - mut use_nulls: bool, + use_nulls: bool, capacities: Capacities, ) -> Self { let data_type = arrays[0].data_type(); @@ -402,20 +372,22 @@ impl<'a> MutableArrayData<'a> { // if any of the arrays has nulls, insertions from any array requires setting bits // as there is at least one array with nulls. - if arrays.iter().any(|array| array.null_count() > 0) { - use_nulls = true; - }; + let use_nulls = use_nulls | arrays.iter().any(|array| array.null_count() > 0); let mut array_capacity; let [buffer1, buffer2] = match (data_type, &capacities) { - (DataType::LargeUtf8, Capacities::Binary(capacity, Some(value_cap))) - | (DataType::LargeBinary, Capacities::Binary(capacity, Some(value_cap))) => { + ( + DataType::LargeUtf8 | DataType::LargeBinary, + Capacities::Binary(capacity, Some(value_cap)), + ) => { array_capacity = *capacity; preallocate_offset_and_binary_buffer::(*capacity, *value_cap) } - (DataType::Utf8, Capacities::Binary(capacity, Some(value_cap))) - | (DataType::Binary, Capacities::Binary(capacity, Some(value_cap))) => { + ( + DataType::Utf8 | DataType::Binary, + Capacities::Binary(capacity, Some(value_cap)), + ) => { array_capacity = *capacity; preallocate_offset_and_binary_buffer::(*capacity, *value_cap) }