From f749e1d9f19a5da9249b8e1d2429b10acde97805 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Wed, 23 Nov 2022 18:48:53 +0000 Subject: [PATCH] Return slice from GenericByteArray::value_data (#3171) --- arrow-array/src/array/byte_array.rs | 8 +++---- arrow-cast/src/cast.rs | 23 ++++++++------------ arrow/src/compute/kernels/concat_elements.rs | 13 +++-------- arrow/src/compute/kernels/substring.rs | 6 ++--- 4 files changed, 18 insertions(+), 32 deletions(-) diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs index 8dd206bd2639..8c2616624c0c 100644 --- a/arrow-array/src/array/byte_array.rs +++ b/arrow-array/src/array/byte_array.rs @@ -21,7 +21,7 @@ use crate::raw_pointer::RawPtrBox; use crate::types::bytes::ByteArrayNativeType; use crate::types::ByteArrayType; use crate::{Array, ArrayAccessor, OffsetSizeTrait}; -use arrow_buffer::{ArrowNativeType, Buffer}; +use arrow_buffer::ArrowNativeType; use arrow_data::ArrayData; use arrow_schema::DataType; use std::any::Any; @@ -55,9 +55,9 @@ impl GenericByteArray { offsets[i + 1] - offsets[i] } - /// Returns a clone of the value data buffer - pub fn value_data(&self) -> Buffer { - self.data.buffers()[1].clone() + /// Returns the raw value data + pub fn value_data(&self) -> &[u8] { + self.data.buffers()[1].as_slice() } /// Returns the offset values in the offsets buffer diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 61be2171b7c1..3f17758255c7 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -3402,14 +3402,13 @@ where OffsetSizeFrom: OffsetSizeTrait + ToPrimitive, OffsetSizeTo: OffsetSizeTrait + NumCast + ArrowNativeType, { - let str_array = array - .as_any() - .downcast_ref::>() - .unwrap(); - let list_data = array.data(); - let str_values_buf = str_array.value_data(); - - let offsets = list_data.buffers()[0].typed_data::(); + let data = array.data(); + assert_eq!( + data.data_type(), + &GenericStringArray::::DATA_TYPE + ); + let str_values_buf = data.buffers()[1].clone(); + let offsets = data.buffers()[0].typed_data::(); let mut offset_builder = BufferBuilder::::new(offsets.len()); offsets @@ -3426,18 +3425,14 @@ where let offset_buffer = offset_builder.finish(); - let dtype = if matches!(std::mem::size_of::(), 8) { - DataType::LargeUtf8 - } else { - DataType::Utf8 - }; + let dtype = GenericStringArray::::DATA_TYPE; let builder = ArrayData::builder(dtype) .offset(array.offset()) .len(array.len()) .add_buffer(offset_buffer) .add_buffer(str_values_buf) - .null_bit_buffer(list_data.null_buffer().cloned()); + .null_bit_buffer(data.null_buffer().cloned()); let array_data = unsafe { builder.build_unchecked() }; diff --git a/arrow/src/compute/kernels/concat_elements.rs b/arrow/src/compute/kernels/concat_elements.rs index 1c0a0925df74..a908ba9ab5d8 100644 --- a/arrow/src/compute/kernels/concat_elements.rs +++ b/arrow/src/compute/kernels/concat_elements.rs @@ -50,10 +50,8 @@ pub fn concat_elements_utf8( let left_offsets = left.value_offsets(); let right_offsets = right.value_offsets(); - let left_buffer = left.value_data(); - let right_buffer = right.value_data(); - let left_values = left_buffer.as_slice(); - let right_values = right_buffer.as_slice(); + let left_values = left.value_data(); + let right_values = right.value_data(); let mut output_values = BufferBuilder::::new( left_values.len() + right_values.len() @@ -115,16 +113,11 @@ pub fn concat_elements_utf8_many( size, )?; - let data_buffers = arrays + let data_values = arrays .iter() .map(|array| array.value_data()) .collect::>(); - let data_values = data_buffers - .iter() - .map(|buffer| buffer.as_slice()) - .collect::>(); - let mut offsets = arrays .iter() .map(|a| a.value_offsets().iter().peekable()) diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs index 76568ae0dac0..23cb2c19fddf 100644 --- a/arrow/src/compute/kernels/substring.rs +++ b/arrow/src/compute/kernels/substring.rs @@ -253,8 +253,7 @@ fn binary_substring( length: Option, ) -> Result { let offsets = array.value_offsets(); - let values = array.value_data(); - let data = values.as_slice(); + let data = array.value_data(); let zero = OffsetSize::zero(); // start and end offsets of all substrings @@ -364,8 +363,7 @@ fn utf8_substring( length: Option, ) -> Result { let offsets = array.value_offsets(); - let values = array.value_data(); - let data = values.as_slice(); + let data = array.value_data(); let zero = OffsetSize::zero(); // Check if `offset` is at a valid char boundary.