Skip to content

Commit

Permalink
Return slice from GenericByteArray::value_data (#3171)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold authored Nov 23, 2022
1 parent 78ab0ef commit f749e1d
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 32 deletions.
8 changes: 4 additions & 4 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::raw_pointer::RawPtrBox;
use crate::types::bytes::ByteArrayNativeType;
use crate::types::ByteArrayType;
use crate::{Array, ArrayAccessor, OffsetSizeTrait};
use arrow_buffer::{ArrowNativeType, Buffer};
use arrow_buffer::ArrowNativeType;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
Expand Down Expand Up @@ -55,9 +55,9 @@ impl<T: ByteArrayType> GenericByteArray<T> {
offsets[i + 1] - offsets[i]
}

/// Returns a clone of the value data buffer
pub fn value_data(&self) -> Buffer {
self.data.buffers()[1].clone()
/// Returns the raw value data
pub fn value_data(&self) -> &[u8] {
self.data.buffers()[1].as_slice()
}

/// Returns the offset values in the offsets buffer
Expand Down
23 changes: 9 additions & 14 deletions arrow-cast/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3402,14 +3402,13 @@ where
OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
OffsetSizeTo: OffsetSizeTrait + NumCast + ArrowNativeType,
{
let str_array = array
.as_any()
.downcast_ref::<GenericStringArray<OffsetSizeFrom>>()
.unwrap();
let list_data = array.data();
let str_values_buf = str_array.value_data();

let offsets = list_data.buffers()[0].typed_data::<OffsetSizeFrom>();
let data = array.data();
assert_eq!(
data.data_type(),
&GenericStringArray::<OffsetSizeFrom>::DATA_TYPE
);
let str_values_buf = data.buffers()[1].clone();
let offsets = data.buffers()[0].typed_data::<OffsetSizeFrom>();

let mut offset_builder = BufferBuilder::<OffsetSizeTo>::new(offsets.len());
offsets
Expand All @@ -3426,18 +3425,14 @@ where

let offset_buffer = offset_builder.finish();

let dtype = if matches!(std::mem::size_of::<OffsetSizeTo>(), 8) {
DataType::LargeUtf8
} else {
DataType::Utf8
};
let dtype = GenericStringArray::<OffsetSizeTo>::DATA_TYPE;

let builder = ArrayData::builder(dtype)
.offset(array.offset())
.len(array.len())
.add_buffer(offset_buffer)
.add_buffer(str_values_buf)
.null_bit_buffer(list_data.null_buffer().cloned());
.null_bit_buffer(data.null_buffer().cloned());

let array_data = unsafe { builder.build_unchecked() };

Expand Down
13 changes: 3 additions & 10 deletions arrow/src/compute/kernels/concat_elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,8 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
let left_offsets = left.value_offsets();
let right_offsets = right.value_offsets();

let left_buffer = left.value_data();
let right_buffer = right.value_data();
let left_values = left_buffer.as_slice();
let right_values = right_buffer.as_slice();
let left_values = left.value_data();
let right_values = right.value_data();

let mut output_values = BufferBuilder::<u8>::new(
left_values.len() + right_values.len()
Expand Down Expand Up @@ -115,16 +113,11 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
size,
)?;

let data_buffers = arrays
let data_values = arrays
.iter()
.map(|array| array.value_data())
.collect::<Vec<_>>();

let data_values = data_buffers
.iter()
.map(|buffer| buffer.as_slice())
.collect::<Vec<_>>();

let mut offsets = arrays
.iter()
.map(|a| a.value_offsets().iter().peekable())
Expand Down
6 changes: 2 additions & 4 deletions arrow/src/compute/kernels/substring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,7 @@ fn binary_substring<OffsetSize: OffsetSizeTrait>(
length: Option<OffsetSize>,
) -> Result<ArrayRef> {
let offsets = array.value_offsets();
let values = array.value_data();
let data = values.as_slice();
let data = array.value_data();
let zero = OffsetSize::zero();

// start and end offsets of all substrings
Expand Down Expand Up @@ -364,8 +363,7 @@ fn utf8_substring<OffsetSize: OffsetSizeTrait>(
length: Option<OffsetSize>,
) -> Result<ArrayRef> {
let offsets = array.value_offsets();
let values = array.value_data();
let data = values.as_slice();
let data = array.value_data();
let zero = OffsetSize::zero();

// Check if `offset` is at a valid char boundary.
Expand Down

0 comments on commit f749e1d

Please sign in to comment.