Skip to content

Commit

Permalink
Add Array::to_data and Array::nulls (apache#3880) (apache#3881)
Browse files Browse the repository at this point in the history
* Add Array::to_data and Array::nulls (apache#3880)

* Review feedback

* Format
  • Loading branch information
tustvold authored and spebern committed Mar 25, 2023
1 parent 5c14cfc commit 33f8881
Show file tree
Hide file tree
Showing 35 changed files with 301 additions and 95 deletions.
8 changes: 4 additions & 4 deletions arrow-arith/src/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ where
.map(|i| unsafe { array.value_unchecked(i) })
.reduce(|acc, item| if cmp(&acc, &item) { item } else { acc })
} else {
let nulls = array.data().nulls().unwrap();
let nulls = array.nulls().unwrap();
let iter = BitIndexIterator::new(nulls.validity(), nulls.offset(), nulls.len());
unsafe {
let idx = iter.reduce(|acc_idx, idx| {
Expand Down Expand Up @@ -288,7 +288,7 @@ where

let data: &[T::Native] = array.values();

match array.data().nulls() {
match array.nulls() {
None => {
let sum = data.iter().fold(T::default_value(), |accumulator, value| {
accumulator.add_wrapping(*value)
Expand Down Expand Up @@ -347,7 +347,7 @@ where

let data: &[T::Native] = array.values();

match array.data().nulls() {
match array.nulls() {
None => {
let sum = data
.iter()
Expand Down Expand Up @@ -665,7 +665,7 @@ mod simd {
let mut chunk_acc = A::init_accumulator_chunk();
let mut rem_acc = A::init_accumulator_scalar();

match array.data().nulls() {
match array.nulls() {
None => {
let data_chunks = data.chunks_exact(64);
let remainder = data_chunks.remainder();
Expand Down
3 changes: 1 addition & 2 deletions arrow-arith/src/arity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,8 +414,7 @@ where

let array_builder = builder
.finish()
.data()
.clone()
.into_data()
.into_builder()
.null_bit_buffer(null_buffer)
.null_count(null_count);
Expand Down
24 changes: 12 additions & 12 deletions arrow-arith/src/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ mod tests {
let a = BooleanArray::from(vec![false, false, false, true, true, true]);

// ensure null bitmap of a is absent
assert!(a.data().nulls().is_none());
assert!(a.nulls().is_none());

let b = BooleanArray::from(vec![
Some(true),
Expand All @@ -622,7 +622,7 @@ mod tests {
]);

// ensure null bitmap of b is present
assert!(b.data().nulls().is_some());
assert!(b.nulls().is_some());

let c = or_kleene(&a, &b).unwrap();

Expand Down Expand Up @@ -650,12 +650,12 @@ mod tests {
]);

// ensure null bitmap of b is absent
assert!(a.data().nulls().is_some());
assert!(a.nulls().is_some());

let b = BooleanArray::from(vec![false, false, false, true, true, true]);

// ensure null bitmap of a is present
assert!(b.data().nulls().is_none());
assert!(b.nulls().is_none());

let c = or_kleene(&a, &b).unwrap();

Expand Down Expand Up @@ -852,7 +852,7 @@ mod tests {
let expected = BooleanArray::from(vec![false, false, false, false]);

assert_eq!(expected, res);
assert!(res.data().nulls().is_none());
assert!(res.nulls().is_none());
}

#[test]
Expand All @@ -865,7 +865,7 @@ mod tests {
let expected = BooleanArray::from(vec![false, false, false, false]);

assert_eq!(expected, res);
assert!(res.data().nulls().is_none());
assert!(res.nulls().is_none());
}

#[test]
Expand All @@ -877,7 +877,7 @@ mod tests {
let expected = BooleanArray::from(vec![true, true, true, true]);

assert_eq!(expected, res);
assert!(res.data().nulls().is_none());
assert!(res.nulls().is_none());
}

#[test]
Expand All @@ -890,7 +890,7 @@ mod tests {
let expected = BooleanArray::from(vec![true, true, true, true]);

assert_eq!(expected, res);
assert!(res.data().nulls().is_none());
assert!(res.nulls().is_none());
}

#[test]
Expand All @@ -902,7 +902,7 @@ mod tests {
let expected = BooleanArray::from(vec![false, true, false, true]);

assert_eq!(expected, res);
assert!(res.data().nulls().is_none());
assert!(res.nulls().is_none());
}

#[test]
Expand Down Expand Up @@ -933,7 +933,7 @@ mod tests {
let expected = BooleanArray::from(vec![false, true, false, true]);

assert_eq!(expected, res);
assert!(res.data().nulls().is_none());
assert!(res.nulls().is_none());
}

#[test]
Expand All @@ -945,7 +945,7 @@ mod tests {
let expected = BooleanArray::from(vec![true, false, true, false]);

assert_eq!(expected, res);
assert!(res.data().nulls().is_none());
assert!(res.nulls().is_none());
}

#[test]
Expand Down Expand Up @@ -976,6 +976,6 @@ mod tests {
let expected = BooleanArray::from(vec![true, false, true, false]);

assert_eq!(expected, res);
assert!(res.data().nulls().is_none());
assert!(res.nulls().is_none());
}
}
2 changes: 1 addition & 1 deletion arrow-array/src/array/binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
.offset(v.offset())
.add_buffer(v.data_ref().buffers()[0].clone())
.add_buffer(child_data.buffers()[0].slice(child_data.offset()))
.nulls(v.data().nulls().cloned());
.nulls(v.nulls().cloned());

let data = unsafe { builder.build_unchecked() };
Self::from(data)
Expand Down
17 changes: 16 additions & 1 deletion arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
use crate::array::print_long_array;
use crate::builder::BooleanBuilder;
use crate::iterator::BooleanIter;
use crate::{Array, ArrayAccessor};
use crate::{Array, ArrayAccessor, ArrayRef};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::bit_mask::combine_option_bitmap;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// Array of bools
///
Expand Down Expand Up @@ -265,9 +267,22 @@ impl Array for BooleanArray {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl<'a> ArrayAccessor for &'a BooleanArray {
Expand Down
18 changes: 16 additions & 2 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ use crate::builder::GenericByteBuilder;
use crate::iterator::ArrayIter;
use crate::types::bytes::ByteArrayNativeType;
use crate::types::ByteArrayType;
use crate::{Array, ArrayAccessor, OffsetSizeTrait};
use arrow_buffer::OffsetBuffer;
use crate::{Array, ArrayAccessor, ArrayRef, OffsetSizeTrait};
use arrow_buffer::{ArrowNativeType, Buffer};
use arrow_buffer::{NullBuffer, OffsetBuffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// Generic struct for variable-size byte arrays
///
Expand Down Expand Up @@ -237,9 +238,22 @@ impl<T: ByteArrayType> Array for GenericByteArray<T> {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {
Expand Down
27 changes: 27 additions & 0 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ use crate::{
make_array, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, PrimitiveArray,
StringArray,
};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::ArrowNativeType;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;

///
/// A dictionary array where each element is a single value indexed by an integer key.
Expand Down Expand Up @@ -590,9 +592,22 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl<T: ArrowDictionaryKeyType> std::fmt::Debug for DictionaryArray<T> {
Expand Down Expand Up @@ -669,9 +684,21 @@ impl<'a, K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'a,
&self.dictionary.data
}

fn to_data(&self) -> ArrayData {
self.dictionary.to_data()
}

fn into_data(self) -> ArrayData {
self.dictionary.into_data()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
self.dictionary.slice(offset, length)
}

fn nulls(&self) -> Option<&NullBuffer> {
self.dictionary.nulls()
}
}

impl<'a, K, V> IntoIterator for TypedDictionaryArray<'a, K, V>
Expand Down
17 changes: 16 additions & 1 deletion arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@

use crate::array::print_long_array;
use crate::iterator::FixedSizeBinaryIter;
use crate::{Array, ArrayAccessor, FixedSizeListArray};
use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;

/// An array where each element is a fixed-size sequence of bytes.
///
Expand Down Expand Up @@ -462,9 +464,22 @@ impl Array for FixedSizeBinaryArray {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray {
Expand Down
15 changes: 15 additions & 0 deletions arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
use crate::array::print_long_array;
use crate::builder::{FixedSizeListBuilder, PrimitiveBuilder};
use crate::{make_array, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType};
use arrow_buffer::buffer::NullBuffer;
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// A list array where each element is a fixed-size sequence of values with the same
/// type whose maximum length is represented by a i32.
Expand Down Expand Up @@ -205,9 +207,22 @@ impl Array for FixedSizeListArray {
&self.data
}

fn to_data(&self) -> ArrayData {
self.data.clone()
}

fn into_data(self) -> ArrayData {
self.into()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
// TODO: Slice buffers directly (#3880)
Arc::new(Self::from(self.data.slice(offset, length)))
}

fn nulls(&self) -> Option<&NullBuffer> {
self.data.nulls()
}
}

impl ArrayAccessor for FixedSizeListArray {
Expand Down
Loading

0 comments on commit 33f8881

Please sign in to comment.