Skip to content

Commit

Permalink
Merge 85d9229 into c47d14c
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold authored Jun 30, 2022
2 parents c47d14c + 85d9229 commit 954916d
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 36 deletions.
11 changes: 11 additions & 0 deletions arrow/src/array/array_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,17 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
) -> impl Iterator<Item = Option<T::Native>> + 'a {
indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
}

/// Returns the backing [`ArrayData`] of this [`PrimitiveArray`]
pub fn into_data(self) -> ArrayData {
self.into()
}
}

impl<T: ArrowPrimitiveType> From<PrimitiveArray<T>> for ArrayData {
fn from(a: PrimitiveArray<T>) -> Self {
a.data
}
}

impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
Expand Down
11 changes: 11 additions & 0 deletions arrow/src/array/array_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,11 @@ impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
) -> impl Iterator<Item = Option<&str>> + 'a {
indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
}

/// Returns the backing [`ArrayData`] of this [`GenericStringArray`]
pub fn into_data(self) -> ArrayData {
self.into()
}
}

impl<'a, Ptr, OffsetSize: OffsetSizeTrait> FromIterator<&'a Option<Ptr>>
Expand Down Expand Up @@ -336,6 +341,12 @@ impl<OffsetSize: OffsetSizeTrait> From<Vec<String>> for GenericStringArray<Offse
}
}

impl<OffsetSize: OffsetSizeTrait> From<GenericStringArray<OffsetSize>> for ArrayData {
fn from(a: GenericStringArray<OffsetSize>) -> Self {
a.data
}
}

/// An array where each element is a variable-sized sequence of bytes representing a string
/// whose maximum length (in bytes) is represented by a i32.
///
Expand Down
26 changes: 0 additions & 26 deletions arrow/src/array/builder/primitive_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,8 @@ use std::sync::Arc;

use crate::array::ArrayData;
use crate::array::ArrayRef;
use crate::array::DictionaryArray;
use crate::array::PrimitiveArray;
use crate::datatypes::ArrowPrimitiveType;
use crate::datatypes::DataType;
use crate::error::{ArrowError, Result};

use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder};
Expand Down Expand Up @@ -197,30 +195,6 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
PrimitiveArray::<T>::from(array_data)
}

/// Builds the `DictionaryArray` and reset this builder.
pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T> {
let len = self.len();
let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
let null_count = len
- null_bit_buffer
.as_ref()
.map(|b| b.count_set_bits())
.unwrap_or(len);
let data_type = DataType::Dictionary(
Box::new(T::DATA_TYPE),
Box::new(values.data_type().clone()),
);
let mut builder = ArrayData::builder(data_type)
.len(len)
.add_buffer(self.values_builder.finish());
if null_count > 0 {
builder = builder.null_bit_buffer(null_bit_buffer);
}
builder = builder.add_child_data(values.data().clone());
let array_data = unsafe { builder.build_unchecked() };
DictionaryArray::<T>::from(array_data)
}

fn materialize_bitmap_builder(&mut self) {
if self.bitmap_builder.is_some() {
return;
Expand Down
22 changes: 15 additions & 7 deletions arrow/src/array/builder/primitive_dictionary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,8 @@ use std::any::Any;
use std::collections::HashMap;
use std::sync::Arc;

use crate::array::ArrayRef;
use crate::array::ArrowPrimitiveType;
use crate::array::DictionaryArray;
use crate::datatypes::ArrowNativeType;
use crate::datatypes::ToByteSlice;
use crate::array::{ArrayRef, ArrowPrimitiveType, DictionaryArray};
use crate::datatypes::{ArrowNativeType, DataType, ToByteSlice};
use crate::error::{ArrowError, Result};

use super::ArrayBuilder;
Expand Down Expand Up @@ -164,8 +161,19 @@ where
/// Builds the `DictionaryArray` and reset this builder.
pub fn finish(&mut self) -> DictionaryArray<K> {
self.map.clear();
let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
self.keys_builder.finish_dict(value_ref)
let values = self.values_builder.finish();
let keys = self.keys_builder.finish();

let data_type =
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE));

let builder = keys
.into_data()
.into_builder()
.data_type(data_type)
.child_data(vec![values.into_data()]);

DictionaryArray::from(unsafe { builder.build_unchecked() })
}
}

Expand Down
17 changes: 14 additions & 3 deletions arrow/src/array/builder/string_dictionary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use super::PrimitiveBuilder;
use crate::array::{
Array, ArrayBuilder, ArrayRef, DictionaryArray, StringArray, StringBuilder,
};
use crate::datatypes::{ArrowDictionaryKeyType, ArrowNativeType};
use crate::datatypes::{ArrowDictionaryKeyType, ArrowNativeType, DataType};
use crate::error::{ArrowError, Result};
use hashbrown::hash_map::RawEntryMut;
use hashbrown::HashMap;
Expand Down Expand Up @@ -250,8 +250,19 @@ where
/// Builds the `DictionaryArray` and reset this builder.
pub fn finish(&mut self) -> DictionaryArray<K> {
self.dedup.clear();
let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
self.keys_builder.finish_dict(value_ref)
let values = self.values_builder.finish();
let keys = self.keys_builder.finish();

let data_type =
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8));

let builder = keys
.into_data()
.into_builder()
.data_type(data_type)
.child_data(vec![values.into_data()]);

DictionaryArray::from(unsafe { builder.build_unchecked() })
}
}

Expand Down
25 changes: 25 additions & 0 deletions arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,11 @@ impl ArrayData {
.zip(other.child_data.iter())
.all(|(a, b)| a.ptr_eq(b))
}

/// Converts this [`ArrayData`] into an [`ArrayDataBuilder`]
pub fn into_builder(self) -> ArrayDataBuilder {
self.into()
}
}

/// Return the expected [`DataTypeLayout`] Arrays of this data
Expand Down Expand Up @@ -1408,6 +1413,10 @@ impl ArrayDataBuilder {
}
}

pub fn data_type(self, data_type: DataType) -> Self {
Self { data_type, ..self }
}

#[inline]
#[allow(clippy::len_without_is_empty)]
pub const fn len(mut self, n: usize) -> Self {
Expand Down Expand Up @@ -1482,6 +1491,22 @@ impl ArrayDataBuilder {
}
}

impl From<ArrayData> for ArrayDataBuilder {
fn from(d: ArrayData) -> Self {
// TODO: Store Bitmap on ArrayData (#1799)
let null_bit_buffer = d.null_buffer().cloned();
Self {
null_bit_buffer,
data_type: d.data_type,
len: d.len,
null_count: Some(d.null_count),
offset: d.offset,
buffers: d.buffers,
child_data: d.child_data,
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down

0 comments on commit 954916d

Please sign in to comment.