Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Investigate usages of ArrayData::new (WIP) #813

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 91 additions & 72 deletions arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,15 +377,17 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
DataType::Null => Arc::new(NullArray::new(length)),
DataType::Boolean => {
let null_buf: Buffer = MutableBuffer::new_null(length).into();
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(null_buf.clone()),
0,
vec![null_buf],
vec![],
))
unsafe {
make_array(ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(null_buf.clone()),
0,
vec![null_buf],
vec![],
))
}
}
DataType::Int8 => new_null_sized_array::<Int8Type>(data_type, length),
DataType::UInt8 => new_null_sized_array::<UInt8Type>(data_type, length),
Expand Down Expand Up @@ -414,15 +416,17 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
new_null_sized_array::<IntervalDayTimeType>(data_type, length)
}
},
DataType::FixedSizeBinary(value_len) => make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(vec![0u8; *value_len as usize * length])],
vec![],
)),
DataType::FixedSizeBinary(value_len) => unsafe {
make_array(ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(vec![0u8; *value_len as usize * length])],
vec![],
))
},
DataType::Binary | DataType::Utf8 => {
new_null_binary_array::<i32>(data_type, length)
}
Expand All @@ -435,19 +439,21 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
DataType::LargeList(field) => {
new_null_list_array::<i64>(data_type, field.data_type(), length)
}
DataType::FixedSizeList(field, value_len) => make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![],
vec![
new_null_array(field.data_type(), *value_len as usize * length)
.data()
.clone(),
],
)),
DataType::FixedSizeList(field, value_len) => unsafe {
make_array(ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![],
vec![
new_null_array(field.data_type(), *value_len as usize * length)
.data()
.clone(),
],
))
},
DataType::Struct(fields) => {
let fields: Vec<_> = fields
.iter()
Expand All @@ -467,15 +473,17 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
let keys = new_null_array(key, length);
let keys = keys.data();

make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
keys.null_buffer().cloned(),
0,
keys.buffers().into(),
vec![new_empty_array(value.as_ref()).data().clone()],
))
unsafe {
make_array(ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
keys.null_buffer().cloned(),
0,
keys.buffers().into(),
vec![new_empty_array(value.as_ref()).data().clone()],
))
}
}
DataType::Decimal(_, _) => {
unimplemented!("Creating null Decimal array not yet supported")
Expand All @@ -489,52 +497,63 @@ fn new_null_list_array<OffsetSize: OffsetSizeTrait>(
child_data_type: &DataType,
length: usize,
) -> ArrayRef {
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(
vec![OffsetSize::zero(); length + 1].to_byte_slice(),
)],
vec![ArrayData::new_empty(child_data_type)],
))
// Safety: buffers are created with the correct length
let data = unsafe {
ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(
vec![OffsetSize::zero(); length + 1].to_byte_slice(),
)],
vec![ArrayData::new_empty(child_data_type)],
)
};
make_array(data)
}

#[inline]
fn new_null_binary_array<OffsetSize: OffsetSizeTrait>(
data_type: &DataType,
length: usize,
) -> ArrayRef {
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![
Buffer::from(vec![OffsetSize::zero(); length + 1].to_byte_slice()),
MutableBuffer::new(0).into(),
],
vec![],
))
let data = unsafe {
ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![
Buffer::from(vec![OffsetSize::zero(); length + 1].to_byte_slice()),
MutableBuffer::new(0).into(),
],
vec![],
)
};
make_array(data)
}

#[inline]
fn new_null_sized_array<T: ArrowPrimitiveType>(
data_type: &DataType,
length: usize,
) -> ArrayRef {
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(vec![0u8; length * T::get_byte_width()])],
vec![],
))
// Safety: buffers are created with the correct length
let data = unsafe {
ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(vec![0u8; length * T::get_byte_width()])],
vec![],
)
};
make_array(data)
}

/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface
Expand Down
20 changes: 11 additions & 9 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -520,15 +520,17 @@ impl FixedSizeBinaryArray {
}

let size = size.unwrap_or(0);
let array_data = ArrayData::new(
DataType::FixedSizeBinary(size as i32),
len,
None,
Some(null_buf.into()),
0,
vec![buffer.into()],
vec![],
);
let array_data = unsafe {
ArrayData::new_unchecked(
DataType::FixedSizeBinary(size as i32),
len,
None,
Some(null_buf.into()),
0,
vec![buffer.into()],
vec![],
)
};
Ok(FixedSizeBinaryArray::from(array_data))
}

Expand Down
8 changes: 2 additions & 6 deletions arrow/src/array/array_boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,10 @@ impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
}
});

let data = ArrayData::new(
DataType::Boolean,
let data = ArrayData::new_boolean(
data_len,
None,
Some(null_buf.into()),
0,
vec![val_buf.into()],
vec![],
val_buf.into(),
);
BooleanArray::from(data)
}
Expand Down
21 changes: 12 additions & 9 deletions arrow/src/array/array_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,18 @@ impl<T: ArrowPrimitiveType> From<ArrayData> for DictionaryArray<T> {
panic!("DictionaryArray's data type must match.")
};
// create a zero-copy of the keys' data
let keys = PrimitiveArray::<T>::from(ArrayData::new(
T::DATA_TYPE,
data.len(),
Some(data.null_count()),
data.null_buffer().cloned(),
data.offset(),
data.buffers().to_vec(),
vec![],
));
let keys_data = unsafe {
ArrayData::new_unchecked(
T::DATA_TYPE,
data.len(),
Some(data.null_count()),
data.null_buffer().cloned(),
data.offset(),
data.buffers().to_vec(),
vec![],
)
};
let keys = PrimitiveArray::<T>::from(keys_data);
let values = make_array(data.child_data()[0].clone());
Self {
data,
Expand Down
26 changes: 7 additions & 19 deletions arrow/src/array/array_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,10 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
/// Creates a PrimitiveArray based on an iterator of values without nulls
pub fn from_iter_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
let val_buf: Buffer = iter.into_iter().collect();
let data = ArrayData::new(
T::DATA_TYPE,
let data = ArrayData::new_primitive::<T>(
val_buf.len() / mem::size_of::<<T as ArrowPrimitiveType>::Native>(),
None,
None,
0,
vec![val_buf],
vec![],
val_buf,
);
PrimitiveArray::from(data)
}
Expand All @@ -140,14 +136,10 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
pub fn from_value(value: T::Native, count: usize) -> Self {
// # Safety: length is known
let val_buf = unsafe { Buffer::from_trusted_len_iter((0..count).map(|_| value)) };
let data = ArrayData::new(
T::DATA_TYPE,
let data = ArrayData::new_primitive::<T>(
val_buf.len() / mem::size_of::<<T as ArrowPrimitiveType>::Native>(),
None,
None,
0,
vec![val_buf],
vec![],
val_buf,
);
PrimitiveArray::from(data)
}
Expand Down Expand Up @@ -338,14 +330,10 @@ impl<T: ArrowPrimitiveType, Ptr: Borrow<Option<<T as ArrowPrimitiveType>::Native
})
.collect();

let data = ArrayData::new(
T::DATA_TYPE,
let data = ArrayData::new_primitive::<T>(
null_buf.len(),
None,
Some(null_buf.into()),
0,
vec![buffer],
vec![],
buffer,
);
PrimitiveArray::from(data)
}
Expand All @@ -369,7 +357,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
let (null, buffer) = trusted_len_unzip(iterator);

let data =
ArrayData::new(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]);
ArrayData::new_primitive::<T>(len, Some(null), buffer);
PrimitiveArray::from(data)
}
}
Expand Down
Loading