Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Array for ArrayRef, Improve as_* kernels to take &dyn Array #1129

Merged
merged 3 commits into from
Jan 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,69 @@ pub trait Array: fmt::Debug + Send + Sync + JsonEqual {
/// A reference-counted reference to a generic `Array`.
pub type ArrayRef = Arc<dyn Array>;

/// Ergonomics: Allow use of an ArrayRef as an `&dyn Array`
impl Array for ArrayRef {
fn as_any(&self) -> &dyn Any {
self.as_ref().as_any()
}

fn data(&self) -> &ArrayData {
self.as_ref().data()
}

fn data_ref(&self) -> &ArrayData {
self.as_ref().data_ref()
}

fn data_type(&self) -> &DataType {
self.as_ref().data_type()
}

fn slice(&self, offset: usize, length: usize) -> ArrayRef {
self.as_ref().slice(offset, length)
}

fn len(&self) -> usize {
self.as_ref().len()
}

fn is_empty(&self) -> bool {
self.as_ref().is_empty()
}

fn offset(&self) -> usize {
self.as_ref().offset()
}

fn is_null(&self, index: usize) -> bool {
self.as_ref().is_null(index)
}

fn is_valid(&self, index: usize) -> bool {
self.as_ref().is_valid(index)
}

fn null_count(&self) -> usize {
self.as_ref().null_count()
}

fn get_buffer_memory_size(&self) -> usize {
self.as_ref().get_buffer_memory_size()
}

fn get_array_memory_size(&self) -> usize {
self.as_ref().get_array_memory_size()
}

fn to_raw(
&self,
) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> {
let data = self.data().clone();
let array = ffi::ArrowArray::try_from(data)?;
Ok(ffi::ArrowArray::into_raw(array))
}
}

/// Constructs an array using the input `data`.
/// Returns a reference-counted `Array` instance.
pub fn make_array(data: ArrayData) -> ArrayRef {
Expand Down Expand Up @@ -843,4 +906,22 @@ mod tests {
expected_size
);
}

/// Test function that takes an &dyn Array
fn compute_my_thing(arr: &dyn Array) -> bool {
!arr.is_empty()
}

#[test]
fn test_array_ref_as_array() {
let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();

// works well!
assert!(compute_my_thing(&arr));

// Should also work when wrapped as an ArrayRef
let arr: ArrayRef = Arc::new(arr);
assert!(compute_my_thing(&arr));
assert!(compute_my_thing(arr.as_ref()));
}
}
43 changes: 36 additions & 7 deletions arrow/src/array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::array::*;
use crate::datatypes::*;

/// Force downcast ArrayRef to PrimitiveArray<T>
pub fn as_primitive_array<T>(arr: &ArrayRef) -> &PrimitiveArray<T>
pub fn as_primitive_array<T>(arr: &dyn Array) -> &PrimitiveArray<T>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As ArrayRef now implements Array all previous calls to this function will still compile and we can also pass &dyn Array as well

Copy link
Contributor Author

@alamb alamb Jan 5, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW this change means that calls that pass in a &ArrayRef will generate an extra dispatch as pointed out by @tustvold on #1128 (comment) -- this can be avoided by the user calling as_primitive_array(arr.as_ref()) or as_primitive_array(&*arr) if it is important to avoid the double dispatch

where
T: ArrowPrimitiveType,
{
Expand All @@ -31,7 +31,7 @@ where
}

/// Force downcast ArrayRef to DictionaryArray<T>
pub fn as_dictionary_array<T>(arr: &ArrayRef) -> &DictionaryArray<T>
pub fn as_dictionary_array<T>(arr: &dyn Array) -> &DictionaryArray<T>
where
T: ArrowDictionaryKeyType,
{
Expand All @@ -41,28 +41,30 @@ where
}

#[doc = "Force downcast ArrayRef to GenericListArray"]
pub fn as_generic_list_array<S: OffsetSizeTrait>(arr: &ArrayRef) -> &GenericListArray<S> {
pub fn as_generic_list_array<S: OffsetSizeTrait>(
arr: &dyn Array,
) -> &GenericListArray<S> {
arr.as_any()
.downcast_ref::<GenericListArray<S>>()
.expect("Unable to downcast to list array")
}

#[doc = "Force downcast ArrayRef to ListArray"]
#[inline]
pub fn as_list_array(arr: &ArrayRef) -> &ListArray {
pub fn as_list_array(arr: &dyn Array) -> &ListArray {
as_generic_list_array::<i32>(arr)
}

#[doc = "Force downcast ArrayRef to LargeListArray"]
#[inline]
pub fn as_large_list_array(arr: &ArrayRef) -> &LargeListArray {
pub fn as_large_list_array(arr: &dyn Array) -> &LargeListArray {
as_generic_list_array::<i64>(arr)
}

#[doc = "Force downcast ArrayRef to GenericBinaryArray"]
#[inline]
pub fn as_generic_binary_array<S: BinaryOffsetSizeTrait>(
arr: &ArrayRef,
arr: &dyn Array,
) -> &GenericBinaryArray<S> {
arr.as_any()
.downcast_ref::<GenericBinaryArray<S>>()
Expand All @@ -73,7 +75,7 @@ macro_rules! array_downcast_fn {
($name: ident, $arrty: ty, $arrty_str:expr) => {
#[doc = "Force downcast ArrayRef to "]
#[doc = $arrty_str]
pub fn $name(arr: &ArrayRef) -> &$arrty {
pub fn $name(arr: &dyn Array) -> &$arrty {
arr.as_any().downcast_ref::<$arrty>().expect(concat!(
"Unable to downcast to typed array through ",
stringify!($name)
Expand All @@ -93,3 +95,30 @@ array_downcast_fn!(as_boolean_array, BooleanArray);
array_downcast_fn!(as_null_array, NullArray);
array_downcast_fn!(as_struct_array, StructArray);
array_downcast_fn!(as_union_array, UnionArray);

#[cfg(test)]
mod tests {
use std::sync::Arc;

use super::*;

#[test]
fn test_as_primitive_array_ref() {
let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
assert!(!as_primitive_array::<Int32Type>(&array).is_empty());

// should also work when wrapped in an Arc
let array: ArrayRef = Arc::new(array);
assert!(!as_primitive_array::<Int32Type>(&array).is_empty());
}

#[test]
fn test_as_string_array_ref() {
let array: StringArray = vec!["foo", "bar"].into_iter().map(Some).collect();
assert!(!as_string_array(&array).is_empty());

// should also work when wrapped in an Arc
let array: ArrayRef = Arc::new(array);
assert!(!as_string_array(&array).is_empty())
}
}
6 changes: 6 additions & 0 deletions arrow/src/array/equal_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,12 @@ impl PartialEq<Value> for NullArray {
}
}

impl JsonEqual for ArrayRef {
fn equals_json(&self, json: &[&Value]) -> bool {
self.as_ref().equals_json(json)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
12 changes: 1 addition & 11 deletions arrow/src/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use pyo3::import_exception;
use pyo3::prelude::*;
use pyo3::types::PyList;

use crate::array::{make_array, Array, ArrayData, ArrayRef};
use crate::array::{Array, ArrayData, ArrayRef};
use crate::datatypes::{DataType, Field, Schema};
use crate::error::ArrowError;
use crate::ffi;
Expand Down Expand Up @@ -148,16 +148,6 @@ impl PyArrowConvert for ArrayData {
}
}

impl PyArrowConvert for ArrayRef {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is now covered by the impl of Array for ArrayRef

fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
Ok(make_array(ArrayData::from_pyarrow(value)?))
}

fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
self.data().to_pyarrow(py)
}
}

impl<T> PyArrowConvert for T
where
T: Array + From<ArrayData>,
Expand Down