Skip to content

Commit

Permalink
ARROW-5721: [Rust] Move array related code into a separate module
Browse files Browse the repository at this point in the history
This moves all array related code into a separate module `array`, and re-export public interfaces.
This allows us to:
1. Split array related implementations into multiple files (e.g., equality, display, etc).
2. Via re-exporting, we can keep the common structs/traits under the `arrow::array` namespace. So,
instead of saying:
```rust
import arrow::builder::Int32Builder;
```
We can say:
```rust
import arrow::array::Int32Builder;
```
which is more accurate.

Author: Chao Sun <[email protected]>
Author: Chao Sun <[email protected]>

Closes #4687 from sunchao/ARROW-5721 and squashes the following commits:

9cf953206 <Chao Sun> Fix lint
266d36e6d <Chao Sun> Fix array_data and test warnings
14b8fcb0c <Chao Sun> Rename array_data.rs to data.rs
1083d3660 <Chao Sun>  Move array related code into a separate module
  • Loading branch information
sunchao authored and kszucs committed Jun 26, 2019
1 parent 51b5a7e commit 945084b
Show file tree
Hide file tree
Showing 19 changed files with 162 additions and 136 deletions.
2 changes: 1 addition & 1 deletion rust/arrow/examples/builders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
///! Many builders are available to easily create different types of arrow arrays
extern crate arrow;

use arrow::builder::Int32Builder;
use arrow::array::Int32Builder;

fn main() {
// Primitive Arrays
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/examples/dynamic_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fn main() -> Result<()> {
let nested = StructArray::from(vec![
(
Field::new("a", DataType::Utf8, false),
Arc::new(BinaryArray::from(vec!["a", "b", "c", "d", "e"])) as Arc<Array>,
Arc::new(BinaryArray::from(vec!["a", "b", "c", "d", "e"])) as Arc<dyn Array>,
),
(
Field::new("b", DataType::Float64, false),
Expand Down
69 changes: 2 additions & 67 deletions rust/arrow/src/array.rs → rust/arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,45 +15,6 @@
// specific language governing permissions and limitations
// under the License.

//! Defines public types representing Apache Arrow arrays. Arrow's specification defines
//! an array as "a sequence of values with known length all having the same type." For
//! example, the type `Int16Array` represents an Apache Arrow array of 16-bit integers.
//!
//! ```
//! extern crate arrow;
//!
//! use arrow::array::Int16Array;
//!
//! // Create a new builder with a capacity of 100
//! let mut builder = Int16Array::builder(100);
//!
//! // Append a single primitive value
//! builder.append_value(1).unwrap();
//!
//! // Append a null value
//! builder.append_null().unwrap();
//!
//! // Append a slice of primitive values
//! builder.append_slice(&[2, 3, 4]).unwrap();
//!
//! // Build the array
//! let array = builder.finish();
//!
//! assert_eq!(
//! 5,
//! array.len(),
//! "The array has 5 values, counting the null value"
//! );
//!
//! assert_eq!(2, array.value(2), "Get the value with index 2");
//!
//! assert_eq!(
//! array.value_slice(3, 2),
//! &[3, 4],
//! "Get slice of len 2 starting at idx 3"
//! )
//! ```
use std::any::Any;
use std::convert::From;
use std::fmt;
Expand All @@ -63,9 +24,8 @@ use std::sync::Arc;

use chrono::prelude::*;

use crate::array_data::{ArrayData, ArrayDataRef};
use super::*;
use crate::buffer::{Buffer, MutableBuffer};
use crate::builder::*;
use crate::datatypes::*;
use crate::memory;
use crate::util::bit_util;
Expand Down Expand Up @@ -131,7 +91,7 @@ pub type ArrayRef = Arc<Array>;

/// Constructs an array using the input `data`. Returns a reference-counted `Array`
/// instance.
pub(crate) fn make_array(data: ArrayDataRef) -> ArrayRef {
pub fn make_array(data: ArrayDataRef) -> ArrayRef {
// TODO: here data_type() needs to clone the type - maybe add a type tag enum to
// avoid the cloning.
match data.data_type().clone() {
Expand Down Expand Up @@ -234,30 +194,6 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
raw_values: RawPtrBox<T::Native>,
}

pub type BooleanArray = PrimitiveArray<BooleanType>;
pub type Int8Array = PrimitiveArray<Int8Type>;
pub type Int16Array = PrimitiveArray<Int16Type>;
pub type Int32Array = PrimitiveArray<Int32Type>;
pub type Int64Array = PrimitiveArray<Int64Type>;
pub type UInt8Array = PrimitiveArray<UInt8Type>;
pub type UInt16Array = PrimitiveArray<UInt16Type>;
pub type UInt32Array = PrimitiveArray<UInt32Type>;
pub type UInt64Array = PrimitiveArray<UInt64Type>;
pub type Float32Array = PrimitiveArray<Float32Type>;
pub type Float64Array = PrimitiveArray<Float64Type>;

pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
pub type Date32Array = PrimitiveArray<Date32Type>;
pub type Date64Array = PrimitiveArray<Date64Type>;
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
// TODO add interval

impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
fn as_any(&self) -> &Any {
self
Expand Down Expand Up @@ -1041,7 +977,6 @@ mod tests {
use std::sync::Arc;
use std::thread;

use crate::array_data::ArrayData;
use crate::buffer::Buffer;
use crate::datatypes::{DataType, Field};
use crate::memory;
Expand Down
47 changes: 0 additions & 47 deletions rust/arrow/src/builder.rs → rust/arrow/src/array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ use std::mem;
use std::sync::Arc;

use crate::array::*;
use crate::array_data::ArrayData;
use crate::buffer::{Buffer, MutableBuffer};
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
Expand All @@ -38,29 +37,6 @@ pub struct BufferBuilder<T: ArrowPrimitiveType> {
_marker: PhantomData<T>,
}

pub type BooleanBufferBuilder = BufferBuilder<BooleanType>;
pub type Int8BufferBuilder = BufferBuilder<Int8Type>;
pub type Int16BufferBuilder = BufferBuilder<Int16Type>;
pub type Int32BufferBuilder = BufferBuilder<Int32Type>;
pub type Int64BufferBuilder = BufferBuilder<Int64Type>;
pub type UInt8BufferBuilder = BufferBuilder<UInt8Type>;
pub type UInt16BufferBuilder = BufferBuilder<UInt16Type>;
pub type UInt32BufferBuilder = BufferBuilder<UInt32Type>;
pub type UInt64BufferBuilder = BufferBuilder<UInt64Type>;
pub type Float32BufferBuilder = BufferBuilder<Float32Type>;
pub type Float64BufferBuilder = BufferBuilder<Float64Type>;

pub type TimestampSecondBufferBuilder = BufferBuilder<TimestampSecondType>;
pub type TimestampMillisecondBufferBuilder = BufferBuilder<TimestampMillisecondType>;
pub type TimestampMicrosecondBufferBuilder = BufferBuilder<TimestampMicrosecondType>;
pub type TimestampNanosecondBufferBuilder = BufferBuilder<TimestampNanosecondType>;
pub type Date32BufferBuilder = BufferBuilder<Date32Type>;
pub type Date64BufferBuilder = BufferBuilder<Date64Type>;
pub type Time32SecondBufferBuilder = BufferBuilder<Time32SecondType>;
pub type Time32MillisecondBufferBuilder = BufferBuilder<Time32MillisecondType>;
pub type Time64MicrosecondBufferBuilder = BufferBuilder<Time64MicrosecondType>;
pub type Time64NanosecondBufferBuilder = BufferBuilder<Time64NanosecondType>;

// Trait for buffer builder. This is used mainly to offer separate implementations for
// numeric types and boolean types, while still be able to call methods on buffer builder
// with generic primitive type.
Expand Down Expand Up @@ -255,29 +231,6 @@ pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
bitmap_builder: BooleanBufferBuilder,
}

pub type BooleanBuilder = PrimitiveBuilder<BooleanType>;
pub type Int8Builder = PrimitiveBuilder<Int8Type>;
pub type Int16Builder = PrimitiveBuilder<Int16Type>;
pub type Int32Builder = PrimitiveBuilder<Int32Type>;
pub type Int64Builder = PrimitiveBuilder<Int64Type>;
pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
pub type Float32Builder = PrimitiveBuilder<Float32Type>;
pub type Float64Builder = PrimitiveBuilder<Float64Type>;

pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
pub type TimestampMillisecondBuilder = PrimitiveBuilder<TimestampMillisecondType>;
pub type TimestampMicrosecondBuilder = PrimitiveBuilder<TimestampMicrosecondType>;
pub type TimestampNanosecondBuilder = PrimitiveBuilder<TimestampNanosecondType>;
pub type Date32Builder = PrimitiveBuilder<Date32Type>;
pub type Date64Builder = PrimitiveBuilder<Date64Type>;
pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;

impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
/// Returns the builder as an non-mutable `Any` reference.
fn as_any(&self) -> &Any {
Expand Down
File renamed without changes.
152 changes: 152 additions & 0 deletions rust/arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Defines public types representing Apache Arrow arrays. Arrow's specification defines
//! an array as "a sequence of values with known length all having the same type." For
//! example, the type `Int16Array` represents an Apache Arrow array of 16-bit integers.
//!
//! ```
//! extern crate arrow;
//!
//! use arrow::array::Int16Array;
//!
//! // Create a new builder with a capacity of 100
//! let mut builder = Int16Array::builder(100);
//!
//! // Append a single primitive value
//! builder.append_value(1).unwrap();
//!
//! // Append a null value
//! builder.append_null().unwrap();
//!
//! // Append a slice of primitive values
//! builder.append_slice(&[2, 3, 4]).unwrap();
//!
//! // Build the array
//! let array = builder.finish();
//!
//! assert_eq!(
//! 5,
//! array.len(),
//! "The array has 5 values, counting the null value"
//! );
//!
//! assert_eq!(2, array.value(2), "Get the value with index 2");
//!
//! assert_eq!(
//! array.value_slice(3, 2),
//! &[3, 4],
//! "Get slice of len 2 starting at idx 3"
//! )
//! ```
mod array;
mod builder;
mod data;

pub use self::array::Array;
pub use self::array::ArrayRef;
pub use self::data::ArrayData;
pub use self::data::ArrayDataBuilder;
pub use self::data::ArrayDataRef;

use crate::datatypes::*;

pub use self::builder::BufferBuilder;
pub use self::builder::BufferBuilderTrait;

pub type BooleanBufferBuilder = BufferBuilder<BooleanType>;
pub type Int8BufferBuilder = BufferBuilder<Int8Type>;
pub type Int16BufferBuilder = BufferBuilder<Int16Type>;
pub type Int32BufferBuilder = BufferBuilder<Int32Type>;
pub type Int64BufferBuilder = BufferBuilder<Int64Type>;
pub type UInt8BufferBuilder = BufferBuilder<UInt8Type>;
pub type UInt16BufferBuilder = BufferBuilder<UInt16Type>;
pub type UInt32BufferBuilder = BufferBuilder<UInt32Type>;
pub type UInt64BufferBuilder = BufferBuilder<UInt64Type>;
pub type Float32BufferBuilder = BufferBuilder<Float32Type>;
pub type Float64BufferBuilder = BufferBuilder<Float64Type>;

pub type TimestampSecondBufferBuilder = BufferBuilder<TimestampSecondType>;
pub type TimestampMillisecondBufferBuilder = BufferBuilder<TimestampMillisecondType>;
pub type TimestampMicrosecondBufferBuilder = BufferBuilder<TimestampMicrosecondType>;
pub type TimestampNanosecondBufferBuilder = BufferBuilder<TimestampNanosecondType>;
pub type Date32BufferBuilder = BufferBuilder<Date32Type>;
pub type Date64BufferBuilder = BufferBuilder<Date64Type>;
pub type Time32SecondBufferBuilder = BufferBuilder<Time32SecondType>;
pub type Time32MillisecondBufferBuilder = BufferBuilder<Time32MillisecondType>;
pub type Time64MicrosecondBufferBuilder = BufferBuilder<Time64MicrosecondType>;
pub type Time64NanosecondBufferBuilder = BufferBuilder<Time64NanosecondType>;

pub use self::builder::PrimitiveBuilder;
pub type BooleanBuilder = PrimitiveBuilder<BooleanType>;
pub type Int8Builder = PrimitiveBuilder<Int8Type>;
pub type Int16Builder = PrimitiveBuilder<Int16Type>;
pub type Int32Builder = PrimitiveBuilder<Int32Type>;
pub type Int64Builder = PrimitiveBuilder<Int64Type>;
pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
pub type Float32Builder = PrimitiveBuilder<Float32Type>;
pub type Float64Builder = PrimitiveBuilder<Float64Type>;

pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
pub type TimestampMillisecondBuilder = PrimitiveBuilder<TimestampMillisecondType>;
pub type TimestampMicrosecondBuilder = PrimitiveBuilder<TimestampMicrosecondType>;
pub type TimestampNanosecondBuilder = PrimitiveBuilder<TimestampNanosecondType>;
pub type Date32Builder = PrimitiveBuilder<Date32Type>;
pub type Date64Builder = PrimitiveBuilder<Date64Type>;
pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;

pub use self::builder::BinaryBuilder;
pub use self::builder::ListBuilder;
pub use self::builder::StructBuilder;

pub use self::array::BinaryArray;
pub use self::array::ListArray;
pub use self::array::PrimitiveArray;
pub use self::array::StructArray;

pub(crate) use self::array::make_array;

pub type BooleanArray = PrimitiveArray<BooleanType>;
pub type Int8Array = PrimitiveArray<Int8Type>;
pub type Int16Array = PrimitiveArray<Int16Type>;
pub type Int32Array = PrimitiveArray<Int32Type>;
pub type Int64Array = PrimitiveArray<Int64Type>;
pub type UInt8Array = PrimitiveArray<UInt8Type>;
pub type UInt16Array = PrimitiveArray<UInt16Type>;
pub type UInt32Array = PrimitiveArray<UInt32Type>;
pub type UInt64Array = PrimitiveArray<UInt64Type>;
pub type Float32Array = PrimitiveArray<Float32Type>;
pub type Float64Array = PrimitiveArray<Float64Type>;

pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
pub type Date32Array = PrimitiveArray<Date32Type>;
pub type Date64Array = PrimitiveArray<Date64Type>;
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
// TODO add interval
2 changes: 1 addition & 1 deletion rust/arrow/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use std::ops::{BitAnd, BitOr, Not};
use std::slice::{from_raw_parts, from_raw_parts_mut};
use std::sync::Arc;

use crate::builder::{BufferBuilderTrait, UInt8BufferBuilder};
use crate::array::{BufferBuilderTrait, UInt8BufferBuilder};
use crate::datatypes::ArrowNativeType;
use crate::error::{ArrowError, Result};
use crate::memory;
Expand Down
1 change: 0 additions & 1 deletion rust/arrow/src/compute/array_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,6 @@ pub fn limit(array: &ArrayRef, num_elements: usize) -> Result<ArrayRef> {
mod tests {
use super::*;
use crate::array::*;
use crate::array_data::ArrayData;
use crate::buffer::Buffer;
use crate::datatypes::{Field, ToByteSlice};
use crate::util::bit_util;
Expand Down
2 changes: 0 additions & 2 deletions rust/arrow/src/compute/kernels/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ use std::sync::Arc;
use num::Zero;

use crate::array::*;
use crate::array_data::ArrayData;
use crate::buffer::MutableBuffer;
use crate::builder::PrimitiveBuilder;
use crate::compute::util::apply_bin_op_to_option_bitmap;
use crate::datatypes;
use crate::error::{ArrowError, Result};
Expand Down
3 changes: 1 addition & 2 deletions rust/arrow/src/compute/kernels/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
use std::sync::Arc;

use crate::array::{Array, BooleanArray};
use crate::array_data::ArrayData;
use crate::array::{Array, ArrayData, BooleanArray};
use crate::buffer::Buffer;
use crate::compute::util::apply_bin_op_to_option_bitmap;
use crate::datatypes::DataType;
Expand Down
2 changes: 0 additions & 2 deletions rust/arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@
use std::sync::Arc;

use crate::array::*;
use crate::array_data::ArrayData;
use crate::buffer::Buffer;
use crate::builder::*;
use crate::compute::kernels::arithmetic::{divide, multiply};
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
Expand Down
2 changes: 0 additions & 2 deletions rust/arrow/src/compute/kernels/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
use std::sync::Arc;

use crate::array::*;
use crate::array_data::ArrayData;
use crate::builder::{BooleanBufferBuilder, BufferBuilderTrait};
use crate::compute::util::apply_bin_op_to_option_bitmap;
use crate::datatypes::{ArrowNumericType, BooleanType, DataType};
use crate::error::{ArrowError, Result};
Expand Down
Loading

0 comments on commit 945084b

Please sign in to comment.