Skip to content

Commit

Permalink
chore: add docs, part of #37 (apache#6433)
Browse files Browse the repository at this point in the history
* chore: add docs, part of #37

- add pragma `#![warn(missing_docs)]` to the following
  - `arrow-array`
  - `arrow-cast`
  - `arrow-csv`
  - `arrow-data`
  - `arrow-json`
  - `arrow-ord`
  - `arrow-pyarrow-integration-testing`
  - `arrow-row`
  - `arrow-schema`
  - `arrow-select`
  - `arrow-string`
  - `arrow`
  - `parquet_derive`

- add docs to those that generated lint warnings

- Remove `bitflags` workaround in `arrow-schema`
At some point, a change in `bitflags v2.3.0` had
started generating lint warnings in `arrow-schema`,

This was handled using a
[workaround](apache#4233)

[Issue](bitflags/bitflags#356)

`bitflags v2.3.1` fixed the issue hence the
workaround is no longer needed.

* fix: resolve comments on PR apache#6433
  • Loading branch information
ByteBaker authored Sep 23, 2024
1 parent 7191f4d commit de6a759
Show file tree
Hide file tree
Showing 30 changed files with 147 additions and 35 deletions.
2 changes: 1 addition & 1 deletion arrow-array/src/builder/generic_bytes_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ mod tests {
write!(builder, "buz").unwrap();
builder.append_value("");
let a = builder.finish();
let r: Vec<_> = a.iter().map(|x| x.unwrap()).collect();
let r: Vec<_> = a.iter().flatten().collect();
assert_eq!(r, &["foo", "bar\n", "fizbuz"])
}
}
12 changes: 7 additions & 5 deletions arrow-array/src/builder/generic_bytes_view_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,8 @@ pub fn make_view(data: &[u8], block_id: u32, offset: u32) -> u128 {

#[cfg(test)]
mod tests {
use core::str;

use super::*;
use crate::Array;

Expand Down Expand Up @@ -642,7 +644,7 @@ mod tests {
let array = v.finish_cloned();
array.to_data().validate_full().unwrap();
assert_eq!(array.data_buffers().len(), 5);
let actual: Vec<_> = array.iter().map(Option::unwrap).collect();
let actual: Vec<_> = array.iter().flatten().collect();
assert_eq!(
actual,
&[
Expand Down Expand Up @@ -692,13 +694,13 @@ mod tests {
let mut exp_builder = StringViewBuilder::new();
let mut fixed_builder = StringViewBuilder::new().with_fixed_block_size(STARTING_BLOCK_SIZE);

let long_string = String::from_utf8(vec![b'a'; STARTING_BLOCK_SIZE as usize]).unwrap();
let long_string = str::from_utf8(&[b'a'; STARTING_BLOCK_SIZE as usize]).unwrap();

for i in 0..9 {
// 8k, 16k, 32k, 64k, 128k, 256k, 512k, 1M, 2M
for _ in 0..(2_u32.pow(i)) {
exp_builder.append_value(&long_string);
fixed_builder.append_value(&long_string);
exp_builder.append_value(long_string);
fixed_builder.append_value(long_string);
}
exp_builder.flush_in_progress();
fixed_builder.flush_in_progress();
Expand All @@ -721,7 +723,7 @@ mod tests {
}

// Add one more value, and the buffer stop growing.
exp_builder.append_value(&long_string);
exp_builder.append_value(long_string);
exp_builder.flush_in_progress();
assert_eq!(
exp_builder.completed.last().unwrap().capacity(),
Expand Down
2 changes: 1 addition & 1 deletion arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9394,7 +9394,7 @@ mod tests {
Some(vec![Some(0), None, Some(2)]),
]);
let a = cast_with_options(&array, &DataType::Utf8, &options).unwrap();
let r: Vec<_> = a.as_string::<i32>().iter().map(|x| x.unwrap()).collect();
let r: Vec<_> = a.as_string::<i32>().iter().flatten().collect();
assert_eq!(r, &["[0, 1, 2]", "[0, null, 2]"]);
}
#[test]
Expand Down
5 changes: 3 additions & 2 deletions arrow-csv/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ mod tests {
};
use arrow_array::types::*;
use arrow_buffer::i256;
use core::str;
use std::io::{Cursor, Read, Seek};
use std::sync::Arc;

Expand Down Expand Up @@ -508,7 +509,7 @@ Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378,06:51:20,cupcakes
sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
"#;
assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap());
assert_eq!(expected, str::from_utf8(&buffer).unwrap());
}

#[test]
Expand Down Expand Up @@ -558,7 +559,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
,
0.290472,0.290472
"#;
assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap());
assert_eq!(expected, str::from_utf8(&buffer).unwrap());
}

#[test]
Expand Down
1 change: 1 addition & 0 deletions arrow-data/src/byte_view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ pub struct ByteView {

impl ByteView {
#[inline(always)]
/// Convert `ByteView` to `u128` by concatenating the fields
pub fn as_u128(self) -> u128 {
(self.length as u128)
| ((self.prefix as u128) << 32)
Expand Down
41 changes: 31 additions & 10 deletions arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ pub struct ArrayData {
nulls: Option<NullBuffer>,
}

/// A thread-safe, shared reference to the Arrow array data.
pub type ArrayDataRef = Arc<ArrayData>;

impl ArrayData {
Expand Down Expand Up @@ -1747,7 +1748,12 @@ pub enum BufferSpec {
/// for array slicing and interoperability with `Vec`, which cannot be over-aligned.
///
/// Note that these alignment requirements will vary between architectures
FixedWidth { byte_width: usize, alignment: usize },
FixedWidth {
/// The width of each element in bytes
byte_width: usize,
/// The alignment required by Rust for an array of the corresponding primitive
alignment: usize,
},
/// Variable width, such as string data for utf8 data
VariableWidth,
/// Buffer holds a bitmap.
Expand Down Expand Up @@ -1783,6 +1789,7 @@ pub struct ArrayDataBuilder {

impl ArrayDataBuilder {
#[inline]
/// Creates a new array data builder
pub const fn new(data_type: DataType) -> Self {
Self {
data_type,
Expand All @@ -1796,61 +1803,72 @@ impl ArrayDataBuilder {
}
}

/// Creates a new array data builder from an existing one, changing the data type
pub fn data_type(self, data_type: DataType) -> Self {
Self { data_type, ..self }
}

#[inline]
#[allow(clippy::len_without_is_empty)]
/// Sets the length of the [ArrayData]
pub const fn len(mut self, n: usize) -> Self {
self.len = n;
self
}

/// Sets the null buffer of the [ArrayData]
pub fn nulls(mut self, nulls: Option<NullBuffer>) -> Self {
self.nulls = nulls;
self.null_count = None;
self.null_bit_buffer = None;
self
}

/// Sets the null count of the [ArrayData]
pub fn null_count(mut self, null_count: usize) -> Self {
self.null_count = Some(null_count);
self
}

/// Sets the `null_bit_buffer` of the [ArrayData]
pub fn null_bit_buffer(mut self, buf: Option<Buffer>) -> Self {
self.nulls = None;
self.null_bit_buffer = buf;
self
}

/// Sets the offset of the [ArrayData]
#[inline]
pub const fn offset(mut self, n: usize) -> Self {
self.offset = n;
self
}

/// Sets the buffers of the [ArrayData]
pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
self.buffers = v;
self
}

/// Adds a single buffer to the [ArrayData]'s buffers
pub fn add_buffer(mut self, b: Buffer) -> Self {
self.buffers.push(b);
self
}

pub fn add_buffers(mut self, bs: Vec<Buffer>) -> Self {
/// Adds multiple buffers to the [ArrayData]'s buffers
pub fn add_buffers<I: IntoIterator<Item = Buffer>>(mut self, bs: I) -> Self {
self.buffers.extend(bs);
self
}

/// Sets the child data of the [ArrayData]
pub fn child_data(mut self, v: Vec<ArrayData>) -> Self {
self.child_data = v;
self
}

/// Adds a single child data to the [ArrayData]'s child data
pub fn add_child_data(mut self, r: ArrayData) -> Self {
self.child_data.push(r);
self
Expand All @@ -1873,22 +1891,25 @@ impl ArrayDataBuilder {

/// Same as [`Self::build_unchecked`] but ignoring `force_validate` feature flag
unsafe fn build_impl(self) -> ArrayData {
let nulls = self.nulls.or_else(|| {
let buffer = self.null_bit_buffer?;
let buffer = BooleanBuffer::new(buffer, self.offset, self.len);
Some(match self.null_count {
Some(n) => NullBuffer::new_unchecked(buffer, n),
None => NullBuffer::new(buffer),
let nulls = self
.nulls
.or_else(|| {
let buffer = self.null_bit_buffer?;
let buffer = BooleanBuffer::new(buffer, self.offset, self.len);
Some(match self.null_count {
Some(n) => NullBuffer::new_unchecked(buffer, n),
None => NullBuffer::new(buffer),
})
})
});
.filter(|b| b.null_count() != 0);

ArrayData {
data_type: self.data_type,
len: self.len,
offset: self.offset,
buffers: self.buffers,
child_data: self.child_data,
nulls: nulls.filter(|b| b.null_count() != 0),
nulls,
}
}

Expand Down
4 changes: 4 additions & 0 deletions arrow-data/src/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
// specific language governing permissions and limitations
// under the License.

//! Defines maximum and minimum values for `decimal256` and `decimal128` types for varying precisions.
//!
//! Also provides functions to validate if a given decimal value is within the valid range of the decimal type.
use arrow_buffer::i256;
use arrow_schema::ArrowError;

Expand Down
1 change: 1 addition & 0 deletions arrow-data/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
//!
//! For a higher-level, strongly-typed interface see [arrow_array](https://docs.rs/arrow_array)
#![warn(missing_docs)]
mod data;
pub use data::*;

Expand Down
5 changes: 5 additions & 0 deletions arrow-data/src/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
// specific language governing permissions and limitations
// under the License.

//! Low-level array data abstractions.
//!
//! Provides utilities for creating, manipulating, and converting Arrow arrays
//! made of primitive types, strings, and nested types.
use super::{data::new_buffers, ArrayData, ArrayDataBuilder, ByteView};
use crate::bit_mask::set_bits;
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
Expand Down
2 changes: 1 addition & 1 deletion arrow-json/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1007,7 +1007,7 @@ mod tests {
let map_values = map.values().as_list::<i32>();
assert_eq!(map.value_offsets(), &[0, 1, 3, 5]);

let k: Vec<_> = map_keys.iter().map(|x| x.unwrap()).collect();
let k: Vec<_> = map_keys.iter().flatten().collect();
assert_eq!(&k, &["a", "a", "b", "c", "a"]);

let list_values = map_values.values().as_string::<i32>();
Expand Down
1 change: 1 addition & 0 deletions arrow-ord/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
//! ```
//!
#![warn(missing_docs)]
pub mod cmp;
#[doc(hidden)]
pub mod comparison;
Expand Down
2 changes: 2 additions & 0 deletions arrow-ord/src/rank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.

//! Provides `rank` function to assign a rank to each value in an array
use arrow_array::cast::AsArray;
use arrow_array::types::*;
use arrow_array::{downcast_primitive_array, Array, ArrowNativeTypeOp, GenericByteArray};
Expand Down
2 changes: 2 additions & 0 deletions arrow-ord/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,9 @@ where
/// One column to be used in lexicographical sort
#[derive(Clone, Debug)]
pub struct SortColumn {
/// The column to sort
pub values: ArrayRef,
/// Sort options for this column
pub options: Option<SortOptions>,
}

Expand Down
1 change: 1 addition & 0 deletions arrow-pyarrow-integration-testing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! This library demonstrates a minimal usage of Rust's C data interface to pass
//! arrays from and to Python.
#![warn(missing_docs)]
use std::sync::Arc;

use arrow::array::new_empty_array;
Expand Down
1 change: 1 addition & 0 deletions arrow-row/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
//! [compared]: PartialOrd
//! [compare]: PartialOrd
#![warn(missing_docs)]
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
Expand Down
4 changes: 3 additions & 1 deletion arrow-schema/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,11 +420,13 @@ pub enum IntervalUnit {
MonthDayNano,
}

// Sparse or Dense union layouts
/// Sparse or Dense union layouts
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum UnionMode {
/// Sparse union layout
Sparse,
/// Dense union layout
Dense,
}

Expand Down
16 changes: 16 additions & 0 deletions arrow-schema/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,39 @@ use std::error::Error;
pub enum ArrowError {
/// Returned when functionality is not yet available.
NotYetImplemented(String),
/// Wraps an external error.
ExternalError(Box<dyn Error + Send + Sync>),
/// Error during casting from one type to another.
CastError(String),
/// Memory or buffer error.
MemoryError(String),
/// Error during parsing from a string.
ParseError(String),
/// Error during schema-related operations.
SchemaError(String),
/// Error during computation.
ComputeError(String),
/// Error during division by zero.
DivideByZero,
/// Error when an arithmetic operation overflows.
ArithmeticOverflow(String),
/// Error during CSV-related operations.
CsvError(String),
/// Error during JSON-related operations.
JsonError(String),
/// Error during IO operations.
IoError(String, std::io::Error),
/// Error during IPC operations in `arrow-ipc` or `arrow-flight`.
IpcError(String),
/// Error indicating that an unexpected or bad argument was passed to a function.
InvalidArgumentError(String),
/// Error during Parquet operations.
ParquetError(String),
/// Error during import or export to/from the C Data Interface
CDataInterface(String),
/// Error when a dictionary key is bigger than the key type
DictionaryKeyOverflowError,
/// Error when the run end index in a REE array is bigger than the array length
RunEndIndexOverflowError,
}

Expand Down
Loading

0 comments on commit de6a759

Please sign in to comment.