Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move functional accessors to core module #151

Merged
merged 4 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 1 addition & 104 deletions arro3-compute/python/arro3/compute/_compute.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Sequence, overload
from typing import overload

# Note: importing with
# `from arro3.core import Array`
Expand Down Expand Up @@ -54,109 +54,6 @@ def dictionary_encode(
The dictionary-encoded array.
"""

@overload
def dictionary_dictionary(array: types.ArrowArrayExportable) -> core.Array: ...
@overload
def dictionary_dictionary(array: types.ArrowStreamExportable) -> core.ArrayReader: ...
def dictionary_dictionary(
array: types.ArrowArrayExportable | types.ArrowStreamExportable,
) -> core.Array | core.ArrayReader:
"""
Access the `dictionary` of a dictionary array.

This is equivalent to the [`.dictionary`][pyarrow.DictionaryArray.dictionary]
attribute on a PyArrow [DictionaryArray][pyarrow.DictionaryArray].

Args:
array: Argument to compute function.

Returns:
The keys of a dictionary-encoded array.
"""

@overload
def dictionary_indices(array: types.ArrowArrayExportable) -> core.Array: ...
@overload
def dictionary_indices(array: types.ArrowStreamExportable) -> core.ArrayReader: ...
def dictionary_indices(
array: types.ArrowArrayExportable | types.ArrowStreamExportable,
) -> core.Array | core.ArrayReader:
"""
Access the indices of a dictionary array.

This is equivalent to the [`.indices`][pyarrow.DictionaryArray.indices]
attribute on a PyArrow [DictionaryArray][pyarrow.DictionaryArray].

Args:
array: Argument to compute function.

Returns:
The indices of a dictionary-encoded array.
"""

@overload
def list_flatten(input: types.ArrowArrayExportable) -> core.Array: ...
@overload
def list_flatten(input: types.ArrowStreamExportable) -> core.ArrayReader: ...
def list_flatten(
input: types.ArrowArrayExportable | types.ArrowStreamExportable,
) -> core.Array | core.ArrayReader:
"""Unnest this ListArray, LargeListArray or FixedSizeListArray.

Args:
input: Input data.

Raises:
Exception if not a list-typed array.

Returns:
The flattened Arrow data.
"""

@overload
def list_offsets(
input: types.ArrowArrayExportable, *, logical: bool = True
) -> core.Array: ...
@overload
def list_offsets(
input: types.ArrowStreamExportable, *, logical: bool = True
) -> core.ArrayReader: ...
def list_offsets(
input: types.ArrowArrayExportable | types.ArrowStreamExportable,
*,
logical: bool = True,
) -> core.Array | core.ArrayReader:
"""Access the offsets of this ListArray or LargeListArray

Args:
input: _description_
physical: If False, return the physical (unsliced) offsets of the provided list array. If True, adjust the list offsets for the current array slicing. Defaults to `True`.

Raises:
Exception if not a list-typed array.

Returns:
_description_
"""

def struct_field(
values: types.ArrowArrayExportable,
/,
indices: int | Sequence[int],
) -> core.Array:
"""Access a column within a StructArray by index

Args:
values: Argument to compute function.
indices: List of indices for chained field lookup, for example [4, 1] will look up the second nested field in the fifth outer field.

Raises:
Exception if not a struct-typed array.

Returns:
_description_
"""

def take(
values: types.ArrowArrayExportable, indices: types.ArrowArrayExportable
) -> core.Array:
Expand Down
6 changes: 0 additions & 6 deletions arro3-compute/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@ use pyo3::prelude::*;
mod cast;
mod concat;
mod dictionary;
mod list_flatten;
mod list_offsets;
mod struct_field;
mod take;

const VERSION: &str = env!("CARGO_PKG_VERSION");
Expand All @@ -24,9 +21,6 @@ fn _compute(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(dictionary::dictionary_dictionary))?;
m.add_wrapped(wrap_pyfunction!(dictionary::dictionary_encode))?;
m.add_wrapped(wrap_pyfunction!(dictionary::dictionary_indices))?;
m.add_wrapped(wrap_pyfunction!(list_flatten::list_flatten))?;
m.add_wrapped(wrap_pyfunction!(list_offsets::list_offsets))?;
m.add_wrapped(wrap_pyfunction!(struct_field::struct_field))?;
m.add_wrapped(wrap_pyfunction!(take::take))?;

Ok(())
Expand Down
32 changes: 2 additions & 30 deletions arro3-core/python/arro3/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,4 @@
from ._core import (
Array,
ArrayReader,
ChunkedArray,
DataType,
Field,
RecordBatch,
RecordBatchReader,
Schema,
Table,
fixed_size_list_array,
list_array,
struct_array,
___version, # noqa,
)
from ._core import *
from ._core import ___version

__version__: str = ___version()

__all__ = (
"Array",
"ArrayReader",
"ChunkedArray",
"DataType",
"Field",
"RecordBatch",
"RecordBatchReader",
"Schema",
"Table",
"fixed_size_list_array",
"list_array",
"struct_array",
)
144 changes: 127 additions & 17 deletions arro3-core/python/arro3/core/_core.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class Array:
/,
type: ArrowSchemaExportable | None = None,
) -> None:
"""Create arro3.core.Array instance from a sequence of Python objects.
"""Create arro3.Array instance from a sequence of Python objects.

Args:
obj: A sequence of input objects.
Expand Down Expand Up @@ -119,7 +119,7 @@ class ArrayReader:
"""A stream of Arrow `Array`s.

This is similar to the [`RecordBatchReader`][arro3.core.RecordBatchReader] but each
item yielded from the stream is an [`Array`][arro3.core.Array], not a
item yielded from the stream is an [`Array`][arro3.Array], not a
[`RecordBatch`][arro3.core.RecordBatch].
"""
def __arrow_c_schema__(self) -> object:
Expand Down Expand Up @@ -167,7 +167,7 @@ class ArrayReader:
"""Construct this from an existing Arrow object.

This is an alias of and has the same behavior as
[`from_arrow`][arro3.core.ArrayReader.from_arrow], but is included for parity
[`from_arrow`][arro3.ArrayReader.from_arrow], but is included for parity
with [`pyarrow.RecordBatchReader`][pyarrow.RecordBatchReader].
"""
@property
Expand Down Expand Up @@ -1719,21 +1719,124 @@ class Table:
_description_
"""

@overload
def dictionary_dictionary(array: ArrowArrayExportable) -> Array: ...
@overload
def dictionary_dictionary(array: ArrowStreamExportable) -> ArrayReader: ...
def dictionary_dictionary(
array: ArrowArrayExportable | ArrowStreamExportable,
) -> Array | ArrayReader:
"""
Access the `dictionary` of a dictionary array.

This is equivalent to the [`.dictionary`][pyarrow.DictionaryArray.dictionary]
attribute on a PyArrow [DictionaryArray][pyarrow.DictionaryArray].

Args:
array: Argument to compute function.

Returns:
The keys of a dictionary-encoded array.
"""

@overload
def dictionary_indices(array: ArrowArrayExportable) -> Array: ...
@overload
def dictionary_indices(array: ArrowStreamExportable) -> ArrayReader: ...
def dictionary_indices(
array: ArrowArrayExportable | ArrowStreamExportable,
) -> Array | ArrayReader:
"""
Access the indices of a dictionary array.

This is equivalent to the [`.indices`][pyarrow.DictionaryArray.indices]
attribute on a PyArrow [DictionaryArray][pyarrow.DictionaryArray].

Args:
array: Argument to compute function.

Returns:
The indices of a dictionary-encoded array.
"""

@overload
def list_flatten(input: ArrowArrayExportable) -> Array: ...
@overload
def list_flatten(input: ArrowStreamExportable) -> ArrayReader: ...
def list_flatten(
input: ArrowArrayExportable | ArrowStreamExportable,
) -> Array | ArrayReader:
"""Unnest this ListArray, LargeListArray or FixedSizeListArray.

Args:
input: Input data.

Raises:
Exception if not a list-typed array.

Returns:
The flattened Arrow data.
"""

@overload
def list_offsets(input: ArrowArrayExportable, *, logical: bool = True) -> Array: ...
@overload
def list_offsets(
input: ArrowStreamExportable, *, logical: bool = True
) -> ArrayReader: ...
def list_offsets(
input: ArrowArrayExportable | ArrowStreamExportable,
*,
logical: bool = True,
) -> Array | ArrayReader:
"""Access the offsets of this ListArray or LargeListArray

Args:
input: _description_
physical: If False, return the physical (unsliced) offsets of the provided list array. If True, adjust the list offsets for the current array slicing. Defaults to `True`.

Raises:
Exception if not a list-typed array.

Returns:
_description_
"""

def struct_field(
values: ArrowArrayExportable,
/,
indices: int | Sequence[int],
) -> Array:
"""Access a column within a StructArray by index

Args:
values: Argument to compute function.
indices: List of indices for chained field lookup, for example [4, 1] will look up the second nested field in the fifth outer field.

Raises:
Exception if not a struct-typed array.

Returns:
_description_
"""

def fixed_size_list_array(
values: ArrowArrayExportable,
list_size: int,
*,
type: ArrowSchemaExportable | None = None,
) -> Array:
"""_summary_
"""Construct a new fixed size list array

Args:
values: _description_
list_size: _description_
type: _description_. Defaults to None.
values: the values of the new fixed size list array
list_size: the number of elements in each item of the list.

Keyword Args:
type: the type of output array. This must have fixed size list type. You may pass a `Field` into this parameter to associate extension metadata with the created array. Defaults to None, in which case it is inferred.

Returns:
_description_
a new Array with fixed size list type
"""

def list_array(
Expand All @@ -1742,28 +1845,35 @@ def list_array(
*,
type: ArrowSchemaExportable | None = None,
) -> Array:
"""_summary_
"""Construct a new list array

Args:
offsets: _description_
values: _description_
type: _description_. Defaults to None.
offsets: the offsets for the output list array. This array must have type int32 or int64, depending on whether you wish to create a list array or large list array.
values: the values for the output list array.

Keyword Args:
type: the type of output array. This must have list or large list type. You may pass a `Field` into this parameter to associate extension metadata with the created array. Defaults to None, in which case it is inferred.

Returns:
_description_
a new Array with list or large list type
"""

def struct_array(
arrays: Sequence[ArrowArrayExportable],
*,
fields: Sequence[ArrowSchemaExportable],
type: ArrowSchemaExportable | None = None,
) -> Array:
"""_summary_
"""Construct a new struct array

Args:
arrays: _description_
fields: _description_
arrays: a sequence of arrays for the struct children

Keyword Args:
fields: a sequence of fields that represent each of the struct children
type: the type of output array. This must have struct type. You may pass a `Field` into this parameter to associate extension metadata with the created array. Defaults to None, in which case it is inferred .


Returns:
_description_
a new Array with struct type
"""
Loading