-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add option to return stores in apply_blockwise function to support icechunk * Add store_icechunk * Add icechunk optional dependency * Run icechunk tests in CI * Typing improvements * Don't include icechunk in coverage * Update to Icechunk 0.1.0-alpha.10 and Zarr 3.0.0 * Move icechunk CI tests to own workflow * Fix mypy
- Loading branch information
Showing
8 changed files
with
236 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
name: Icechunk tests | ||
|
||
on: | ||
push: | ||
branches: | ||
- "main" | ||
pull_request: | ||
workflow_dispatch: | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.ref }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
test: | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
os: ["ubuntu-latest"] | ||
python-version: ["3.12"] | ||
|
||
steps: | ||
- name: Checkout source | ||
uses: actions/checkout@v3 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v3 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
architecture: x64 | ||
|
||
- name: Install | ||
run: | | ||
python -m pip install --upgrade pip | ||
python -m pip install -e '.[test]' 'icechunk' | ||
- name: Run tests | ||
run: | | ||
pytest -v -k icechunk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
from typing import TYPE_CHECKING, Any, List, Sequence, Union | ||
|
||
import zarr | ||
from icechunk import Session | ||
|
||
from cubed import compute | ||
from cubed.core.array import CoreArray | ||
from cubed.core.ops import blockwise | ||
from cubed.runtime.types import Callback | ||
|
||
if TYPE_CHECKING: | ||
from cubed.array_api.array_object import Array | ||
|
||
|
||
def store_icechunk( | ||
session: Session, | ||
*, | ||
sources: Union["Array", Sequence["Array"]], | ||
targets: List[zarr.Array], | ||
executor=None, | ||
**kwargs: Any, | ||
) -> None: | ||
if isinstance(sources, CoreArray): | ||
sources = [sources] | ||
targets = [targets] # type: ignore | ||
|
||
if any(not isinstance(s, CoreArray) for s in sources): | ||
raise ValueError("All sources must be cubed array objects") | ||
|
||
if len(sources) != len(targets): | ||
raise ValueError( | ||
f"Different number of sources ({len(sources)}) and targets ({len(targets)})" | ||
) | ||
|
||
arrays = [] | ||
for source, target in zip(sources, targets): | ||
identity = lambda a: a | ||
ind = tuple(range(source.ndim)) | ||
array = blockwise( | ||
identity, | ||
ind, | ||
source, | ||
ind, | ||
dtype=source.dtype, | ||
align_arrays=False, | ||
target_store=target, | ||
return_writes_stores=True, | ||
) | ||
arrays.append(array) | ||
|
||
# use a callback to merge icechunk sessions | ||
store_callback = IcechunkStoreCallback() | ||
# add to other callbacks the user may have set | ||
callbacks = kwargs.pop("callbacks", []) | ||
callbacks = [store_callback] + list(callbacks) | ||
|
||
compute( | ||
*arrays, | ||
executor=executor, | ||
_return_in_memory_array=False, | ||
callbacks=callbacks, | ||
**kwargs, | ||
) | ||
|
||
# merge back into the session passed into this function | ||
merged_session = store_callback.session | ||
session.merge(merged_session) | ||
|
||
|
||
class IcechunkStoreCallback(Callback): | ||
def on_compute_start(self, event): | ||
self.session = None | ||
|
||
def on_task_end(self, event): | ||
result = event.result | ||
if result is None: | ||
return | ||
for store in result: | ||
if self.session is None: | ||
self.session = store.session | ||
else: | ||
self.session.merge(store.session) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from typing import Iterable | ||
|
||
import numpy as np | ||
import pytest | ||
import zarr | ||
from numpy.testing import assert_array_equal | ||
|
||
import cubed | ||
import cubed.array_api as xp | ||
import cubed.random | ||
from cubed.tests.utils import MAIN_EXECUTORS | ||
|
||
icechunk = pytest.importorskip("icechunk") | ||
|
||
from icechunk import Repository, Storage | ||
|
||
from cubed.icechunk import store_icechunk | ||
|
||
|
||
@pytest.fixture( | ||
scope="module", | ||
params=MAIN_EXECUTORS, | ||
ids=[executor.name for executor in MAIN_EXECUTORS], | ||
) | ||
def executor(request): | ||
return request.param | ||
|
||
|
||
@pytest.fixture(scope="function") | ||
def icechunk_storage(tmpdir) -> "Storage": | ||
return Storage.new_local_filesystem(str(tmpdir)) | ||
|
||
|
||
def create_icechunk(a, icechunk_storage, /, *, dtype=None, chunks=None): | ||
# from dask.asarray | ||
if not isinstance(getattr(a, "shape", None), Iterable): | ||
# ensure blocks are arrays | ||
a = np.asarray(a, dtype=dtype) | ||
if dtype is None: | ||
dtype = a.dtype | ||
|
||
repo = Repository.create(storage=icechunk_storage) | ||
session = repo.writable_session("main") | ||
store = session.store | ||
|
||
group = zarr.group(store=store, overwrite=True) | ||
arr = group.create_array("a", shape=a.shape, dtype=dtype, chunks=chunks) | ||
|
||
arr[...] = a | ||
|
||
session.commit("commit 1") | ||
|
||
|
||
def test_from_zarr_icechunk(icechunk_storage, executor): | ||
create_icechunk( | ||
[[1, 2, 3], [4, 5, 6], [7, 8, 9]], | ||
icechunk_storage, | ||
chunks=(2, 2), | ||
) | ||
|
||
repo = Repository.open(icechunk_storage) | ||
session = repo.readonly_session(branch="main") | ||
store = session.store | ||
|
||
a = cubed.from_zarr(store, path="a") | ||
assert_array_equal( | ||
a.compute(executor=executor), np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) | ||
) | ||
|
||
|
||
def test_store_icechunk(icechunk_storage, executor): | ||
a = xp.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]], chunks=(2, 2)) | ||
|
||
repo = Repository.create(storage=icechunk_storage) | ||
session = repo.writable_session("main") | ||
store = session.store | ||
|
||
group = zarr.group(store=store, overwrite=True) | ||
target = group.create_array("a", shape=a.shape, dtype=a.dtype, chunks=a.chunksize) | ||
store_icechunk(session, sources=a, targets=target, executor=executor) | ||
session.commit("commit 1") | ||
|
||
# reopen store and check contents of array | ||
repo = Repository.open(icechunk_storage) | ||
session = repo.readonly_session(branch="main") | ||
store = session.store | ||
|
||
group = zarr.open_group(store=store, mode="r") | ||
assert_array_equal( | ||
cubed.from_array(group["a"])[:], np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters