diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..d3397f9d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +data/**/ +target* +.netcdf-java +.tensorstore +.git diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bec61101..57a5b4cd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,38 +4,44 @@ name: Build on: [push, pull_request] jobs: - test: - name: ${{ matrix.platform }} ${{ matrix.python-version }} - runs-on: ${{ matrix.platform }} - strategy: - fail-fast: false - matrix: - platform: [ubuntu-latest] - python-version: [3.7] + build: + runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 + + - name: Cache conda + uses: actions/cache@v3 + env: + # Increase this value to reset cache if etc/example-environment.yml has not changed + CACHE_NUMBER: 0 + with: + path: ~/conda_pkgs_dir - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v1 + uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true - channels: conda-forge,ome + activate-environment: ZI + channels: conda-forge,defaults + channel-priority: true environment-file: environment.yml - python-version: ${{ matrix.python-version }} + mamba-version: "*" env: ACTIONS_ALLOW_UNSECURE_COMMANDS: true - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- - - name: Run tests - shell: bash -l {0} - run: make + - name: Run build + shell: bash -el {0} + run: | + make write; + pytest test/test_read_all.py diff --git a/.github/workflows/impl.yml b/.github/workflows/impl.yml new file mode 100644 index 00000000..3287f3df --- /dev/null +++ b/.github/workflows/impl.yml @@ -0,0 +1,90 @@ +--- +name: Implementation tests + +on: + workflow_call: + inputs: + implementation: + required: true + type: string + platform: + required: true + type: string + python-version: + required: true + type: string + zarr-python: + required: true + type: string + action: + required: true + type: string + +jobs: + + impl: + name: ${{ inputs.implementation }}-${{ inputs.action }}-${{ inputs.platform }}-py${{ inputs.python-version }}-${{ inputs.zarr-python }} + runs-on: ${{ inputs.platform }} + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Cache conda + uses: actions/cache@v3 + env: + # Increase this value to reset cache if etc/example-environment.yml has not changed + CACHE_NUMBER: 0 + with: + path: ~/conda_pkgs_dir + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{hashFiles( format('{0}/{1}/{2}', 'implementations/', inputs.implementation, '/environment.yml') ) }} + + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + channels: conda-forge,defaults + channel-priority: true + environment-file: implementations/${{ inputs.implementation }}/environment.yml + mamba-version: "*" + python-version: ${{ inputs.python-version }} + activate-environment: ZI_${{ inputs.implementation }} + env: + ACTIONS_ALLOW_UNSECURE_COMMANDS: true + + - name: Install zarr dev (optional) + shell: bash -l {0} + if: ${{ inputs.zarr-python == 'pre' }} + run: | + python -m pip install git+https://github.com/zarr-developers/zarr-python.git + + - name: Cache local Maven repository + uses: actions/cache@v3 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + # + # Choose read or write based on {{ action }} + # + + - name: Download previous output for testing + uses: actions/download-artifact@v3 + if: ${{ inputs.action == 'read' }} + with: + name: ${{ inputs.implementation }}-${{ inputs.zarr-python }}-data + path: data + + - name: Run build + shell: bash -l {0} + run: make implementations/${{ inputs.implementation }}-${{ inputs.action }} + + - name: Save output for testing + uses: actions/upload-artifact@v3 + if: ${{ inputs.action == 'write' }} + with: + name: ${{ inputs.implementation }}-${{ inputs.zarr-python }}-data + path: data diff --git a/.github/workflows/zarr-dev.yml b/.github/workflows/zarr-dev.yml deleted file mode 100644 index c9cc2025..00000000 --- a/.github/workflows/zarr-dev.yml +++ /dev/null @@ -1,51 +0,0 @@ ---- -name: Build with Zarr dev - -on: [push, pull_request] - -jobs: - test: - name: ${{ matrix.platform }} ${{ matrix.python-version }} - runs-on: ${{ matrix.platform }} - - strategy: - fail-fast: true - matrix: - platform: [ubuntu-latest] - python-version: [3.7] - - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - channels: conda-forge,ome - environment-file: environment.yml - python-version: ${{ matrix.python-version }} - env: - ACTIONS_ALLOW_UNSECURE_COMMANDS: true - - - name: Install zarr dev - shell: bash -l {0} - run: | - python -m pip install git+https://github.com/zarr-developers/zarr-python.git - - - name: Run tests - shell: bash -l {0} - run: make - - - name: Generate Report - shell: bash -l {0} - run: | - python test/test_read_all.py - - - name: Archive test report artifacts - uses: actions/upload-artifact@v2 - with: - name: test-report - path: | - report.md - report.html diff --git a/.gitignore b/.gitignore index 03b3105c..c9cee769 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,9 @@ target* __pycache__/ -generate_data/xtensor_zarr/build +implmenetations/xtensor_zarr/build node_modules/ report.* # ignore data subdirectories data/**/ +*iml diff --git a/Dockerfile b/Dockerfile index ee308a08..d551d51d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM continuumio/miniconda3 WORKDIR /src COPY environment.yml /src/environment.yml -RUN apt-get update -y && apt install -y freeglut3-dev +RUN apt-get update -y && apt install -y freeglut3-dev # Unsure why freeglut is here RUN conda env create -f environment.yml -n z SHELL ["conda", "run", "-n", "z", "/bin/bash", "-c"] diff --git a/Makefile b/Makefile index e1888d6f..ad31904a 100644 --- a/Makefile +++ b/Makefile @@ -1,46 +1,107 @@ +IMPLEMENTATIONS=$(wildcard implementations/*) +CURRENT_DIR = $(shell pwd) + +define HELP_TEXT +make [target] + +Target(s): + write generate data for all implementations + list print directory\tpath pairs for all written data + read read all data written by all implementations + read-fast like read, but do not re-run `write` + +Notes: + - Each implementation has a conda environment named ZI_ + +endef + +export HELP_TEXT + +.PHONY: help +help: + @echo "$$HELP_TEXT" + +.PHONY: report + +report: data + python test/test_read_all.py + +.PHONY: test data + +ifeq ($(TEST),) ################################################# +# If TEST is not set, by default build everything, generate +# data for all implementations, and then run all pytests. + test: data - pytest -v + pytest -v -k W + +data: $(IMPLEMENTATIONS) + +else +# Otherwise, focus on a single implementation, only generating +# its data and using the "-k W-" keyword to limit which pytests +# get run + +test: implementations/$(TEST) + pytest -v -k W-$(TEST) + +data: implementations/$(TEST) + +endif ########################################################## + data/reference_image.png: python generate_reference_image.py -.PHONY: jzarr -jzarr: data/reference_image.png - bash generate_data/jzarr/generate_data.sh +define mk-impl-target +# For each of the items in our "implementations" directory, +# create targets which depend on the reference data and +# call the "driver.sh" script as necessary. -.PHONY: n5java -n5java: data/reference_image.png - bash generate_data/n5-java/generate_data.sh +.PHONY: write list read-fast read $1 $1/ $1-list $1-read-fast $1-read $1-write $1-destroy clean -.PHONY: pyn5 -pyn5: data/reference_image.png - python generate_data/generate_pyn5.py +write: $1-write +list: $1-list -.PHONY: z5py -z5py: data/reference_image.png - python generate_data/generate_z5py.py +read-fast: $1-read-fast +read: $1-read -.PHONY: zarr -zarr: data/reference_image.png - python generate_data/generate_zarr.py +$1-write: data/reference_image.png + @if test -e $1/.skip; \ + then >&2 echo "Skipping $1 -- $$(shell test -e $1/.skip && cat $1/.skip)"; \ + else \ + bash $1/driver.sh write; \ + fi -.PHONY: zarrita -zarrita: data/reference_image.png - python generate_data/generate_zarrita.py +$1-list: + @if test -e $1/.skip; \ + then >&2 echo "Skipping $1 -- $$(shell test -e $1/.skip && cat $1/.skip)"; \ + else \ + bash $1/driver.sh list; \ + fi -.PHONY: js -js: data/reference_image.png - bash generate_data/js/generate_data.sh +$1-read-fast: + @if test -e $1/.skip; \ + then >&2 echo "Skipping $1 -- $$(shell test -e $1/.skip && cat $1/.skip)"; \ + else \ + bash $1/driver.sh read $(DIR) $(DATASET); \ + fi -.PHONY: xtensor_zarr -xtensor_zarr: data/reference_image.png - bash generate_data/xtensor_zarr/generate_data.sh -.PHONY: data -data: jzarr n5java pyn5 z5py zarr js xtensor_zarr zarrita +$1-read: write $1-read-fast -.PHONY: test -.PHONY: report -report: data - python test/test_read_all.py +# Alias for read & write +$1: $1-write $1-read + +# Alias in case the trailing slash is included +$1/: $1 + +# Additional target to cleanup the environment +$1-destroy: + bash $1/driver.sh destroy + +clean: $1-destroy + +endef +$(foreach impl,$(IMPLEMENTATIONS),$(eval $(call mk-impl-target,$(impl)))) diff --git a/README.md b/README.md index cd0bda30..5b1a22d7 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,6 @@ Test for compatibility. See [doc/development_overview.md](doc/development_overvi ## Implementations currently tested - * https://github.com/bcdev/jzarr * https://github.com/saalfeldlab/n5-zarr * https://github.com/aschampion/rust-n5 @@ -13,7 +12,7 @@ Test for compatibility. See [doc/development_overview.md](doc/development_overvi * https://github.com/gzuidhof/zarr.js * https://github.com/zarr-developers/zarr-python * https://github.com/constantinpape/z5 - +* https://gihutb.com/grimbough/Rarr ## Other implementations @@ -23,3 +22,13 @@ Test for compatibility. See [doc/development_overview.md](doc/development_overvi * [Zarr.jl](https://github.com/meggart/Zarr.jl) #42 * https://github.com/freeman-lab/zarr-js * [GDAL >= 3.4](https://gdal.org/drivers/raster/zarr.html) + +## Running locally + +* Create an environment: `conda env create -n ZI -f environment.yml` +* Run `make` + +## Other features + +* `touch implementations/{impl}/.skip` to disable an implementation +* Use `make NODEBUG=1 ...` to quiet output diff --git a/generate_data/generate_reference_image.py b/data/generate_reference_image.py similarity index 77% rename from generate_data/generate_reference_image.py rename to data/generate_reference_image.py index 1780d7b1..dec8da3d 100644 --- a/generate_data/generate_reference_image.py +++ b/data/generate_reference_image.py @@ -1,9 +1,11 @@ +#!/usr/bin/env python + from skimage.data import astronaut from skimage.io import imsave def write_reference_image(): - path = '../data/reference_image.png' + path = 'reference_image.png' im = astronaut() imsave(path, im) diff --git a/doc/development_overview.md b/doc/development_overview.md index 2a503a04..0d7d7d43 100644 --- a/doc/development_overview.md +++ b/doc/development_overview.md @@ -2,7 +2,7 @@ ## Overview -This repository contains scripts to generate datasets in zarr-v2, zarr-v3 and +This repository contains scripts to generate datasets in Zarr-v2, Zarr-v3 and N5 formats via a number of different implementations. Once generated, the test suite will attempt to have each library read all datasets with a supported format. @@ -44,7 +44,7 @@ conda activate zarr_impl_dev ## Data Generation All test data is currently generated from a common image file that gets -generated by the script `generate_data/generate_reference_image.py`. Each +generated by the script `data/generate_reference_image.py`. Each library that generates data currently has either a python script or a shell script that will generate datasets for the supported file types and codecs. Some implementations also generate versions using both flat and nested file diff --git a/environment.yml b/environment.yml index 8f651a0f..f3dad3f1 100644 --- a/environment.yml +++ b/environment.yml @@ -2,21 +2,10 @@ channels: - conda-forge - defaults dependencies: - - openjdk - - maven - make - - cmake - - xtensor-zarr >=0.0.8|=0.0.7=*_1 - - openimageio - - nodejs - - z5py >= 2.0.10 - - python == 3.7.9 + - python - scikit-image - pytest - - zarr >= 2.8.3 - pip - pandas - tabulate - - pip: - - pyn5 - - git+https://github.com/grlee77/zarrita.git@codec_update diff --git a/generate_data/generate_pyn5.py b/generate_data/generate_pyn5.py deleted file mode 100755 index 99b10c34..00000000 --- a/generate_data/generate_pyn5.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python -from pathlib import Path -import pyn5 -from skimage.io import imread - -# choose chunks s.t. we do have overhanging edge-chunks -CHUNKS = (100, 100, 1) - - -def generate_n5_format(compressors=pyn5.CompressionType): - here = Path(__file__).resolve().parent - data_dir = here.parent / "data" - path = data_dir / "pyn5.n5" - - im = imread(data_dir / "reference_image.png") - - f = pyn5.File(path, pyn5.Mode.CREATE_TRUNCATE) - for compressor in compressors: - name = str(compressor) - f.create_dataset(name, data=im, chunks=CHUNKS, compression=compressor) - - -if __name__ == '__main__': - generate_n5_format() diff --git a/generate_data/generate_z5py.py b/generate_data/generate_z5py.py deleted file mode 100644 index 8106cf71..00000000 --- a/generate_data/generate_z5py.py +++ /dev/null @@ -1,41 +0,0 @@ -import z5py -from skimage.data import astronaut - -# choose chunks s.t. we do have overhanging edge-chunks -CHUNKS = (100, 100, 1) - -# options for the different compressors -COMPRESSION_OPTIONS = {"blosc": {"codec": "lz4"}} - - -# TODO support more compressors: -# - more compressors in numcodecs -# - more blosc codecs -def generate_zarr_format(compressors=['gzip', 'blosc', 'zlib', 'raw']): - path = 'data/z5py.zr' - im = astronaut() - - f = z5py.File(path, mode='w') - for compressor in compressors: - copts = COMPRESSION_OPTIONS.get(compressor, {}) - name = ( - compressor - if compressor != "blosc" - else "%s/%s" % (compressor, copts.get("codec")) - ) - f.create_dataset(name, data=im, compression=compressor, chunks=CHUNKS, **copts) - - -def generate_n5_format(compressors=['gzip', 'raw']): - path = 'data/z5py.n5' - im = astronaut() - - f = z5py.File(path, mode='w') - for compressor in compressors: - name = compressor - f.create_dataset(name, data=im, chunks=CHUNKS, compression=compressor) - - -if __name__ == '__main__': - generate_zarr_format() - generate_n5_format() diff --git a/generate_data/generate_zarrita.py b/generate_data/generate_zarrita.py deleted file mode 100644 index 2765dea4..00000000 --- a/generate_data/generate_zarrita.py +++ /dev/null @@ -1,42 +0,0 @@ -import zarrita -import numcodecs -from skimage.data import astronaut - -# choose chunks s.t. we do have overhanging edge-chunks -CHUNKS = (100, 100, 1) -STR_TO_COMPRESSOR = { - "gzip": numcodecs.GZip, - "blosc": numcodecs.Blosc, - "zlib": numcodecs.Zlib, -} -COMPRESSION_OPTIONS = {"blosc": {"cname": "lz4"}} - - -def generate_zr3_format(compressors=['gzip', 'blosc', 'zlib', None], - nested=True): - im = astronaut() - if nested: - chunk_separator = '/' - fname = 'data/zarrita_nested.zr3' - else: - chunk_separator = '.' - fname = 'data/zarrita.zr3' - h = zarrita.create_hierarchy(fname) - for compressor in compressors: - copts = COMPRESSION_OPTIONS.get(compressor, {}) - if compressor is None: - name = "raw" - elif compressor == "blosc": - name = "%s/%s" % (compressor, copts.get("cname")) - else: - name = compressor - compressor_impl = STR_TO_COMPRESSOR[compressor](**copts) if compressor is not None else None - a = h.create_array('/' + name, shape=im.shape, chunk_shape=CHUNKS, - chunk_separator=chunk_separator, dtype=im.dtype, - compressor=compressor_impl) - a[...] = im - - -if __name__ == '__main__': - for nested in [False, True]: - generate_zr3_format(nested=nested) diff --git a/generate_data/js/generate_data.sh b/generate_data/js/generate_data.sh deleted file mode 100755 index c70e51f0..00000000 --- a/generate_data/js/generate_data.sh +++ /dev/null @@ -1,6 +0,0 @@ -# cd to this directory -# https://stackoverflow.com/a/6393573/2700168 -cd "${0%/*}" - -npm install -npm start diff --git a/generate_data/jzarr/generate_data.sh b/generate_data/jzarr/generate_data.sh deleted file mode 100755 index c860ea03..00000000 --- a/generate_data/jzarr/generate_data.sh +++ /dev/null @@ -1,19 +0,0 @@ -# cd to this directory -# https://stackoverflow.com/a/6393573/2700168 -cd "${0%/*}" - -set -e -set -u -set -x - -MVN_FLAGS=${MVN_FLAGS:-"--no-transfer-progress"} -mvn "${MVN_FLAGS}" clean package - -java -cp target/jzarr-1.0.0.jar zarr_implementations.jzarr.App "$@" && { - # Workaround for: https://github.com/bcdev/jzarr/issues/25 - find ../../data/jzarr* -name .zarray -exec sed -ibak 's/>u1/|u1/' {} \; -} || { - echo jzarr failed - exit 2 -} - diff --git a/generate_data/n5-java/generate_data.sh b/generate_data/n5-java/generate_data.sh deleted file mode 100755 index 2607bea3..00000000 --- a/generate_data/n5-java/generate_data.sh +++ /dev/null @@ -1,8 +0,0 @@ -# cd to this directory -# https://stackoverflow.com/a/6393573/2700168 -cd "${0%/*}" - -MVN_FLAGS=${MVN_FLAGS:-"--no-transfer-progress"} -mvn "${MVN_FLAGS}" clean package - -java -cp target/n5_java-1.0.0.jar zarr_implementations.n5_java.App diff --git a/generate_data/xtensor_zarr/generate_data.sh b/generate_data/xtensor_zarr/generate_data.sh deleted file mode 100755 index b6e66582..00000000 --- a/generate_data/xtensor_zarr/generate_data.sh +++ /dev/null @@ -1,11 +0,0 @@ -# cd to this directory -# https://stackoverflow.com/a/6393573/2700168 -cd "${0%/*}" - -rm -rf build -mkdir build -cd build -export LDFLAGS="${LDFLAGS} -Wl,-rpath,$CONDA_PREFIX/lib -Wl,-rpath,$PWD" -export LINKFLAGS="${LDFLAGS}" -cmake .. -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_INSTALL_LIBDIR=lib -make run diff --git a/implementations/.bash_driver.sh b/implementations/.bash_driver.sh new file mode 100644 index 00000000..f52650b0 --- /dev/null +++ b/implementations/.bash_driver.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# +# This is re-usable driver code for all of the implementations. +# + +set -e +set -o pipefail + +argparse(){ + case "${1}" in + write) + [[ -z "${NODEBUG}" ]] && >&2 echo "Generating data..." + zi_write;; + list) + shift; + zi_list "$@";; + read) + shift; + [[ -z "${NODEBUG}" ]] && >&2 echo "Verifying data..." + zi_read "$@";; + destroy) + [[ -z "${NODEBUG}" ]] && >&2 echo "Tearing down..." + zi_destroy;; + *) + [[ -z "${NODEBUG}" ]] && >&2 echo "Unknown command: ${1}" + exit 2;; + esac +} diff --git a/implementations/.conda_driver.sh b/implementations/.conda_driver.sh new file mode 100644 index 00000000..592487eb --- /dev/null +++ b/implementations/.conda_driver.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# +# This is re-usable driver code for all of the implementations +# that make use of a conda environment file. +# + +set -e +set -o pipefail + +## Setup based on mamba versus conda installation +if command -v mamba &> /dev/null +then + COMMAND=mamba +else + COMMAND=conda +fi + +create_or_activate(){ + + if { $COMMAND env list | grep $ENVNAME; } >/dev/null 2>&1; then + [[ -z "${NODEBUG}" ]] && >&2 echo "Using $ENVNAME" + else + [[ -z "${NODEBUG}" ]] && >&2 echo "Creating $ENVNAME" + $COMMAND env create -n $ENVNAME -f $IMPL/environment.yml + fi + export MAMBA_ROOT_PREFIX=$(conda info --base -q) + export MAMBA_EXE=${MAMBA_ROOT_PREFIX}/bin/mamba + export CONDA_EXE=${MAMBA_ROOT_PREFIX}/bin/conda + . $MAMBA_ROOT_PREFIX/etc/profile.d/conda.sh + . $MAMBA_ROOT_PREFIX/etc/profile.d/mamba.sh + + [[ -z "${NODEBUG}" ]] && >&2 echo "Activating $ENVNAME" + $COMMAND activate $ENVNAME +} + +zi_destroy(){ + + if { $COMMAND env list | grep $ENVNAME; } >/dev/null 2>&1; then + [[ -z "${NODEBUG}" ]] && >&2 echo "Destroying $ENVNAME" + $COMMAND env remove -y -n $ENVNAME + else + >&2 echo "No known env: $ENVNAME" + fi +} diff --git a/implementations/Rarr/.skip b/implementations/Rarr/.skip new file mode 100644 index 00000000..523b9b00 --- /dev/null +++ b/implementations/Rarr/.skip @@ -0,0 +1 @@ +built under R version 4.3.2 diff --git a/implementations/Rarr/driver.sh b/implementations/Rarr/driver.sh new file mode 100755 index 00000000..9858ec4b --- /dev/null +++ b/implementations/Rarr/driver.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +ENVNAME=ZI_Rarr + +# Standard bootstrapping +IMPL=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$( dirname $IMPL) + +zi_write(){ + create_or_activate + + cd "${IMPL}" + + Rscript install_packages.R + Rscript generate_Rarr.R +} + +zi_list(){ + create_or_activate + + cd "${IMPL}" + + Rscript install_packages.R + Rscript generate_Rarr.R -list +} + +zi_read(){ + create_or_activate + + cd "${IMPL}" + + Rscript install_packages.R + Rscript verify_data.R "$@" +} + +. $ROOT/.conda_driver.sh +. $ROOT/.bash_driver.sh +argparse "$@" diff --git a/implementations/Rarr/environment.yml b/implementations/Rarr/environment.yml new file mode 100644 index 00000000..36a94516 --- /dev/null +++ b/implementations/Rarr/environment.yml @@ -0,0 +1,7 @@ +channels: + - defaults + - bioconda +dependencies: + - bioconductor-rarr +variables: + LC_ALL: C.UTF-8 diff --git a/implementations/Rarr/generate_Rarr.R b/implementations/Rarr/generate_Rarr.R new file mode 100644 index 00000000..d41e7c7a --- /dev/null +++ b/implementations/Rarr/generate_Rarr.R @@ -0,0 +1,33 @@ +library(loder) +library(Rarr) + +img <- loder::readPng("../../data/reference_image.png") + +chunk_dim <- c(100, 100, 1) + +for (sep in c("_flat", "_nested")) { + for (codec in c("blosc/lz4", "zlib", "gzip", "raw")) { + dir_name <- paste0("Rarr", sep, ".zr") + output_name <- file.path("../../data", dir_name, codec) + + dir.create(output_name, recursive = TRUE, showWarnings = FALSE) + + dim_sep <- ifelse(sep == "_flat", yes = ".", no = "/") + + compressor <- switch(codec, + "blosc/lz4" = Rarr:::use_blosc(), + "zlib" = Rarr:::use_zlib(), + "gzip" = Rarr::use_gzip(), + "raw" = NULL + ) + + write_zarr_array( + x = img, + zarr_array_path = output_name, + chunk_dim = chunk_dim, + compressor = compressor, + dimension_separator = dim_sep, + order = "C" + ) + } +} diff --git a/implementations/Rarr/install_packages.R b/implementations/Rarr/install_packages.R new file mode 100644 index 00000000..651f46b6 --- /dev/null +++ b/implementations/Rarr/install_packages.R @@ -0,0 +1,8 @@ +chooseCRANmirror(ind = 1) +if (!require("loder", quietly = TRUE)) { + install.packages("loder", ) +} +if (!require("Rarr", quietly = TRUE)) { + #remotes::install_git("https://git.bioconductor.org/packages/Rarr", upgrade = "never") + remotes::install_git("https://github.com/grimbough/Rarr.git/", ref = "main", upgrade = "never") +} diff --git a/implementations/Rarr/verify_data.R b/implementations/Rarr/verify_data.R new file mode 100644 index 00000000..8b5fde72 --- /dev/null +++ b/implementations/Rarr/verify_data.R @@ -0,0 +1,17 @@ +args <- commandArgs(trailingOnly = TRUE) + +stopifnot(length(args) == 2) + +fpath <- args[1] +ds_name <- args[2] + +library(Rarr) +library(reticulate) +np <- import("numpy") + +zarr_array <- file.path(fpath, ds_name) + +x <- read_zarr_array(zarr_array_path = zarr_array) + +a <- np$array(x) +np$save("a.npy", a) diff --git a/implementations/Rarr/verify_data_internal.R b/implementations/Rarr/verify_data_internal.R new file mode 100644 index 00000000..99f3f266 --- /dev/null +++ b/implementations/Rarr/verify_data_internal.R @@ -0,0 +1,27 @@ +args <- commandArgs(trailingOnly = TRUE) + +stopifnot(length(args) == 2) + +fpath <- args[1] +ds_name <- args[2] + +source("implementations/Rarr/install_packages.R") + +library(loder) +library(Rarr) + +## Read the reference image. We strip the "loder" class so it is a +## regular array, making the comparison easier +reference_img <- loder::readPng("data/reference_image.png") +class(reference_img) <- "array" + +## read the zarr input +zarr_array <- file.path(fpath, ds_name) +x <- read_zarr_array(zarr_array_path = zarr_array) + +## if the values are different quit +if(!isTRUE(all.equal(reference_img, x, check.attributes = FALSE))) { + stop("Input and reference image are different") +} + +quit(save = "no", status = 0) diff --git a/implementations/js/driver.sh b/implementations/js/driver.sh new file mode 100755 index 00000000..1821c3eb --- /dev/null +++ b/implementations/js/driver.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +ENVNAME=ZI_js + +# Standard bootstrapping +IMPL=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$( dirname $IMPL) + +zi_write(){ + create_or_activate + + cd "${IMPL}" + + npm install + npm start +} + +zi_list(){ + create_or_activate + + cd "${IMPL}" + + npm install --silent + npm run --silent start -- --list "$@" +} + +zi_read(){ + cd "${IMPL}" + npm run start -- --verify "$@" +} + +. $ROOT/.conda_driver.sh +. $ROOT/.bash_driver.sh +argparse "$@" diff --git a/implementations/js/environment.yml b/implementations/js/environment.yml new file mode 100644 index 00000000..4c3e61e9 --- /dev/null +++ b/implementations/js/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - defaults +dependencies: + - nodejs diff --git a/generate_data/js/package-lock.json b/implementations/js/package-lock.json similarity index 60% rename from generate_data/js/package-lock.json rename to implementations/js/package-lock.json index c03611b7..1128d74e 100644 --- a/generate_data/js/package-lock.json +++ b/implementations/js/package-lock.json @@ -1,59 +1,91 @@ { + "name": "js", + "lockfileVersion": 3, "requires": true, - "lockfileVersion": 1, - "dependencies": { - "eventemitter3": { + "packages": { + "": { + "dependencies": { + "minimist": "1.2.8", + "pngjs": "^6.0.0", + "zarr": "^0.4.0" + } + }, + "node_modules/eventemitter3": { "version": "4.0.7", "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==" }, - "numcodecs": { + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/numcodecs": { "version": "0.1.1", "resolved": "https://registry.npmjs.org/numcodecs/-/numcodecs-0.1.1.tgz", "integrity": "sha512-UjKulZ6GIFKLdBIczEbsoXNZQmiHafpoIdo39YcdecHVGyMKh0+azsfHTrybXm5RZwepqLZv24mkjqGdZGm24Q==", - "requires": { + "dependencies": { "pako": "^1.0.11" } }, - "p-finally": { + "node_modules/p-finally": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", - "integrity": "sha1-P7z7FbiZpEEjs0ttzBi3JDNqLK4=" + "integrity": "sha1-P7z7FbiZpEEjs0ttzBi3JDNqLK4=", + "engines": { + "node": ">=4" + } }, - "p-queue": { + "node_modules/p-queue": { "version": "6.2.0", "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.2.0.tgz", "integrity": "sha512-B2LXNONcyn/G6uz2UBFsGjmSa0e/br3jznlzhEyCXg56c7VhEpiT2pZxGOfv32Q3FSyugAdys9KGpsv3kV+Sbg==", - "requires": { + "dependencies": { "eventemitter3": "^4.0.0", "p-timeout": "^3.1.0" + }, + "engines": { + "node": ">=8" } }, - "p-timeout": { + "node_modules/p-timeout": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz", "integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==", - "requires": { + "dependencies": { "p-finally": "^1.0.0" + }, + "engines": { + "node": ">=8" } }, - "pako": { + "node_modules/pako": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==" }, - "pngjs": { + "node_modules/pngjs": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/pngjs/-/pngjs-6.0.0.tgz", - "integrity": "sha512-TRzzuFRRmEoSW/p1KVAmiOgPco2Irlah+bGFCeNfJXxxYGwSw7YwAOAcd7X28K/m5bjBWKsC29KyoMfHbypayg==" + "integrity": "sha512-TRzzuFRRmEoSW/p1KVAmiOgPco2Irlah+bGFCeNfJXxxYGwSw7YwAOAcd7X28K/m5bjBWKsC29KyoMfHbypayg==", + "engines": { + "node": ">=12.13.0" + } }, - "zarr": { + "node_modules/zarr": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/zarr/-/zarr-0.4.0.tgz", "integrity": "sha512-zvxdX3aRWxjy6H3OtA7R05NNZvRKxn/7bkNJhUsVKKbNoJ3DBqYERQfzI4WfAV1OTcclqvlYwkQ7DWsGJA5QEw==", - "requires": { + "dependencies": { "numcodecs": "^0.1.0", "p-queue": "6.2.0" + }, + "engines": { + "node": ">=8.0.0" } } } diff --git a/generate_data/js/package.json b/implementations/js/package.json similarity index 85% rename from generate_data/js/package.json rename to implementations/js/package.json index 60d38ffc..7d67cecb 100644 --- a/generate_data/js/package.json +++ b/implementations/js/package.json @@ -4,6 +4,7 @@ "start": "node src/index.js" }, "dependencies": { + "minimist": "1.2.8", "pngjs": "^6.0.0", "zarr": "^0.4.0" } diff --git a/generate_data/js/src/fsstore.js b/implementations/js/src/fsstore.js similarity index 100% rename from generate_data/js/src/fsstore.js rename to implementations/js/src/fsstore.js diff --git a/generate_data/js/src/index.js b/implementations/js/src/index.js similarity index 67% rename from generate_data/js/src/index.js rename to implementations/js/src/index.js index 02998710..da1d68b6 100644 --- a/generate_data/js/src/index.js +++ b/implementations/js/src/index.js @@ -1,6 +1,7 @@ import fs from "fs"; import p from "path"; import pkg from "pngjs"; +import minimist from "minimist"; const { PNG } = pkg; import { openGroup, NestedArray, slice } from "zarr"; @@ -34,21 +35,39 @@ function getName(config) { return config.id; } -async function generateZarrFormat(codecIds = ["gzip", "blosc", "zlib", null]) { +async function generateZarrFormat(listOnly, codecIds = ["gzip", "blosc", "zlib", null]) { const path = p.join("..", "..", "data", "js.zr"); const img = imread(p.join("..", "..", "data", "reference_image.png")); - fs.rmdirSync(path, { recursive: true, force: true }); + if (!listOnly && fs.existsSync(path)) { + fs.rmdirSync(path, { recursive: true, force: true }); + } + const grp = await open(path); for (const id of codecIds) { const config = id ? STR_TO_COMPRESSOR[id] : null; const name = getName(config); - grp.createDataset(name, undefined, img, { - compressor: config, - chunks: CHUNKS, - fillValue: 0, - }); + if (listOnly) { + console.log(path + "\t" + name); + } else { + grp.createDataset(name, undefined, img, { + compressor: config, + chunks: CHUNKS, + fillValue: 0, + }); + } } } -generateZarrFormat(); +function main(){ + var argv = minimist(process.argv.slice(2), { + boolean: ["list", "verify"], + }); + if (argv.verify) { + verifyZarrFormat(verify); + } else { + generateZarrFormat(argv.list); + } +} + +main(); diff --git a/implementations/jzarr/driver.sh b/implementations/jzarr/driver.sh new file mode 100755 index 00000000..df8a830f --- /dev/null +++ b/implementations/jzarr/driver.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +ENVNAME=ZI_jzarr + +# Standard bootstrapping +IMPL=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$( dirname $IMPL) + +zi_write(){ + create_or_activate + + cd "${IMPL}" + + MVN_FLAGS=${MVN_FLAGS:-"-q --no-transfer-progress"} + mvn ${MVN_FLAGS} package + + java -cp target/jzarr-1.0.0.jar zarr_implementations.jzarr.App "$@" && { + # Workaround for: https://github.com/bcdev/jzarr/issues/25 + find ../../data/jzarr* -name .zarray -exec sed -ibak 's/>u1/|u1/' {} \; + } || { + echo jzarr failed + exit 2 + } +} + +zi_list(){ + create_or_activate + + cd "${IMPL}" + + MVN_FLAGS=${MVN_FLAGS:-"-q --no-transfer-progress"} + mvn ${MVN_FLAGS} package + #notrap_outerr + + java -cp target/jzarr-1.0.0.jar zarr_implementations.jzarr.App -list +} + +zi_read(){ + create_or_activate + + cd "${IMPL}" + + MVN_FLAGS=${MVN_FLAGS:-"-q --no-transfer-progress"} + mvn ${MVN_FLAGS} package + #notrap_outerr + + java -cp target/jzarr-1.0.0.jar zarr_implementations.jzarr.App -verify "$@" +} + + +. $ROOT/.conda_driver.sh +. $ROOT/.bash_driver.sh +argparse "$@" diff --git a/implementations/jzarr/environment.yml b/implementations/jzarr/environment.yml new file mode 100644 index 00000000..a7a62cbb --- /dev/null +++ b/implementations/jzarr/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - defaults +dependencies: + - openjdk + - maven + - blosc diff --git a/generate_data/jzarr/pom.xml b/implementations/jzarr/pom.xml similarity index 100% rename from generate_data/jzarr/pom.xml rename to implementations/jzarr/pom.xml diff --git a/generate_data/jzarr/src/main/java/zarr_implementations/jzarr/App.java b/implementations/jzarr/src/main/java/zarr_implementations/jzarr/App.java similarity index 87% rename from generate_data/jzarr/src/main/java/zarr_implementations/jzarr/App.java rename to implementations/jzarr/src/main/java/zarr_implementations/jzarr/App.java index 4cc39d15..2a8a90b7 100644 --- a/generate_data/jzarr/src/main/java/zarr_implementations/jzarr/App.java +++ b/implementations/jzarr/src/main/java/zarr_implementations/jzarr/App.java @@ -80,11 +80,14 @@ private static int[] getArrayData(ZarrArray zarr) throws Exception { public static void main(String args[]) throws Exception { - if (args.length != 0 && args.length != 3) { + if (args.length != 0 && args.length != 1 && args.length != 3) { System.out.println("usage: App"); + System.out.println("usage: App -list"); System.out.println("usage: App -verify fpath dsname"); System.exit(2); // EARLY EXIT - } else if (args.length == 3) { + } + + if (args.length == 3) { String fpath = args[1]; String dsname = args[2]; ZarrArray verification = ZarrGroup.open(fpath).openArray(dsname); @@ -105,6 +108,11 @@ public static void main(String args[]) throws Exception { return; // EARLY EXIT } + boolean listOnly = false; + if (args.length == 1) { + listOnly = true; + } + int[] data = getTestData(); final ZarrGroup container = ZarrGroup.create(OUT_PATH); @@ -121,9 +129,13 @@ public static void main(String args[]) throws Exception { dsname = "blosc/lz4"; // FIXME: better workaround? } Path subdir = OUT_PATH.resolve(dsname); - ZarrArray zArray = ZarrArray.create(subdir, arrayParams); - // final ZarrArray zarr = ZarrArray.open(getRootPath().resolve(pathName)); - zArray.write(data, SHAPE, new int[]{0, 0, 0}); + if (listOnly) { + System.out.println(OUT_PATH + "\t" + dsname); + } else { + ZarrArray zArray = ZarrArray.create(subdir, arrayParams); + // final ZarrArray zarr = ZarrArray.open(getRootPath().resolve(pathName)); + zArray.write(data, SHAPE, new int[]{0, 0, 0}); + } } } } diff --git a/implementations/n5-java/driver.sh b/implementations/n5-java/driver.sh new file mode 100755 index 00000000..912e111f --- /dev/null +++ b/implementations/n5-java/driver.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +ENVNAME=ZI_n5_java + +# Standard bootstrapping +IMPL=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$( dirname $IMPL) + +zi_write(){ + create_or_activate + + cd "${IMPL}" + MVN_FLAGS=${MVN_FLAGS:-"-q --no-transfer-progress"} + mvn ${MVN_FLAGS} package + + java -cp target/n5_java-1.0.0.jar zarr_implementations.n5_java.App +} + +zi_list(){ + create_or_activate + + cd "${IMPL}" + MVN_FLAGS=${MVN_FLAGS:-"-q --no-transfer-progress"} + mvn ${MVN_FLAGS} package + + java -cp target/n5_java-1.0.0.jar zarr_implementations.n5_java.App -list +} + +zi_read(){ + create_or_activate + + cd "${IMPL}" + MVN_FLAGS=${MVN_FLAGS:-"-q --no-transfer-progress"} + mvn ${MVN_FLAGS} package + + java -cp target/n5_java-1.0.0.jar zarr_implementations.n5_java.App -verify "$@" +} + +. $ROOT/.conda_driver.sh +. $ROOT/.bash_driver.sh +argparse "$@" diff --git a/implementations/n5-java/environment.yml b/implementations/n5-java/environment.yml new file mode 100644 index 00000000..a7a62cbb --- /dev/null +++ b/implementations/n5-java/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - defaults +dependencies: + - openjdk + - maven + - blosc diff --git a/generate_data/n5-java/pom.xml b/implementations/n5-java/pom.xml similarity index 100% rename from generate_data/n5-java/pom.xml rename to implementations/n5-java/pom.xml diff --git a/generate_data/n5-java/src/main/java/zarr_implementations/n5_java/App.java b/implementations/n5-java/src/main/java/zarr_implementations/n5_java/App.java similarity index 90% rename from generate_data/n5-java/src/main/java/zarr_implementations/n5_java/App.java rename to implementations/n5-java/src/main/java/zarr_implementations/n5_java/App.java index bc6202c3..eea10f9d 100644 --- a/generate_data/n5-java/src/main/java/zarr_implementations/n5_java/App.java +++ b/implementations/n5-java/src/main/java/zarr_implementations/n5_java/App.java @@ -58,13 +58,18 @@ private static RandomAccessibleInterval getData() throws IOExc public static void main(String args[]) throws IOException { + boolean listOnly = (args.length == 1 && args[0].equals("-list")); RandomAccessibleInterval data = getData(); final N5FSWriter container = new N5FSWriter(OUT_PATH); for (final Compression compression : getCompressions()) { final DatasetAttributes attrs = new DatasetAttributes(Intervals.dimensionsAsLongArray(data), BLOCK_SIZE, DataType.UINT8, compression); final String dataset = compression.getType(); - container.createDataset(dataset, attrs); - N5Utils.save(data, container, dataset, BLOCK_SIZE, compression); + if (listOnly) { + System.out.println(OUT_PATH + "\t" + dataset); + } else { + container.createDataset(dataset, attrs); + N5Utils.save(data, container, dataset, BLOCK_SIZE, compression); + } } } diff --git a/implementations/pyn5/.skip b/implementations/pyn5/.skip new file mode 100644 index 00000000..bb855dd7 --- /dev/null +++ b/implementations/pyn5/.skip @@ -0,0 +1 @@ +pyn5 requires "n5" version attribute diff --git a/implementations/pyn5/driver.sh b/implementations/pyn5/driver.sh new file mode 100755 index 00000000..290d5a1a --- /dev/null +++ b/implementations/pyn5/driver.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +ENVNAME=ZI_pyn5 + +# Standard bootstrapping +IMPL=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$( dirname $IMPL) + +zi_write(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_pyn5.py +} + +zi_list(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_pyn5.py -list +} + +zi_read(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_pyn5.py -verify "$@" +} + +. $ROOT/.conda_driver.sh +. $ROOT/.bash_driver.sh +argparse "$@" diff --git a/implementations/pyn5/environment.yml b/implementations/pyn5/environment.yml new file mode 100644 index 00000000..44fbebf7 --- /dev/null +++ b/implementations/pyn5/environment.yml @@ -0,0 +1,11 @@ +channels: + - conda-forge + - defaults +dependencies: + - rust + - maturin + - python + - scikit-image + - pip + - pip: + - "git+https://github.com/pattonw/rust-pyn5@master#egg=pyn5" diff --git a/implementations/pyn5/generate_pyn5.py b/implementations/pyn5/generate_pyn5.py new file mode 100755 index 00000000..f375b5d2 --- /dev/null +++ b/implementations/pyn5/generate_pyn5.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +from pathlib import Path +import pyn5 +from skimage.io import imread + +# choose chunks s.t. we do have overhanging edge-chunks +CHUNKS = (100, 100, 1) + + +def generate_n5_format(list_only: bool, compressors=pyn5.CompressionType): + data_dir = Path("../..") / "data" + path = data_dir / "pyn5.n5" + + im = imread(data_dir / "reference_image.png") + + f = pyn5.File(path, pyn5.Mode.CREATE_TRUNCATE) + for compressor in compressors: + name = str(compressor) + if list_only: + print(f"{path}\t{name}") + else: + f.create_dataset(name, data=im, chunks=CHUNKS, compression=compressor) + + +def verify_format(directory: str, dataset: str): + f = pyn5.File(f"{directory}/{dataset}")# TODO, mode="r") + return f[:] + + +if __name__ == '__main__': + import sys + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("-list", action="store_true") + parser.add_argument("-verify", action="store_true") + parser.add_argument("args", nargs="*") + ns = parser.parse_args() + if ns.verify: + verify_format(*ns.args) + else: + generate_n5_format(ns.list) diff --git a/implementations/tensorstore/.skip b/implementations/tensorstore/.skip new file mode 100644 index 00000000..5b9a6bfa --- /dev/null +++ b/implementations/tensorstore/.skip @@ -0,0 +1 @@ +compile issues diff --git a/implementations/tensorstore/driver.sh b/implementations/tensorstore/driver.sh new file mode 100755 index 00000000..2abbfc57 --- /dev/null +++ b/implementations/tensorstore/driver.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +ENVNAME=ZI_tensorstore + +# Standard bootstrapping +IMPL=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$( dirname $IMPL) + +zi_write(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_tensorstore.py +} + +zi_list(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_tensorstore.py -list +} + +zi_read(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_tensorstore.py -verify "$@" +} + +. $ROOT/.conda_driver.sh +. $ROOT/.bash_driver.sh +argparse "$@" diff --git a/implementations/tensorstore/environment.yml b/implementations/tensorstore/environment.yml new file mode 100644 index 00000000..a0ebaddc --- /dev/null +++ b/implementations/tensorstore/environment.yml @@ -0,0 +1,10 @@ +channels: + - conda-forge +dependencies: + - tensorstore + # Test generation + - numcodecs + - python + - scikit-image + - zarr + - fsspec diff --git a/implementations/tensorstore/generate_tensorstore.py b/implementations/tensorstore/generate_tensorstore.py new file mode 100755 index 00000000..b2e00e11 --- /dev/null +++ b/implementations/tensorstore/generate_tensorstore.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +import tensorstore as ts +import numcodecs +from skimage.data import astronaut + +# choose chunks s.t. we do have overhanging edge-chunks +CHUNKS = (100, 100, 1) +STR_TO_COMPRESSOR = { + "gzip": numcodecs.GZip, + "blosc": numcodecs.Blosc, + "zlib": numcodecs.Zlib, +} +COMPRESSION_OPTIONS = {"blosc": {"cname": "lz4"}} + +im = astronaut() +SIZE = im.shape + + +def n5_metadata(compression: str): + return { + 'compression': { + 'type': compression, + }, + 'dataType': 'uint32', + 'dimensions': SIZE, + 'blockSize': CHUNKS, + } + +def zr_metadata(): + if True: + chunk_grid = {"name": "regular", "configuration": {"chunk_shape": chunks}} # read size + + sharding_codec = { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": chunks, # write size + "codecs": [{"name": "bytes", "configuration": {"endian": "little"}}, + {"name": "blosc", "configuration": {"cname": "zstd", "clevel": 5}}], + "index_codecs": [{"name": "bytes", "configuration": {"endian": "little"}}, + {"name": "crc32c"}], + "index_location": "end" + } + } + codecs = [sharding_codec] + else: + # Alternative without sharding... + chunk_grid = {"name": "regular", "configuration": {"chunk_shape": chunks}} + blosc_codec = {"name": "blosc", "configuration": { "cname": "lz4", "clevel": 5}} + codecs = [blosc_codec] + + base_config = { + "driver": "zarr3", + "kvstore": CONFIGS[1], + "metadata": { + "shape": shape, + "chunk_grid": chunk_grid, + "chunk_key_encoding": {"name": "default"}, # "configuration": {"separator": "/"}}, + "codecs": codecs, + "data_type": read.dtype, + "dimension_names": dimension_names, + } + } + + +def ts_write(driver: str, path: str, metadata: dict, data: List): + arr = ts.open({ + 'driver': driver, + 'kvstore': { + 'driver': 'file', + 'path': path, + }, + 'metadata': metadata, + 'create': True, + 'delete_existing': True, + }).result() + write_future = arr.write(data) + write_future.result() + + +# TODO use more compressors from numcodecs and more blosc filter_ids +def generate_zarr_format(list_only:bool, compressors=['gzip', 'blosc', 'zlib', None]): + for compressor in compressors: + copts = COMPRESSION_OPTIONS.get(compressor, {}) + if compressor is None: + name = "raw" + elif compressor == "blosc": + name = "%s/%s" % (compressor, copts.get("cname")) + else: + name = compressor + compressor_impl = STR_TO_COMPRESSOR[compressor](**copts) if compressor is not None else None + # V2. TODO: add method for v3 everywhere + if list_only: + print(f"data/tensorstore.zr\t{name}") + else: + ts_write('zarr', f'data/tensorstore.zr/{name}', zr_metadata(), im) + + +def generate_n5_format(list_only:bool, compressors=['gzip', None]): + im = astronaut() + for compressor in compressors: + name = compressor if compressor is not None else 'raw' + compressor_impl = STR_TO_COMPRESSOR[compressor]() if compressor is not None else None + if list_only: + print(f"data/tensorstore.n5\t{name}") + else: + ts_write('zarr', f'data/tensorstore.n5/{name}', n5_metadata(), im) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("-list", action="store_true") + parser.add_argument("-verify", action="store_true") + parser.add_argument("args", nargs="*") + ns = parser.parse_args() + if ns.verify: + verify_format(*ns.args) + else: + generate_zarr_format(ns.list) + generate_n5_format(ns.list) diff --git a/implementations/xtensor_zarr/.gitignore b/implementations/xtensor_zarr/.gitignore new file mode 100644 index 00000000..378eac25 --- /dev/null +++ b/implementations/xtensor_zarr/.gitignore @@ -0,0 +1 @@ +build diff --git a/implementations/xtensor_zarr/.skip b/implementations/xtensor_zarr/.skip new file mode 100644 index 00000000..3fa6019f --- /dev/null +++ b/implementations/xtensor_zarr/.skip @@ -0,0 +1 @@ +no amd64; core dump on GH diff --git a/generate_data/xtensor_zarr/CMakeLists.txt b/implementations/xtensor_zarr/CMakeLists.txt similarity index 98% rename from generate_data/xtensor_zarr/CMakeLists.txt rename to implementations/xtensor_zarr/CMakeLists.txt index 7ee11e5e..619eebaf 100644 --- a/generate_data/xtensor_zarr/CMakeLists.txt +++ b/implementations/xtensor_zarr/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.8) if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) - project(generate_data) + project(implementations) find_package(xtensor-zarr REQUIRED CONFIG) set(XTENSOR_ZARR_INCLUDE_DIR ${xtensor_zarr_INCLUDE_DIRS}) diff --git a/implementations/xtensor_zarr/driver.sh b/implementations/xtensor_zarr/driver.sh new file mode 100755 index 00000000..8cf4ea6b --- /dev/null +++ b/implementations/xtensor_zarr/driver.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +ENVNAME=ZI_xtensor_zarr + +# Standard bootstrapping +IMPL=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$( dirname $IMPL) + +zi_write(){ + create_or_activate + + cd "${IMPL}" + + set +u # Due to GDAL_DATA + + rm -rf build + mkdir build + cd build + export LDFLAGS="${LDFLAGS} -Wl,-rpath,$CONDA_PREFIX/lib -Wl,-rpath,$PWD" + export LINKFLAGS="${LDFLAGS}" + cmake .. -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_INSTALL_LIBDIR=lib + make run +} + +zi_list(){ + echo "skipping list" + exit 1 +} + +zi_read(){ + echo "skipping read" + exit 1 +} + +. $ROOT/.conda_driver.sh +. $ROOT/.bash_driver.sh +argparse "$@" diff --git a/implementations/xtensor_zarr/environment.yml b/implementations/xtensor_zarr/environment.yml new file mode 100644 index 00000000..a991ba1a --- /dev/null +++ b/implementations/xtensor_zarr/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - defaults +dependencies: + - python=3.8 + - cmake + - xtensor-zarr>=0.0.7 + - gdal>=3.0.4 + - tbb=2020.2 diff --git a/generate_data/xtensor_zarr/modules/FindBlosc.cmake b/implementations/xtensor_zarr/modules/FindBlosc.cmake similarity index 100% rename from generate_data/xtensor_zarr/modules/FindBlosc.cmake rename to implementations/xtensor_zarr/modules/FindBlosc.cmake diff --git a/generate_data/xtensor_zarr/modules/FindOIIO.cmake b/implementations/xtensor_zarr/modules/FindOIIO.cmake similarity index 100% rename from generate_data/xtensor_zarr/modules/FindOIIO.cmake rename to implementations/xtensor_zarr/modules/FindOIIO.cmake diff --git a/generate_data/xtensor_zarr/src/main.cpp b/implementations/xtensor_zarr/src/main.cpp similarity index 100% rename from generate_data/xtensor_zarr/src/main.cpp rename to implementations/xtensor_zarr/src/main.cpp diff --git a/implementations/z5py/.skip b/implementations/z5py/.skip new file mode 100644 index 00000000..6129b3da --- /dev/null +++ b/implementations/z5py/.skip @@ -0,0 +1 @@ +read_metadata failure diff --git a/implementations/z5py/driver.sh b/implementations/z5py/driver.sh new file mode 100755 index 00000000..ffc9ae6f --- /dev/null +++ b/implementations/z5py/driver.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +ENVNAME=ZI_z5py + +# Standard bootstrapping +IMPL=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$( dirname $IMPL) + +zi_write(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_z5py.py +} + +zi_list(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_z5py.py -list +} + +zi_read(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_z5py.py -verify "$@" +} + +. $ROOT/.conda_driver.sh +. $ROOT/.bash_driver.sh +argparse "$@" diff --git a/implementations/z5py/environment.yml b/implementations/z5py/environment.yml new file mode 100644 index 00000000..71cadd41 --- /dev/null +++ b/implementations/z5py/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - defaults +dependencies: + - z5py + - python + - scikit-image diff --git a/implementations/z5py/generate_z5py.py b/implementations/z5py/generate_z5py.py new file mode 100644 index 00000000..3a40d61b --- /dev/null +++ b/implementations/z5py/generate_z5py.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +import z5py +from skimage.data import astronaut + +# choose chunks s.t. we do have overhanging edge-chunks +CHUNKS = (100, 100, 1) + +# options for the different compressors +COMPRESSION_OPTIONS = {"blosc": {"codec": "lz4"}} + + +# TODO support more compressors: +# - more compressors in numcodecs +# - more blosc codecs +def generate_zarr_format(list_only:bool, compressors=['gzip', 'blosc', 'zlib', 'raw']): + path = '../../data/z5py.zr' + im = astronaut() + + f = z5py.File(path, mode='w') + for compressor in compressors: + copts = COMPRESSION_OPTIONS.get(compressor, {}) + name = ( + compressor + if compressor != "blosc" + else "%s/%s" % (compressor, copts.get("codec")) + ) + if list_only: + print(f"{path}\t{name}") + else: + f.create_dataset(name, data=im, compression=compressor, chunks=CHUNKS, **copts) + + +def generate_n5_format(list_only:bool, compressors=['gzip', 'raw']): + path = '../../data/z5py.n5' + im = astronaut() + + f = z5py.File(path, mode='w') + for compressor in compressors: + name = compressor + if list_only: + print(f"{path}\t{name}") + else: + f.create_dataset(name, data=im, chunks=CHUNKS, compression=compressor) + + +def verify_format(directory: str, dataset: str): + f = z5py.File(f"{directory}/{dataset}", mode="r") + return f[:] + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("-list", action="store_true") + parser.add_argument("-verify", action="store_true") + parser.add_argument("args", nargs="*") + ns = parser.parse_args() + if ns.verify: + verify_format(*ns.args) + else: + generate_zarr_format(ns.list) + generate_n5_format(ns.list) diff --git a/implementations/zarr-python/driver.sh b/implementations/zarr-python/driver.sh new file mode 100755 index 00000000..5c98dc29 --- /dev/null +++ b/implementations/zarr-python/driver.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +ENVNAME=ZI_zarr-python + +# Standard bootstrapping +IMPL=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$( dirname $IMPL) + +zi_write(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_zarr.py +} + +zi_list(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_zarr.py -list +} + +zi_read(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_zarr.py -verify "$@" +} + +. $ROOT/.conda_driver.sh +. $ROOT/.bash_driver.sh +argparse "$@" diff --git a/implementations/zarr-python/environment.yml b/implementations/zarr-python/environment.yml new file mode 100644 index 00000000..1fd13515 --- /dev/null +++ b/implementations/zarr-python/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - defaults +dependencies: + - python + - scikit-image + - zarr + - fsspec diff --git a/generate_data/generate_zarr.py b/implementations/zarr-python/generate_zarr.py similarity index 61% rename from generate_data/generate_zarr.py rename to implementations/zarr-python/generate_zarr.py index a0e0c4ca..b3b81ec4 100644 --- a/generate_data/generate_zarr.py +++ b/implementations/zarr-python/generate_zarr.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + import zarr import numcodecs from skimage.data import astronaut @@ -13,7 +15,7 @@ # TODO use more compressors from numcodecs and more blosc filter_ids -def generate_zarr_format(compressors=['gzip', 'blosc', 'zlib', None]): +def generate_zarr_format(list_only:bool, compressors=['gzip', 'blosc', 'zlib', None]): for nested, StoreClass, store_kwargs in [ (False, zarr.storage.DirectoryStore, {}), @@ -24,7 +26,7 @@ def generate_zarr_format(compressors=['gzip', 'blosc', 'zlib', None]): ]: nested_str = '_nested' if nested else '_flat' - path = f'data/zarr_{StoreClass.__name__}{nested_str}.zr' + path = f'../../data/zarr_{StoreClass.__name__}{nested_str}.zr' store = StoreClass(path, **store_kwargs) im = astronaut() @@ -38,21 +40,41 @@ def generate_zarr_format(compressors=['gzip', 'blosc', 'zlib', None]): else: name = compressor compressor_impl = STR_TO_COMPRESSOR[compressor](**copts) if compressor is not None else None - f.create_dataset(name, data=im, chunks=CHUNKS, - compressor=compressor_impl) + if list_only: + print(f"{path}\t{name}") + else: + f.create_dataset(name, data=im, chunks=CHUNKS, + compressor=compressor_impl) -def generate_n5_format(compressors=['gzip', None]): +def generate_n5_format(list_only:bool, compressors=['gzip', None]): im = astronaut() - f = zarr.open('data/zarr.n5', mode='w') + path = "../../data/zarr.n5" + f = zarr.open(path, mode='w') for compressor in compressors: name = compressor if compressor is not None else 'raw' compressor_impl = STR_TO_COMPRESSOR[compressor]() if compressor is not None else None - f.create_dataset(name, data=im, chunks=CHUNKS, - compressor=compressor_impl) + if list_only: + print(f"{path}\t{name}") + else: + f.create_dataset(name, data=im, chunks=CHUNKS, + compressor=compressor_impl) + + +def verify_format(path: str, group: str): + print(path, group) if __name__ == '__main__': - generate_zarr_format() - generate_n5_format() + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("-list", action="store_true") + parser.add_argument("-verify", action="store_true") + parser.add_argument("args", nargs="*") + ns = parser.parse_args() + if ns.verify: + verify_format(*ns.args) + else: + generate_zarr_format(ns.list) + generate_n5_format(ns.list) diff --git a/implementations/zarrita/driver.sh b/implementations/zarrita/driver.sh new file mode 100755 index 00000000..480b393d --- /dev/null +++ b/implementations/zarrita/driver.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +ENVNAME=ZI_zarrita + +# Standard bootstrapping +IMPL=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$( dirname $IMPL) + +zi_write(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_zarrita.py +} + +zi_list(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_zarrita.py -list +} + +zi_read(){ + cd "${IMPL}" + create_or_activate + python $IMPL/generate_zarrita.py -verify "$@" +} + +. $ROOT/.conda_driver.sh +. $ROOT/.bash_driver.sh +argparse "$@" diff --git a/implementations/zarrita/environment.yml b/implementations/zarrita/environment.yml new file mode 100644 index 00000000..66901648 --- /dev/null +++ b/implementations/zarrita/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - defaults +dependencies: + - python + - pip + - pip: + - scikit-image + - zarrita diff --git a/implementations/zarrita/generate_zarrita.py b/implementations/zarrita/generate_zarrita.py new file mode 100644 index 00000000..5ed76c9c --- /dev/null +++ b/implementations/zarrita/generate_zarrita.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python + +import zarrita +from skimage.data import astronaut + +# choose chunks s.t. we do have overhanging edge-chunks +CHUNK_SHAPE = (100, 100, 1) +SHARD_SHAPE = (1000, 1000, 3) +STR_TO_CODEC = { + "gzip": zarrita.codecs.gzip_codec(), + "blosc": zarrita.codecs.blosc_codec(cname="lz4", typesize=10), +} + + +def generate_zr3_format(list_only:bool, codecs=["gzip", "blosc", None], nested=True, sharded=True): + im = astronaut() + fname = "zarrita" + if nested: + chunk_separator = "/" + fname += "_nested" + else: + chunk_separator = "." + if sharded: + fname += "_sharded" + + path = f"../../data/{fname}" + store = zarrita.LocalStore(path) + g = zarrita.Group.create(store, exists_ok=True) + for codec in codecs: + if codec is None: + name = "raw" + elif codec == "blosc": + name = f"{codec}/{STR_TO_CODEC[codec].configuration.cname}" + else: + name = codec + + codecs_impl = [zarrita.codecs.bytes_codec()] + if codec is not None: + codecs_impl.append(STR_TO_CODEC[codec]) + + if sharded: + codecs_impl = [ + zarrita.codecs.sharding_codec( + chunk_shape=CHUNK_SHAPE, codecs=codecs_impl + ), + ] + + if list_only: + print(f"{path}\t{name}") + else: + a = g.create_array( + name, + shape=im.shape, + chunk_shape=SHARD_SHAPE if sharded else CHUNK_SHAPE, + chunk_key_encoding=("default", chunk_separator), + dtype=im.dtype, + codecs=codecs_impl, + exists_ok=True, + ) + a[:, :, :]= im + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("-list", action="store_true") + parser.add_argument("-verify", action="store_true") + parser.add_argument("args", nargs="*") + ns = parser.parse_args() + if ns.verify: + verify_format(*ns.args) + else: + for nested in [False, True]: + for sharded in [False, True]: + generate_zr3_format(ns.list, nested=nested, sharded=sharded) diff --git a/test/test_read_all.py b/test/test_read_all.py index c8e77049..8f740c93 100644 --- a/test/test_read_all.py +++ b/test/test_read_all.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python """ Usage ===== @@ -33,6 +34,7 @@ """ import os +import sys import subprocess from typing import Dict, List from pathlib import Path @@ -73,72 +75,71 @@ "zarr-v3": ["blosc", "gzip", "raw", "zlib"], "N5": [], }, + "Rarr": { + "zarr": ["blosc", "gzip", "raw", "zlib"], + "zarr-v3": [], + "N5": [], + }, } -def read_with_jzarr(fpath, ds_name, nested=None): - if ds_name == "blosc": - ds_name = "blosc/lz4" - +def make_read(name, fpath, ds_name, nested=None): cmd = ( - f"generate_data/jzarr/generate_data.sh " - f"-verify {str(fpath)} {ds_name}" + f"make implementations/{name}-read-fast " + f"NODEBUG=1 DIR={str(fpath)} DATASET={ds_name}" ) # will raise subprocess.CalledProcessError if return code is not 0 - subprocess.check_output(cmd, shell=True) - return None + result = subprocess.run(cmd, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if result.returncode != 0: + raise Exception( + { + "command": cmd, + "stdout": result.stdout, + "stderr": result.stderr, + } + ) + return result + +def read_with_jzarr(fpath, ds_name, nested=None): + if ds_name == "blosc": + ds_name = "blosc/lz4" + return make_read("jzarr", fpath, ds_name, nested) def read_with_zarr(fpath, ds_name, nested): - import zarr if ds_name == "blosc": ds_name = "blosc/lz4" - if str(fpath).endswith('.zr'): - if nested: - if 'FSStore' in str(fpath): - store = zarr.storage.FSStore( - os.fspath(fpath), dimension_separator='/', mode='r' - ) - else: - store = zarr.storage.NestedDirectoryStore(os.fspath(fpath)) - else: - if 'FSStore' in str(fpath): - store = zarr.storage.FSStore(os.fspath(fpath)) - else: - store = zarr.storage.DirectoryStore(fpath) - else: - store = os.fspath(fpath) - return zarr.open(store)[ds_name][:] + return make_read("zarr-python", fpath, ds_name, nested) def read_with_pyn5(fpath, ds_name, nested): - import pyn5 - return pyn5.File(fpath)[ds_name][:] + return make_read("pyn5", fpath, ds_name, nested) def read_with_z5py(fpath, ds_name, nested): - import z5py if ds_name == "blosc": ds_name = "blosc/lz4" - return z5py.File(fpath)[ds_name][:] + return make_read("z5py", fpath, ds_name, nested) def read_with_zarrita(fpath, ds_name, nested): - import zarrita if ds_name == "blosc": ds_name = "blosc/lz4" - h = zarrita.get_hierarchy(str(fpath.absolute())) - return h["/" + ds_name][:] + return make_read("zarrita", fpath, ds_name, nested) def read_with_xtensor_zarr(fpath, ds_name, nested): + if ds_name == "blosc": + ds_name = "blosc/lz4" + return make_read("xtensor_zarr", fpath, ds_name, nested) + +def read_with_Rarr(fpath, ds_name, nested): + if ds_name == "blosc": ds_name = "blosc/lz4" - fname = "a.npz" - if os.path.exists(fname): - os.remove(fname) - subprocess.check_call(["generate_data/xtensor_zarr/build/run_xtensor_zarr", fpath, ds_name]) - return np.load(fname)["a"] + return make_read("Rarr", fpath, ds_name, nested) EXTENSIONS = {"zarr": ".zr", "N5": ".n5", "zarr-v3": ".zr3"} @@ -220,8 +221,8 @@ def create_params(): if write_attrs: write_attrs = ' (' + write_attrs + ')' ids.append( - f"read {writing_library}{write_attrs} {fmt} using " - f"{reading_library}, {codec}" + f"W-{writing_library}{write_attrs} {fmt}_R-" + f"{reading_library}_{codec}" ) return argnames, params, ids @@ -237,6 +238,7 @@ def _get_read_fn(reading_library): "z5py": read_with_z5py, "zarrita": read_with_zarrita, "xtensor_zarr": read_with_xtensor_zarr, + "Rarr": read_with_Rarr, }[reading_library] return read_fn @@ -254,11 +256,13 @@ def test_correct_read(fmt, writing_library, reading_library, codec, nested, f"file not found: {fpath}. Make sure you have generated the data " "using 'make data'" ) - test = read_fn(fpath, codec, nested) - # Assume if None is returned, the read function has verified. - if test is not None: - assert test.shape == reference.shape - assert np.allclose(test, reference) + result = read_fn(fpath, codec, nested) + + if b"Skipping" in result.stderr: + pytest.skip(result.stderr.decode()) + else: + print(result.stdout) + print(result.stderr, file=sys.stderr) def tabulate_test_results(params, per_codec_tables=False):