Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Axes v0.4 #93

Merged
merged 16 commits into from
Jan 31, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/omero_zarr/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ._version import version as __version__

ngff_version = "0.3"
ngff_version = "0.4"
will-moore marked this conversation as resolved.
Show resolved Hide resolved

__all__ = [
"__version__",
Expand Down
15 changes: 8 additions & 7 deletions src/omero_zarr/masks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from skimage.draw import polygon as sk_polygon
from zarr.hierarchy import open_group

from .util import open_store, print_status
from .util import marshal_axes, open_store, print_status

# Mapping of dimension names to axes in the Zarr
DIMENSION_ORDER: Dict[str, int] = {
Expand Down Expand Up @@ -215,6 +215,7 @@ def set_image(
:param plate_path: The zarr path to the image
:return: None
"""
self.image = image
self.size_t = image.getSizeT()
self.size_c = image.getSizeC()
self.size_z = image.getSizeZ()
Expand Down Expand Up @@ -303,30 +304,30 @@ def save(self, masks: List[omero.model.Shape], name: str) -> None:
ignored_dimensions,
check_overlaps=True,
)
# For v0.3 ngff we want to reduce the number of dimensions to

metadata = marshal_axes(self.image, levels=1)

# For v0.3+ ngff we want to reduce the number of dimensions to
# match the dims of the Image.
dims_to_squeeze = []
axes = []
for dim, size in enumerate(self.image_shape):
if size == 1:
dims_to_squeeze.append(dim)
else:
axes.append("tczyx"[dim])
labels = np.squeeze(labels, axis=tuple(dims_to_squeeze))

scaler = Scaler(max_layer=input_pyramid_levels)
label_pyramid = scaler.nearest(labels)
pyramid_grp = out_labels.require_group(name)

write_multiscale(
label_pyramid, pyramid_grp, axes=axes
label_pyramid, pyramid_grp, **metadata
) # TODO: dtype, chunks, overwite

# Specify and store metadata
image_label_colors: List[JSONDict] = []
label_properties: List[JSONDict] = []
image_label = {
"version": "0.3",
"version": "0.4",
will-moore marked this conversation as resolved.
Show resolved Hide resolved
"colors": image_label_colors,
"properties": label_properties,
"source": {"image": source_image_link},
Expand Down
78 changes: 32 additions & 46 deletions src/omero_zarr/raw_pixels.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
import numpy as np
import omero.clients # noqa
import omero.gateway # required to allow 'from omero_zarr import raw_pixels'
from ome_zarr.writer import write_multiscales_metadata, write_plate_metadata
from omero.rtypes import unwrap
from skimage.transform import resize
from zarr.hierarchy import Array, Group, open_group

from . import __version__
from . import ngff_version as VERSION
from .util import open_store, print_status
from .util import marshal_axes, open_store, print_status


def image_to_zarr(image: omero.gateway.ImageWrapper, args: argparse.Namespace) -> None:
Expand All @@ -24,16 +25,15 @@ def image_to_zarr(image: omero.gateway.ImageWrapper, args: argparse.Namespace) -
print(f"Exporting to {name} ({VERSION})")
store = open_store(name)
root = open_group(store)
n_levels, axes = add_image(image, root, cache_dir=cache_dir)
add_multiscales_metadata(root, axes, n_levels)
add_image(image, root, cache_dir=cache_dir)
add_omero_metadata(root, image)
add_toplevel_metadata(root)
print("Finished.")


def add_image(
image: omero.gateway.ImageWrapper, parent: Group, cache_dir: Optional[str] = None
) -> Tuple[int, List[str]]:
) -> Tuple[int, List[Dict[str, Any]]]:
"""Adds an OMERO image pixel data as array to the given parent zarr group.
Optionally caches the pixel data in the given cache_dir directory.
Returns the number of resolution levels generated for the image.
Expand Down Expand Up @@ -79,7 +79,7 @@ def planeGen() -> np.ndarray:
longest = longest // 2
level_count += 1

return add_raw_image(
paths = add_raw_image(
planes=planes,
size_z=size_z,
size_c=size_c,
Expand All @@ -91,6 +91,12 @@ def planeGen() -> np.ndarray:
cache_file_name_func=get_cache_filename,
)

metadata = marshal_axes(image, len(paths))

write_multiscales_metadata(parent, paths, **metadata)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that I see this API (introduced in ome/ome-zarr-py#124) in action, I am slightly confused. I think this is primarily because transformations is generated from the knowledge of paths above and then later re-zipped with paths when writing the metadata.

An alternative workflow would be to support datasets either as lists of string or dictionaries in write_multiscales_metadata.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps slightly related to my thoughts in ome/ome-zarr-py#161 (review)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since datasets would include paths, we would just have write_multiscales_metadata(group, datasets) and remove support for paths? Or if datasets is a list of strings, treat it as paths (wouldn't break the API, but could be more confusing).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar questions and concerns around the usability came up in ome/ome-zarr-py#157 when adding support for plate.wells as list of dictionaries alongside list of strings (also corresponding to individual well paths).

At least, I find List[Union[str, dict]] to be a fairly good compromise that allows to handle both the simple use case where a minimal valid spec is generated from a list of paths and the extensible scenario where the caller wants to store additional metadata (and not necessarily specified by the OME-NGFF spec) to the datasets element.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also wonder about the API for reading the transformations in ome-zarr-py. These are currently a 2D list in the node.metadata (not in the form of datasets) since paths are not returned by the reader:

            paths = [d["path"] for d in datasets]
            self.datasets: List[str] = paths
            transformations = [d.get("transformations") for d in datasets]
            if any(trans is not None for trans in transformations):
                node.metadata["transformations"] = transformations

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


return (level_count, metadata["axes"])


def add_raw_image(
*,
Expand All @@ -103,7 +109,7 @@ def add_raw_image(
level_count: int,
cache_dir: Optional[str] = None,
cache_file_name_func: Callable[[int, int, int], str] = None,
) -> Tuple[int, List[str]]:
) -> List[str]:
"""Adds the raw image pixel data as array to the given parent zarr group.
Optionally caches the pixel data in the given cache_dir directory.
Returns the number of resolution levels generated for the image.
Expand All @@ -121,14 +127,8 @@ def add_raw_image(
cache_dir = ""

dims = [dim for dim in [size_t, size_c, size_z] if dim != 1]
axes = []
if size_t > 1:
axes.append("t")
if size_c > 1:
axes.append("c")
if size_z > 1:
axes.append("z")

paths: List[str] = []
field_groups: List[Array] = []
for t in range(size_t):
for c in range(size_c):
Expand All @@ -151,9 +151,11 @@ def add_raw_image(
size_x = plane.shape[1]
# If on first plane, create a new group for this resolution level
if len(field_groups) <= level:
path = str(level)
paths.append(path)
field_groups.append(
parent.create(
str(level),
path,
shape=tuple(dims + [size_y, size_x]),
chunks=tuple([1] * len(dims) + [size_y, size_x]),
dtype=d_type,
Expand All @@ -179,7 +181,8 @@ def add_raw_image(
preserve_range=True,
anti_aliasing=False,
).astype(plane.dtype)
return (level_count, axes + ["y", "x"])

return paths


def marshal_acquisition(acquisition: omero.gateway._PlateAcquisitionWrapper) -> Dict:
Expand Down Expand Up @@ -222,20 +225,14 @@ def plate_to_zarr(plate: omero.gateway._PlateWrapper, args: argparse.Namespace)

well_paths = set()

col_names = plate.getColumnLabels()
row_names = plate.getRowLabels()
col_names = [str(name) for name in plate.getColumnLabels()]
row_names = [str(name) for name in plate.getRowLabels()]

plate_metadata = {
"name": plate.name,
"rows": [{"name": str(name)} for name in row_names],
"columns": [{"name": str(name)} for name in col_names],
"version": VERSION,
}
# Add acquisitions key if at least one plate acquisition exists
acquisitions = list(plate.listPlateAcquisitions())
plate_acq = None
if acquisitions:
plate_metadata["acquisitions"] = [marshal_acquisition(x) for x in acquisitions]
root.attrs["plate"] = plate_metadata
plate_acq = [marshal_acquisition(x) for x in acquisitions]

for well in plate.listChildren():
row = plate.getRowLabels()[well.row]
Expand All @@ -256,39 +253,28 @@ def plate_to_zarr(plate: omero.gateway._PlateWrapper, args: argparse.Namespace)
row_group = root.require_group(row)
col_group = row_group.require_group(col)
field_group = col_group.require_group(field_name)
n_levels, axes = add_image(img, field_group, cache_dir=cache_dir)
add_multiscales_metadata(field_group, axes, n_levels)
add_image(img, field_group, cache_dir=cache_dir)
add_omero_metadata(field_group, img)
# Update Well metadata after each image
col_group.attrs["well"] = {"images": fields, "version": VERSION}
will-moore marked this conversation as resolved.
Show resolved Hide resolved
max_fields = max(max_fields, field + 1)
print_status(int(t0), int(time.time()), count, total)

# Update plate_metadata after each Well
plate_metadata["wells"] = [{"path": x} for x in well_paths]
plate_metadata["field_count"] = max_fields
root.attrs["plate"] = plate_metadata
write_plate_metadata(
sbesson marked this conversation as resolved.
Show resolved Hide resolved
root,
row_names,
col_names,
wells=list(well_paths),
field_count=max_fields,
acquisitions=plate_acq,
name=plate.name,
)

add_toplevel_metadata(root)
print("Finished.")


def add_multiscales_metadata(
zarr_root: Group,
axes: List[str],
resolutions: int = 1,
) -> None:

multiscales = [
{
"version": "0.3",
"datasets": [{"path": str(r)} for r in range(resolutions)],
"axes": axes,
}
]
zarr_root.attrs["multiscales"] = multiscales


def add_omero_metadata(zarr_root: Group, image: omero.gateway.ImageWrapper) -> None:

image_data = {
Expand Down
68 changes: 68 additions & 0 deletions src/omero_zarr/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import time
from typing import Dict, List

from omero.gateway import ImageWrapper
from zarr.storage import FSStore


Expand Down Expand Up @@ -33,3 +35,69 @@ def open_store(name: str) -> FSStore:
normalize_keys=False,
mode="w",
)


def marshal_axes(
image: ImageWrapper, levels: int = 1, multiscales_zoom: float = 2.0
) -> Dict[str, List]:
sbesson marked this conversation as resolved.
Show resolved Hide resolved
# Prepare axes and transformations info...
size_c = image.getSizeC()
size_z = image.getSizeZ()
size_t = image.getSizeT()
pixel_sizes = {}
pix_size_x = image.getPixelSizeX(units=True)
pix_size_y = image.getPixelSizeY(units=True)
pix_size_z = image.getPixelSizeZ(units=True)
# All OMERO units.lower() are valid UDUNITS-2 and therefore NGFF spec
if pix_size_x is not None:
pixel_sizes["x"] = {
"units": str(pix_size_x.getUnit()).lower(),
"value": pix_size_x.getValue(),
}
if pix_size_y is not None:
pixel_sizes["y"] = {
"units": str(pix_size_y.getUnit()).lower(),
"value": pix_size_y.getValue(),
}
if pix_size_z is not None:
pixel_sizes["z"] = {
"units": str(pix_size_z.getUnit()).lower(),
"value": pix_size_z.getValue(),
}

axes = []
if size_t > 1:
axes.append({"name": "t", "type": "time"})
if size_c > 1:
axes.append({"name": "c", "type": "channel"})
if size_z > 1:
axes.append({"name": "z", "type": "space"})
if pixel_sizes and "z" in pixel_sizes:
axes[-1]["units"] = pixel_sizes["z"]["units"]
# last 2 dimensions are always y and x
for dim in ("y", "x"):
axes.append({"name": dim, "type": "space"})
if pixel_sizes and dim in pixel_sizes:
axes[-1]["units"] = pixel_sizes[dim]["units"]

# Each path needs a transformations list...
transformations = []
zooms = {"x": 1.0, "y": 1.0, "z": 1.0}
for level in range(levels):
# {"type": "scale", "scale": [2.0, 2.0, 2.0], "axisIndices": [2, 3, 4]}
scales = []
axisIndices = []
for index, axis in enumerate(axes):
if axis["name"] in pixel_sizes:
scales.append(zooms[axis["name"]] * pixel_sizes[axis["name"]]["value"])
axisIndices.append(index)
# ...with a single 'scale' transformation each
if len(scales) > 0:
transformations.append(
[{"type": "scale", "scale": scales, "axisIndices": axisIndices}]
)
# NB we rescale X and Y for each level, but not Z
zooms["x"] = zooms["x"] * multiscales_zoom
zooms["y"] = zooms["y"] * multiscales_zoom

return {"axes": axes, "transformations": transformations}