From 022f3e866325f1c31c2cc865e7d35ae0a0bcb5b1 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 8 Mar 2023 11:02:09 +0100 Subject: [PATCH 01/98] Prototype new loader code Add error handling Fix reading of fields Add NXobject subclass handling Refactor for more automatic child handling Introduce multiple map layers, I think this does not work Refactor to new approach Customize field dims via strategy Refactor Rename Basic NXdata support Cleanup Absorb errors handling into Field Cleanup Allow for dtype override Pass (base) definitions from parents Translate sel for children Bin-edge handling Undo change to unrelated file --- src/scippnexus/nx2.py | 452 ++++++++++++++++++++++++++++++++ src/scippnexus/nxevent_data2.py | 170 ++++++++++++ tests/nx2_test.py | 220 ++++++++++++++++ 3 files changed, 842 insertions(+) create mode 100644 src/scippnexus/nx2.py create mode 100644 src/scippnexus/nxevent_data2.py create mode 100644 tests/nx2_test.py diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py new file mode 100644 index 00000000..ad3e6c52 --- /dev/null +++ b/src/scippnexus/nx2.py @@ -0,0 +1,452 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from __future__ import annotations + +import datetime +import re +import warnings +from collections.abc import Mapping +from dataclasses import dataclass +from functools import cached_property +from typing import Any, Dict, Iterable, Iterator, Optional, Tuple, Union + +import dateutil.parser +import numpy as np +import scipp as sc + +from ._common import convert_time_to_datetime64, to_child_select, to_plain_index +from ._hdf5_nexus import _warn_latin1_decode +from .typing import H5Dataset, H5Group, ScippIndex + +# What we know: +# 1. We must not do a recursive read, or we will get in trouble for files with many +# entries. User may just want to access subgroup recursively. +# 2. Some child access needs info from parent: +# - Field dims +# - NXevent_data +# - NXoff_geometry +# Maybe not... parent can modify dims/customize assembly +# 3. Unless we read shape, attrs, children only once, we will suffer too much overhead. +# This includes dims/sizes computation. +# 4. Must be able to load coord before loading rest, for label-based indexing + +# Desired behaviors: +# - Field should encapsulate "errors" handling +# - NXtransformations should load depends_on as chain (scalar variable with next) +# - NXobject.__setitem__ to set `axes` and `name_indices` attributes? + +# Consider: +# - Non-legacy mode would make dim parsing simpler and faster? + + +class NexusStructureError(Exception): + """Invalid or unsupported class and field structure in Nexus. + """ + pass + + +def is_dataset(obj: Union[H5Group, H5Dataset]) -> bool: + """Return true if the object is an h5py.Dataset or equivalent. + + Use this instead of isinstance(obj, h5py.Dataset) to ensure that code is compatible + with other h5py-alike interfaces. + """ + return hasattr(obj, 'shape') + + +def _is_time(obj): + if (unit := obj.unit) is None: + return False + return unit.to_dict()['powers'] == {'time': 1} + + +def _as_datetime(obj: Any): + if isinstance(obj, str): + try: + # NumPy and scipp cannot handle timezone information. We therefore apply it, + # i.e., convert to UTC. + # Would like to use dateutil directly, but with Python's datetime we do not + # get nanosecond precision. Therefore we combine numpy and dateutil parsing. + date_only = 'T' not in obj + if date_only: + return sc.datetime(obj) + date, time = obj.split('T') + time_and_timezone_offset = re.split(r'Z|\+|-', time) + time = time_and_timezone_offset[0] + if len(time_and_timezone_offset) == 1: + # No timezone, parse directly (scipp based on numpy) + return sc.datetime(f'{date}T{time}') + else: + # There is timezone info. Parse with dateutil. + dt = dateutil.parser.isoparse(obj) + dt = dt.replace(microsecond=0) # handled by numpy + dt = dt.astimezone(datetime.timezone.utc) + dt = dt.replace(tzinfo=None).isoformat() + # We operate with string operations here and thus end up parsing date + # and time twice. The reason is that the timezone-offset arithmetic + # cannot be done, e.g., in nanoseconds without causing rounding errors. + if '.' in time: + dt += f".{time.split('.')[1]}" + return sc.datetime(dt) + except ValueError: + pass + return None + + +_scipp_dtype = { + np.dtype('int8'): sc.DType.int32, + np.dtype('int16'): sc.DType.int32, + np.dtype('uint8'): sc.DType.int32, + np.dtype('uint16'): sc.DType.int32, + np.dtype('uint32'): sc.DType.int32, + np.dtype('uint64'): sc.DType.int64, + np.dtype('int32'): sc.DType.int32, + np.dtype('int64'): sc.DType.int64, + np.dtype('float32'): sc.DType.float32, + np.dtype('float64'): sc.DType.float64, + np.dtype('bool'): sc.DType.bool, +} + + +def _dtype_fromdataset(dataset: H5Dataset) -> sc.DType: + return _scipp_dtype.get(dataset.dtype, sc.DType.string) + + +@dataclass +class Field: + dataset: H5Dataset + dims: Optional[Tuple[str, ...]] = None + dtype: Optional[sc.DType] = None + errors: Optional[H5Dataset] = None + _is_time: Optional[bool] = None + """NeXus field. + + In HDF5 fields are represented as dataset. + """ + + @cached_property + def attrs(self) -> Dict[str, Any]: + return dict(self.dataset.attrs) if self.dataset.attrs else dict() + + #def __init__(self, + # dataset: H5Dataset, + # errors: Optional[H5Dataset] = None, + # *, + # ancestor, + # dims=None, + # dtype: Optional[sc.DType] = None, + # is_time=None): + # self._ancestor = ancestor # Usually the parent, but may be grandparent, etc. + # self.dataset = dataset + # self._errors = errors + # self._dtype = _dtype_fromdataset(dataset) if dtype is None else dtype + # self._shape = self.dataset.shape + # if self._dtype == sc.DType.vector3: + # self._shape = self._shape[:-1] + # self._is_time = is_time + # # NeXus treats [] and [1] interchangeably. In general this is ill-defined, but + # # the best we can do appears to be squeezing unless the file provides names for + # # dimensions. The shape property of this class does thus not necessarily return + # # the same as the shape of the underlying dataset. + # # TODO Should this logic be in FieldInfo? Or in NXdataInfo? + # if dims is not None: + # self._dims = tuple(dims) + # if len(self._dims) < len(self._shape): + # # The convention here is that the given dimensions apply to the shapes + # # starting from the left. So we only squeeze dimensions that are after + # # len(dims). + # self._shape = self._shape[:len(self._dims)] + tuple( + # size for size in self._shape[len(self._dims):] if size != 1) + # elif (axes := self.attrs.get('axes')) is not None: + # self._dims = tuple(axes.split(',')) + # else: + # self._shape = tuple(size for size in self._shape if size != 1) + # self._dims = tuple(f'dim_{i}' for i in range(self.ndim)) + + @property + def shape(self) -> Tuple[int, ...]: + return self.dataset.shape + + @property + def sizes(self) -> Dict[str, int]: + return {dim: size for dim, size in zip(self.dims, self.shape)} + + def _load_variances(self, var, index): + stddevs = sc.empty(dims=var.dims, + shape=var.shape, + dtype=var.dtype, + unit=var.unit) + try: + self.errors.read_direct(stddevs.values, source_sel=index) + except TypeError: + stddevs.values = self.errors[index].squeeze() + # According to the standard, errors must have the same shape as the data. + # This is not the case in all files we observed, is there any harm in + # attempting a broadcast? + var.variances = np.broadcast_to(sc.pow(stddevs, sc.scalar(2)).values, + shape=var.shape) + + def __getitem__(self, select) -> Union[Any, sc.Variable]: + """Load the field as a :py:class:`scipp.Variable` or Python object. + + If the shape is empty and no unit is given this returns a Python object, such + as a string or integer. Otherwise a :py:class:`scipp.Variable` is returned. + """ + index = to_plain_index(self.dims, select) + if isinstance(index, (int, slice)): + index = (index, ) + + base_dims = self.dims + base_shape = self.shape + dims = [] + shape = [] + for i, ind in enumerate(index): + if not isinstance(ind, int): + dims.append(base_dims[i]) + shape.append(len(range(*ind.indices(base_shape[i])))) + + variable = sc.empty(dims=dims, + shape=shape, + dtype=self.dtype, + unit=self.unit, + with_variances=self.errors is not None) + + # If the variable is empty, return early + if np.prod(shape) == 0: + return variable + + if self.dtype == sc.DType.string: + try: + strings = self.dataset.asstr()[index] + except UnicodeDecodeError as e: + strings = self.dataset.asstr(encoding='latin-1')[index] + _warn_latin1_decode(self.dataset, strings, str(e)) + variable.values = np.asarray(strings).flatten() + elif variable.values.flags["C_CONTIGUOUS"]: + # On versions of h5py prior to 3.2, a TypeError occurs in some cases + # where h5py cannot broadcast data with e.g. shape (20, 1) to a buffer + # of shape (20,). Note that broadcasting (1, 20) -> (20,) does work + # (see https://github.com/h5py/h5py/pull/1796). + # Therefore, we manually squeeze here. + # A pin of h5py<3.2 is currently required by Mantid and hence scippneutron + # (see https://github.com/h5py/h5py/issues/1880#issuecomment-823223154) + # hence this workaround. Once we can use a more recent h5py with Mantid, + # this try/except can be removed. + try: + self.dataset.read_direct(variable.values, source_sel=index) + except TypeError: + variable.values = self.dataset[index].squeeze() + if self.errors is not None: + self._load_variances(variable, index) + else: + variable.values = self.dataset[index] + if _is_time(variable): + starts = [] + for name in self.attrs: + if (dt := _as_datetime(self.attrs[name])) is not None: + starts.append(dt) + if self._is_time and len(starts) == 0: + starts.append(sc.epoch(unit=self.unit)) + if len(starts) == 1: + variable = convert_time_to_datetime64( + variable, + start=starts[0], + scaling_factor=self.attrs.get('scaling_factor')) + if variable.ndim == 0 and variable.unit is None: + # Work around scipp/scipp#2815, and avoid returning NumPy bool + if isinstance(variable.values, np.ndarray) and variable.dtype != 'bool': + return variable.values[()] + else: + return variable.value + return variable + + def __repr__(self) -> str: + return f'' + + @property + def name(self) -> str: + return self.dataset.name + + @property + def ndim(self) -> int: + """Total number of dimensions in the dataset. + + See the shape property for potential differences to the value returned by the + underlying h5py.Dataset.ndim. + """ + return len(self.shape) + + @cached_property + def unit(self) -> Union[sc.Unit, None]: + if (unit := self.attrs.get('units')) is not None: + try: + return sc.Unit(unit) + except sc.UnitError: + warnings.warn(f"Unrecognized unit '{unit}' for value dataset " + f"in '{self.name}'; setting unit as 'dimensionless'") + return sc.units.one + return None + + +class NXobject: + + def __init__(self, group: Group): + self._group = group + + @property + def sizes(self) -> Dict[str, int]: + # exclude geometry/tansform groups? + return sc.DataGroup(self._group).sizes + + def field_dims(self, name: str, dataset: H5Dataset) -> Tuple[str, ...]: + return tuple(f'dim_{i}' for i in range(len(dataset.shape))) + + def field_dtype(self, name: str, dataset: H5Dataset) -> sc.dtype: + return _dtype_fromdataset(dataset) + + def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: + # Note that this will be similar in NXdata, but there we need to handle + # bin edges as well. + child_sel = to_child_select(self.sizes.keys(), child.dims, sel) + return child[child_sel] + + def read_children(self, obj: Group, sel: ScippIndex) -> sc.DataGroup: + return sc.DataGroup( + {name: self.index_child(child, sel) + for name, child in obj.items()}) + + def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: + return dg + + +# Group adds children/dims caching, removes __setitem__? +# class Group(WriteableGroup): +class Group(Mapping): + + def __init__(self, + group: H5Group, + definitions: Optional[Dict[str, NXobject]] = None): + self._group = group + self._definitions = {} if definitions is None else definitions + + @cached_property + def attrs(self) -> Dict[str, Any]: + # Attrs are not read until needed, to avoid reading all attrs for all subgroups. + # We may expected a per-subgroup overhead of 1 ms for reading attributes, so if + # all we want is access one attribute, we may save, e.g., a second for a group + # with 1000 subgroups. + return dict(self._group.attrs) if self._group.attrs else dict() + + @cached_property + def _children(self) -> Dict[str, Union[Field, Group]]: + # split off special children here? + # - depends_on + # - NXoff_geometry and NXcylindrical_geometry + # - legacy NXgeometry + # - NXtransformations + items = { + name: + Field(obj) if is_dataset(obj) else Group(obj, definitions=self._definitions) + for name, obj in self._group.items() + } + suffix = '_errors' + field_with_errors = [name for name in items if f'{name}{suffix}' in items] + for name in field_with_errors: + values = items[name] + errors = items[f'{name}{suffix}'] + if values.unit == errors.unit and values.shape == errors.shape: + values.errors = errors.dataset + del items[f'{name}{suffix}'] + return items + + @cached_property + def _nexus(self) -> NXobject: + return self._definitions.get(self.attrs.get('NX_class'), NXobject)(self) + + def _populate_field(self, name: str, field: Field) -> None: + if field.dims is not None: + return + field.dims = self._nexus.field_dims(name, field.dataset) + field.dtype = self._nexus.field_dtype(name, field.dataset) + + def __len__(self) -> int: + return len(self._children) + + def __iter__(self) -> Iterator[str]: + return self._children.__iter__() + + def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: + if isinstance(sel, str): + child = self._children[sel] + if isinstance(child, Field): + self._populate_field(sel, child) + return child + # Here this is scipp.DataGroup. Child classes like NXdata may return DataArray. + # (not scipp.DataArray, as that does not support lazy data) + dg = self._nexus.read_children(self, sel) + # TODO assemble geometry/transforms/events + try: + return self._nexus.assemble(dg) + except NexusStructureError as e: + return dg + + @cached_property + def sizes(self) -> Dict[str, int]: + return self._nexus.sizes + + @property + def dims(self) -> Tuple[str, ...]: + return tuple(self.sizes) + + +class NXdata(NXobject): + + def __init__(self, group: Group): + super().__init__(group) + self._signal = group.attrs['signal'] + self._dims = tuple(group.attrs['axes']) + indices_suffix = '_indices' + indices_attrs = { + key[:-len(indices_suffix)]: attr + for key, attr in group.attrs.items() if key.endswith(indices_suffix) + } + + dims = np.array(self._dims) + self._coord_dims = { + key: tuple(dims[np.array(indices).flatten()]) + for key, indices in indices_attrs.items() + } + + @property + def sizes(self) -> Dict[str, int]: + # TODO We should only do this if we know that assembly into DataArray is possible. + return dict(zip(self._dims, self._group[self._signal].shape)) + + def _bin_edge_dim(self, coord: Field) -> Union[None, str]: + sizes = self.sizes + for dim, size in zip(coord.dims, coord.shape): + if (sz := sizes.get(dim)) is not None and sz + 1 == size: + return dim + return None + + def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: + child_sel = to_child_select(self._group.dims, + child.dims, + sel, + bin_edge_dim=self._bin_edge_dim(child)) + return child[child_sel] + + def field_dims(self, name: str, dataset: H5Dataset) -> Tuple[str, ...]: + if name == self._signal: + return self._dims + return self._coord_dims[name] + + def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: + coords = sc.DataGroup(dg) + signal = coords.pop(self._signal) + return sc.DataArray(data=signal, coords=coords) + + +base_definitions = {} +base_definitions['NXdata'] = NXdata \ No newline at end of file diff --git a/src/scippnexus/nxevent_data2.py b/src/scippnexus/nxevent_data2.py new file mode 100644 index 00000000..e7494d98 --- /dev/null +++ b/src/scippnexus/nxevent_data2.py @@ -0,0 +1,170 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from typing import Dict, List, Tuple, Union + +import numpy as np +import scipp as sc + +from ._common import to_plain_index +from .nx2 import ( + Group, + H5Dataset, + NexusStructureError, + NXobject, + ScippIndex, + base_definitions, +) + +_event_dimension = "event" +_pulse_dimension = "event_time_zero" + + +def _check_for_missing_fields(fields): + for field in ("event_time_zero", "event_index", "event_time_offset"): + if field not in fields: + raise NexusStructureError( + f"Required field {field} not found in NXevent_data") + + +class _NXevent_data(Group): + _field_names = [ + 'event_time_zero', 'event_index', 'event_time_offset', 'event_id', + 'cue_timestamp_zero', 'cue_index', 'pulse_height' + ] + + #@staticmethod + #def _make_class_info(info: GroupContentInfo) -> NXobjectInfo: + # """Create info object for this NeXus class.""" + # children = {} + # for name in NXevent_data._field_names: + # if (di := info.datasets.pop(name, None)) is not None: + # children[name] = FieldInfo(values=di.value, + # dims=NXevent_data._get_field_dims(name)) + # return NXobjectInfo(children=children) + + @property + def unit(self) -> None: + # Binned data, bins do not have a unit + return None + + +class NXevent_data(NXobject): + + @property + def shape(self) -> Tuple[int]: + if (event_index := self._group.get('event_index')) is not None: + return event_index.shape + return () + + @property + def dims(self) -> List[str]: + return (_pulse_dimension, )[:len(self.shape)] + + @property + def sizes(self) -> Dict[str, int]: + return dict(zip(self.dims, self.shape)) + + def field_dims(self, name: str, dataset: H5Dataset) -> Tuple[str, ...]: + if name in ['event_time_zero', 'event_index']: + return (_pulse_dimension, ) + if name in ['event_time_offset', 'event_id']: + return (_event_dimension, ) + return None + + def read_children(self, obj: Group, select: ScippIndex) -> sc.DataGroup: + children = obj + index = to_plain_index([_pulse_dimension], select) + + if not children: # TODO Check that select is trivial? + return sc.DataGroup() + + max_index = self.shape[0] + event_time_zero = children['event_time_zero'][index] + if index is Ellipsis or index == tuple(): + last_loaded = False + else: + if isinstance(index, int): + start, stop, _ = slice(index, None).indices(max_index) + if start == stop: + raise IndexError('Index {start} is out of range') + index = slice(start, start + 1) + start, stop, stride = index.indices(max_index) + if stop + stride > max_index: + last_loaded = False + else: + stop += stride + last_loaded = True + index = slice(start, stop, stride) + + event_index = children['event_index'][index].values + + num_event = children["event_time_offset"].shape[0] + # Some files contain uint64 "max" indices, which turn into negatives during + # conversion to int64. This is a hack to get around this. + event_index[event_index < 0] = num_event + + if len(event_index) > 0: + event_select = slice(event_index[0], + event_index[-1] if last_loaded else num_event) + else: + event_select = slice(None) + + if (event_id := children.get('event_id')) is not None: + event_id = event_id[event_select] + if event_id.dtype not in [sc.DType.int32, sc.DType.int64]: + raise NexusStructureError( + "NXevent_data contains event_id field with non-integer values") + + event_time_offset = children['event_time_offset'][event_select] + + event_index = sc.array(dims=[_pulse_dimension], + values=event_index, + dtype=sc.DType.int64, + unit=None) + + event_index -= event_index.min() + + dg = sc.DataGroup(event_time_zero=event_time_zero, + event_index=event_index, + event_time_offset=event_time_offset) + if event_id is not None: + dg['event_id'] = event_id + return dg + + def assemble(self, children: sc.DataGroup) -> sc.DataGroup: + _check_for_missing_fields(children) + event_time_offset = children['event_time_offset'] + event_time_zero = children['event_time_zero'] + event_index = children['event_index'] + + # Weights are not stored in NeXus, so use 1s + weights = sc.ones(dims=[_event_dimension], + shape=event_time_offset.shape, + unit='counts', + dtype=np.float32) + + events = sc.DataArray(data=weights, + coords={'event_time_offset': event_time_offset}) + if (event_id := children.get('event_id')) is not None: + events.coords['event_id'] = event_id + + # There is some variation in the last recorded event_index in files from + # different institutions. We try to make sure here that it is what would be the + # first index of the next pulse. In other words, ensure that event_index + # includes the bin edge for the last pulse. + if event_time_zero.ndim == 0: + begins = event_index[_pulse_dimension, 0] + else: + begins = event_index + + try: + binned = sc.bins(data=events, dim=_event_dimension, begin=begins) + except IndexError as e: + raise NexusStructureError( + f"Invalid index in NXevent_data at {self.name}/event_index:\n{e}.") + + return sc.DataArray(data=binned, coords={'event_time_zero': event_time_zero}) + + +base_definitions['NXevent_data'] = NXevent_data \ No newline at end of file diff --git a/tests/nx2_test.py b/tests/nx2_test.py new file mode 100644 index 00000000..203d3271 --- /dev/null +++ b/tests/nx2_test.py @@ -0,0 +1,220 @@ +import h5py +import numpy as np +import pytest +import scipp as sc + +import scippnexus.nx2 as snx +from scippnexus.nxevent_data2 import NXevent_data + + +@pytest.fixture() +def h5root(request): + """Yield NXroot containing a single NXentry named 'entry'""" + with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: + yield f + + +def test_does_not_see_changes(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + signal = data['signal'] = np.arange(4) + coord = data['time'] = np.arange(4) + obj = snx.Group(entry) + dg = obj[()] + print(list(dg.items())) + assert obj.sizes == {'dim_0': 4} + assert 'data' in dg + entry.create_group('data2') + assert 'data2' not in dg # inserted after NXobject creation + + +def test_read_recursive(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data['signal'] = np.arange(4) + data['signal'].attrs['units'] = 'm' + data['time'] = np.arange(5) + data['time'].attrs['units'] = 's' + obj = snx.Group(entry) + dg = obj[()] + print(list(dg.items())) + assert obj.sizes == {'dim_0': None} + assert 'data' in dg + + +def test_errors_read_as_variances(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data['signal'] = np.arange(4.0) + data['signal'].attrs['units'] = 'm' + data['signal_errors'] = np.arange(4.0) + data['signal_errors'].attrs['units'] = 'm' + data['time'] = np.arange(5.0) + data['time'].attrs['units'] = 's' + data['time_errors'] = np.arange(5.0) + data['time_errors'].attrs['units'] = 's' + obj = snx.Group(data) + assert set(obj._children.keys()) == {'signal', 'time'} + dg = obj[()] + assert dg['signal'].variances is not None + assert dg['time'].variances is not None + assert np.array_equal(dg['signal'].variances, np.arange(4.0)**2) + assert np.array_equal(dg['time'].variances, np.arange(5.0)**2) + + +def test_read_field(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data['signal'] = np.arange(4) + data['signal'].attrs['units'] = 'm' + obj = snx.Group(data) + var = obj['signal'][()] + assert sc.identical(var, sc.array(dims=['dim_0'], values=np.arange(4), unit='m')) + + +def test_nx_class_attribute_sets_NXobject_subclass(h5root): + entry = h5root.create_group('entry') + events = entry.create_group('events') + events.attrs['NX_class'] = 'NXevent_data' + root = snx.Group(entry) + assert isinstance(root['events'], NXevent_data) + + +def test_read_empty_nxevent_data(h5root): + entry = h5root.create_group('entry') + events = entry.create_group('events') + events.attrs['NX_class'] = 'NXevent_data' + root = snx.Group(entry) + event_data = root['events'] + dg = event_data[()] + assert sc.identical(dg, sc.DataGroup()) + + +def make_event_data(h5root): + entry = h5root.create_group('entry') + events = entry.create_group('events') + events.attrs['NX_class'] = 'NXevent_data' + rng = np.random.default_rng(0) + events['event_id'] = rng.integers(0, 2, size=4) + events['event_time_offset'] = np.arange(4) + events['event_time_offset'].attrs['units'] = 'ns' + events['event_time_zero'] = np.array([100, 200]) + events['event_time_zero'].attrs['units'] = 'ms' + events['event_index'] = np.array([0, 3]) + return entry + + +def test_nxevent_data_keys(h5root): + entry = make_event_data(h5root) + root = snx.Group(entry) + event_data = root['events'] + assert set(event_data.keys()) == { + 'event_id', 'event_time_offset', 'event_time_zero', 'event_index' + } + + +def test_nxevent_data_children_read_as_variables_with_correct_dims(h5root): + entry = make_event_data(h5root) + root = snx.Group(entry, definitions=snx.base_definitions) + event_data = root['events'] + assert sc.identical(event_data['event_id'][()], + sc.array(dims=['event'], values=[1, 1, 1, 0], unit=None)) + assert sc.identical(event_data['event_time_offset'][()], + sc.array(dims=['event'], values=[0, 1, 2, 3], unit='ns')) + assert sc.identical( + event_data['event_time_zero'][()], + sc.array(dims=['event_time_zero'], values=[100, 200], unit='ms')) + assert sc.identical(event_data['event_index'][()], + sc.array(dims=['event_time_zero'], values=[0, 3], unit=None)) + + +def test_nxevent_data_dims_and_sizes_ignore_pulse_contents(h5root): + entry = make_event_data(h5root) + root = snx.Group(entry, definitions=snx.base_definitions) + event_data = root['events'] + assert event_data.dims == ('event_time_zero', ) + assert event_data.sizes == {'event_time_zero': 2} + + +def test_read_nxevent_data(h5root): + entry = make_event_data(h5root) + root = snx.Group(entry, definitions=snx.base_definitions) + event_data = root['events'] + da = event_data[()] + assert sc.identical(da.data.bins.size(), + sc.array(dims=['event_time_zero'], values=[3, 1], unit=None)) + + +def test_nxdata_with_signal_axes_indices_reads_as_data_array(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data.attrs['NX_class'] = 'NXdata' + data.attrs['signal'] = 'signal' + data.attrs['axes'] = ['time', 'temperature'] + data.attrs['time_indices'] = [0] + data.attrs['temperature_indices'] = [1] + ref = sc.DataArray( + data=sc.ones(dims=['time', 'temperature'], shape=[3, 4], unit='m')) + ref.coords['time'] = sc.array(dims=['time'], values=np.arange(3), unit='s') + ref.coords['temperature'] = sc.array(dims=['temperature'], + values=np.arange(4), + unit='K') + data['signal'] = ref.values + data['signal'].attrs['units'] = str(ref.unit) + data['time'] = ref.coords['time'].values + data['time'].attrs['units'] = str(ref.coords['time'].unit) + data['temperature'] = ref.coords['temperature'].values + data['temperature'].attrs['units'] = str(ref.coords['temperature'].unit) + obj = snx.Group(data, definitions=snx.base_definitions) + da = obj[()] + assert sc.identical(da, ref) + + +def test_nxdata_positional_indexing_returns_correct_slice(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data.attrs['NX_class'] = 'NXdata' + data.attrs['signal'] = 'signal' + data.attrs['axes'] = ['time', 'temperature'] + data.attrs['time_indices'] = [0] + data.attrs['temperature_indices'] = [1] + ref = sc.DataArray( + data=sc.ones(dims=['time', 'temperature'], shape=[3, 4], unit='m')) + ref.coords['time'] = sc.array(dims=['time'], values=np.arange(3), unit='s') + ref.coords['temperature'] = sc.array(dims=['temperature'], + values=np.arange(4), + unit='K') + data['signal'] = ref.values + data['signal'].attrs['units'] = str(ref.unit) + data['time'] = ref.coords['time'].values + data['time'].attrs['units'] = str(ref.coords['time'].unit) + data['temperature'] = ref.coords['temperature'].values + data['temperature'].attrs['units'] = str(ref.coords['temperature'].unit) + obj = snx.Group(data, definitions=snx.base_definitions) + da = obj['time', 0:2] + assert sc.identical(da, ref['time', 0:2]) + + +def test_nxdata_with_bin_edges_positional_indexing_returns_correct_slice(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data.attrs['NX_class'] = 'NXdata' + data.attrs['signal'] = 'signal' + data.attrs['axes'] = ['time', 'temperature'] + data.attrs['time_indices'] = [0] + data.attrs['temperature_indices'] = [1] + ref = sc.DataArray( + data=sc.ones(dims=['time', 'temperature'], shape=[3, 4], unit='m')) + ref.coords['time'] = sc.array(dims=['time'], values=np.arange(3), unit='s') + ref.coords['temperature'] = sc.array(dims=['temperature'], + values=np.arange(5), + unit='K') + data['signal'] = ref.values + data['signal'].attrs['units'] = str(ref.unit) + data['time'] = ref.coords['time'].values + data['time'].attrs['units'] = str(ref.coords['time'].unit) + data['temperature'] = ref.coords['temperature'].values + data['temperature'].attrs['units'] = str(ref.coords['temperature'].unit) + obj = snx.Group(data, definitions=snx.base_definitions) + da = obj['temperature', 0:2] + assert sc.identical(da, ref['temperature', 0:2]) From c6f81dd90cfd118a5d06a8c58d23243ba7d02db8 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 15 Mar 2023 15:03:01 +0100 Subject: [PATCH 02/98] Begin refactoring tests --- src/scippnexus/nx2.py | 42 +++++++++++++++++++++++++++++++++++++++++- tests/nxdata_test.py | 28 +++++++++++++++++++--------- 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index ad3e6c52..c9a56421 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -417,6 +417,13 @@ def __init__(self, group: Group): key: tuple(dims[np.array(indices).flatten()]) for key, indices in indices_attrs.items() } + for name, dataset in group._group.items(): + if name not in self._coord_dims: + # TODO handle squeezing + if dataset.shape == (): + self._coord_dims[name] = () + elif name in dims: + self._coord_dims[name] = (name, ) @property def sizes(self) -> Dict[str, int]: @@ -449,4 +456,37 @@ def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: base_definitions = {} -base_definitions['NXdata'] = NXdata \ No newline at end of file +base_definitions['NXdata'] = NXdata + + +def create_field(group: H5Group, name: str, data: DimensionedArray, **kwargs): + if not isinstance(data, sc.Variable): + return group.create_dataset(name, data=data, **kwargs) + values = data.values + if data.dtype == sc.DType.string: + values = np.array(data.values, dtype=object) + elif data.dtype == sc.DType.datetime64: + start = sc.epoch(unit=data.unit) + values = (data - start).values + dataset = group.create_dataset(name, data=values, **kwargs) + if data.unit is not None: + dataset.attrs['units'] = str(data.unit) + if data.dtype == sc.DType.datetime64: + dataset.attrs['start'] = str(start.value) + + +def create_group(group: H5Group, name: str, nx_class: Union[str, type]) -> H5Group: + """Create empty HDF5 group with given name and set the NX_class attribute. + + Parameters + ---------- + name: + Group name. + nx_class: + Nexus class, can be a valid string for the NX_class attribute, or a + subclass of NXobject, such as NXdata or NXlog. + """ + group = group.create_group(name) + attr = nx_class if isinstance(nx_class, str) else nx_class.__name__ + group.attrs['NX_class'] = attr + return group diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 924476dc..4fd6c349 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -3,9 +3,17 @@ import pytest import scipp as sc +import scippnexus.nx2 as snx from scippnexus import Field, NXdata, NXentry, NXlog, NXroot +@pytest.fixture() +def h5root(request): + """Yield NXroot containing a single NXentry named 'entry'""" + with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: + yield f + + @pytest.fixture() def nxroot(request): """Yield NXroot containing a single NXentry named 'entry'""" @@ -15,25 +23,27 @@ def nxroot(request): yield root -def test_without_coords(nxroot): +def test_without_coords(h5root): signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1, 2.2], [3.3, 4.4]]) - data = nxroot.create_class('data1', NXdata) - data.create_field('signal', signal) + data = snx.create_group(h5root, 'data1', snx.NXdata) + snx.create_field(data, 'signal', signal) data.attrs['axes'] = signal.dims data.attrs['signal'] = 'signal' - assert sc.identical(data[...], sc.DataArray(signal)) + obj = snx.Group(data, definitions=snx.base_definitions) + assert sc.identical(obj[...], sc.DataArray(signal)) -def test_with_coords_matching_axis_names(nxroot): +def test_with_coords_matching_axis_names(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx'] = da.data['yy', 0] - data = nxroot.create_class('data1', NXdata) + data = snx.create_group(h5root, 'data1', snx.NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_field('signal', da.data) - data.create_field('xx', da.coords['xx']) - assert sc.identical(data[...], da) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'xx', da.coords['xx']) + group = snx.Group(data, definitions=snx.base_definitions) + assert sc.identical(group[...], da) def test_guessed_dim_for_coord_not_matching_axis_name(nxroot): From ebf3584bbb2921d172d0096f0aedaee63784395d Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 15 Mar 2023 15:21:37 +0100 Subject: [PATCH 03/98] Refactor more tests and restore old behavior --- src/scippnexus/nx2.py | 11 +++++--- tests/nxdata_test.py | 61 +++++++++++++++++++++++-------------------- 2 files changed, 41 insertions(+), 31 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index c9a56421..43996207 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -58,7 +58,7 @@ def is_dataset(obj: Union[H5Group, H5Dataset]) -> bool: def _is_time(obj): if (unit := obj.unit) is None: return False - return unit.to_dict()['powers'] == {'time': 1} + return unit.to_dict().get('powers') == {'time': 1} def _as_datetime(obj: Any): @@ -424,11 +424,16 @@ def __init__(self, group: Group): self._coord_dims[name] = () elif name in dims: self._coord_dims[name] = (name, ) + elif dataset.shape == group._group[self._signal].shape: + self._coord_dims[name] = self._dims + elif len(dataset.shape) == 1: + self._coord_dims[name] = (dims[list(self.sizes.values()).index( + dataset.shape[0])], ) @property def sizes(self) -> Dict[str, int]: # TODO We should only do this if we know that assembly into DataArray is possible. - return dict(zip(self._dims, self._group[self._signal].shape)) + return dict(zip(self._dims, self._group._group[self._signal].shape)) def _bin_edge_dim(self, coord: Field) -> Union[None, str]: sizes = self.sizes @@ -475,7 +480,7 @@ def create_field(group: H5Group, name: str, data: DimensionedArray, **kwargs): dataset.attrs['start'] = str(start.value) -def create_group(group: H5Group, name: str, nx_class: Union[str, type]) -> H5Group: +def create_class(group: H5Group, name: str, nx_class: Union[str, type]) -> H5Group: """Create empty HDF5 group with given name and set the NX_class attribute. Parameters diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 4fd6c349..88432e55 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -25,7 +25,7 @@ def nxroot(request): def test_without_coords(h5root): signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1, 2.2], [3.3, 4.4]]) - data = snx.create_group(h5root, 'data1', snx.NXdata) + data = snx.create_class(h5root, 'data1', snx.NXdata) snx.create_field(data, 'signal', signal) data.attrs['axes'] = signal.dims data.attrs['signal'] = 'signal' @@ -37,7 +37,7 @@ def test_with_coords_matching_axis_names(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx'] = da.data['yy', 0] - data = snx.create_group(h5root, 'data1', snx.NXdata) + data = snx.create_class(h5root, 'data1', snx.NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' snx.create_field(data, 'signal', da.data) @@ -46,75 +46,80 @@ def test_with_coords_matching_axis_names(h5root): assert sc.identical(group[...], da) -def test_guessed_dim_for_coord_not_matching_axis_name(nxroot): +def test_guessed_dim_for_coord_not_matching_axis_name(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx2'] = da.data['yy', 1] - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_field('signal', da.data) - data.create_field('xx2', da.coords['xx2']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'xx2', da.coords['xx2']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], da) -def test_multiple_coords(nxroot): +def test_multiple_coords(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx'] = da.data['yy', 0] da.coords['xx2'] = da.data['yy', 1] da.coords['yy'] = da.data['xx', 0] - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_field('signal', da.data) - data.create_field('xx', da.coords['xx']) - data.create_field('xx2', da.coords['xx2']) - data.create_field('yy', da.coords['yy']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'xx', da.coords['xx']) + snx.create_field(data, 'xx2', da.coords['xx2']) + snx.create_field(data, 'yy', da.coords['yy']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], da) -def test_slice_of_1d(nxroot): +def test_slice_of_1d(h5root): da = sc.DataArray(sc.array(dims=['xx'], unit='m', values=[1, 2, 3])) da.coords['xx'] = da.data da.coords['xx2'] = da.data da.coords['scalar'] = sc.scalar(1.2) - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_field('signal', da.data) - data.create_field('xx', da.coords['xx']) - data.create_field('xx2', da.coords['xx2']) - data.create_field('scalar', da.coords['scalar']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'xx', da.coords['xx']) + snx.create_field(data, 'xx2', da.coords['xx2']) + snx.create_field(data, 'scalar', da.coords['scalar']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data['xx', :2], da['xx', :2]) assert sc.identical(data[:2], da['xx', :2]) -def test_slice_of_multiple_coords(nxroot): +def test_slice_of_multiple_coords(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx'] = da.data['yy', 0] da.coords['xx2'] = da.data['yy', 1] da.coords['yy'] = da.data['xx', 0] - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_field('signal', da.data) - data.create_field('xx', da.coords['xx']) - data.create_field('xx2', da.coords['xx2']) - data.create_field('yy', da.coords['yy']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'xx', da.coords['xx']) + snx.create_field(data, 'xx2', da.coords['xx2']) + snx.create_field(data, 'yy', da.coords['yy']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data['xx', :2], da['xx', :2]) -def test_guessed_dim_for_2d_coord_not_matching_axis_name(nxroot): +def test_guessed_dim_for_2d_coord_not_matching_axis_name(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx2'] = da.data - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_field('signal', da.data) - data.create_field('xx2', da.coords['xx2']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'xx2', da.coords['xx2']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], da) From ccbd6ed3522386051507d6365977f88309b03df1 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 15 Mar 2023 15:30:54 +0100 Subject: [PATCH 04/98] Use old field dims guessing code --- src/scippnexus/nx2.py | 47 ++++++++++++++++++++++++++++++++++--------- tests/nxdata_test.py | 9 +++++---- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index 43996207..d6d1b01f 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -388,7 +388,7 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: # TODO assemble geometry/transforms/events try: return self._nexus.assemble(dg) - except NexusStructureError as e: + except (sc.DimensionError, NexusStructureError) as e: return dg @cached_property @@ -422,18 +422,43 @@ def __init__(self, group: Group): # TODO handle squeezing if dataset.shape == (): self._coord_dims[name] = () - elif name in dims: - self._coord_dims[name] = (name, ) - elif dataset.shape == group._group[self._signal].shape: - self._coord_dims[name] = self._dims - elif len(dataset.shape) == 1: - self._coord_dims[name] = (dims[list(self.sizes.values()).index( - dataset.shape[0])], ) + elif (dims := self._guess_dims(name, dataset)) is not None: + self._coord_dims[name] = dims + #elif name in dims: + # self._coord_dims[name] = (name, ) + #elif dataset.shape == group._group[self._signal].shape: + # self._coord_dims[name] = self._dims + #elif len(dataset.shape) == 1: + # self._coord_dims[name] = (dims[list(self.sizes.values()).index( + # dataset.shape[0])], ) + + def _guess_dims(self, name: str, dataset: H5Dataset) -> Tuple[str, ...]: + """Guess dims of non-signal dataset based on shape. + + Does not check for potential bin-edge coord. + """ + shape = dataset.shape + if self.shape == shape: + return self._dims + lut = {} + if self._signal is not None: + for d, s in self.sizes.items(): + if self.shape.count(s) == 1: + lut[s] = d + try: + dims = tuple(lut[s] for s in shape) + except KeyError: + return None + return dims + + @property + def shape(self) -> Tuple[int, ...]: + return self._group._group[self._signal].shape @property def sizes(self) -> Dict[str, int]: # TODO We should only do this if we know that assembly into DataArray is possible. - return dict(zip(self._dims, self._group._group[self._signal].shape)) + return dict(zip(self._dims, self.shape)) def _bin_edge_dim(self, coord: Field) -> Union[None, str]: sizes = self.sizes @@ -452,7 +477,9 @@ def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex def field_dims(self, name: str, dataset: H5Dataset) -> Tuple[str, ...]: if name == self._signal: return self._dims - return self._coord_dims[name] + if (dims := self._coord_dims.get(name)) is not None: + return dims + return super().field_dims(name, dataset) def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: coords = sc.DataGroup(dg) diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 88432e55..8a7237a9 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -123,15 +123,16 @@ def test_guessed_dim_for_2d_coord_not_matching_axis_name(h5root): assert sc.identical(data[...], da) -def test_skips_axis_if_dim_guessing_finds_ambiguous_shape(nxroot): +def test_skips_axis_if_dim_guessing_finds_ambiguous_shape(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6], [7, 8, 9]])) da.coords['yy2'] = da.data['xx', 0] - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_field('signal', da.data) - data.create_field('yy2', da.coords['yy2']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'yy2', da.coords['yy2']) + data = snx.Group(data, definitions=snx.base_definitions) dg = data[...] assert isinstance(dg, sc.DataGroup) assert 'yy2' in dg From 8dcc9997099062cc5eb4732ff90062782e5b3ce5 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 16 Mar 2023 09:19:33 +0100 Subject: [PATCH 05/98] Pass more tests --- src/scippnexus/nx2.py | 49 ++++++++++++----- tests/nxdata_test.py | 120 +++++++++++++++++++++++------------------- 2 files changed, 103 insertions(+), 66 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index d6d1b01f..9f5cc44f 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -58,7 +58,7 @@ def is_dataset(obj: Union[H5Group, H5Dataset]) -> bool: def _is_time(obj): if (unit := obj.unit) is None: return False - return unit.to_dict().get('powers') == {'time': 1} + return unit.to_dict().get('powers') == {'s': 1} def _as_datetime(obj: Any): @@ -350,14 +350,14 @@ def _children(self) -> Dict[str, Union[Field, Group]]: Field(obj) if is_dataset(obj) else Group(obj, definitions=self._definitions) for name, obj in self._group.items() } - suffix = '_errors' - field_with_errors = [name for name in items if f'{name}{suffix}' in items] - for name in field_with_errors: - values = items[name] - errors = items[f'{name}{suffix}'] - if values.unit == errors.unit and values.shape == errors.shape: - values.errors = errors.dataset - del items[f'{name}{suffix}'] + for suffix in ('_errors', '_error'): + field_with_errors = [name for name in items if f'{name}{suffix}' in items] + for name in field_with_errors: + values = items[name] + errors = items[f'{name}{suffix}'] + if values.unit == errors.unit and values.dataset.shape == errors.dataset.shape: + values.errors = errors.dataset + del items[f'{name}{suffix}'] return items @cached_property @@ -404,8 +404,18 @@ class NXdata(NXobject): def __init__(self, group: Group): super().__init__(group) + # Must do full consistency check here, to define self.sizes: + # - squeeze correctly + # - check if coord dims are compatible with signal dims + # - check if there is a signal + # If not the case, fall back do DataGroup.sizes + # Can we just set field dims here? self._signal = group.attrs['signal'] - self._dims = tuple(group.attrs['axes']) + if (axes := group.attrs.get('axes')) is not None: + self._dims = tuple(axes) + else: + self._dims = tuple(super().field_sizes(self._signal, + group._children[self._signal])) indices_suffix = '_indices' indices_attrs = { key[:-len(indices_suffix)]: attr @@ -417,13 +427,26 @@ def __init__(self, group: Group): key: tuple(dims[np.array(indices).flatten()]) for key, indices in indices_attrs.items() } + self._valid = True for name, dataset in group._group.items(): if name not in self._coord_dims: # TODO handle squeezing if dataset.shape == (): self._coord_dims[name] = () - elif (dims := self._guess_dims(name, dataset)) is not None: - self._coord_dims[name] = dims + elif name in self._dims: + # If there are named axes then items of same name are "dimension + # coordinates", i.e., have a dim matching their name. + # However, if the item is not 1-D we need more labels. Try to use labels of + # signal if dimensionality matches. + if dataset.ndim == len(dims): + self._coord_dims[name] = self._dims + else: + self._coord_dims[name] = (name, ) + elif (field_dims := self._guess_dims(name, dataset)) is not None: + self._coord_dims[name] = field_dims + else: + self._valid = False + #elif name in dims: # self._coord_dims[name] = (name, ) #elif dataset.shape == group._group[self._signal].shape: @@ -458,7 +481,7 @@ def shape(self) -> Tuple[int, ...]: @property def sizes(self) -> Dict[str, int]: # TODO We should only do this if we know that assembly into DataArray is possible. - return dict(zip(self._dims, self.shape)) + return dict(zip(self._dims, self.shape)) if self._valid else super().sizes def _bin_edge_dim(self, coord: Field) -> Union[None, str]: sizes = self.sizes diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 8a7237a9..0f945d9a 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -133,59 +133,64 @@ def test_skips_axis_if_dim_guessing_finds_ambiguous_shape(h5root): snx.create_field(data, 'signal', da.data) snx.create_field(data, 'yy2', da.coords['yy2']) data = snx.Group(data, definitions=snx.base_definitions) + assert set(data.dims) == {'dim_0', 'xx', 'yy'} dg = data[...] assert isinstance(dg, sc.DataGroup) assert 'yy2' in dg assert set(dg.dims) == {'dim_0', 'xx', 'yy'} -def test_guesses_transposed_dims_for_2d_coord(nxroot): +def test_guesses_transposed_dims_for_2d_coord(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx2'] = sc.transpose(da.data) - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_field('signal', da.data) - data.create_field('xx2', da.coords['xx2']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'xx2', da.coords['xx2']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], da) @pytest.mark.parametrize("indices", [1, [1]], ids=['int', 'list-of-int']) -def test_indices_attribute_for_coord(nxroot, indices): +def test_indices_attribute_for_coord(h5root, indices): da = sc.DataArray(sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2], [4, 5]])) da.coords['yy2'] = da.data['xx', 0] - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' data.attrs['yy2_indices'] = indices - data.create_field('signal', da.data) - data.create_field('yy2', da.coords['yy2']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'yy2', da.coords['yy2']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], da) @pytest.mark.parametrize("indices", [1, [1]], ids=['int', 'list-of-int']) -def test_indices_attribute_for_coord_with_nontrivial_slice(nxroot, indices): +def test_indices_attribute_for_coord_with_nontrivial_slice(h5root, indices): da = sc.DataArray(sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2], [4, 5]])) da.coords['yy2'] = da.data['xx', 0] - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' data.attrs['yy2_indices'] = indices - data.create_field('signal', da.data) - data.create_field('yy2', da.coords['yy2']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'yy2', da.coords['yy2']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data['yy', :1], da['yy', :1]) -def test_transpose_indices_attribute_for_coord(nxroot): +def test_transpose_indices_attribute_for_coord(h5root): da = sc.DataArray(sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2], [4, 5]])) da.coords['xx2'] = sc.transpose(da.data) - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' data.attrs['xx2_indices'] = [1, 0] - data.create_field('signal', da.data) - data.create_field('xx2', da.coords['xx2']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'xx2', da.coords['xx2']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], da) @@ -205,19 +210,20 @@ def test_auxiliary_signal_is_not_loaded_as_coord(nxroot): assert sc.identical(data[...], da) -def test_field_dims_match_NXdata_dims(nxroot): +def test_field_dims_match_NXdata_dims(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx'] = da.data['yy', 0] da.coords['xx2'] = da.data['yy', 1] da.coords['yy'] = da.data['xx', 0] - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal1' - data.create_field('signal1', da.data) - data.create_field('xx', da.coords['xx']) - data.create_field('xx2', da.coords['xx2']) - data.create_field('yy', da.coords['yy']) + snx.create_field(data, 'signal1', da.data) + snx.create_field(data, 'xx', da.coords['xx']) + snx.create_field(data, 'xx2', da.coords['xx2']) + snx.create_field(data, 'yy', da.coords['yy']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data['xx', :2].data, data['signal1']['xx', :2]) assert sc.identical(data['xx', :2].coords['xx'], data['xx']['xx', :2]) assert sc.identical(data['xx', :2].coords['xx2'], data['xx2']['xx', :2]) @@ -244,34 +250,37 @@ def test_field_dims_match_NXdata_dims_when_selected_via_class_name(nxroot): assert fields['yy'].dims == ('yy', ) -def test_uses_default_field_dims_if_inference_fails(nxroot): +def test_uses_default_field_dims_if_inference_fails(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['yy2'] = sc.arange('yy', 4) - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_field('signal', da.data) - data.create_field('yy2', da.coords['yy2']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'yy2', da.coords['yy2']) + data = snx.Group(data, definitions=snx.base_definitions) dg = data[()] assert sc.identical(dg['yy2'], da.coords['yy2'].rename(yy='dim_0')) assert sc.identical(data['yy2'][()], da.coords['yy2'].rename(yy='dim_0')) @pytest.mark.parametrize("unit", ['m', 's', None]) -def test_create_field_from_variable(nxroot, unit): +def test_create_field_from_variable(h5root, unit): var = sc.array(dims=['xx'], unit=unit, values=[3, 4]) - nxroot.create_field('field', var) - loaded = nxroot['field'][...] + snx.create_field(h5root, 'field', var) + group = snx.Group(h5root, definitions=snx.base_definitions) + loaded = group['field'][...] # Nexus does not support storing dim labels assert sc.identical(loaded, var.rename(xx=loaded.dim)) -def test_create_datetime_field_from_variable(nxroot): +def test_create_datetime_field_from_variable(h5root): var = sc.datetime(np.datetime64('now'), unit='ns') + sc.arange( 'time', 1, 4, dtype='int64', unit='ns') - nxroot.create_field('field', var) - loaded = nxroot['field'][...] + snx.create_field(h5root, 'field', var) + group = snx.Group(h5root, definitions=snx.base_definitions) + loaded = group['field'][...] # Nexus does not support storing dim labels assert sc.identical(loaded, var.rename(time=loaded.dim)) @@ -284,20 +293,21 @@ def test_create_class(nxroot, nx_class): @pytest.mark.parametrize("errors_suffix", ['_error', '_errors']) def test_field_matching_errors_regex_is_loaded_if_no_corresponding_value_field( - nxroot, errors_suffix): + h5root, errors_suffix): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords[f'xx{errors_suffix}'] = da.data['yy', 0] - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_field('signal', da.data) - data.create_field(f'xx{errors_suffix}', da.coords[f'xx{errors_suffix}']) + snx.create_field(data,'signal', da.data) + snx.create_field(data,f'xx{errors_suffix}', da.coords[f'xx{errors_suffix}']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], da) @pytest.mark.parametrize("errors_suffix", ['_error', '_errors']) -def test_uncertainties_of_coords_are_loaded(nxroot, errors_suffix): +def test_uncertainties_of_coords_are_loaded(h5root, errors_suffix): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx'] = sc.array(dims=['xx'], @@ -311,28 +321,31 @@ def test_uncertainties_of_coords_are_loaded(nxroot, errors_suffix): variances=[4, 9], dtype='float64') da.coords['scalar'] = sc.scalar(value=1.2, variance=4.0, unit='K') - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' data.attrs['xx2_indices'] = 0 - data.create_field('signal', da.data) - data.create_field('xx', sc.values(da.coords['xx'])) - data.create_field(f'xx{errors_suffix}', sc.stddevs(da.coords['xx'])) - data.create_field('xx2', sc.values(da.coords['xx2'])) - data.create_field(f'xx2{errors_suffix}', sc.stddevs(da.coords['xx2'])) - data.create_field('scalar', sc.values(da.coords['scalar'])) - data.create_field(f'scalar{errors_suffix}', sc.stddevs(da.coords['scalar'])) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'xx', sc.values(da.coords['xx'])) + snx.create_field(data, f'xx{errors_suffix}', sc.stddevs(da.coords['xx'])) + snx.create_field(data, 'xx2', sc.values(da.coords['xx2'])) + snx.create_field(data, f'xx2{errors_suffix}', sc.stddevs(da.coords['xx2'])) + snx.create_field(data, 'scalar', sc.values(da.coords['scalar'])) + snx.create_field(data, f'scalar{errors_suffix}', sc.stddevs(da.coords['scalar'])) + data = snx.Group(data, definitions=snx.base_definitions) + print(data[...], da) assert sc.identical(data[...], da) -def test_unnamed_extra_dims_of_coords_are_squeezed(nxroot): +def test_unnamed_extra_dims_of_coords_are_squeezed(h5root): signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1, 2.2], [3.3, 4.4]]) - data = nxroot.create_class('data1', NXdata) - data.create_field('signal', signal) + data = snx.create_class(h5root, 'data1', NXdata) + snx.create_field(data, 'signal', signal) data.attrs['axes'] = signal.dims data.attrs['signal'] = 'signal' # shape=[1] - data.create_field('scalar', sc.array(dims=['ignored'], values=[1.2])) + snx.create_field(data, 'scalar', sc.array(dims=['ignored'], values=[1.2])) + data = snx.Group(data, definitions=snx.base_definitions) loaded = data[...] assert sc.identical(loaded.coords['scalar'], sc.scalar(1.2)) assert data['scalar'].ndim == 0 @@ -340,15 +353,16 @@ def test_unnamed_extra_dims_of_coords_are_squeezed(nxroot): assert sc.identical(data['scalar'][...], sc.scalar(1.2)) -def test_unnamed_extra_dims_of_multidim_coords_are_squeezed(nxroot): +def test_unnamed_extra_dims_of_multidim_coords_are_squeezed(h5root): signal = sc.array(dims=['xx'], unit='m', values=[1.1, 2.2]) - data = nxroot.create_class('data1', NXdata) - data.create_field('signal', signal) + data = snx.create_class(h5root, 'data1', NXdata) + snx.create_field(data, 'signal', signal) data.attrs['axes'] = signal.dims data.attrs['signal'] = 'signal' # shape=[2,1] xx = sc.array(dims=['xx', 'ignored'], values=[[1.1], [2.2]]) - data.create_field('xx', xx) + snx.create_field(data, 'xx', xx) + data = snx.Group(data, definitions=snx.base_definitions) loaded = data[...] assert sc.identical(loaded.coords['xx'], xx['ignored', 0]) assert data['xx'].ndim == 1 From d9e8739a570eca2e46ad22d09b406609470b9309 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 16 Mar 2023 15:17:55 +0100 Subject: [PATCH 06/98] Refactor to init fields on NXobject creation --- src/scippnexus/nx2.py | 219 +++++++++++++++++++++++++------- src/scippnexus/nxevent_data2.py | 3 +- tests/nxdata_test.py | 25 ++-- 3 files changed, 190 insertions(+), 57 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index 9f5cc44f..72a994f0 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -116,7 +116,7 @@ def _dtype_fromdataset(dataset: H5Dataset) -> sc.DType: @dataclass class Field: dataset: H5Dataset - dims: Optional[Tuple[str, ...]] = None + sizes: Optional[Dict[str, int]] = None dtype: Optional[sc.DType] = None errors: Optional[H5Dataset] = None _is_time: Optional[bool] = None @@ -165,12 +165,12 @@ def attrs(self) -> Dict[str, Any]: # self._dims = tuple(f'dim_{i}' for i in range(self.ndim)) @property - def shape(self) -> Tuple[int, ...]: - return self.dataset.shape + def dims(self) -> Tuple[str]: + return tuple(self.sizes.keys()) @property - def sizes(self) -> Dict[str, int]: - return {dim: size for dim, size in zip(self.dims, self.shape)} + def shape(self) -> Tuple[int, ...]: + return tuple(self.sizes.values()) def _load_variances(self, var, index): stddevs = sc.empty(dims=var.dims, @@ -289,21 +289,31 @@ def unit(self) -> Union[sc.Unit, None]: return None +def _squeezed_field_sizes(dataset: H5Dataset) -> Dict[str, int]: + shape = tuple(size for size in dataset.shape if size != 1) + return {f'dim_{i}': size for i, size in enumerate(shape)} + + class NXobject: def __init__(self, group: Group): self._group = group + for field in group._children.values(): + if isinstance(field, Field): + field.sizes = _squeezed_field_sizes(field.dataset) + field.dtype = _dtype_fromdataset(field.dataset) @property def sizes(self) -> Dict[str, int]: # exclude geometry/tansform groups? return sc.DataGroup(self._group).sizes - def field_dims(self, name: str, dataset: H5Dataset) -> Tuple[str, ...]: - return tuple(f'dim_{i}' for i in range(len(dataset.shape))) + #def field_sizes(self, name: str, field: Field) -> Dict[str, int]: + # shape = tuple(size for size in field.dataset.shape if size != 1) + # return {f'dim_{i}': size for i, size in enumerate(shape)} - def field_dtype(self, name: str, dataset: H5Dataset) -> sc.dtype: - return _dtype_fromdataset(dataset) + #def field_dtype(self, name: str, dataset: H5Dataset) -> sc.dtype: + # return _dtype_fromdataset(dataset) def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: # Note that this will be similar in NXdata, but there we need to handle @@ -338,6 +348,10 @@ def attrs(self) -> Dict[str, Any]: # with 1000 subgroups. return dict(self._group.attrs) if self._group.attrs else dict() + # TODO + # should this by Dict[str, Union[H5Group, H5Dataset]]? + # then we can recreate Group on every access (in principle more repeated init, + # but maybe better since it "clears" the cache)? @cached_property def _children(self) -> Dict[str, Union[Field, Group]]: # split off special children here? @@ -364,10 +378,13 @@ def _children(self) -> Dict[str, Union[Field, Group]]: def _nexus(self) -> NXobject: return self._definitions.get(self.attrs.get('NX_class'), NXobject)(self) + def _populate_fields(self) -> None: + _ = self._nexus + def _populate_field(self, name: str, field: Field) -> None: - if field.dims is not None: + if field.sizes is not None: return - field.dims = self._nexus.field_dims(name, field.dataset) + field.sizes = self._nexus.field_sizes(name, field) field.dtype = self._nexus.field_dtype(name, field.dataset) def __len__(self) -> int: @@ -380,7 +397,8 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: if isinstance(sel, str): child = self._children[sel] if isinstance(child, Field): - self._populate_field(sel, child) + self._populate_fields() + #self._populate_field(sel, child) return child # Here this is scipp.DataGroup. Child classes like NXdata may return DataArray. # (not scipp.DataArray, as that does not support lazy data) @@ -400,22 +418,141 @@ def dims(self) -> Tuple[str, ...]: return tuple(self.sizes) +def _guess_dims(dims, shape, dataset: H5Dataset): + """Guess dims of non-signal dataset based on shape. + + Does not check for potential bin-edge coord. + """ + if shape == dataset.shape: + return dims + lut = {} + for d, s in zip(dims, shape): + if shape.count(s) == 1: + lut[s] = d + try: + return [lut[s] for s in dataset.shape] + except KeyError: + return None + + class NXdata(NXobject): def __init__(self, group: Group): super().__init__(group) + self._valid = True # Must do full consistency check here, to define self.sizes: # - squeeze correctly # - check if coord dims are compatible with signal dims # - check if there is a signal # If not the case, fall back do DataGroup.sizes # Can we just set field dims here? - self._signal = group.attrs['signal'] - if (axes := group.attrs.get('axes')) is not None: - self._dims = tuple(axes) + self._signal_name = None + self._signal = None + if (name := group.attrs.get('signal')) is not None: + self._signal_name = name + self._signal = group._children[name] + else: + # Legacy NXdata defines signal not as group attribute, but attr on dataset + for name, field in group._children.items(): + # What is the meaning of the attribute value? It is undocumented, we simply + # ignore it. + if 'signal' in field.attrs: + self._signal_name = name + self._signal = group._children[name] + break + + axes = group.attrs.get('axes') + signal_axes = None if self._signal is None else self._signal.attrs.get('axes') + + axis_index = {} + for name, field in group._children.items(): + if (axis := field.attrs.get('axis')) is not None: + axis_index[name] = axis + + # Apparently it is not possible to define dim labels unless there are + # corresponding coords. Special case of '.' entries means "no coord". + def _get_group_dims(): + if axes is not None: + return [f'dim_{i}' if a == '.' else a for i, a in enumerate(axes)] + if signal_axes is not None: + return tuple(signal_axes.split(',')) + if axis_index: + return [ + k for k, _ in sorted(axis_index.items(), key=lambda item: item[1]) + ] + return None + + group_dims = _get_group_dims() + + if self._signal is not None: + if group_dims is not None: + shape = self._signal.dataset.shape + shape = _squeeze_trailing(group_dims, shape) + self._signal.sizes = dict(zip(group_dims, shape)) + + # if group_dims is None: + # group_dims = fallback_dims + + if axes is not None: + # Unlike self.dims we *drop* entries that are '.' + named_axes = [a for a in axes if a != '.'] + elif signal_axes is not None: + named_axes = signal_axes.split(',') + # elif fallback_dims is not None: + # named_axes = fallback_dims else: - self._dims = tuple(super().field_sizes(self._signal, - group._children[self._signal])) + named_axes = [] + + # 3. Find field dims + indices_suffix = '_indices' + indices_attrs = { + key[:-len(indices_suffix)]: attr + for key, attr in group.attrs.items() if key.endswith(indices_suffix) + } + + dims = np.array(group_dims) + dims_from_indices = { + key: tuple(dims[np.array(indices).flatten()]) + for key, indices in indices_attrs.items() + } + + def get_dims(name, field): + # Newly written files should always contain indices attributes, but the + # standard recommends that readers should also make "best effort" guess + # since legacy files do not set this attribute. + # TODO signal and errors? + # TODO aux + if name in (self._signal_name, ): + return group_dims + # if name in [self._signal_name, self._errors_name]: + # return self._get_group_dims() # if None, field determines dims itself + # if name in list(self.attrs.get('auxiliary_signals', [])): + # return self._try_guess_dims(name) + if (dims := dims_from_indices.get(name)) is not None: + return dims + if (axis := axis_index.get(name)) is not None: + return (group_dims[axis - 1], ) + if name in named_axes: + # If there are named axes then items of same name are "dimension + # coordinates", i.e., have a dim matching their name. + # However, if the item is not 1-D we need more labels. Try to use labels of + # signal if dimensionality matches. + if self._signal is not None and len(field.dataset.shape) == len( + self._signal.dataset.shape): + return group_dims + return [name] + if self._signal is not None and group_dims is not None: + return _guess_dims(group_dims, self._signal.dataset.shape, + field.dataset) + self._valid = False + + for name, field in group._children.items(): + if (dims := get_dims(name, field)) is not None: + field.sizes = dict(zip(dims, field.dataset.shape)) + + return + ################ + indices_suffix = '_indices' indices_attrs = { key[:-len(indices_suffix)]: attr @@ -455,33 +592,16 @@ def __init__(self, group: Group): # self._coord_dims[name] = (dims[list(self.sizes.values()).index( # dataset.shape[0])], ) - def _guess_dims(self, name: str, dataset: H5Dataset) -> Tuple[str, ...]: - """Guess dims of non-signal dataset based on shape. - - Does not check for potential bin-edge coord. - """ - shape = dataset.shape - if self.shape == shape: - return self._dims - lut = {} - if self._signal is not None: - for d, s in self.sizes.items(): - if self.shape.count(s) == 1: - lut[s] = d - try: - dims = tuple(lut[s] for s in shape) - except KeyError: - return None - return dims - @property def shape(self) -> Tuple[int, ...]: - return self._group._group[self._signal].shape + return self._signal.shape @property def sizes(self) -> Dict[str, int]: + return self._signal.sizes # TODO We should only do this if we know that assembly into DataArray is possible. - return dict(zip(self._dims, self.shape)) if self._valid else super().sizes + return dict(zip(self._field_dims[self._signal_name], + self.shape)) if self._valid else super().sizes def _bin_edge_dim(self, coord: Field) -> Union[None, str]: sizes = self.sizes @@ -497,19 +617,28 @@ def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex bin_edge_dim=self._bin_edge_dim(child)) return child[child_sel] - def field_dims(self, name: str, dataset: H5Dataset) -> Tuple[str, ...]: - if name == self._signal: - return self._dims - if (dims := self._coord_dims.get(name)) is not None: - return dims - return super().field_dims(name, dataset) + def field_sizes(self, name: str, field: Field) -> Dict[str, int]: + dims = self._field_dims[name] + if dims is None: + dims = super().field_sizes(name, field) + shape = field.dataset.shape + if len(dims) < len(shape): + # The convention here is that the given dimensions apply to the shapes + # starting from the left. So we only squeeze dimensions that are after + # len(dims). + shape = _squeeze_trailing(dims, shape) + return dict(zip(dims, shape)) def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: coords = sc.DataGroup(dg) - signal = coords.pop(self._signal) + signal = coords.pop(self._signal_name) return sc.DataArray(data=signal, coords=coords) +def _squeeze_trailing(dims: Tuple[str, ...], shape: Tuple[int, ...]) -> Tuple[int, ...]: + return shape[:len(dims)] + tuple(size for size in shape[len(dims):] if size != 1) + + base_definitions = {} base_definitions['NXdata'] = NXdata diff --git a/src/scippnexus/nxevent_data2.py b/src/scippnexus/nxevent_data2.py index e7494d98..70941775 100644 --- a/src/scippnexus/nxevent_data2.py +++ b/src/scippnexus/nxevent_data2.py @@ -8,6 +8,7 @@ from ._common import to_plain_index from .nx2 import ( + Field, Group, H5Dataset, NexusStructureError, @@ -65,7 +66,7 @@ def dims(self) -> List[str]: def sizes(self) -> Dict[str, int]: return dict(zip(self.dims, self.shape)) - def field_dims(self, name: str, dataset: H5Dataset) -> Tuple[str, ...]: + def field_dims(self, name: str, field: Field) -> Tuple[str, ...]: if name in ['event_time_zero', 'event_index']: return (_pulse_dimension, ) if name in ['event_time_offset', 'event_id']: diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 0f945d9a..04912abb 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -300,8 +300,8 @@ def test_field_matching_errors_regex_is_loaded_if_no_corresponding_value_field( data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - snx.create_field(data,'signal', da.data) - snx.create_field(data,f'xx{errors_suffix}', da.coords[f'xx{errors_suffix}']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, f'xx{errors_suffix}', da.coords[f'xx{errors_suffix}']) data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], da) @@ -370,34 +370,37 @@ def test_unnamed_extra_dims_of_multidim_coords_are_squeezed(h5root): assert sc.identical(data['xx'][...], xx['ignored', 0]) -def test_dims_of_length_1_are_kept_when_axes_specified(nxroot): +def test_dims_of_length_1_are_kept_when_axes_specified(h5root): signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1]]) - data = nxroot.create_class('data1', NXdata) - data.create_field('signal', signal) + data = snx.create_class(h5root, 'data1', NXdata) + snx.create_field(data, 'signal', signal) data.attrs['axes'] = ['xx', 'yy'] data.attrs['signal'] = 'signal' + data = snx.Group(data, definitions=snx.base_definitions) loaded = data[...] assert sc.identical(loaded.data, signal) assert data['signal'].ndim == 2 assert data['signal'].shape == (1, 1) -def test_dims_of_length_1_are_squeezed_when_no_axes_specified(nxroot): +def test_dims_of_length_1_are_squeezed_when_no_axes_specified(h5root): signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1]]) - data = nxroot.create_class('data1', NXdata) - data.create_field('signal', signal) + data = snx.create_class(h5root, 'data1', NXdata) + snx.create_field(data, 'signal', signal) data.attrs['signal'] = 'signal' + data = snx.Group(data, definitions=snx.base_definitions) loaded = data[...] assert sc.identical(loaded.data, sc.scalar(1.1, unit='m')) assert data['signal'].ndim == 0 assert data['signal'].shape == () -def test_one_dim_of_length_1_is_squeezed_when_no_axes_specified(nxroot): +def test_one_dim_of_length_1_is_squeezed_when_no_axes_specified(h5root): signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1, 2.2]]) - data = nxroot.create_class('data1', NXdata) - data.create_field('signal', signal) + data = snx.create_class(h5root, 'data1', NXdata) + snx.create_field(data, 'signal', signal) data.attrs['signal'] = 'signal' + data = snx.Group(data, definitions=snx.base_definitions) loaded = data[...] # Note that dimension gets renamed to `dim_0` since no axes are specified assert sc.identical(loaded.data, From 3e1b48796144516d8a514d4d8ecab7e09ba5c28f Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 16 Mar 2023 15:25:28 +0100 Subject: [PATCH 07/98] Cleanup --- src/scippnexus/nx2.py | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index 72a994f0..c13db42d 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -308,13 +308,6 @@ def sizes(self) -> Dict[str, int]: # exclude geometry/tansform groups? return sc.DataGroup(self._group).sizes - #def field_sizes(self, name: str, field: Field) -> Dict[str, int]: - # shape = tuple(size for size in field.dataset.shape if size != 1) - # return {f'dim_{i}': size for i, size in enumerate(shape)} - - #def field_dtype(self, name: str, dataset: H5Dataset) -> sc.dtype: - # return _dtype_fromdataset(dataset) - def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: # Note that this will be similar in NXdata, but there we need to handle # bin edges as well. @@ -544,11 +537,17 @@ def get_dims(name, field): if self._signal is not None and group_dims is not None: return _guess_dims(group_dims, self._signal.dataset.shape, field.dataset) - self._valid = False for name, field in group._children.items(): if (dims := get_dims(name, field)) is not None: - field.sizes = dict(zip(dims, field.dataset.shape)) + # The convention here is that the given dimensions apply to the shapes + # starting from the left. So we only squeeze dimensions that are after + # len(dims). + print(f'get_dims({name}) = {dims}') + shape = _squeeze_trailing(dims, field.dataset.shape) + field.sizes = dict(zip(dims, shape)) + else: + self._valid = False return ################ @@ -592,16 +591,9 @@ def get_dims(name, field): # self._coord_dims[name] = (dims[list(self.sizes.values()).index( # dataset.shape[0])], ) - @property - def shape(self) -> Tuple[int, ...]: - return self._signal.shape - @property def sizes(self) -> Dict[str, int]: - return self._signal.sizes - # TODO We should only do this if we know that assembly into DataArray is possible. - return dict(zip(self._field_dims[self._signal_name], - self.shape)) if self._valid else super().sizes + return self._signal.sizes if self._valid else super().sizes def _bin_edge_dim(self, coord: Field) -> Union[None, str]: sizes = self.sizes @@ -617,18 +609,6 @@ def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex bin_edge_dim=self._bin_edge_dim(child)) return child[child_sel] - def field_sizes(self, name: str, field: Field) -> Dict[str, int]: - dims = self._field_dims[name] - if dims is None: - dims = super().field_sizes(name, field) - shape = field.dataset.shape - if len(dims) < len(shape): - # The convention here is that the given dimensions apply to the shapes - # starting from the left. So we only squeeze dimensions that are after - # len(dims). - shape = _squeeze_trailing(dims, shape) - return dict(zip(dims, shape)) - def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: coords = sc.DataGroup(dg) signal = coords.pop(self._signal_name) From 0b22bf9f273213ed504df5aa3bd80355eb044ac2 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 16 Mar 2023 15:35:17 +0100 Subject: [PATCH 08/98] Restore coord to attr conversion --- src/scippnexus/nx2.py | 28 ++++++++++++++++++++++++++-- tests/nxdata_test.py | 31 +++++++++++++++++-------------- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index c13db42d..01e85e9a 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -543,7 +543,6 @@ def get_dims(name, field): # The convention here is that the given dimensions apply to the shapes # starting from the left. So we only squeeze dimensions that are after # len(dims). - print(f'get_dims({name}) = {dims}') shape = _squeeze_trailing(dims, field.dataset.shape) field.sizes = dict(zip(dims, shape)) else: @@ -612,7 +611,32 @@ def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: coords = sc.DataGroup(dg) signal = coords.pop(self._signal_name) - return sc.DataArray(data=signal, coords=coords) + da = sc.DataArray(data=signal) + return self._add_coords(da, coords) + + def _dim_of_coord(self, name: str, coord: sc.Variable) -> Union[None, str]: + if len(coord.dims) == 1: + return coord.dims[0] + if name in coord.dims and name in self.dims: + return name + return self._bin_edge_dim(coord) + + def _coord_to_attr(self, da: sc.DataArray, name: str, coord: sc.Variable) -> bool: + dim_of_coord = self._dim_of_coord(name, coord) + if dim_of_coord is None: + return False + if dim_of_coord not in da.dims: + return True + return False + + def _add_coords(self, da: sc.DataArray, coords: sc.DataGroup) -> sc.DataArray: + da.coords.update(coords) + for name, coord in coords.items(): + #if name not in self: + # continue + if self._coord_to_attr(da, name, coord): + da.attrs[name] = da.coords.pop(name) + return da def _squeeze_trailing(dims: Tuple[str, ...], shape: Tuple[int, ...]) -> Tuple[int, ...]: diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 04912abb..1207c52d 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -410,45 +410,48 @@ def test_one_dim_of_length_1_is_squeezed_when_no_axes_specified(h5root): assert data['signal'].dims == ('dim_0', ) -def test_only_one_axis_specified_for_2d_field(nxroot): +def test_only_one_axis_specified_for_2d_field(h5root): signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1]]) - data = nxroot.create_class('data1', NXdata) - data.create_field('signal', signal) + data = snx.create_class(h5root, 'data1', NXdata) + snx.create_field(data, 'signal', signal) data.attrs['axes'] = ['zz'] data.attrs['signal'] = 'signal' + data = snx.Group(data, definitions=snx.base_definitions) loaded = data[...] assert sc.identical(loaded.data, sc.array(dims=['zz'], unit='m', values=[1.1])) -def test_fields_with_datetime_attribute_are_loaded_as_datetime(nxroot): +def test_fields_with_datetime_attribute_are_loaded_as_datetime(h5root): da = sc.DataArray( sc.epoch(unit='s') + sc.array(dims=['xx', 'yy'], unit='s', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx'] = da.data['yy', 0] da.coords['xx2'] = da.data['yy', 1] da.coords['yy'] = da.data['xx', 0] - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_field('signal', da.data) - data.create_field('xx', da.coords['xx']) - data.create_field('xx2', da.coords['xx2']) - data.create_field('yy', da.coords['yy']) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'xx', da.coords['xx']) + snx.create_field(data, 'xx2', da.coords['xx2']) + snx.create_field(data, 'yy', da.coords['yy']) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], da) -def test_slicing_with_bin_edge_coord_returns_bin_edges(nxroot): +def test_slicing_with_bin_edge_coord_returns_bin_edges(h5root): da = sc.DataArray(sc.array(dims=['xx'], unit='K', values=[1.1, 2.2, 3.3])) da.coords['xx'] = sc.array(dims=['xx'], unit='m', values=[0.1, 0.2, 0.3, 0.4]) da.coords['xx2'] = sc.array(dims=['xx'], unit='m', values=[0.3, 0.4, 0.5, 0.6]) - data = nxroot.create_class('data', NXdata) - data.create_field('xx', da.coords['xx']) - data.create_field('xx2', da.coords['xx2']) - data.create_field('data', da.data) + data = snx.create_class(h5root, 'data', NXdata) + snx.create_field(data, 'xx', da.coords['xx']) + snx.create_field(data, 'xx2', da.coords['xx2']) + snx.create_field(data, 'data', da.data) data.attrs['signal'] = 'data' data.attrs['axes'] = ['xx'] data.attrs['xx_indices'] = [0] data.attrs['xx2_indices'] = [0] + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], da) assert sc.identical(data['xx', 0], da['xx', 0]) assert sc.identical(data['xx', 1], da['xx', 1]) From c77c68b4c29f62661cc3d4551795ca6abce81ef3 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 16 Mar 2023 15:38:43 +0100 Subject: [PATCH 09/98] Legacy signal handling --- src/scippnexus/nx2.py | 6 ++++-- tests/nxdata_test.py | 14 ++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index 01e85e9a..3e26bb6d 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -441,7 +441,7 @@ def __init__(self, group: Group): # Can we just set field dims here? self._signal_name = None self._signal = None - if (name := group.attrs.get('signal')) is not None: + if (name := group.attrs.get('signal')) is not None and name in group._children: self._signal_name = name self._signal = group._children[name] else: @@ -647,7 +647,8 @@ def _squeeze_trailing(dims: Tuple[str, ...], shape: Tuple[int, ...]) -> Tuple[in base_definitions['NXdata'] = NXdata -def create_field(group: H5Group, name: str, data: DimensionedArray, **kwargs): +def create_field(group: H5Group, name: str, data: DimensionedArray, + **kwargs) -> H5Dataset: if not isinstance(data, sc.Variable): return group.create_dataset(name, data=data, **kwargs) values = data.values @@ -661,6 +662,7 @@ def create_field(group: H5Group, name: str, data: DimensionedArray, **kwargs): dataset.attrs['units'] = str(data.unit) if data.dtype == sc.DType.datetime64: dataset.attrs['start'] = str(start.value) + return dataset def create_class(group: H5Group, name: str, nx_class: Union[str, type]) -> H5Group: diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 1207c52d..8c033723 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -460,22 +460,24 @@ def test_slicing_with_bin_edge_coord_returns_bin_edges(h5root): assert sc.identical(data['xx', 1:1], da['xx', 1:1]) # empty slice -def test_legacy_signal_attr_is_used(nxroot): +def test_legacy_signal_attr_is_used(h5root): signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1, 2.2], [3.3, 4.4]]) - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = signal.dims - field = data.create_field('mysig', signal) + field = snx.create_field(data, 'mysig', signal) field.attrs['signal'] = 1 # legacy way of defining signal + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], sc.DataArray(signal)) -def test_invalid_group_signal_attribute_is_ignored(nxroot): +def test_invalid_group_signal_attribute_is_ignored(h5root): signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1, 2.2], [3.3, 4.4]]) - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = signal.dims data.attrs['signal'] = 'signal' - field = data.create_field('mysig', signal) + field = snx.create_field(data, 'mysig', signal) field.attrs['signal'] = 1 # legacy way of defining signal + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], sc.DataArray(signal)) From cc02a4bd0199316e42daa967e6001ab315df2911 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 17 Mar 2023 10:47:20 +0100 Subject: [PATCH 10/98] Update most NXdata tests --- src/scippnexus/nx2.py | 21 +++++++++++++++++---- tests/nxdata_test.py | 27 +++++++++++++++------------ 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index 3e26bb6d..e71c62fe 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -477,7 +477,9 @@ def _get_group_dims(): group_dims = _get_group_dims() - if self._signal is not None: + if self._signal is None: + self._valid = False + else: if group_dims is not None: shape = self._signal.dataset.shape shape = _squeeze_trailing(group_dims, shape) @@ -539,14 +541,21 @@ def get_dims(name, field): field.dataset) for name, field in group._children.items(): - if (dims := get_dims(name, field)) is not None: + if not isinstance(field, Field): + self._valid = False + elif (dims := get_dims(name, field)) is not None: # The convention here is that the given dimensions apply to the shapes # starting from the left. So we only squeeze dimensions that are after # len(dims). shape = _squeeze_trailing(dims, field.dataset.shape) field.sizes = dict(zip(dims, shape)) - else: - self._valid = False + elif self._valid: + s1 = self._signal.sizes + s2 = field.sizes + if not set(s2.keys()).issubset(set(s1.keys())): + self._valid = False + elif any(s1[k] != s2[k] for k in s1.keys() & s2.keys()): + self._valid = False return ################ @@ -595,6 +604,8 @@ def sizes(self) -> Dict[str, int]: return self._signal.sizes if self._valid else super().sizes def _bin_edge_dim(self, coord: Field) -> Union[None, str]: + if not isinstance(coord, Field): + return None sizes = self.sizes for dim, size in zip(coord.dims, coord.shape): if (sz := sizes.get(dim)) is not None and sz + 1 == size: @@ -609,6 +620,8 @@ def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex return child[child_sel] def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: + if not self._valid: + return super().assemble(dg) coords = sc.DataGroup(dg) signal = coords.pop(self._signal_name) da = sc.DataArray(data=signal) diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 8c033723..198abc5e 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -481,36 +481,39 @@ def test_invalid_group_signal_attribute_is_ignored(h5root): assert sc.identical(data[...], sc.DataArray(signal)) -def test_legacy_axis_attrs_define_dim_names(nxroot): +def test_legacy_axis_attrs_define_dim_names(h5root): da = sc.DataArray(sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2], [4, 5]])) da.coords['xx'] = da.data['yy', 0] da.coords['yy'] = da.data['xx', 0] - data = nxroot.create_class('data1', NXdata) - signal = data.create_field('signal', da.data) - xx = data.create_field('xx', da.coords['xx']) - yy = data.create_field('yy', da.coords['yy']) + data = snx.create_class(h5root, 'data1', NXdata) + signal = snx.create_field(data, 'signal', da.data) + xx = snx.create_field(data, 'xx', da.coords['xx']) + yy = snx.create_field(data, 'yy', da.coords['yy']) signal.attrs['signal'] = 1 xx.attrs['axis'] = 1 yy.attrs['axis'] = 2 + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], da) -def test_nested_groups_trigger_fallback_to_load_as_data_group(nxroot): +def test_nested_groups_trigger_fallback_to_load_as_data_group(h5root): da = sc.DataArray(sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2], [4, 5]])) - data = nxroot.create_class('data1', NXdata) - data.create_field('signal', da.data) + data = snx.create_class(h5root, 'data1', NXdata) + snx.create_field(data, 'signal', da.data) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' - data.create_class('nested', NXdata) + snx.create_class(data, 'nested', NXdata) + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], sc.DataGroup(signal=da.data, nested=sc.DataGroup())) -def test_slicing_raises_given_invalid_index(nxroot): +def test_slicing_raises_given_invalid_index(h5root): signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1, 2.2], [3.3, 4.4]]) - data = nxroot.create_class('data1', NXdata) - data.create_field('signal', signal) + data = snx.create_class(h5root, 'data1', NXdata) + snx.create_field(data, 'signal', signal) data.attrs['axes'] = signal.dims data.attrs['signal'] = 'signal' + data = snx.Group(data, definitions=snx.base_definitions) assert sc.identical(data[...], sc.DataArray(signal)) with pytest.raises(IndexError): data['xx', 2] From 05debb97ddd5eb7b9fee686212e7276537db34c8 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 17 Mar 2023 13:31:58 +0100 Subject: [PATCH 11/98] Support NXlog --- src/scippnexus/nx2.py | 30 +++++++++++++++++++++++++----- tests/nexus_test.py | 19 ++++++++++++++----- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index e71c62fe..f70b33b6 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -40,6 +40,10 @@ # - Non-legacy mode would make dim parsing simpler and faster? +def asarray(obj: Union[Any, sc.Variable]) -> sc.Variable: + return obj if isinstance(obj, sc.Variable) else sc.scalar(obj, unit=None) + + class NexusStructureError(Exception): """Invalid or unsupported class and field structure in Nexus. """ @@ -365,6 +369,7 @@ def _children(self) -> Dict[str, Union[Field, Group]]: if values.unit == errors.unit and values.dataset.shape == errors.dataset.shape: values.errors = errors.dataset del items[f'{name}{suffix}'] + items = {k: v for k, v in items.items() if not k.startswith('cue_')} return items @cached_property @@ -430,7 +435,10 @@ def _guess_dims(dims, shape, dataset: H5Dataset): class NXdata(NXobject): - def __init__(self, group: Group): + def __init__(self, + group: Group, + fallback_dims: Optional[Tuple[str, ...]] = None, + fallback_signal_name: Optional[str] = None): super().__init__(group) self._valid = True # Must do full consistency check here, to define self.sizes: @@ -441,7 +449,9 @@ def __init__(self, group: Group): # Can we just set field dims here? self._signal_name = None self._signal = None - if (name := group.attrs.get('signal')) is not None and name in group._children: + if (name := group.attrs.get( + 'signal', + fallback_signal_name)) is not None and name in group._children: self._signal_name = name self._signal = group._children[name] else: @@ -477,6 +487,9 @@ def _get_group_dims(): group_dims = _get_group_dims() + if group_dims is None: + group_dims = fallback_dims + if self._signal is None: self._valid = False else: @@ -485,9 +498,6 @@ def _get_group_dims(): shape = _squeeze_trailing(group_dims, shape) self._signal.sizes = dict(zip(group_dims, shape)) - # if group_dims is None: - # group_dims = fallback_dims - if axes is not None: # Unlike self.dims we *drop* entries that are '.' named_axes = [a for a in axes if a != '.'] @@ -625,6 +635,7 @@ def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: coords = sc.DataGroup(dg) signal = coords.pop(self._signal_name) da = sc.DataArray(data=signal) + coords = {name: asarray(coord) for name, coord in coords.items()} return self._add_coords(da, coords) def _dim_of_coord(self, name: str, coord: sc.Variable) -> Union[None, str]: @@ -656,8 +667,17 @@ def _squeeze_trailing(dims: Tuple[str, ...], shape: Tuple[int, ...]) -> Tuple[in return shape[:len(dims)] + tuple(size for size in shape[len(dims):] if size != 1) +class NXlog(NXdata): + + def __init__(self, group: Group): + super().__init__(group, fallback_dims=('time', ), fallback_signal_name='value') + if (time := self._group._children.get('time')) is not None: + time._is_time = True + + base_definitions = {} base_definitions['NXdata'] = NXdata +base_definitions['NXlog'] = NXlog def create_field(group: H5Group, name: str, data: DimensionedArray, diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 5ae97431..f2c5e04b 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -6,6 +6,7 @@ import pytest import scipp as sc +import scippnexus.nx2 as snx from scippnexus import ( Field, NexusStructureError, @@ -26,6 +27,13 @@ ) +@pytest.fixture() +def h5root(request): + """Yield NXroot containing a single NXentry named 'entry'""" + with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: + yield f + + @pytest.fixture() def nxroot(request): """Yield NXroot containing a single NXentry named 'entry'""" @@ -82,7 +90,7 @@ def test_nxobject_entry(nxroot): assert set(entry.keys()) == {'events_0', 'events_1', 'log'} -def test_nxobject_log(nxroot): +def test_nxobject_log(h5root): da = sc.DataArray(sc.array(dims=['time'], values=[1.1, 2.2, 3.3]), coords={ 'time': @@ -90,10 +98,11 @@ def test_nxobject_log(nxroot): sc.array(dims=['time'], unit='s', values=[4.4, 5.5, 6.6]).to( unit='ns', dtype='int64') }) - log = nxroot['entry'].create_class('log', NXlog) - log['value'] = da.data - log['time'] = da.coords['time'] - sc.epoch(unit='ns') - assert log.nx_class == NXlog + log = snx.create_class(h5root, 'log', NXlog) + snx.create_field(log, 'value', da.data) + snx.create_field(log, 'time', da.coords['time'] - sc.epoch(unit='ns')) + log = snx.Group(log, definitions=snx.base_definitions) + #assert log.nx_class == NXlog assert sc.identical(log[...], da) From 97ffb0cc7e85644d6808a3862b63cf88affe5628 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 17 Mar 2023 13:49:37 +0100 Subject: [PATCH 12/98] Cache sizes for significant speedup --- src/scippnexus/nx2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index f70b33b6..6ec19e56 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -307,7 +307,7 @@ def __init__(self, group: Group): field.sizes = _squeezed_field_sizes(field.dataset) field.dtype = _dtype_fromdataset(field.dataset) - @property + @cached_property def sizes(self) -> Dict[str, int]: # exclude geometry/tansform groups? return sc.DataGroup(self._group).sizes @@ -609,7 +609,7 @@ def get_dims(name, field): # self._coord_dims[name] = (dims[list(self.sizes.values()).index( # dataset.shape[0])], ) - @property + @cached_property def sizes(self) -> Dict[str, int]: return self._signal.sizes if self._valid else super().sizes From 26cb1e238d115733a25173d7de95c61383ab0c27 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 20 Mar 2023 06:48:52 +0100 Subject: [PATCH 13/98] Fixes and NXoff_geometry support --- src/scippnexus/nx2.py | 11 +++- src/scippnexus/nxoff_geometry.py | 13 ++++- tests/nxdetector_test.py | 94 +++++++++++++++++++------------- tests/nxoff_geometry_test.py | 40 +++++++------- 4 files changed, 97 insertions(+), 61 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index 6ec19e56..6b472626 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -655,10 +655,10 @@ def _coord_to_attr(self, da: sc.DataArray, name: str, coord: sc.Variable) -> boo def _add_coords(self, da: sc.DataArray, coords: sc.DataGroup) -> sc.DataArray: da.coords.update(coords) - for name, coord in coords.items(): + for name in coords: #if name not in self: # continue - if self._coord_to_attr(da, name, coord): + if self._coord_to_attr(da, name, self._group[name]): da.attrs[name] = da.coords.pop(name) return da @@ -675,9 +675,16 @@ def __init__(self, group: Group): time._is_time = True +class NXdetector(NXdata): + + def __init__(self, group: Group): + super().__init__(group, fallback_signal_name='data') + + base_definitions = {} base_definitions['NXdata'] = NXdata base_definitions['NXlog'] = NXlog +base_definitions['NXdetector'] = NXdetector def create_field(group: H5Group, name: str, data: DimensionedArray, diff --git a/src/scippnexus/nxoff_geometry.py b/src/scippnexus/nxoff_geometry.py index 7f2223ae..d08ca137 100644 --- a/src/scippnexus/nxoff_geometry.py +++ b/src/scippnexus/nxoff_geometry.py @@ -5,7 +5,7 @@ import scipp as sc -from .nxobject import NexusStructureError, NXobject +from .nx2 import Field, Group, NexusStructureError, NXobject, base_definitions def off_to_shape(*, @@ -55,6 +55,14 @@ class NXoff_geometry(NXobject): 'faces': ('face', ) } + def __init__(self, group: Group): + super().__init__(group) + for name, field in group._children.items(): + if isinstance(field, Field): + field.sizes = dict(zip(self._get_field_dims(name), field.dataset.shape)) + if (dtype := self._get_field_dtype(name)) is not None: + field.dtype = dtype + def _get_field_dims(self, name: str) -> Union[None, Tuple[str]]: return self._dims.get(name) @@ -66,3 +74,6 @@ def _get_field_dtype(self, name: str) -> Union[None, sc.DType]: def load_as_array(self, detector_number: Optional[sc.Variable] = None) -> sc.Variable: return off_to_shape(**self[()], detector_number=detector_number) + + +base_definitions['NXoff_geometry'] = NXoff_geometry \ No newline at end of file diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index a667f59c..dced9bd3 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -4,6 +4,7 @@ import scipp as sc import scippnexus as snx +import scippnexus.nx2 as snx2 from scippnexus import ( NexusStructureError, NXdetector, @@ -15,6 +16,17 @@ ) +def make_group(group: h5py.Group) -> snx2.Group: + return snx2.Group(group, definitions=snx2.base_definitions) + + +@pytest.fixture() +def h5root(request): + """Yield NXroot containing a single NXentry named 'entry'""" + with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: + yield f + + @pytest.fixture() def nxroot(request): """Yield NXroot containing a single NXentry named 'entry'""" @@ -33,21 +45,23 @@ def test_warns_if_no_data_found(nxroot): assert isinstance(dg, sc.DataGroup) -def test_can_load_fields_if_no_data_found(nxroot): +def test_can_load_fields_if_no_data_found(h5root): detector_numbers = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3, 4])) - detector = nxroot.create_class('detector0', NXdetector) - detector.create_field('detector_numbers', detector_numbers) + detector = snx2.create_class(h5root, 'detector0', NXdetector) + snx2.create_field(detector, 'detector_numbers', detector_numbers) detector['detector_numbers'][...] -def test_finds_data_from_group_attr(nxroot): +def test_finds_data_from_group_attr(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]])) da.coords['detector_numbers'] = detector_numbers_xx_yy_1234() - detector = nxroot.create_class('detector0', NXdetector) - detector.create_field('detector_numbers', da.coords['detector_numbers']) - detector.create_field('custom', da.data) + detector = snx2.create_class(h5root, 'detector0', NXdetector) + snx2.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) + snx2.create_field(detector, 'custom', da.data) detector.attrs['signal'] = 'custom' + detector = make_group(detector) + print(detector[...]) assert sc.identical(detector[...], da.rename_dims({'xx': 'dim_0', 'yy': 'dim_1'})) @@ -72,23 +86,25 @@ def detector_numbers_xx_yy_1234(): return sc.array(dims=['xx', 'yy'], unit=None, values=np.array([[1, 2], [3, 4]])) -def test_loads_data_without_coords(nxroot): +def test_loads_data_without_coords(h5root): da = sc.DataArray(sc.array(dims=['xx', 'yy'], values=[[1.1, 2.2], [3.3, 4.4]])) da.coords['detector_numbers'] = detector_numbers_xx_yy_1234() - detector = nxroot.create_class('detector0', NXdetector) - detector.create_field('detector_numbers', da.coords['detector_numbers']) - detector.create_field('data', da.data) + detector = snx2.create_class(h5root, 'detector0', NXdetector) + snx2.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) + snx2.create_field(detector, 'data', da.data) + detector = make_group(detector) assert sc.identical(detector[...], da.rename_dims({'xx': 'dim_0', 'yy': 'dim_1'})) @pytest.mark.parametrize('detector_number_key', ['detector_number', 'pixel_id', 'spectrum_index']) -def test_detector_number_key_alias(nxroot, detector_number_key): +def test_detector_number_key_alias(h5root, detector_number_key): da = sc.DataArray(sc.array(dims=['xx', 'yy'], values=[[1.1, 2.2], [3.3, 4.4]])) da.coords[detector_number_key] = detector_numbers_xx_yy_1234() - detector = nxroot.create_class('detector0', NXdetector) - detector.create_field(detector_number_key, da.coords[detector_number_key]) - detector.create_field('data', da.data) + detector = snx2.create_class(h5root, 'detector0', NXdetector) + snx2.create_field(detector, detector_number_key, da.coords[detector_number_key]) + snx2.create_field(detector, 'data', da.data) + detector = make_group(detector) assert sc.identical(detector[...], da.rename_dims({'xx': 'dim_0', 'yy': 'dim_1'})) @@ -102,20 +118,21 @@ def test_select_events_raises_if_detector_contains_data(nxroot): detector.select_events -def test_loads_data_with_coords(nxroot): +def test_loads_data_with_coords(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]])) da.coords['detector_numbers'] = detector_numbers_xx_yy_1234() da.coords['xx'] = sc.array(dims=['xx'], unit='m', values=[0.1, 0.2]) - detector = nxroot.create_class('detector0', NXdetector) - detector.create_field('detector_numbers', da.coords['detector_numbers']) - detector.create_field('xx', da.coords['xx']) - detector.create_field('data', da.data) + detector = snx2.create_class(h5root, 'detector0', NXdetector) + snx2.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) + snx2.create_field(detector, 'xx', da.coords['xx']) + snx2.create_field(detector, 'data', da.data) detector.attrs['axes'] = ['xx', '.'] + detector = make_group(detector) assert sc.identical(detector[...], da.rename_dims({'yy': 'dim_1'})) -def test_slicing_works_as_in_scipp(nxroot): +def test_slicing_works_as_in_scipp(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2, 3.3], [3.3, 4.4, 5.5]])) @@ -128,15 +145,16 @@ def test_slicing_works_as_in_scipp(nxroot): da.coords['2d_edges'] = sc.array(dims=['yy', 'xx'], unit='m', values=[[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - detector = nxroot.create_class('detector0', NXdetector) - detector.create_field('detector_numbers', da.coords['detector_numbers']) - detector.create_field('xx', da.coords['xx']) - detector.create_field('xx2', da.coords['xx2']) - detector.create_field('yy', da.coords['yy']) - detector.create_field('2d_edges', da.coords['2d_edges']) - detector.create_field('data', da.data) + detector = snx2.create_class(h5root, 'detector0', NXdetector) + snx2.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) + snx2.create_field(detector, 'xx', da.coords['xx']) + snx2.create_field(detector, 'xx2', da.coords['xx2']) + snx2.create_field(detector, 'yy', da.coords['yy']) + snx2.create_field(detector, '2d_edges', da.coords['2d_edges']) + snx2.create_field(detector, 'data', da.data) detector.attrs['axes'] = ['xx', 'yy'] detector.attrs['2d_edges_indices'] = [1, 0] + detector = make_group(detector) assert sc.identical(detector[...], da) assert sc.identical(detector['xx', 0], da['xx', 0]) assert sc.identical(detector['xx', 1], da['xx', 1]) @@ -315,7 +333,7 @@ def __getitem__(self, select=...): assert np.array_equal(Load()['pulse', :-2], [3, 0]) -def create_off_geometry_detector_numbers_1234(group: NXobject, +def create_off_geometry_detector_numbers_1234(group: snx2.Group, name: str, detector_faces: bool = True): off = group.create_class(name, NXoff_geometry) @@ -335,20 +353,22 @@ def create_off_geometry_detector_numbers_1234(group: NXobject, @pytest.mark.parametrize('detid_name', ['detector_number', 'pixel_id', 'spectrum_index']) -def test_loads_data_with_coords_and_off_geometry(nxroot, detid_name): +def test_loads_data_with_coords_and_off_geometry(h5root, detid_name): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]])) da.coords['detector_number'] = detector_numbers_xx_yy_1234() da.coords['xx'] = sc.array(dims=['xx'], unit='m', values=[0.1, 0.2]) - detector = nxroot.create_class('detector0', NXdetector) - detector.create_field(detid_name, da.coords['detector_number']) - detector.create_field('xx', da.coords['xx']) - detector.create_field('data', da.data) + detector = snx2.create_class(h5root, 'detector0', NXdetector) + snx2.create_field(detector, detid_name, da.coords['detector_number']) + snx2.create_field(detector, 'xx', da.coords['xx']) + snx2.create_field(detector, 'data', da.data) detector.attrs['axes'] = ['xx', 'yy'] - create_off_geometry_detector_numbers_1234(detector, name='shape') - loaded = detector[...] + det = make_group(detector) + create_off_geometry_detector_numbers_1234(det, name='shape') + det = make_group(detector) + loaded = det[...] expected = snx.nxoff_geometry.off_to_shape( - **detector['shape'][()], detector_number=da.coords['detector_number']) + **det['shape'][()], detector_number=da.coords['detector_number']) assert sc.identical(loaded.coords['shape'].bins.size(), sc.array(dims=da.dims, values=[[1, 1], [1, 1]], unit=None)) assert sc.identical(loaded.coords['shape'], expected) diff --git a/tests/nxoff_geometry_test.py b/tests/nxoff_geometry_test.py index 435c4b83..1b8f3283 100644 --- a/tests/nxoff_geometry_test.py +++ b/tests/nxoff_geometry_test.py @@ -3,21 +3,19 @@ import pytest import scipp as sc -from scippnexus import NexusStructureError, NXentry, NXoff_geometry, NXroot -from scippnexus.nxoff_geometry import off_to_shape +import scippnexus.nx2 as snx +from scippnexus.nxoff_geometry import NXoff_geometry, off_to_shape @pytest.fixture() -def nxroot(request): +def group(request): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: - root = NXroot(f) - root.create_class('entry', NXentry) - yield root + yield snx.Group(f, definitions=snx.base_definitions) -def test_vertices_loaded_as_vector3(nxroot): - shape = nxroot['entry'].create_class('shape', NXoff_geometry) +def test_vertices_loaded_as_vector3(group): + shape = group.create_class('shape', NXoff_geometry) values = [[1, 2, 3], [4, 5, 6]] shape['vertices'] = sc.array(dims=['ignored', 'comp'], values=values, unit='mm') loaded = shape[()] @@ -25,8 +23,8 @@ def test_vertices_loaded_as_vector3(nxroot): sc.vectors(dims=['vertex'], values=values, unit='mm')) -def test_field_properties(nxroot): - shape = nxroot['entry'].create_class('shape', NXoff_geometry) +def test_field_properties(group): + shape = group.create_class('shape', NXoff_geometry) values = [[1, 2, 3], [4, 5, 6]] shape['vertices'] = sc.array(dims=['ignored', 'comp'], values=values, unit='m') shape['winding_order'] = sc.array(dims=['ignored'], values=[], unit=None) @@ -39,8 +37,8 @@ def test_field_properties(nxroot): assert loaded['faces'].unit is None -def test_off_to_shape_without_detector_faces_yields_scalar_shape_with_all_faces(nxroot): - off = nxroot['entry'].create_class('off', NXoff_geometry) +def test_off_to_shape_without_detector_faces_yields_scalar_shape_with_all_faces(group): + off = group.create_class('off', NXoff_geometry) values = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] off['vertices'] = sc.array(dims=['_', 'comp'], values=values, unit='m') off['winding_order'] = sc.array(dims=['_'], values=[0, 1, 2, 0, 2, 1], unit=None) @@ -51,8 +49,8 @@ def test_off_to_shape_without_detector_faces_yields_scalar_shape_with_all_faces( assert sc.identical(shape.bins.size(), sc.index(2)) -def test_off_to_shape_raises_if_detector_faces_but_not_detector_numbers_given(nxroot): - off = nxroot['entry'].create_class('off', NXoff_geometry) +def test_off_to_shape_raises_if_detector_faces_but_not_detector_numbers_given(group): + off = group.create_class('off', NXoff_geometry) values = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) off['vertices'] = sc.array(dims=['_', 'comp'], values=values, unit='m') off['winding_order'] = sc.array(dims=['_'], values=[0, 1, 2, 0, 2, 1], unit=None) @@ -63,12 +61,12 @@ def test_off_to_shape_raises_if_detector_faces_but_not_detector_numbers_given(nx values=[[0, det_num2], [1, det_num1]], unit=None) loaded = off[()] - with pytest.raises(NexusStructureError): + with pytest.raises(snx.NexusStructureError): off_to_shape(**loaded) -def test_off_to_shape_with_single_detector_yields_1d_shape(nxroot): - off = nxroot['entry'].create_class('off', NXoff_geometry) +def test_off_to_shape_with_single_detector_yields_1d_shape(group): + off = group.create_class('off', NXoff_geometry) values = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] off['vertices'] = sc.array(dims=['_', 'comp'], values=values, unit='m') off['winding_order'] = sc.array(dims=['_'], values=[0, 1, 2, 0, 2, 1], unit=None) @@ -93,8 +91,8 @@ def test_off_to_shape_with_single_detector_yields_1d_shape(nxroot): sc.array(dims=['detector_number'], values=[2], unit=None)) -def test_off_to_shape_with_two_detectors_yields_1d_shape(nxroot): - off = nxroot['entry'].create_class('off', NXoff_geometry) +def test_off_to_shape_with_two_detectors_yields_1d_shape(group): + off = group.create_class('off', NXoff_geometry) values = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) off['vertices'] = sc.array(dims=['_', 'comp'], values=values, unit='m') off['winding_order'] = sc.array(dims=['_'], values=[0, 1, 2, 0, 2, 1], unit=None) @@ -119,8 +117,8 @@ def test_off_to_shape_with_two_detectors_yields_1d_shape(nxroot): sc.vectors(dims=['face', 'vertex'], values=[values], unit='m')) -def test_off_to_shape_uses_order_of_provided_detector_number_param(nxroot): - off = nxroot['entry'].create_class('off', NXoff_geometry) +def test_off_to_shape_uses_order_of_provided_detector_number_param(group): + off = group.create_class('off', NXoff_geometry) values = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) off['vertices'] = sc.array(dims=['_', 'comp'], values=values, unit='m') off['winding_order'] = sc.array(dims=['_'], values=[0, 1, 2, 0, 2, 1], unit=None) From dbdc9f53e4db5253afd393361b06d4290c609b79 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 20 Mar 2023 07:21:33 +0100 Subject: [PATCH 14/98] Implement special field handling --- src/scippnexus/nx2.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index 6b472626..9f125236 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -302,10 +302,15 @@ class NXobject: def __init__(self, group: Group): self._group = group - for field in group._children.values(): + self._special_fields = [] + for name, field in group._children.items(): if isinstance(field, Field): field.sizes = _squeezed_field_sizes(field.dataset) field.dtype = _dtype_fromdataset(field.dataset) + elif (nx_class := field.attrs.get('NX_class')) in [ + 'NXoff_geometry', + ]: + self._special_fields.append(name) @cached_property def sizes(self) -> Dict[str, int]: @@ -323,6 +328,18 @@ def read_children(self, obj: Group, sel: ScippIndex) -> sc.DataGroup: {name: self.index_child(child, sel) for name, child in obj.items()}) + @property + def detector_number(self) -> Optional[str]: + return None + + def pre_assemble(self, dg: sc.DataGroup) -> sc.DataGroup: + for name in self._special_fields: + from .nxoff_geometry import off_to_shape + detector_number = dg.get(self.detector_number) + dg[name] = off_to_shape(**dg[name], detector_number=detector_number) + #print(list(dg.items())) + return dg + def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: return dg @@ -379,12 +396,6 @@ def _nexus(self) -> NXobject: def _populate_fields(self) -> None: _ = self._nexus - def _populate_field(self, name: str, field: Field) -> None: - if field.sizes is not None: - return - field.sizes = self._nexus.field_sizes(name, field) - field.dtype = self._nexus.field_dtype(name, field.dataset) - def __len__(self) -> int: return len(self._children) @@ -396,12 +407,11 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: child = self._children[sel] if isinstance(child, Field): self._populate_fields() - #self._populate_field(sel, child) return child # Here this is scipp.DataGroup. Child classes like NXdata may return DataArray. # (not scipp.DataArray, as that does not support lazy data) dg = self._nexus.read_children(self, sel) - # TODO assemble geometry/transforms/events + dg = self._nexus.pre_assemble(dg) try: return self._nexus.assemble(dg) except (sc.DimensionError, NexusStructureError) as e: @@ -552,7 +562,8 @@ def get_dims(name, field): for name, field in group._children.items(): if not isinstance(field, Field): - self._valid = False + if name not in self._special_fields: + self._valid = False elif (dims := get_dims(name, field)) is not None: # The convention here is that the given dimensions apply to the shapes # starting from the left. So we only squeeze dimensions that are after @@ -680,6 +691,12 @@ class NXdetector(NXdata): def __init__(self, group: Group): super().__init__(group, fallback_signal_name='data') + @property + def detector_number(self) -> Optional[str]: + for name in ['detector_number', 'pixel_id', 'spectrum_index']: + if name in self._group: + return name + base_definitions = {} base_definitions['NXdata'] = NXdata From 60d6499373d2bf116ce36b407d1c9342a3151fe7 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 20 Mar 2023 08:06:38 +0100 Subject: [PATCH 15/98] Improve support for special fields --- src/scippnexus/nx2.py | 29 +++++++++++++++++++++-------- src/scippnexus/nxoff_geometry.py | 5 +++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index 9f125236..7796bb7f 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -302,15 +302,16 @@ class NXobject: def __init__(self, group: Group): self._group = group - self._special_fields = [] + self._special_fields = {} for name, field in group._children.items(): if isinstance(field, Field): field.sizes = _squeezed_field_sizes(field.dataset) field.dtype = _dtype_fromdataset(field.dataset) - elif (nx_class := field.attrs.get('NX_class')) in [ + elif field.attrs.get('NX_class') in [ 'NXoff_geometry', + 'NXgeometry', ]: - self._special_fields.append(name) + self._special_fields[name] = field @cached_property def sizes(self) -> Dict[str, int]: @@ -333,11 +334,11 @@ def detector_number(self) -> Optional[str]: return None def pre_assemble(self, dg: sc.DataGroup) -> sc.DataGroup: - for name in self._special_fields: - from .nxoff_geometry import off_to_shape - detector_number = dg.get(self.detector_number) - dg[name] = off_to_shape(**dg[name], detector_number=detector_number) - #print(list(dg.items())) + for name, field in self._special_fields.items(): + det_num = self.detector_number + if det_num is not None: + det_num = dg[det_num] + dg[name] = field._nexus.assemble_as_child(dg[name], detector_number=det_num) return dg def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: @@ -698,10 +699,22 @@ def detector_number(self) -> Optional[str]: return name +class NXgeometry(NXobject): + + def __init__(self, group: Group): + super().__init__(group) + + @staticmethod + def assemble_as_child(children: sc.DataGroup, + detector_number: Optional[sc.Variable] = None) -> sc.Variable: + return sc.scalar(children) + + base_definitions = {} base_definitions['NXdata'] = NXdata base_definitions['NXlog'] = NXlog base_definitions['NXdetector'] = NXdetector +base_definitions['NXgeometry'] = NXgeometry def create_field(group: H5Group, name: str, data: DimensionedArray, diff --git a/src/scippnexus/nxoff_geometry.py b/src/scippnexus/nxoff_geometry.py index d08ca137..f04cc4ab 100644 --- a/src/scippnexus/nxoff_geometry.py +++ b/src/scippnexus/nxoff_geometry.py @@ -75,5 +75,10 @@ def load_as_array(self, detector_number: Optional[sc.Variable] = None) -> sc.Variable: return off_to_shape(**self[()], detector_number=detector_number) + @staticmethod + def assemble_as_child(children: sc.DataGroup, + detector_number: Optional[sc.Variable] = None) -> sc.Variable: + return off_to_shape(**children, detector_number=detector_number) + base_definitions['NXoff_geometry'] = NXoff_geometry \ No newline at end of file From cef6a7df55bc8423c81a6eb4d06f31b660cbffe6 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 20 Mar 2023 08:13:15 +0100 Subject: [PATCH 16/98] Cleanup and pass pre-commit --- src/scippnexus/nx2.py | 116 +++++++++++------------------------------- 1 file changed, 30 insertions(+), 86 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index 7796bb7f..899009a9 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -9,7 +9,7 @@ from collections.abc import Mapping from dataclasses import dataclass from functools import cached_property -from typing import Any, Dict, Iterable, Iterator, Optional, Tuple, Union +from typing import Any, Dict, Iterator, Optional, Protocol, Tuple, Union import dateutil.parser import numpy as np @@ -44,6 +44,27 @@ def asarray(obj: Union[Any, sc.Variable]) -> sc.Variable: return obj if isinstance(obj, sc.Variable) else sc.scalar(obj, unit=None) +# TODO move into scipp +class DimensionedArray(Protocol): + """ + A multi-dimensional array with a unit and dimension labels. + + Could be, e.g., a scipp.Variable or a dimple dataclass wrapping a numpy array. + """ + + @property + def values(self): + """Multi-dimensional array of values""" + + @property + def unit(self): + """Physical unit of the values""" + + @property + def dims(self) -> Tuple[str]: + """Dimension labels for the values""" + + class NexusStructureError(Exception): """Invalid or unsupported class and field structure in Nexus. """ @@ -133,41 +154,6 @@ class Field: def attrs(self) -> Dict[str, Any]: return dict(self.dataset.attrs) if self.dataset.attrs else dict() - #def __init__(self, - # dataset: H5Dataset, - # errors: Optional[H5Dataset] = None, - # *, - # ancestor, - # dims=None, - # dtype: Optional[sc.DType] = None, - # is_time=None): - # self._ancestor = ancestor # Usually the parent, but may be grandparent, etc. - # self.dataset = dataset - # self._errors = errors - # self._dtype = _dtype_fromdataset(dataset) if dtype is None else dtype - # self._shape = self.dataset.shape - # if self._dtype == sc.DType.vector3: - # self._shape = self._shape[:-1] - # self._is_time = is_time - # # NeXus treats [] and [1] interchangeably. In general this is ill-defined, but - # # the best we can do appears to be squeezing unless the file provides names for - # # dimensions. The shape property of this class does thus not necessarily return - # # the same as the shape of the underlying dataset. - # # TODO Should this logic be in FieldInfo? Or in NXdataInfo? - # if dims is not None: - # self._dims = tuple(dims) - # if len(self._dims) < len(self._shape): - # # The convention here is that the given dimensions apply to the shapes - # # starting from the left. So we only squeeze dimensions that are after - # # len(dims). - # self._shape = self._shape[:len(self._dims)] + tuple( - # size for size in self._shape[len(self._dims):] if size != 1) - # elif (axes := self.attrs.get('axes')) is not None: - # self._dims = tuple(axes.split(',')) - # else: - # self._shape = tuple(size for size in self._shape if size != 1) - # self._dims = tuple(f'dim_{i}' for i in range(self.ndim)) - @property def dims(self) -> Tuple[str]: return tuple(self.sizes.keys()) @@ -384,7 +370,8 @@ def _children(self) -> Dict[str, Union[Field, Group]]: for name in field_with_errors: values = items[name] errors = items[f'{name}{suffix}'] - if values.unit == errors.unit and values.dataset.shape == errors.dataset.shape: + if (values.unit == errors.unit + and values.dataset.shape == errors.dataset.shape): values.errors = errors.dataset del items[f'{name}{suffix}'] items = {k: v for k, v in items.items() if not k.startswith('cue_')} @@ -415,7 +402,8 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: dg = self._nexus.pre_assemble(dg) try: return self._nexus.assemble(dg) - except (sc.DimensionError, NexusStructureError) as e: + except (sc.DimensionError, NexusStructureError): + # TODO log warning return dg @cached_property @@ -468,8 +456,8 @@ def __init__(self, else: # Legacy NXdata defines signal not as group attribute, but attr on dataset for name, field in group._children.items(): - # What is the meaning of the attribute value? It is undocumented, we simply - # ignore it. + # What is the meaning of the attribute value? It is undocumented, + # we simply ignore it. if 'signal' in field.attrs: self._signal_name = name self._signal = group._children[name] @@ -551,8 +539,8 @@ def get_dims(name, field): if name in named_axes: # If there are named axes then items of same name are "dimension # coordinates", i.e., have a dim matching their name. - # However, if the item is not 1-D we need more labels. Try to use labels of - # signal if dimensionality matches. + # However, if the item is not 1-D we need more labels. Try to use labels + # of signal if dimensionality matches. if self._signal is not None and len(field.dataset.shape) == len( self._signal.dataset.shape): return group_dims @@ -579,48 +567,6 @@ def get_dims(name, field): elif any(s1[k] != s2[k] for k in s1.keys() & s2.keys()): self._valid = False - return - ################ - - indices_suffix = '_indices' - indices_attrs = { - key[:-len(indices_suffix)]: attr - for key, attr in group.attrs.items() if key.endswith(indices_suffix) - } - - dims = np.array(self._dims) - self._coord_dims = { - key: tuple(dims[np.array(indices).flatten()]) - for key, indices in indices_attrs.items() - } - self._valid = True - for name, dataset in group._group.items(): - if name not in self._coord_dims: - # TODO handle squeezing - if dataset.shape == (): - self._coord_dims[name] = () - elif name in self._dims: - # If there are named axes then items of same name are "dimension - # coordinates", i.e., have a dim matching their name. - # However, if the item is not 1-D we need more labels. Try to use labels of - # signal if dimensionality matches. - if dataset.ndim == len(dims): - self._coord_dims[name] = self._dims - else: - self._coord_dims[name] = (name, ) - elif (field_dims := self._guess_dims(name, dataset)) is not None: - self._coord_dims[name] = field_dims - else: - self._valid = False - - #elif name in dims: - # self._coord_dims[name] = (name, ) - #elif dataset.shape == group._group[self._signal].shape: - # self._coord_dims[name] = self._dims - #elif len(dataset.shape) == 1: - # self._coord_dims[name] = (dims[list(self.sizes.values()).index( - # dataset.shape[0])], ) - @cached_property def sizes(self) -> Dict[str, int]: return self._signal.sizes if self._valid else super().sizes @@ -668,8 +614,6 @@ def _coord_to_attr(self, da: sc.DataArray, name: str, coord: sc.Variable) -> boo def _add_coords(self, da: sc.DataArray, coords: sc.DataGroup) -> sc.DataArray: da.coords.update(coords) for name in coords: - #if name not in self: - # continue if self._coord_to_attr(da, name, self._group[name]): da.attrs[name] = da.coords.pop(name) return da From f6c9b91f2f23f41e9ab06d44e915e0b0c7720f37 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 20 Mar 2023 08:31:25 +0100 Subject: [PATCH 17/98] NXcylindrical_geometry --- src/scippnexus/nx2.py | 12 +++++++++++ src/scippnexus/nxcylindrical_geometry.py | 26 ++++++++++++++++-------- src/scippnexus/nxoff_geometry.py | 18 +++++----------- 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index 899009a9..a869737d 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -295,6 +295,7 @@ def __init__(self, group: Group): field.dtype = _dtype_fromdataset(field.dataset) elif field.attrs.get('NX_class') in [ 'NXoff_geometry', + 'NXcylindrical_geometry', 'NXgeometry', ]: self._special_fields[name] = field @@ -406,6 +407,17 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: # TODO log warning return dg + # TODO It is not clear if we want to support these convenience methods + def __setitem__(self, key, value): + return create_field(self._group, key, value) + + def create_class(self, name, class_name: str) -> Group: + return Group(create_class(self._group, name, class_name), + definitions=self._definitions) + + def rebuild(self) -> Group: + return Group(self._group, definitions=self._definitions) + @cached_property def sizes(self) -> Dict[str, int]: return self._nexus.sizes diff --git a/src/scippnexus/nxcylindrical_geometry.py b/src/scippnexus/nxcylindrical_geometry.py index 5baef6f3..538789c2 100644 --- a/src/scippnexus/nxcylindrical_geometry.py +++ b/src/scippnexus/nxcylindrical_geometry.py @@ -1,11 +1,11 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Optional, Tuple, Union +from typing import Optional import scipp as sc -from .nxobject import NexusStructureError, NXobject +from .nx2 import Field, Group, NexusStructureError, NXobject, base_definitions def _parse(*, @@ -52,14 +52,22 @@ class NXcylindrical_geometry(NXobject): 'cylinders': ('cylinder', 'vertex_index') } - def _get_field_dims(self, name: str) -> Union[None, Tuple[str]]: - return self._dims.get(name) - - def _get_field_dtype(self, name: str) -> Union[None, sc.DType]: - if name == 'vertices': - return sc.DType.vector3 - return None + def __init__(self, group: Group): + super().__init__(group) + for name, field in group._children.items(): + if isinstance(field, Field): + field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) + if name == 'vertices': + field.dtype = sc.DType.vector3 def load_as_array(self, detector_number: Optional[sc.Variable] = None) -> sc.Variable: return _parse(**self[()], parent_detector_number=detector_number) + + @staticmethod + def assemble_as_child(children: sc.DataGroup, + detector_number: Optional[sc.Variable] = None) -> sc.Variable: + return _parse(**children, parent_detector_number=detector_number) + + +base_definitions['NXcylindrical_geometry'] = NXcylindrical_geometry diff --git a/src/scippnexus/nxoff_geometry.py b/src/scippnexus/nxoff_geometry.py index f04cc4ab..f68206c8 100644 --- a/src/scippnexus/nxoff_geometry.py +++ b/src/scippnexus/nxoff_geometry.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Optional, Tuple, Union +from typing import Optional import scipp as sc @@ -59,17 +59,9 @@ def __init__(self, group: Group): super().__init__(group) for name, field in group._children.items(): if isinstance(field, Field): - field.sizes = dict(zip(self._get_field_dims(name), field.dataset.shape)) - if (dtype := self._get_field_dtype(name)) is not None: - field.dtype = dtype - - def _get_field_dims(self, name: str) -> Union[None, Tuple[str]]: - return self._dims.get(name) - - def _get_field_dtype(self, name: str) -> Union[None, sc.DType]: - if name == 'vertices': - return sc.DType.vector3 - return None + field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) + if name == 'vertices': + field.dtype = sc.DType.vector3 def load_as_array(self, detector_number: Optional[sc.Variable] = None) -> sc.Variable: @@ -81,4 +73,4 @@ def assemble_as_child(children: sc.DataGroup, return off_to_shape(**children, detector_number=detector_number) -base_definitions['NXoff_geometry'] = NXoff_geometry \ No newline at end of file +base_definitions['NXoff_geometry'] = NXoff_geometry From 1c3e9302f54fba2e249908756159085bcc3328bc Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 20 Mar 2023 09:08:46 +0100 Subject: [PATCH 18/98] Fallback from pre_assemble errors --- src/scippnexus/nx2.py | 5 +++- tests/nxdetector_test.py | 52 +++++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index a869737d..a0326be3 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -400,8 +400,8 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: # Here this is scipp.DataGroup. Child classes like NXdata may return DataArray. # (not scipp.DataArray, as that does not support lazy data) dg = self._nexus.read_children(self, sel) - dg = self._nexus.pre_assemble(dg) try: + dg = self._nexus.pre_assemble(dg) return self._nexus.assemble(dg) except (sc.DimensionError, NexusStructureError): # TODO log warning @@ -411,6 +411,9 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: def __setitem__(self, key, value): return create_field(self._group, key, value) + def create_field(self, key: str, value: sc.Variable) -> H5Dataset: + return create_field(self._group, key, value) + def create_class(self, name, class_name: str) -> Group: return Group(create_class(self._group, name, class_name), definitions=self._definitions) diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index dced9bd3..51b18380 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -10,7 +10,6 @@ NXdetector, NXentry, NXevent_data, - NXobject, NXoff_geometry, NXroot, ) @@ -27,6 +26,13 @@ def h5root(request): yield f +@pytest.fixture() +def group(request): + """Yield NXroot containing a single NXentry named 'entry'""" + with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: + yield make_group(f) + + @pytest.fixture() def nxroot(request): """Yield NXroot containing a single NXentry named 'entry'""" @@ -353,31 +359,29 @@ def create_off_geometry_detector_numbers_1234(group: snx2.Group, @pytest.mark.parametrize('detid_name', ['detector_number', 'pixel_id', 'spectrum_index']) -def test_loads_data_with_coords_and_off_geometry(h5root, detid_name): +def test_loads_data_with_coords_and_off_geometry(group, detid_name): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]])) da.coords['detector_number'] = detector_numbers_xx_yy_1234() da.coords['xx'] = sc.array(dims=['xx'], unit='m', values=[0.1, 0.2]) - detector = snx2.create_class(h5root, 'detector0', NXdetector) - snx2.create_field(detector, detid_name, da.coords['detector_number']) - snx2.create_field(detector, 'xx', da.coords['xx']) - snx2.create_field(detector, 'data', da.data) + detector = group.create_class('detector0', NXdetector) + detector.create_field(detid_name, da.coords['detector_number']) + detector.create_field('xx', da.coords['xx']) + detector.create_field('data', da.data) detector.attrs['axes'] = ['xx', 'yy'] - det = make_group(detector) - create_off_geometry_detector_numbers_1234(det, name='shape') - det = make_group(detector) - loaded = det[...] + create_off_geometry_detector_numbers_1234(detector, name='shape') + loaded = detector[...] expected = snx.nxoff_geometry.off_to_shape( - **det['shape'][()], detector_number=da.coords['detector_number']) + **detector['shape'][()], detector_number=da.coords['detector_number']) assert sc.identical(loaded.coords['shape'].bins.size(), sc.array(dims=da.dims, values=[[1, 1], [1, 1]], unit=None)) assert sc.identical(loaded.coords['shape'], expected) def test_missing_detector_numbers_triggers_fallback_given_off_geometry_with_det_faces( - nxroot): + group): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) - detector = nxroot.create_class('detector0', NXdetector) + detector = group.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] create_off_geometry_detector_numbers_1234(detector, name='shape') @@ -386,9 +390,9 @@ def test_missing_detector_numbers_triggers_fallback_given_off_geometry_with_det_ assert sc.identical(loaded['shape'], detector['shape'][()]) -def test_off_geometry_without_detector_faces_loaded_as_0d_with_multiple_faces(nxroot): +def test_off_geometry_without_detector_faces_loaded_as_0d_with_multiple_faces(group): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) - detector = nxroot.create_class('detector0', NXdetector) + detector = group.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] create_off_geometry_detector_numbers_1234(detector, @@ -399,7 +403,7 @@ def test_off_geometry_without_detector_faces_loaded_as_0d_with_multiple_faces(nx assert sc.identical(loaded.coords['shape'].bins.size(), sc.index(4)) -def create_cylindrical_geometry_detector_numbers_1234(group: snx.NXobject, +def create_cylindrical_geometry_detector_numbers_1234(group: snx2.Group, name: str, detector_numbers: bool = True): shape = group.create_class(name, snx.NXcylindrical_geometry) @@ -412,9 +416,9 @@ def create_cylindrical_geometry_detector_numbers_1234(group: snx.NXobject, shape['detector_number'] = sc.array(dims=['_'], values=[0, 1, 1, 0], unit=None) -def test_cylindrical_geometry_without_detector_numbers_loaded_as_0d(nxroot): +def test_cylindrical_geometry_without_detector_numbers_loaded_as_0d(group): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) - detector = nxroot.create_class('detector0', NXdetector) + detector = group.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] create_cylindrical_geometry_detector_numbers_1234(detector, @@ -437,9 +441,9 @@ def test_cylindrical_geometry_without_detector_numbers_loaded_as_0d(nxroot): def test_cylindrical_geometry_with_missing_parent_detector_numbers_triggers_fallback( - nxroot): + group): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) - detector = nxroot.create_class('detector0', NXdetector) + detector = group.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] create_cylindrical_geometry_detector_numbers_1234(detector, @@ -451,9 +455,9 @@ def test_cylindrical_geometry_with_missing_parent_detector_numbers_triggers_fall def test_cylindrical_geometry_with_inconsistent_detector_numbers_triggers_fallback( - nxroot): + group): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1], [3.3]]) - detector = nxroot.create_class('detector0', NXdetector) + detector = group.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] detector.create_field('detector_numbers', @@ -466,9 +470,9 @@ def test_cylindrical_geometry_with_inconsistent_detector_numbers_triggers_fallba assert isinstance(loaded['shape'], sc.DataGroup) -def test_cylindrical_geometry_with_detector_numbers(nxroot): +def test_cylindrical_geometry_with_detector_numbers(group): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) - detector = nxroot.create_class('detector0', NXdetector) + detector = group.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] detector_number = sc.array(dims=var.dims, values=[[1, 2], [3, 4]], unit=None) From 764daab60b5ffeb75614a1c81d6b2a0a56663340 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 20 Mar 2023 09:42:38 +0100 Subject: [PATCH 19/98] NXevent_data --- src/scippnexus/nx2.py | 3 ++- src/scippnexus/nxevent_data2.py | 41 ++++++++++------------------- tests/nxdetector_test.py | 46 ++++++++++++++------------------- 3 files changed, 36 insertions(+), 54 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index a0326be3..ed69e46b 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -403,7 +403,8 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: try: dg = self._nexus.pre_assemble(dg) return self._nexus.assemble(dg) - except (sc.DimensionError, NexusStructureError): + except (sc.DimensionError, NexusStructureError) as e: + print(e) # TODO log warning return dg diff --git a/src/scippnexus/nxevent_data2.py b/src/scippnexus/nxevent_data2.py index 70941775..264cf4c1 100644 --- a/src/scippnexus/nxevent_data2.py +++ b/src/scippnexus/nxevent_data2.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Tuple import numpy as np import scipp as sc @@ -10,7 +10,6 @@ from .nx2 import ( Field, Group, - H5Dataset, NexusStructureError, NXobject, ScippIndex, @@ -28,30 +27,16 @@ def _check_for_missing_fields(fields): f"Required field {field} not found in NXevent_data") -class _NXevent_data(Group): - _field_names = [ - 'event_time_zero', 'event_index', 'event_time_offset', 'event_id', - 'cue_timestamp_zero', 'cue_index', 'pulse_height' - ] - - #@staticmethod - #def _make_class_info(info: GroupContentInfo) -> NXobjectInfo: - # """Create info object for this NeXus class.""" - # children = {} - # for name in NXevent_data._field_names: - # if (di := info.datasets.pop(name, None)) is not None: - # children[name] = FieldInfo(values=di.value, - # dims=NXevent_data._get_field_dims(name)) - # return NXobjectInfo(children=children) - - @property - def unit(self) -> None: - # Binned data, bins do not have a unit - return None - - class NXevent_data(NXobject): + def __init__(self, group: Group): + super().__init__(group) + for name, field in group._children.items(): + if name in ['event_time_zero', 'event_index']: + field.sizes = {_pulse_dimension: field.dataset.shape[0]} + elif name in ['event_time_offset', 'event_id']: + field.sizes = {_event_dimension: field.dataset.shape[0]} + @property def shape(self) -> Tuple[int]: if (event_index := self._group.get('event_index')) is not None: @@ -93,6 +78,8 @@ def read_children(self, obj: Group, select: ScippIndex) -> sc.DataGroup: start, stop, stride = index.indices(max_index) if stop + stride > max_index: last_loaded = False + elif start == stop: + last_loaded = True else: stop += stride last_loaded = True @@ -120,7 +107,7 @@ def read_children(self, obj: Group, select: ScippIndex) -> sc.DataGroup: event_time_offset = children['event_time_offset'][event_select] event_index = sc.array(dims=[_pulse_dimension], - values=event_index, + values=event_index[:-1] if last_loaded else event_index, dtype=sc.DType.int64, unit=None) @@ -133,7 +120,7 @@ def read_children(self, obj: Group, select: ScippIndex) -> sc.DataGroup: dg['event_id'] = event_id return dg - def assemble(self, children: sc.DataGroup) -> sc.DataGroup: + def assemble(self, children: sc.DataGroup) -> sc.DataArray: _check_for_missing_fields(children) event_time_offset = children['event_time_offset'] event_time_zero = children['event_time_zero'] @@ -168,4 +155,4 @@ def assemble(self, children: sc.DataGroup) -> sc.DataGroup: return sc.DataArray(data=binned, coords={'event_time_zero': event_time_zero}) -base_definitions['NXevent_data'] = NXevent_data \ No newline at end of file +base_definitions['NXevent_data'] = NXevent_data diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index 51b18380..15bbc8c3 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -5,14 +5,8 @@ import scippnexus as snx import scippnexus.nx2 as snx2 -from scippnexus import ( - NexusStructureError, - NXdetector, - NXentry, - NXevent_data, - NXoff_geometry, - NXroot, -) +from scippnexus import NexusStructureError, NXdetector, NXentry, NXoff_geometry, NXroot +from scippnexus.nxevent_data2 import NXevent_data def make_group(group: h5py.Group) -> snx2.Group: @@ -310,8 +304,8 @@ def test_event_data_field_dims_labels(nxroot): assert detector['detector_number'].dims == ('detector_number', ) -def test_nxevent_data_selection_yields_correct_pulses(nxroot): - detector = nxroot.create_class('detector0', NXdetector) +def test_nxevent_data_selection_yields_correct_pulses(group): + detector = group.create_class('detector0', NXdetector) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) class Load: @@ -321,22 +315,22 @@ def __getitem__(self, select=...): return da.bins.size().values assert np.array_equal(Load()[...], [3, 0, 2, 1]) - assert np.array_equal(Load()['pulse', 0], 3) - assert np.array_equal(Load()['pulse', 1], 0) - assert np.array_equal(Load()['pulse', 3], 1) - assert np.array_equal(Load()['pulse', -1], 1) - assert np.array_equal(Load()['pulse', -2], 2) - assert np.array_equal(Load()['pulse', 0:0], []) - assert np.array_equal(Load()['pulse', 1:1], []) - assert np.array_equal(Load()['pulse', 1:-3], []) - assert np.array_equal(Load()['pulse', 3:3], []) - assert np.array_equal(Load()['pulse', -1:-1], []) - assert np.array_equal(Load()['pulse', 0:1], [3]) - assert np.array_equal(Load()['pulse', 0:-3], [3]) - assert np.array_equal(Load()['pulse', -1:], [1]) - assert np.array_equal(Load()['pulse', -2:-1], [2]) - assert np.array_equal(Load()['pulse', -2:], [2, 1]) - assert np.array_equal(Load()['pulse', :-2], [3, 0]) + assert np.array_equal(Load()['event_time_zero', 0], 3) + assert np.array_equal(Load()['event_time_zero', 1], 0) + assert np.array_equal(Load()['event_time_zero', 3], 1) + assert np.array_equal(Load()['event_time_zero', -1], 1) + assert np.array_equal(Load()['event_time_zero', -2], 2) + assert np.array_equal(Load()['event_time_zero', 0:0], []) + assert np.array_equal(Load()['event_time_zero', 1:1], []) + assert np.array_equal(Load()['event_time_zero', 1:-3], []) + assert np.array_equal(Load()['event_time_zero', 3:3], []) + assert np.array_equal(Load()['event_time_zero', -1:-1], []) + assert np.array_equal(Load()['event_time_zero', 0:1], [3]) + assert np.array_equal(Load()['event_time_zero', 0:-3], [3]) + assert np.array_equal(Load()['event_time_zero', -1:], [1]) + assert np.array_equal(Load()['event_time_zero', -2:-1], [2]) + assert np.array_equal(Load()['event_time_zero', -2:], [2, 1]) + assert np.array_equal(Load()['event_time_zero', :-2], [3, 0]) def create_off_geometry_detector_numbers_1234(group: snx2.Group, From b2327b9a75ace2e6ee0086d1677a13168bac1c38 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 20 Mar 2023 11:27:36 +0100 Subject: [PATCH 20/98] Do not auto-group event data by detector --- src/scippnexus/nx2.py | 66 ++++++++++++++++++++++++++++++--- src/scippnexus/nxevent_data2.py | 32 +++++++++++++++- tests/nexus_test.py | 8 ++-- tests/nxdetector_test.py | 31 +++++++--------- 4 files changed, 110 insertions(+), 27 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index ed69e46b..d65624af 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -9,7 +9,7 @@ from collections.abc import Mapping from dataclasses import dataclass from functools import cached_property -from typing import Any, Dict, Iterator, Optional, Protocol, Tuple, Union +from typing import Any, Dict, Iterator, List, Optional, Protocol, Tuple, Union import dateutil.parser import numpy as np @@ -430,6 +430,10 @@ def sizes(self) -> Dict[str, int]: def dims(self) -> Tuple[str, ...]: return tuple(self.sizes) + @property + def shape(self) -> Tuple[int, ...]: + return tuple(self.sizes.values()) + def _guess_dims(dims, shape, dataset: H5Dataset): """Guess dims of non-signal dataset based on shape. @@ -518,8 +522,8 @@ def _get_group_dims(): named_axes = [a for a in axes if a != '.'] elif signal_axes is not None: named_axes = signal_axes.split(',') - # elif fallback_dims is not None: - # named_axes = fallback_dims + elif fallback_dims is not None: + named_axes = fallback_dims else: named_axes = [] @@ -648,14 +652,15 @@ def __init__(self, group: Group): class NXdetector(NXdata): + _detector_number_fields = ['detector_number', 'pixel_id', 'spectrum_index'] def __init__(self, group: Group): super().__init__(group, fallback_signal_name='data') @property def detector_number(self) -> Optional[str]: - for name in ['detector_number', 'pixel_id', 'spectrum_index']: - if name in self._group: + for name in self._detector_number_fields: + if name in self._group._children: return name @@ -710,3 +715,54 @@ def create_class(group: H5Group, name: str, nx_class: Union[str, type]) -> H5Gro attr = nx_class if isinstance(nx_class, str) else nx_class.__name__ group.attrs['NX_class'] = attr return group + + +def _group_events(*, + event_data: sc.DataArray, + grouping: Optional[sc.Variable] = None) -> sc.DataArray: + if isinstance(event_data, sc.DataGroup): + raise NexusStructureError("Invalid NXevent_data in NXdetector.") + if grouping is None: + event_id = 'event_id' + else: + # copy since sc.bin cannot deal with a non-contiguous view + event_id = grouping.flatten(to='event_id').copy() + event_data.bins.coords['event_time_zero'] = sc.bins_like( + event_data, fill_value=event_data.coords['event_time_zero']) + # After loading raw NXevent_data it is guaranteed that the event table + # is contiguous and that there is no masking. We can therefore use the + # more efficient approach of binning from scratch instead of erasing the + # 'event_time_zero' binning defined by NXevent_data. + event_data = event_data.bins.constituents['data'].group(event_id) + # if self._grouping is None: + # event_data.coords[self._grouping_key] = event_data.coords.pop('event_id') + # else: + # del event_data.coords['event_id'] + if grouping is None: + return event_data + return event_data.fold(dim='event_id', sizes=grouping.sizes) + + +def _find_event_entries(dg: sc.DataGroup) -> List[str]: + event_entries = [] + for name, value in dg.items(): + if isinstance( + value, sc.DataArray + ) and 'event_time_zero' in value.coords and value.bins is not None: + event_entries.append(name) + return event_entries + + +def group_events_by_detector_number(dg: sc.DataGroup) -> sc.DataArray: + event_entry = _find_event_entries(dg)[0] + events = dg.pop(event_entry) + grouping_key = None + for key in NXdetector._detector_number_fields: + if (grouping := dg.get(key)) is not None: + grouping_key = key + break + grouping = None if grouping_key is None else asarray(dg.pop(grouping_key)) + da = _group_events(event_data=events, grouping=grouping) + # TODO What about _coord_to_attr mapping as NXdata? + da.coords.update(dg) + return da diff --git a/src/scippnexus/nxevent_data2.py b/src/scippnexus/nxevent_data2.py index 264cf4c1..a589bc10 100644 --- a/src/scippnexus/nxevent_data2.py +++ b/src/scippnexus/nxevent_data2.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Dict, List, Tuple +from typing import Dict, List, Optional, Tuple import numpy as np import scipp as sc @@ -13,6 +13,7 @@ NexusStructureError, NXobject, ScippIndex, + asarray, base_definitions, ) @@ -154,5 +155,34 @@ def assemble(self, children: sc.DataGroup) -> sc.DataArray: return sc.DataArray(data=binned, coords={'event_time_zero': event_time_zero}) + # TODO now unused + @staticmethod + def assemble_as_child( + event_data: sc.DataArray, + detector_number: Optional[sc.Variable] = None) -> sc.DataArray: + grouping = asarray(detector_number) + + if isinstance(event_data, sc.DataGroup): + raise NexusStructureError("Invalid NXevent_data in NXdetector.") + if grouping is None: + event_id = 'event_id' + else: + # copy since sc.bin cannot deal with a non-contiguous view + event_id = grouping.flatten(to='event_id').copy() + event_data.bins.coords['event_time_zero'] = sc.bins_like( + event_data, fill_value=event_data.coords['event_time_zero']) + # After loading raw NXevent_data it is guaranteed that the event table + # is contiguous and that there is no masking. We can therefore use the + # more efficient approach of binning from scratch instead of erasing the + # 'pulse' binning defined by NXevent_data. + event_data = event_data.bins.constituents['data'].group(event_id) + # if self._grouping is None: + # event_data.coords[self._grouping_key] = event_data.coords.pop('event_id') + # else: + # del event_data.coords['event_id'] + if grouping is None: + return event_data + return event_data.fold(dim='event_id', sizes=grouping.sizes) + base_definitions['NXevent_data'] = NXevent_data diff --git a/tests/nexus_test.py b/tests/nexus_test.py index f2c5e04b..59254267 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -102,7 +102,6 @@ def test_nxobject_log(h5root): snx.create_field(log, 'value', da.data) snx.create_field(log, 'time', da.coords['time'] - sc.epoch(unit='ns')) log = snx.Group(log, definitions=snx.base_definitions) - #assert log.nx_class == NXlog assert sc.identical(log[...], da) @@ -115,7 +114,8 @@ def test_nxlog_with_missing_value_triggers_fallback(nxroot): assert sc.identical(loaded, sc.DataGroup(time=time.rename(time='dim_0'))) -def test_nxlog_length_1(nxroot): +def test_nxlog_length_1(h5root): + nxroot = snx.Group(h5root, definitions=snx.base_definitions) da = sc.DataArray( sc.array(dims=['time'], values=[1.1]), coords={ @@ -123,10 +123,10 @@ def test_nxlog_length_1(nxroot): sc.epoch(unit='ns') + sc.array(dims=['time'], unit='s', values=[4.4]).to(unit='ns', dtype='int64') }) - log = nxroot['entry'].create_class('log', NXlog) + log = nxroot.create_class('log', NXlog) log['value'] = da.data log['time'] = da.coords['time'] - sc.epoch(unit='ns') - assert log.nx_class == NXlog + log = log.rebuild() assert sc.identical(log[...], da) diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index 15bbc8c3..ec458530 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -176,14 +176,15 @@ def create_event_data_ids_1234(group): sc.array(dims=[''], unit='None', values=[0, 3, 3, 5])) -def test_loads_event_data_mapped_to_detector_numbers_based_on_their_event_id(nxroot): +def test_loads_event_data_mapped_to_detector_numbers_based_on_their_event_id(group): detector_numbers = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3, 4])) - detector = nxroot.create_class('detector0', NXdetector) + detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) assert detector.dims == ('detector_number', ) assert detector.shape == (4, ) loaded = detector[...] + print(loaded) assert sc.identical( loaded.bins.size().data, sc.array(dims=['detector_number'], @@ -194,8 +195,8 @@ def test_loads_event_data_mapped_to_detector_numbers_based_on_their_event_id(nxr assert 'event_time_zero' in loaded.bins.coords -def test_loads_event_data_with_0d_detector_numbers(nxroot): - detector = nxroot.create_class('detector0', NXdetector) +def test_loads_event_data_with_0d_detector_numbers(group): + detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', sc.index(1, dtype='int64')) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) assert detector.dims == () @@ -204,8 +205,8 @@ def test_loads_event_data_with_0d_detector_numbers(nxroot): assert sc.identical(loaded.bins.size().data, sc.index(2, dtype='int64')) -def test_loads_event_data_with_2d_detector_numbers(nxroot): - detector = nxroot.create_class('detector0', NXdetector) +def test_loads_event_data_with_2d_detector_numbers(group): + detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers_xx_yy_1234()) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) assert detector.dims == ('dim_0', 'dim_1') @@ -263,19 +264,15 @@ def test_select_events_slice_does_not_affect_original_detector(nxroot): def test_loading_event_data_creates_automatic_detector_numbers_if_not_present_in_file( - nxroot): - detector = nxroot.create_class('detector0', NXdetector) + group): + detector = group.create_class('detector0', NXdetector) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) - assert detector.dims == ['detector_number'] - with pytest.raises(NexusStructureError): - detector.shape + assert detector.dims == ('event_time_zero', ) loaded = detector[...] + loaded = snx2.group_events_by_detector_number(loaded) assert sc.identical( loaded.bins.size().data, - sc.array(dims=['detector_number'], - unit=None, - dtype='int64', - values=[2, 3, 0, 1])) + sc.array(dims=['event_id'], unit=None, dtype='int64', values=[2, 3, 1])) def test_loading_event_data_with_selection_and_automatic_detector_numbers_raises( @@ -296,9 +293,9 @@ def test_loading_event_data_with_full_selection_and_automatic_detector_numbers_w assert tuple(detector[()].shape) == (4, ) -def test_event_data_field_dims_labels(nxroot): +def test_event_data_field_dims_labels(group): detector_numbers = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3, 4])) - detector = nxroot.create_class('detector0', NXdetector) + detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) assert detector['detector_number'].dims == ('detector_number', ) From 66d6067bfe9e797aec18c9eecee2cfad37e156a5 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 20 Mar 2023 11:53:12 +0100 Subject: [PATCH 21/98] Continue work on event data --- src/scippnexus/nx2.py | 13 +++++++++++-- tests/nxdetector_test.py | 40 +++++++++++++++------------------------- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index d65624af..b5e05088 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -506,6 +506,13 @@ def _get_group_dims(): group_dims = _get_group_dims() + # Reject fallback dims if they are not compatible with group dims + if fallback_dims is not None: + for field in group._children.values(): + if len(fallback_dims) < len(field.shape): + fallback_dims = None + break + if group_dims is None: group_dims = fallback_dims @@ -564,7 +571,7 @@ def get_dims(name, field): if self._signal is not None and len(field.dataset.shape) == len( self._signal.dataset.shape): return group_dims - return [name] + return (name, ) if self._signal is not None and group_dims is not None: return _guess_dims(group_dims, self._signal.dataset.shape, field.dataset) @@ -655,7 +662,9 @@ class NXdetector(NXdata): _detector_number_fields = ['detector_number', 'pixel_id', 'spectrum_index'] def __init__(self, group: Group): - super().__init__(group, fallback_signal_name='data') + super().__init__(group, + fallback_dims=('detector_number', ), + fallback_signal_name='data') @property def detector_number(self) -> Optional[str]: diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index ec458530..f8a20a9e 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -181,18 +181,17 @@ def test_loads_event_data_mapped_to_detector_numbers_based_on_their_event_id(gro detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) - assert detector.dims == ('detector_number', ) - assert detector.shape == (4, ) + assert detector.sizes == {'detector_number': 4, 'event_time_zero': 4} loaded = detector[...] - print(loaded) + da = snx2.group_events_by_detector_number(loaded) assert sc.identical( - loaded.bins.size().data, + da.bins.size().data, sc.array(dims=['detector_number'], unit=None, dtype='int64', values=[2, 3, 0, 1])) - assert 'event_time_offset' in loaded.bins.coords - assert 'event_time_zero' in loaded.bins.coords + assert 'event_time_offset' in da.bins.coords + assert 'event_time_zero' in da.bins.coords def test_loads_event_data_with_0d_detector_numbers(group): @@ -220,43 +219,34 @@ def test_loads_event_data_with_2d_detector_numbers(group): values=[[2, 3], [0, 1]])) -def test_select_events_slices_underlying_event_data(nxroot): - detector = nxroot.create_class('detector0', NXdetector) +def test_select_events_slices_underlying_event_data(group): + detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers_xx_yy_1234()) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) + da = snx2.group_events_by_detector_number(detector['event_time_zero', :2]) assert sc.identical( - detector.select_events['pulse', :2][...].bins.size().data, + da.bins.size().data, sc.array(dims=['dim_0', 'dim_1'], unit=None, dtype='int64', values=[[1, 1], [0, 1]])) + da = snx2.group_events_by_detector_number(detector['event_time_zero', :3]) assert sc.identical( - detector.select_events['pulse', :3][...].bins.size().data, + da.bins.size().data, sc.array(dims=['dim_0', 'dim_1'], unit=None, dtype='int64', values=[[2, 2], [0, 1]])) + da = snx2.group_events_by_detector_number(detector['event_time_zero', 3]) assert sc.identical( - detector.select_events['pulse', 3][...].bins.size().data, + da.bins.size().data, sc.array(dims=['dim_0', 'dim_1'], unit=None, dtype='int64', values=[[0, 1], [0, 0]])) + da = snx2.group_events_by_detector_number(detector[()]) assert sc.identical( - detector.select_events[...][...].bins.size().data, - sc.array(dims=['dim_0', 'dim_1'], - unit=None, - dtype='int64', - values=[[2, 3], [0, 1]])) - - -def test_select_events_slice_does_not_affect_original_detector(nxroot): - detector = nxroot.create_class('detector0', NXdetector) - detector.create_field('detector_number', detector_numbers_xx_yy_1234()) - create_event_data_ids_1234(detector.create_class('events', NXevent_data)) - detector.select_events['pulse', 0][...] - assert sc.identical( - detector[...].bins.size().data, + da.bins.size().data, sc.array(dims=['dim_0', 'dim_1'], unit=None, dtype='int64', From d6582504fbc123a5b677951a62196719ff9f5d35 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 21 Mar 2023 09:05:37 +0100 Subject: [PATCH 22/98] Update monitor tests --- src/scippnexus/nx2.py | 8 +++++++ tests/nxmonitor_test.py | 49 ++++++++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index b5e05088..322a4259 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -673,6 +673,13 @@ def detector_number(self) -> Optional[str]: return name +class NXmonitor(NXdata): + + # TODO should read axes of fallback signal? + def __init__(self, group: Group): + super().__init__(group, fallback_signal_name='data') + + class NXgeometry(NXobject): def __init__(self, group: Group): @@ -689,6 +696,7 @@ def assemble_as_child(children: sc.DataGroup, base_definitions['NXlog'] = NXlog base_definitions['NXdetector'] = NXdetector base_definitions['NXgeometry'] = NXgeometry +base_definitions['NXmonitor'] = NXmonitor def create_field(group: H5Group, name: str, data: DimensionedArray, diff --git a/tests/nxmonitor_test.py b/tests/nxmonitor_test.py index 8df167ca..de968081 100644 --- a/tests/nxmonitor_test.py +++ b/tests/nxmonitor_test.py @@ -2,27 +2,37 @@ import pytest import scipp as sc -from scippnexus import NXentry, NXevent_data, NXmonitor, NXroot +import scippnexus.nx2 as snx +from scippnexus.nxevent_data2 import NXevent_data @pytest.fixture() -def nxroot(request): +def h5root(request): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: - root = NXroot(f) - root.create_class('entry', NXentry) - yield root + yield f -def test_dense_monitor(nxroot): - monitor = nxroot['entry'].create_class('monitor', NXmonitor) - assert monitor.nx_class == NXmonitor +def make_group(group: h5py.Group) -> snx.Group: + return snx.Group(group, definitions=snx.base_definitions) + + +@pytest.fixture() +def group(request): + """Yield NXroot containing a single NXentry named 'entry'""" + with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: + yield snx.Group(f, definitions=snx.base_definitions) + + +def test_dense_monitor(h5root): + monitor = snx.create_class(h5root, 'monitor', snx.NXmonitor) da = sc.DataArray( sc.array(dims=['time_of_flight'], values=[1.0]), coords={'time_of_flight': sc.array(dims=['time_of_flight'], values=[1.0])}) - monitor['data'] = da.data - monitor['data'].attrs['axes'] = 'time_of_flight' - monitor['time_of_flight'] = da.coords['time_of_flight'] + data = snx.create_field(monitor, 'data', da.data) + data.attrs['axes'] = 'time_of_flight' + snx.create_field(monitor, 'time_of_flight', da.coords['time_of_flight']) + monitor = make_group(monitor) assert sc.identical(monitor[...], da) @@ -36,8 +46,8 @@ def create_event_data_no_ids(group): values=[0, 3, 3, 5])) -def test_loads_event_data_in_current_group(nxroot): - monitor = nxroot.create_class('monitor1', NXmonitor) +def test_loads_event_data_in_current_group(group): + monitor = group.create_class('monitor1', snx.NXmonitor) create_event_data_no_ids(monitor) assert monitor.dims == ('pulse', ) assert monitor.shape == (4, ) @@ -47,12 +57,15 @@ def test_loads_event_data_in_current_group(nxroot): sc.array(dims=['pulse'], unit=None, dtype='int64', values=[3, 0, 2, 1])) -def test_loads_event_data_in_child_group(nxroot): - monitor = nxroot.create_class('monitor1', NXmonitor) +def test_loads_event_data_in_child_group(group): + monitor = group.create_class('monitor1', snx.NXmonitor) create_event_data_no_ids(monitor.create_class('events', NXevent_data)) - assert monitor.dims == ('pulse', ) + assert monitor.dims == ('event_time_zero', ) assert monitor.shape == (4, ) loaded = monitor[...] assert sc.identical( - loaded.bins.size().data, - sc.array(dims=['pulse'], unit=None, dtype='int64', values=[3, 0, 2, 1])) + loaded['events'].bins.size().data, + sc.array(dims=['event_time_zero'], + unit=None, + dtype='int64', + values=[3, 0, 2, 1])) From 2ab8015c1219d69180582e96b154c2df55cb6fa4 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 21 Mar 2023 09:24:08 +0100 Subject: [PATCH 23/98] Fix remaining NXdetector tests --- tests/nxdetector_test.py | 47 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index f8a20a9e..80ff3037 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -2,6 +2,7 @@ import numpy as np import pytest import scipp as sc +from scipp.testing import assert_identical import scippnexus as snx import scippnexus.nx2 as snx2 @@ -65,21 +66,20 @@ def test_finds_data_from_group_attr(h5root): assert sc.identical(detector[...], da.rename_dims({'xx': 'dim_0', 'yy': 'dim_1'})) -def test_loads_events_when_data_and_events_found(nxroot): +def test_loads_signal_and_events_when_both_found(group): detector_number = sc.array(dims=[''], unit=None, values=np.array([1, 2])) - data = sc.ones(dims=['xx'], shape=[2]) - detector = nxroot.create_class('detector0', NXdetector) + data = sc.ones(dims=['detector_number'], shape=[2]) + detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_number) detector.create_field('data', data) - assert detector[...].bins is None - detector.create_field('event_id', sc.array(dims=[''], unit=None, values=[1])) - detector.create_field('event_time_offset', sc.array(dims=[''], unit='s', - values=[1])) - detector.create_field('event_time_zero', sc.array(dims=[''], unit='s', values=[1])) - detector.create_field('event_index', sc.array(dims=[''], unit='None', values=[0])) + events = detector.create_class('events', NXevent_data) + events.create_field('event_id', sc.array(dims=[''], unit=None, values=[1])) + events.create_field('event_time_offset', sc.array(dims=[''], unit='s', values=[1])) + events.create_field('event_time_zero', sc.array(dims=[''], unit='s', values=[1])) + events.create_field('event_index', sc.array(dims=[''], unit='None', values=[0])) loaded = detector[...] - assert loaded.bins is not None - assert loaded.values[0].variances is None + assert_identical(loaded['data'], data) + assert loaded['events'].bins is not None def detector_numbers_xx_yy_1234(): @@ -198,9 +198,9 @@ def test_loads_event_data_with_0d_detector_numbers(group): detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', sc.index(1, dtype='int64')) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) - assert detector.dims == () - assert detector.shape == () - loaded = detector[...] + assert detector.dims == ('event_time_zero', ) + assert detector.shape == (4, ) + loaded = snx2.group_events_by_detector_number(detector[...]) assert sc.identical(loaded.bins.size().data, sc.index(2, dtype='int64')) @@ -208,9 +208,8 @@ def test_loads_event_data_with_2d_detector_numbers(group): detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers_xx_yy_1234()) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) - assert detector.dims == ('dim_0', 'dim_1') - assert detector.shape == (2, 2) - loaded = detector[...] + assert detector.sizes == {'dim_0': 2, 'dim_1': 2, 'event_time_zero': 4} + loaded = snx2.group_events_by_detector_number(detector[...]) assert sc.identical( loaded.bins.size().data, sc.array(dims=['dim_0', 'dim_1'], @@ -265,20 +264,20 @@ def test_loading_event_data_creates_automatic_detector_numbers_if_not_present_in sc.array(dims=['event_id'], unit=None, dtype='int64', values=[2, 3, 1])) -def test_loading_event_data_with_selection_and_automatic_detector_numbers_raises( - nxroot): - detector = nxroot.create_class('detector0', NXdetector) +def test_loading_event_data_with_det_selection_and_automatic_detector_numbers_raises( + group): + detector = group.create_class('detector0', NXdetector) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) - assert detector.dims == ['detector_number'] + assert detector.dims == ('event_time_zero', ) with pytest.raises(sc.DimensionError): detector['detector_number', 0] def test_loading_event_data_with_full_selection_and_automatic_detector_numbers_works( - nxroot): - detector = nxroot.create_class('detector0', NXdetector) + group): + detector = group.create_class('detector0', NXdetector) create_event_data_ids_1234(detector.create_class('events', NXevent_data)) - assert detector.dims == ['detector_number'] + assert detector.dims == ('event_time_zero', ) assert tuple(detector[...].shape) == (4, ) assert tuple(detector[()].shape) == (4, ) From aa7fb1234045da41c2d7cff6b1e903035557dfbe Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 21 Mar 2023 10:08:32 +0100 Subject: [PATCH 24/98] Pass some NXtransformations tests --- src/scippnexus/nxtransformations.py | 19 +++++++++- tests/nxtransformations_test.py | 57 +++++++++++++---------------- 2 files changed, 44 insertions(+), 32 deletions(-) diff --git a/src/scippnexus/nxtransformations.py b/src/scippnexus/nxtransformations.py index e6f666d9..d51bc494 100644 --- a/src/scippnexus/nxtransformations.py +++ b/src/scippnexus/nxtransformations.py @@ -9,7 +9,15 @@ import scipp as sc from scipp.scipy import interpolate -from .nxobject import Field, NexusStructureError, NXobject, ScippIndex +from ._common import to_child_select +from .nx2 import ( + Field, + Group, + NexusStructureError, + NXobject, + ScippIndex, + base_definitions, +) class TransformationError(NexusStructureError): @@ -34,6 +42,12 @@ def _getitem(self, index: ScippIndex) -> sc.DataGroup: for name, child in self.items() }) + def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: + # Note that this will be similar in NXdata, but there we need to handle + # bin edges as well. + child_sel = to_child_select(self.sizes.keys(), child.dims, sel) + return Transformation(child)[child_sel] + class Transformation: @@ -180,3 +194,6 @@ def _get_transformations(transform: Transformation, *, # to deal with changing beamline components (e.g. pixel positions) during a # live data stream (see https://github.com/scipp/scippneutron/issues/76). return transformations + + +base_definitions['NXtransformations'] = NXtransformations \ No newline at end of file diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index 0fcdaba1..850b6e51 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -3,23 +3,19 @@ import pytest import scipp as sc -from scippnexus import ( - NXdetector, - NXentry, - NXlog, - NXroot, - NXtransformations, - nxtransformations, -) +import scippnexus.nx2 as snx +from scippnexus.nxtransformations import NXtransformations + + +def make_group(group: h5py.Group) -> snx.Group: + return snx.Group(group, definitions=snx.base_definitions) @pytest.fixture() -def nxroot(): +def h5root(request): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: - root = NXroot(f) - root.create_class('entry', NXentry) - yield root + yield f def create_detector(group): @@ -211,14 +207,14 @@ def test_broken_time_dependent_transformation_returns_path_and_transformations(n def write_translation(group, name: str, value: sc.Variable, offset: sc.Variable, vector: sc.Variable) -> None: - dset = group.create_field(name, value) + dset = snx.create_field(group, name, value) dset.attrs['transformation_type'] = 'translation' dset.attrs['offset'] = offset.values dset.attrs['offset_units'] = str(offset.unit) dset.attrs['vector'] = vector.value -def test_nxtransformations_group_single_item(nxroot): +def test_nxtransformations_group_single_item(h5root): value = sc.scalar(2.4, unit='mm') offset = sc.spatial.translation(value=[6, 2, 6], unit='mm') vector = sc.vector(value=[0, 1, 1]) @@ -226,16 +222,16 @@ def test_nxtransformations_group_single_item(nxroot): expected = (sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) * sc.spatial.translation(value=[0.006, 0.002, 0.006], unit='m')) - transformations = nxroot.create_class('transformations', NXtransformations) + transformations = snx.create_class(h5root, 'transformations', NXtransformations) write_translation(transformations, 't1', value, offset, vector) - loaded = nxroot['transformations'][()] + loaded = make_group(h5root)['transformations'][()] assert set(loaded.keys()) == {'t1'} assert sc.identical(loaded['t1'], expected) -def test_nxtransformations_group_two_independent_items(nxroot): - transformations = nxroot.create_class('transformations', NXtransformations) +def test_nxtransformations_group_two_independent_items(h5root): + transformations = snx.create_class(h5root, 'transformations', NXtransformations) value = sc.scalar(2.4, unit='mm') offset = sc.spatial.translation(value=[6, 2, 6], unit='mm') @@ -251,14 +247,14 @@ def test_nxtransformations_group_two_independent_items(nxroot): expected2 = (sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) * sc.spatial.translation(value=[0.006, 0.002, 0.006], unit='m')) - loaded = nxroot['transformations'][()] + loaded = make_group(h5root)['transformations'][()] assert set(loaded.keys()) == {'t1', 't2'} assert sc.identical(loaded['t1'], expected1) assert sc.identical(loaded['t2'], expected2) -def test_nxtransformations_group_single_chain(nxroot): - transformations = nxroot.create_class('transformations', NXtransformations) +def test_nxtransformations_group_single_chain(h5root): + transformations = snx.create_class(h5root, 'transformations', NXtransformations) value = sc.scalar(2.4, unit='mm') offset = sc.spatial.translation(value=[6, 2, 6], unit='mm') @@ -272,18 +268,17 @@ def test_nxtransformations_group_single_chain(nxroot): t = value.to(unit='m') * vector write_translation(transformations, 't2', value, offset, vector) transformations['t2'].attrs['depends_on'] = 't1' - expected2 = (expected1 * - sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) * + expected2 = (sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) * sc.spatial.translation(value=[0.006, 0.002, 0.006], unit='m')) - loaded = nxroot['transformations'][()] + loaded = make_group(h5root)['transformations'][()] assert set(loaded.keys()) == {'t1', 't2'} assert sc.identical(loaded['t1'], expected1) assert sc.allclose(loaded['t2'], expected2) -def test_slice_transformations(nxroot): - transformations = nxroot.create_class('transformations', NXtransformations) +def test_slice_transformations(h5root): + transformations = snx.create_class(h5root, 'transformations', NXtransformations) log = sc.DataArray( sc.array(dims=['time'], values=[1.1, 2.2, 3.3], unit='m'), coords={'time': sc.array(dims=['time'], values=[11, 22, 33], unit='s')}) @@ -292,9 +287,9 @@ def test_slice_transformations(nxroot): vector = sc.vector(value=[0, 0, 1]) t = log * vector t.data = sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) - value1 = transformations.create_class('t1', NXlog) - value1['time'] = log.coords['time'] - sc.epoch(unit='ns') - value1['value'] = log.data + value1 = snx.create_class(transformations, 't1', snx.NXlog) + snx.create_field(value1, 'time', log.coords['time'] - sc.epoch(unit='ns')) + snx.create_field(value1, 'value', log.data) value1.attrs['transformation_type'] = 'translation' value1.attrs['offset'] = offset.values value1.attrs['offset_units'] = str(offset.unit) @@ -302,5 +297,5 @@ def test_slice_transformations(nxroot): expected = t * offset - assert sc.identical(nxroot['transformations']['time', 1:3]['t1'], expected['time', - 1:3]) + assert sc.identical( + make_group(h5root)['transformations']['time', 1:3]['t1'], expected['time', 1:3]) From e4f1900396401364a97fcbd35dc7ff214a98d4c3 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 21 Mar 2023 10:13:48 +0100 Subject: [PATCH 25/98] Do not convert all translations to meter --- src/scippnexus/nxtransformations.py | 3 ++- tests/nxtransformations_test.py | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/scippnexus/nxtransformations.py b/src/scippnexus/nxtransformations.py index d51bc494..8066c71f 100644 --- a/src/scippnexus/nxtransformations.py +++ b/src/scippnexus/nxtransformations.py @@ -95,7 +95,6 @@ def __getitem__(self, select: ScippIndex): t = value * self.vector v = t if isinstance(t, sc.Variable) else t.data if transformation_type == 'translation': - v = v.to(unit='m', copy=False) v = sc.spatial.translations(dims=v.dims, values=v.values, unit=v.unit) elif transformation_type == 'rotation': v = sc.spatial.rotations_from_rotvecs(v) @@ -111,6 +110,8 @@ def __getitem__(self, select: ScippIndex): return t offset = sc.vector(value=offset.values, unit=offset.unit).to(unit='m') offset = sc.spatial.translation(value=offset.value, unit=offset.unit) + if transformation_type == 'translation': + offset = offset.to(unit=t.unit, copy=False) return t * offset except (sc.DimensionError, sc.UnitError) as e: raise NexusStructureError( diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index 850b6e51..f91599ef 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -218,9 +218,9 @@ def test_nxtransformations_group_single_item(h5root): value = sc.scalar(2.4, unit='mm') offset = sc.spatial.translation(value=[6, 2, 6], unit='mm') vector = sc.vector(value=[0, 1, 1]) - t = value.to(unit='m') * vector + t = value * vector expected = (sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) * - sc.spatial.translation(value=[0.006, 0.002, 0.006], unit='m')) + offset) transformations = snx.create_class(h5root, 'transformations', NXtransformations) write_translation(transformations, 't1', value, offset, vector) @@ -236,16 +236,16 @@ def test_nxtransformations_group_two_independent_items(h5root): value = sc.scalar(2.4, unit='mm') offset = sc.spatial.translation(value=[6, 2, 6], unit='mm') vector = sc.vector(value=[0, 1, 1]) - t = value.to(unit='m') * vector + t = value * vector write_translation(transformations, 't1', value, offset, vector) expected1 = (sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) * - sc.spatial.translation(value=[0.006, 0.002, 0.006], unit='m')) + offset) value = value * 0.1 - t = value.to(unit='m') * vector + t = value * vector write_translation(transformations, 't2', value, offset, vector) expected2 = (sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) * - sc.spatial.translation(value=[0.006, 0.002, 0.006], unit='m')) + offset) loaded = make_group(h5root)['transformations'][()] assert set(loaded.keys()) == {'t1', 't2'} @@ -259,17 +259,17 @@ def test_nxtransformations_group_single_chain(h5root): value = sc.scalar(2.4, unit='mm') offset = sc.spatial.translation(value=[6, 2, 6], unit='mm') vector = sc.vector(value=[0, 1, 1]) - t = value.to(unit='m') * vector + t = value * vector write_translation(transformations, 't1', value, offset, vector) expected1 = (sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) * - sc.spatial.translation(value=[0.006, 0.002, 0.006], unit='m')) + offset) value = value * 0.1 - t = value.to(unit='m') * vector + t = value * vector write_translation(transformations, 't2', value, offset, vector) transformations['t2'].attrs['depends_on'] = 't1' expected2 = (sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) * - sc.spatial.translation(value=[0.006, 0.002, 0.006], unit='m')) + offset) loaded = make_group(h5root)['transformations'][()] assert set(loaded.keys()) == {'t1', 't2'} From 97af919923737477ff7041d5b2fc7c172cbcb341 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 21 Mar 2023 14:06:11 +0100 Subject: [PATCH 26/98] Set depends_on as attr instead of executing transform chains --- src/scippnexus/nx2.py | 67 +++++++++--- src/scippnexus/nxtransformations.py | 61 +++++------ tests/nxtransformations_test.py | 158 ++++++++++++++++------------ 3 files changed, 169 insertions(+), 117 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index 322a4259..ab32d08f 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -162,6 +162,11 @@ def dims(self) -> Tuple[str]: def shape(self) -> Tuple[int, ...]: return tuple(self.sizes.values()) + @property + def parent(self) -> H5Group: + # TODO Get corrected definitions + return Group(self.dataset.parent, definitions=base_definitions) + def _load_variances(self, var, index): stddevs = sc.empty(dims=var.dims, shape=var.shape, @@ -289,16 +294,21 @@ class NXobject: def __init__(self, group: Group): self._group = group self._special_fields = {} + self._transformations = {} for name, field in group._children.items(): if isinstance(field, Field): field.sizes = _squeezed_field_sizes(field.dataset) field.dtype = _dtype_fromdataset(field.dataset) - elif field.attrs.get('NX_class') in [ - 'NXoff_geometry', - 'NXcylindrical_geometry', - 'NXgeometry', - ]: - self._special_fields[name] = field + elif (nx_class := field.attrs.get('NX_class')) is not None: + if nx_class in [ + 'NXoff_geometry', + 'NXcylindrical_geometry', + 'NXgeometry', + ]: + self._special_fields[name] = field + elif nx_class == 'NXtransformations': + self._special_fields[name] = field + self._transformations[name] = field @cached_property def sizes(self) -> Dict[str, int]: @@ -322,18 +332,27 @@ def detector_number(self) -> Optional[str]: def pre_assemble(self, dg: sc.DataGroup) -> sc.DataGroup: for name, field in self._special_fields.items(): + if name in self._transformations: + continue det_num = self.detector_number if det_num is not None: det_num = dg[det_num] dg[name] = field._nexus.assemble_as_child(dg[name], detector_number=det_num) + if (depends_on := dg.get('depends_on')) is not None: + transform = self._group[depends_on] + # Avoid loading transform twice if it is a child of the same group + for name, transformations in self._transformations.items(): + if transform.name.startswith(transformations.name): + dg['depends_on'] = dg[name][depends_on.split('/')[-1]] + break + else: + dg['depends_on'] = transform[()] return dg def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: return dg -# Group adds children/dims caching, removes __setitem__? -# class Group(WriteableGroup): class Group(Mapping): def __init__(self, @@ -350,17 +369,17 @@ def attrs(self) -> Dict[str, Any]: # with 1000 subgroups. return dict(self._group.attrs) if self._group.attrs else dict() - # TODO - # should this by Dict[str, Union[H5Group, H5Dataset]]? - # then we can recreate Group on every access (in principle more repeated init, - # but maybe better since it "clears" the cache)? + @property + def name(self) -> str: + return self._group.name + + @property + def parent(self) -> Optional[Group]: + return Group(self._group.parent, + definitions=self._definitions) if self._group.parent else None + @cached_property def _children(self) -> Dict[str, Union[Field, Group]]: - # split off special children here? - # - depends_on - # - NXoff_geometry and NXcylindrical_geometry - # - legacy NXgeometry - # - NXtransformations items = { name: Field(obj) if is_dataset(obj) else Group(obj, definitions=self._definitions) @@ -391,12 +410,26 @@ def __len__(self) -> int: def __iter__(self) -> Iterator[str]: return self._children.__iter__() + def _is_nxtransformations(self) -> bool: + return self.attrs.get('NX_class') == 'NXtransformations' + def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: if isinstance(sel, str): + if '/' in sel: + if sel.startswith('/'): + return Group(self._group.file, + definitions=self._definitions)[sel[1:]] + else: + return self[sel.split('/')[0]][sel[sel.index('/') + 1:]] child = self._children[sel] if isinstance(child, Field): self._populate_fields() + if self._is_nxtransformations(): + from .nxtransformations import Transformation + return Transformation(child) return child + # Get child again, as it may have ben replaced by call to _populate_fields + return self._children[sel] # Here this is scipp.DataGroup. Child classes like NXdata may return DataArray. # (not scipp.DataArray, as that does not support lazy data) dg = self._nexus.read_children(self, sel) diff --git a/src/scippnexus/nxtransformations.py b/src/scippnexus/nxtransformations.py index 8066c71f..5cbc680b 100644 --- a/src/scippnexus/nxtransformations.py +++ b/src/scippnexus/nxtransformations.py @@ -3,21 +3,13 @@ # @author Simon Heybrock from __future__ import annotations -from typing import List, Optional, Union +from typing import List, Optional, Tuple, Union import numpy as np import scipp as sc from scipp.scipy import interpolate -from ._common import to_child_select -from .nx2 import ( - Field, - Group, - NexusStructureError, - NXobject, - ScippIndex, - base_definitions, -) +from .nx2 import Field, NexusStructureError, NXobject, ScippIndex, base_definitions class TransformationError(NexusStructureError): @@ -26,34 +18,33 @@ class TransformationError(NexusStructureError): def make_transformation(obj, /, path) -> Optional[Transformation]: if path.startswith('/'): - return Transformation(obj.file[path]) + return obj.file[path] elif path != '.': - return Transformation(obj.parent[path]) + return obj.parent[path] return None # end of chain class NXtransformations(NXobject): """Group of transformations.""" - def _getitem(self, index: ScippIndex) -> sc.DataGroup: - return sc.DataGroup({ - name: get_full_transformation_starting_at(Transformation(child), - index=index) - for name, child in self.items() - }) - - def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: - # Note that this will be similar in NXdata, but there we need to handle - # bin edges as well. - child_sel = to_child_select(self.sizes.keys(), child.dims, sel) - return Transformation(child)[child_sel] - class Transformation: def __init__(self, obj: Union[Field, NXobject]): # could be an NXlog self._obj = obj + @property + def sizes(self) -> dict: + return self._obj.sizes + + @property + def dims(self) -> Tuple[str, ...]: + return self._obj.dims + + @property + def shape(self) -> Tuple[int, ...]: + return self._obj.shape + @property def attrs(self): return self._obj.attrs @@ -107,12 +98,18 @@ def __getitem__(self, select: ScippIndex): else: t.data = v if (offset := self.offset) is None: - return t - offset = sc.vector(value=offset.values, unit=offset.unit).to(unit='m') - offset = sc.spatial.translation(value=offset.value, unit=offset.unit) - if transformation_type == 'translation': - offset = offset.to(unit=t.unit, copy=False) - return t * offset + transform = t + else: + offset = sc.vector(value=offset.values, unit=offset.unit) + offset = sc.spatial.translation(value=offset.value, unit=offset.unit) + if transformation_type == 'translation': + offset = offset.to(unit=t.unit, copy=False) + transform = t * offset + if (depends_on := self.depends_on) is not None: + if not isinstance(transform, sc.DataArray): + transform = sc.DataArray(transform) + transform.attrs['depends_on'] = sc.scalar(depends_on[select]) + return transform except (sc.DimensionError, sc.UnitError) as e: raise NexusStructureError( f"Invalid transformation in NXtransformations: {e}") from e @@ -197,4 +194,4 @@ def _get_transformations(transform: Transformation, *, return transformations -base_definitions['NXtransformations'] = NXtransformations \ No newline at end of file +base_definitions['NXtransformations'] = NXtransformations diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index f91599ef..de1402ae 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -2,6 +2,7 @@ import numpy as np import pytest import scipp as sc +from scipp.testing import assert_identical import scippnexus.nx2 as snx from scippnexus.nxtransformations import NXtransformations @@ -23,65 +24,72 @@ def create_detector(group): detector_numbers = sc.array(dims=['xx', 'yy'], unit=None, values=np.array([[1, 2], [3, 4]])) - detector = group.create_class('detector_0', NXdetector) - detector.create_field('detector_number', detector_numbers) - detector.create_field('data', data) + detector = snx.create_class(group, 'detector_0', snx.NXdetector) + snx.create_field(detector, 'detector_number', detector_numbers) + snx.create_field(detector, 'data', data) return detector -def test_Transformation_with_single_value(nxroot): - detector = create_detector(nxroot) - detector.create_field('depends_on', sc.scalar('/detector_0/transformations/t1')) - transformations = detector.create_class('transformations', NXtransformations) +def test_Transformation_with_single_value(h5root): + detector = create_detector(h5root) + snx.create_field(detector, 'depends_on', + sc.scalar('/detector_0/transformations/t1')) + transformations = snx.create_class(detector, 'transformations', NXtransformations) value = sc.scalar(6.5, unit='mm') offset = sc.spatial.translation(value=[1, 2, 3], unit='mm') vector = sc.vector(value=[0, 0, 1]) - t = value.to(unit='m') * vector + t = value * vector expected = sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) - expected = expected * sc.spatial.translation(value=[0.001, 0.002, 0.003], unit='m') - value = transformations.create_field('t1', value) + expected = expected * offset + value = snx.create_field(transformations, 't1', value) value.attrs['depends_on'] = '.' value.attrs['transformation_type'] = 'translation' value.attrs['offset'] = offset.values value.attrs['offset_units'] = str(offset.unit) value.attrs['vector'] = vector.value - depends_on = detector['depends_on'][()] - t = nxtransformations.Transformation(nxroot[depends_on]) + depends_on = make_group(detector)['depends_on'][()] + t = make_group(h5root)[depends_on] assert t.depends_on is None assert sc.identical(t.offset, offset) assert sc.identical(t.vector, vector) - assert sc.identical(t[()], expected) + assert_identical(t[()], expected) -def test_chain_with_single_values_and_different_unit(nxroot): - detector = create_detector(nxroot) - detector.create_field('depends_on', sc.scalar('/detector_0/transformations/t1')) - transformations = detector.create_class('transformations', NXtransformations) +def test_chain_with_single_values_and_different_unit(h5root): + detector = create_detector(h5root) + snx.create_field(detector, 'depends_on', + sc.scalar('/detector_0/transformations/t1')) + transformations = snx.create_class(detector, 'transformations', NXtransformations) value = sc.scalar(6.5, unit='mm') offset = sc.spatial.translation(value=[1, 2, 3], unit='mm') vector = sc.vector(value=[0, 0, 1]) - t = value.to(unit='m') * vector - value1 = transformations.create_field('t1', value) + t = value * vector + value1 = snx.create_field(transformations, 't1', value) value1.attrs['depends_on'] = 't2' value1.attrs['transformation_type'] = 'translation' value1.attrs['offset'] = offset.values value1.attrs['offset_units'] = str(offset.unit) value1.attrs['vector'] = vector.value - value2 = transformations.create_field('t2', value.to(unit='cm')) + value2 = snx.create_field(transformations, 't2', value.to(unit='cm')) value2.attrs['depends_on'] = '.' value2.attrs['transformation_type'] = 'translation' value2.attrs['vector'] = vector.value - expected = (sc.spatial.translations(dims=t.dims, values=2 * t.values, unit=t.unit) * - sc.spatial.translation(value=[0.001, 0.002, 0.003], unit='m')) - assert sc.identical(detector[...].coords['depends_on'], expected) + t1 = sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) * offset + t2 = sc.spatial.translations(dims=t.dims, values=t.values, + unit=t.unit).to(unit='cm') + detector = make_group(h5root['detector_0']) + depends_on = detector[...].coords['depends_on'] + assert_identical(depends_on.value.data, t1) + assert_identical(depends_on.value.attrs['depends_on'].value, t2) -def test_Transformation_with_multiple_values(nxroot): - detector = create_detector(nxroot) - detector.create_field('depends_on', sc.scalar('/detector_0/transformations/t1')) - transformations = detector.create_class('transformations', NXtransformations) +def test_Transformation_with_multiple_values(h5root): + detector = create_detector(h5root) + snx.create_field(detector, 'depends_on', + sc.scalar('/detector_0/transformations/t1')) + transformations = snx.create_class(detector, 'transformations', NXtransformations) log = sc.DataArray( sc.array(dims=['time'], values=[1.1, 2.2], unit='m'), coords={'time': sc.array(dims=['time'], values=[11, 22], unit='s')}) @@ -91,27 +99,28 @@ def test_Transformation_with_multiple_values(nxroot): t = log * vector t.data = sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) expected = t * offset - value = transformations.create_class('t1', NXlog) - value['time'] = log.coords['time'] - sc.epoch(unit='ns') - value['value'] = log.data + value = snx.create_class(transformations, 't1', snx.NXlog) + snx.create_field(value, 'time', log.coords['time'] - sc.epoch(unit='ns')) + snx.create_field(value, 'value', log.data) value.attrs['depends_on'] = '.' value.attrs['transformation_type'] = 'translation' value.attrs['offset'] = offset.values value.attrs['offset_units'] = str(offset.unit) value.attrs['vector'] = vector.value - depends_on = detector['depends_on'][()] - t = nxtransformations.Transformation(nxroot[depends_on]) + depends_on = make_group(detector)['depends_on'][()] + t = make_group(h5root)[depends_on] assert t.depends_on is None assert sc.identical(t.offset, offset) assert sc.identical(t.vector, vector) - assert sc.identical(t[()], expected) + assert_identical(t[()], expected) -def test_chain_with_multiple_values(nxroot): - detector = create_detector(nxroot) - detector.create_field('depends_on', sc.scalar('/detector_0/transformations/t1')) - transformations = detector.create_class('transformations', NXtransformations) +def test_chain_with_multiple_values(h5root): + detector = create_detector(h5root) + snx.create_field(detector, 'depends_on', + sc.scalar('/detector_0/transformations/t1')) + transformations = snx.create_class(detector, 'transformations', NXtransformations) log = sc.DataArray( sc.array(dims=['time'], values=[1.1, 2.2], unit='m'), coords={'time': sc.array(dims=['time'], values=[11, 22], unit='s')}) @@ -120,29 +129,33 @@ def test_chain_with_multiple_values(nxroot): vector = sc.vector(value=[0, 0, 1]) t = log * vector t.data = sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) - value1 = transformations.create_class('t1', NXlog) - value1['time'] = log.coords['time'] - sc.epoch(unit='ns') - value1['value'] = log.data + value1 = snx.create_class(transformations, 't1', snx.NXlog) + snx.create_field(value1, 'time', log.coords['time'] - sc.epoch(unit='ns')) + snx.create_field(value1, 'value', log.data) value1.attrs['depends_on'] = 't2' value1.attrs['transformation_type'] = 'translation' value1.attrs['offset'] = offset.values value1.attrs['offset_units'] = str(offset.unit) value1.attrs['vector'] = vector.value - value2 = transformations.create_class('t2', NXlog) - value2['time'] = log.coords['time'] - sc.epoch(unit='ns') - value2['value'] = log.data + value2 = snx.create_class(transformations, 't2', snx.NXlog) + snx.create_field(value2, 'time', log.coords['time'] - sc.epoch(unit='ns')) + snx.create_field(value2, 'value', log.data) value2.attrs['depends_on'] = '.' value2.attrs['transformation_type'] = 'translation' value2.attrs['vector'] = vector.value - expected = t * (t * offset) - assert sc.identical(detector[...].coords['depends_on'].value, expected) + expected = t * offset + expected.attrs['depends_on'] = sc.scalar(t) + detector = make_group(detector) + depends_on = detector[...].coords['depends_on'] + assert sc.identical(depends_on.value, expected) -def test_chain_with_multiple_values_and_different_time_unit(nxroot): - detector = create_detector(nxroot) - detector.create_field('depends_on', sc.scalar('/detector_0/transformations/t1')) - transformations = detector.create_class('transformations', NXtransformations) +def test_chain_with_multiple_values_and_different_time_unit(h5root): + detector = create_detector(h5root) + snx.create_field(detector, 'depends_on', + sc.scalar('/detector_0/transformations/t1')) + transformations = snx.create_class(detector, 'transformations', NXtransformations) # Making sure to not use nanoseconds since that is used internally and may thus # mask bugs. log = sc.DataArray( @@ -153,29 +166,36 @@ def test_chain_with_multiple_values_and_different_time_unit(nxroot): vector = sc.vector(value=[0, 0, 1]) t = log * vector t.data = sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) - value1 = transformations.create_class('t1', NXlog) - value1['time'] = log.coords['time'] - sc.epoch(unit='us') - value1['value'] = log.data + value1 = snx.create_class(transformations, 't1', snx.NXlog) + snx.create_field(value1, 'time', log.coords['time'] - sc.epoch(unit='us')) + snx.create_field(value1, 'value', log.data) value1.attrs['depends_on'] = 't2' value1.attrs['transformation_type'] = 'translation' value1.attrs['offset'] = offset.values value1.attrs['offset_units'] = str(offset.unit) value1.attrs['vector'] = vector.value - value2 = transformations.create_class('t2', NXlog) - value2['time'] = log.coords['time'].to(unit='ms') - sc.epoch(unit='ms') - value2['value'] = log.data + value2 = snx.create_class(transformations, 't2', snx.NXlog) + snx.create_field(value2, 'time', + log.coords['time'].to(unit='ms') - sc.epoch(unit='ms')) + snx.create_field(value2, 'value', log.data) value2.attrs['depends_on'] = '.' value2.attrs['transformation_type'] = 'translation' value2.attrs['vector'] = vector.value - expected = t * (t * offset) - assert sc.identical(detector[...].coords['depends_on'].value, expected) - - -def test_broken_time_dependent_transformation_returns_path_and_transformations(nxroot): - detector = create_detector(nxroot) - detector.create_field('depends_on', sc.scalar('/detector_0/transformations/t1')) - transformations = detector.create_class('transformations', NXtransformations) + expected = t * offset + t2 = t.copy() + t2.coords['time'] = t2.coords['time'].to(unit='ms') + expected.attrs['depends_on'] = sc.scalar(t2) + detector = make_group(detector) + depends_on = detector[...].coords['depends_on'] + assert sc.identical(depends_on.value, expected) + + +def test_broken_time_dependent_transformation_returns_path_and_transformations(h5root): + detector = create_detector(h5root) + snx.create_field(detector, 'depends_on', + sc.scalar('/detector_0/transformations/t1')) + transformations = snx.create_class(detector, 'transformations', NXtransformations) log = sc.DataArray( sc.array(dims=['time'], values=[1.1, 2.2], unit='m'), coords={'time': sc.array(dims=['time'], values=[11, 22], unit='s')}) @@ -184,16 +204,17 @@ def test_broken_time_dependent_transformation_returns_path_and_transformations(n vector = sc.vector(value=[0, 0, 1]) t = log * vector t.data = sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) - value = transformations.create_class('t1', NXlog) - value['time'] = log.coords['time'] - sc.epoch(unit='ns') + value = snx.create_class(transformations, 't1', snx.NXlog) + snx.create_field(value, 'time', log.coords['time'] - sc.epoch(unit='ns')) # This makes the transform "broken" since "time" has length 2 but data has length 0. - value['value'] = log.data[0:0] + snx.create_field(value, 'value', log.data[0:0]) value.attrs['depends_on'] = '.' value.attrs['transformation_type'] = 'translation' value.attrs['offset'] = offset.values value.attrs['offset_units'] = str(offset.unit) value.attrs['vector'] = vector.value + detector = make_group(detector) loaded = detector[()] assert sc.identical(loaded.coords['depends_on'], sc.scalar('/detector_0/transformations/t1')) @@ -273,8 +294,9 @@ def test_nxtransformations_group_single_chain(h5root): loaded = make_group(h5root)['transformations'][()] assert set(loaded.keys()) == {'t1', 't2'} - assert sc.identical(loaded['t1'], expected1) - assert sc.allclose(loaded['t2'], expected2) + assert_identical(loaded['t1'], expected1) + assert_identical(loaded['t2'].data, expected2) + assert_identical(loaded['t2'].attrs['depends_on'].value, expected1) def test_slice_transformations(h5root): From 4053873590dd1f6190ed944614bf551b9ab3363c Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 21 Mar 2023 14:18:48 +0100 Subject: [PATCH 27/98] cleanup --- src/scippnexus/nx2.py | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/src/scippnexus/nx2.py b/src/scippnexus/nx2.py index ab32d08f..2b043bc0 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/nx2.py @@ -19,26 +19,6 @@ from ._hdf5_nexus import _warn_latin1_decode from .typing import H5Dataset, H5Group, ScippIndex -# What we know: -# 1. We must not do a recursive read, or we will get in trouble for files with many -# entries. User may just want to access subgroup recursively. -# 2. Some child access needs info from parent: -# - Field dims -# - NXevent_data -# - NXoff_geometry -# Maybe not... parent can modify dims/customize assembly -# 3. Unless we read shape, attrs, children only once, we will suffer too much overhead. -# This includes dims/sizes computation. -# 4. Must be able to load coord before loading rest, for label-based indexing - -# Desired behaviors: -# - Field should encapsulate "errors" handling -# - NXtransformations should load depends_on as chain (scalar variable with next) -# - NXobject.__setitem__ to set `axes` and `name_indices` attributes? - -# Consider: -# - Non-legacy mode would make dim parsing simpler and faster? - def asarray(obj: Union[Any, sc.Variable]) -> sc.Variable: return obj if isinstance(obj, sc.Variable) else sc.scalar(obj, unit=None) @@ -415,6 +395,9 @@ def _is_nxtransformations(self) -> bool: def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: if isinstance(sel, str): + # We cannot get the child directly from the HDF5 group, since we need to + # create the parent group, to ensure that fields get the correct properties + # such as sizes and dtype. if '/' in sel: if sel.startswith('/'): return Group(self._group.file, @@ -428,8 +411,6 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: from .nxtransformations import Transformation return Transformation(child) return child - # Get child again, as it may have ben replaced by call to _populate_fields - return self._children[sel] # Here this is scipp.DataGroup. Child classes like NXdata may return DataArray. # (not scipp.DataArray, as that does not support lazy data) dg = self._nexus.read_children(self, sel) From 47cca4c963a169bfabc803dda47b54600e26b08b Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 22 Mar 2023 07:29:47 +0100 Subject: [PATCH 28/98] Move new API to folder 'v2' --- src/scippnexus/nxcylindrical_geometry.py | 26 +-- src/scippnexus/nxoff_geometry.py | 26 +-- src/scippnexus/nxtransformations.py | 47 ++--- src/scippnexus/v2/__init__.py | 29 +++ src/scippnexus/{nx2.py => v2/base.py} | 6 +- src/scippnexus/v2/nexus_classes.py | 189 +++++++++++++++++ src/scippnexus/v2/nxcylindrical_geometry.py | 73 +++++++ .../{nxevent_data2.py => v2/nxevent_data.py} | 4 +- src/scippnexus/v2/nxoff_geometry.py | 76 +++++++ src/scippnexus/v2/nxtransformations.py | 197 ++++++++++++++++++ tests/nxdetector_test.py | 23 +- 11 files changed, 614 insertions(+), 82 deletions(-) create mode 100644 src/scippnexus/v2/__init__.py rename src/scippnexus/{nx2.py => v2/base.py} (99%) create mode 100644 src/scippnexus/v2/nexus_classes.py create mode 100644 src/scippnexus/v2/nxcylindrical_geometry.py rename src/scippnexus/{nxevent_data2.py => v2/nxevent_data.py} (99%) create mode 100644 src/scippnexus/v2/nxoff_geometry.py create mode 100644 src/scippnexus/v2/nxtransformations.py diff --git a/src/scippnexus/nxcylindrical_geometry.py b/src/scippnexus/nxcylindrical_geometry.py index 538789c2..5baef6f3 100644 --- a/src/scippnexus/nxcylindrical_geometry.py +++ b/src/scippnexus/nxcylindrical_geometry.py @@ -1,11 +1,11 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Optional +from typing import Optional, Tuple, Union import scipp as sc -from .nx2 import Field, Group, NexusStructureError, NXobject, base_definitions +from .nxobject import NexusStructureError, NXobject def _parse(*, @@ -52,22 +52,14 @@ class NXcylindrical_geometry(NXobject): 'cylinders': ('cylinder', 'vertex_index') } - def __init__(self, group: Group): - super().__init__(group) - for name, field in group._children.items(): - if isinstance(field, Field): - field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) - if name == 'vertices': - field.dtype = sc.DType.vector3 + def _get_field_dims(self, name: str) -> Union[None, Tuple[str]]: + return self._dims.get(name) + + def _get_field_dtype(self, name: str) -> Union[None, sc.DType]: + if name == 'vertices': + return sc.DType.vector3 + return None def load_as_array(self, detector_number: Optional[sc.Variable] = None) -> sc.Variable: return _parse(**self[()], parent_detector_number=detector_number) - - @staticmethod - def assemble_as_child(children: sc.DataGroup, - detector_number: Optional[sc.Variable] = None) -> sc.Variable: - return _parse(**children, parent_detector_number=detector_number) - - -base_definitions['NXcylindrical_geometry'] = NXcylindrical_geometry diff --git a/src/scippnexus/nxoff_geometry.py b/src/scippnexus/nxoff_geometry.py index f68206c8..7f2223ae 100644 --- a/src/scippnexus/nxoff_geometry.py +++ b/src/scippnexus/nxoff_geometry.py @@ -1,11 +1,11 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Optional +from typing import Optional, Tuple, Union import scipp as sc -from .nx2 import Field, Group, NexusStructureError, NXobject, base_definitions +from .nxobject import NexusStructureError, NXobject def off_to_shape(*, @@ -55,22 +55,14 @@ class NXoff_geometry(NXobject): 'faces': ('face', ) } - def __init__(self, group: Group): - super().__init__(group) - for name, field in group._children.items(): - if isinstance(field, Field): - field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) - if name == 'vertices': - field.dtype = sc.DType.vector3 + def _get_field_dims(self, name: str) -> Union[None, Tuple[str]]: + return self._dims.get(name) + + def _get_field_dtype(self, name: str) -> Union[None, sc.DType]: + if name == 'vertices': + return sc.DType.vector3 + return None def load_as_array(self, detector_number: Optional[sc.Variable] = None) -> sc.Variable: return off_to_shape(**self[()], detector_number=detector_number) - - @staticmethod - def assemble_as_child(children: sc.DataGroup, - detector_number: Optional[sc.Variable] = None) -> sc.Variable: - return off_to_shape(**children, detector_number=detector_number) - - -base_definitions['NXoff_geometry'] = NXoff_geometry diff --git a/src/scippnexus/nxtransformations.py b/src/scippnexus/nxtransformations.py index 5cbc680b..e6f666d9 100644 --- a/src/scippnexus/nxtransformations.py +++ b/src/scippnexus/nxtransformations.py @@ -3,13 +3,13 @@ # @author Simon Heybrock from __future__ import annotations -from typing import List, Optional, Tuple, Union +from typing import List, Optional, Union import numpy as np import scipp as sc from scipp.scipy import interpolate -from .nx2 import Field, NexusStructureError, NXobject, ScippIndex, base_definitions +from .nxobject import Field, NexusStructureError, NXobject, ScippIndex class TransformationError(NexusStructureError): @@ -18,33 +18,28 @@ class TransformationError(NexusStructureError): def make_transformation(obj, /, path) -> Optional[Transformation]: if path.startswith('/'): - return obj.file[path] + return Transformation(obj.file[path]) elif path != '.': - return obj.parent[path] + return Transformation(obj.parent[path]) return None # end of chain class NXtransformations(NXobject): """Group of transformations.""" + def _getitem(self, index: ScippIndex) -> sc.DataGroup: + return sc.DataGroup({ + name: get_full_transformation_starting_at(Transformation(child), + index=index) + for name, child in self.items() + }) + class Transformation: def __init__(self, obj: Union[Field, NXobject]): # could be an NXlog self._obj = obj - @property - def sizes(self) -> dict: - return self._obj.sizes - - @property - def dims(self) -> Tuple[str, ...]: - return self._obj.dims - - @property - def shape(self) -> Tuple[int, ...]: - return self._obj.shape - @property def attrs(self): return self._obj.attrs @@ -86,6 +81,7 @@ def __getitem__(self, select: ScippIndex): t = value * self.vector v = t if isinstance(t, sc.Variable) else t.data if transformation_type == 'translation': + v = v.to(unit='m', copy=False) v = sc.spatial.translations(dims=v.dims, values=v.values, unit=v.unit) elif transformation_type == 'rotation': v = sc.spatial.rotations_from_rotvecs(v) @@ -98,18 +94,10 @@ def __getitem__(self, select: ScippIndex): else: t.data = v if (offset := self.offset) is None: - transform = t - else: - offset = sc.vector(value=offset.values, unit=offset.unit) - offset = sc.spatial.translation(value=offset.value, unit=offset.unit) - if transformation_type == 'translation': - offset = offset.to(unit=t.unit, copy=False) - transform = t * offset - if (depends_on := self.depends_on) is not None: - if not isinstance(transform, sc.DataArray): - transform = sc.DataArray(transform) - transform.attrs['depends_on'] = sc.scalar(depends_on[select]) - return transform + return t + offset = sc.vector(value=offset.values, unit=offset.unit).to(unit='m') + offset = sc.spatial.translation(value=offset.value, unit=offset.unit) + return t * offset except (sc.DimensionError, sc.UnitError) as e: raise NexusStructureError( f"Invalid transformation in NXtransformations: {e}") from e @@ -192,6 +180,3 @@ def _get_transformations(transform: Transformation, *, # to deal with changing beamline components (e.g. pixel positions) during a # live data stream (see https://github.com/scipp/scippneutron/issues/76). return transformations - - -base_definitions['NXtransformations'] = NXtransformations diff --git a/src/scippnexus/v2/__init__.py b/src/scippnexus/v2/__init__.py new file mode 100644 index 00000000..8dcd6f36 --- /dev/null +++ b/src/scippnexus/v2/__init__.py @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock + +# flake8: noqa +import importlib.metadata + +try: + __version__ = importlib.metadata.version(__package__ or __name__) +except importlib.metadata.PackageNotFoundError: + __version__ = "0.0.0" + +from .. import typing +#from .nxdata import NXdataStrategy +#from .nxdetector import NXdetectorStrategy +#from .nxlog import NXlogStrategy +from .base import ( + Field, + Group, + NexusStructureError, + NXobject, + base_definitions, + create_class, + create_field, + group_events_by_detector_number, +) +#from .definition import ApplicationDefinition, make_definition +#from .file import File +from .nexus_classes import * diff --git a/src/scippnexus/nx2.py b/src/scippnexus/v2/base.py similarity index 99% rename from src/scippnexus/nx2.py rename to src/scippnexus/v2/base.py index 2b043bc0..fe0e3d6d 100644 --- a/src/scippnexus/nx2.py +++ b/src/scippnexus/v2/base.py @@ -15,9 +15,9 @@ import numpy as np import scipp as sc -from ._common import convert_time_to_datetime64, to_child_select, to_plain_index -from ._hdf5_nexus import _warn_latin1_decode -from .typing import H5Dataset, H5Group, ScippIndex +from .._common import convert_time_to_datetime64, to_child_select, to_plain_index +from .._hdf5_nexus import _warn_latin1_decode +from ..typing import H5Dataset, H5Group, ScippIndex def asarray(obj: Union[Any, sc.Variable]) -> sc.Variable: diff --git a/src/scippnexus/v2/nexus_classes.py b/src/scippnexus/v2/nexus_classes.py new file mode 100644 index 00000000..a6faa567 --- /dev/null +++ b/src/scippnexus/v2/nexus_classes.py @@ -0,0 +1,189 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +#from .nxfermi_chopper import NXfermi_chopper # noqa F401 +#from .nxlog import NXlog # noqa F401 +#from .nxmonitor import NXmonitor # noqa F401 +from .base import NXobject # noqa F401 +from .base import NXdata, NXdetector, NXgeometry, NXlog, NXmonitor # noqa F401 +from .nxcylindrical_geometry import NXcylindrical_geometry # noqa F401 +#from .nxdata import NXdata # noqa F401 +#from .nxdetector import NXdetector # noqa F401 +#from .nxdisk_chopper import NXdisk_chopper # noqa F401 +from .nxevent_data import NXevent_data # noqa F401 +from .nxoff_geometry import NXoff_geometry # noqa F401 +#from .nxsample import NXsample # noqa F401 +#from .nxsource import NXsource # noqa F401 +from .nxtransformations import NXtransformations # noqa F401 + + +class NXentry(NXobject): + """Entry in a NeXus file.""" + + +class NXinstrument(NXobject): + """Group of instrument-related information.""" + + +class NXaperture(NXobject): + """NXaperture""" + + +class NXattenuator(NXobject): + """NXattenuator""" + + +class NXbeam(NXobject): + """NXbeam""" + + +class NXbeam_stop(NXobject): + """NXbeam_stop""" + + +class NXbending_magnet(NXobject): + """NXbending_magnet""" + + +class NXcapillary(NXobject): + """NXcapillary""" + + +class NXcite(NXobject): + """NXcite""" + + +class NXcollection(NXobject): + """NXcollection""" + + +class NXcollimator(NXobject): + """NXcollimator""" + + +class NXcrystal(NXobject): + """NXcrystal""" + + +class NXdetector_group(NXobject): + """NXdetector_group""" + + +class NXdetector_module(NXobject): + """NXdetector_module""" + + +class NXenvironment(NXobject): + """NXenvironment""" + + +class NXfilter(NXobject): + """NXfilter""" + + +class NXflipper(NXobject): + """NXflipper""" + + +class NXfresnel_zone_plate(NXobject): + """NXfresnel_zone_plate""" + + +class NXgrating(NXobject): + """NXgrating""" + + +class NXguide(NXobject): + """NXguide""" + + +class NXinsertion_device(NXobject): + """NXinsertion_device""" + + +class NXmirror(NXobject): + """NXmirror""" + + +class NXmoderator(NXobject): + """NXmoderator""" + + +class NXmonochromator(NXobject): + """NXmonochromator""" + + +class NXnote(NXobject): + """NXnote""" + + +class NXorientation(NXobject): + """NXorientation""" + + +class NXparameters(NXobject): + """NXparameters""" + + +class NXpdb(NXobject): + """NXpdb""" + + +class NXpinhole(NXobject): + """NXpinhole""" + + +class NXpolarizer(NXobject): + """NXpolarizer""" + + +class NXpositioner(NXobject): + """NXpositioner""" + + +class NXprocess(NXobject): + """NXprocess""" + + +class NXreflections(NXobject): + """NXreflections""" + + +class NXsample_component(NXobject): + """NXsample_component""" + + +class NXsensor(NXobject): + """NXsensor""" + + +class NXshape(NXobject): + """NXshape""" + + +class NXslit(NXobject): + """NXslit""" + + +class NXsubentry(NXobject): + """NXsubentry""" + + +class NXtranslation(NXobject): + """NXtranslation""" + + +class NXuser(NXobject): + """NXuser""" + + +class NXvelocity_selector(NXobject): + """NXvelocity_selector""" + + +class NXxraylens(NXobject): + """NXxraylens""" + + +# Not included in list of NeXus classes since this is the "base" of all others +del NXobject diff --git a/src/scippnexus/v2/nxcylindrical_geometry.py b/src/scippnexus/v2/nxcylindrical_geometry.py new file mode 100644 index 00000000..bbe89279 --- /dev/null +++ b/src/scippnexus/v2/nxcylindrical_geometry.py @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from typing import Optional + +import scipp as sc + +from .base import Field, Group, NexusStructureError, NXobject, base_definitions + + +def _parse(*, + vertices: sc.Variable, + cylinders: sc.Variable, + detector_number: Optional[sc.Variable] = None, + parent_detector_number: Optional[sc.Variable] = None) -> sc.Variable: + face1_center = cylinders['vertex_index', 0] + face1_edge = cylinders['vertex_index', 1] + face2_center = cylinders['vertex_index', 2] + ds = sc.Dataset() + ds['face1_center'] = vertices[face1_center.values] + ds['face1_edge'] = vertices[face1_edge.values] + ds['face2_center'] = vertices[face2_center.values] + ds = ds.rename(**{vertices.dim: 'cylinder'}) + if detector_number is None: + # All cylinders belong to the same shape + return sc.bins(begin=sc.index(0), dim='cylinder', data=ds) + if parent_detector_number is None: + raise NexusStructureError( + "`detector_number` not given, but " + "NXcylindrical_geometry contains mapping to `detector_number`.") + # detector_number gives indices into cylinders, the naming in the NeXus + # standard appears to be misleading + if parent_detector_number.values.size != detector_number.values.size: + raise NexusStructureError( + "Number of detector numbers in NXcylindrical_geometry " + "does not match the one given by the parent.") + detecting_cylinders = ds['cylinder', detector_number.values] + # One cylinder per detector + begin = sc.arange('dummy', + parent_detector_number.values.size, + unit=None, + dtype='int64') + end = begin + sc.index(1) + shape = sc.bins(begin=begin, end=end, dim='cylinder', data=detecting_cylinders) + return shape.fold(dim='dummy', sizes=parent_detector_number.sizes) + + +class NXcylindrical_geometry(NXobject): + _dims = { + 'vertices': ('vertex', ), + 'detector_number': ('detector_number', ), + 'cylinders': ('cylinder', 'vertex_index') + } + + def __init__(self, group: Group): + super().__init__(group) + for name, field in group._children.items(): + if isinstance(field, Field): + field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) + if name == 'vertices': + field.dtype = sc.DType.vector3 + + def load_as_array(self, + detector_number: Optional[sc.Variable] = None) -> sc.Variable: + return _parse(**self[()], parent_detector_number=detector_number) + + @staticmethod + def assemble_as_child(children: sc.DataGroup, + detector_number: Optional[sc.Variable] = None) -> sc.Variable: + return _parse(**children, parent_detector_number=detector_number) + + +base_definitions['NXcylindrical_geometry'] = NXcylindrical_geometry diff --git a/src/scippnexus/nxevent_data2.py b/src/scippnexus/v2/nxevent_data.py similarity index 99% rename from src/scippnexus/nxevent_data2.py rename to src/scippnexus/v2/nxevent_data.py index a589bc10..f17e7909 100644 --- a/src/scippnexus/nxevent_data2.py +++ b/src/scippnexus/v2/nxevent_data.py @@ -6,8 +6,8 @@ import numpy as np import scipp as sc -from ._common import to_plain_index -from .nx2 import ( +from .._common import to_plain_index +from .base import ( Field, Group, NexusStructureError, diff --git a/src/scippnexus/v2/nxoff_geometry.py b/src/scippnexus/v2/nxoff_geometry.py new file mode 100644 index 00000000..b6ffb74b --- /dev/null +++ b/src/scippnexus/v2/nxoff_geometry.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from typing import Optional + +import scipp as sc + +from .base import Field, Group, NexusStructureError, NXobject, base_definitions + + +def off_to_shape(*, + vertices: sc.Variable, + winding_order: sc.Variable, + faces: sc.Variable, + detector_faces: Optional[sc.Variable] = None, + detector_number: Optional[sc.Variable] = None) -> sc.Variable: + """ + Convert OFF shape description to simpler shape representation. + """ + # Vertices in winding order. This duplicates vertices if they are part of more than + # one faces. + vw = vertices[winding_order.values] + # Same as above, grouped by face. + fvw = sc.bins(begin=faces, data=vw, dim=vw.dim) + low = fvw.bins.size().min().value + high = fvw.bins.size().max().value + if low == high: + # Vertices in winding order, groupbed by face. Unlike `fvw` above we now know + # that each face has the same number of vertices, so we can fold instead of + # using binned data. + shapes = vw.fold(dim=vertices.dim, sizes={faces.dim: -1, vertices.dim: low}) + else: + raise NotImplementedError("Conversion from OFF to shape not implemented for " + "inconsistent number of vertices in faces.") + if detector_faces is None: # if detector_number is not None, all have same shape + return sc.bins(begin=sc.index(0), dim=faces.dim, data=shapes) + if detector_number is None: + raise NexusStructureError("`detector_number` not given but NXoff_geometry " + "contains `detector_faces`.") + shape_index = detector_faces['column', 0].copy() + detid = detector_faces['column', 1].copy() + da = sc.DataArray(shape_index, coords={ + 'detector_number': detid + }).group(detector_number.flatten(to='detector_number')) + comps = da.bins.constituents + comps['data'] = shapes[faces.dim, comps['data'].values] + return sc.bins(**comps).fold(dim='detector_number', sizes=detector_number.sizes) + + +class NXoff_geometry(NXobject): + _dims = { + 'detector_faces': ('face', 'column'), + 'vertices': ('vertex', ), + 'winding_order': ('winding_order', ), + 'faces': ('face', ) + } + + def __init__(self, group: Group): + super().__init__(group) + for name, field in group._children.items(): + if isinstance(field, Field): + field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) + if name == 'vertices': + field.dtype = sc.DType.vector3 + + def load_as_array(self, + detector_number: Optional[sc.Variable] = None) -> sc.Variable: + return off_to_shape(**self[()], detector_number=detector_number) + + @staticmethod + def assemble_as_child(children: sc.DataGroup, + detector_number: Optional[sc.Variable] = None) -> sc.Variable: + return off_to_shape(**children, detector_number=detector_number) + + +base_definitions['NXoff_geometry'] = NXoff_geometry diff --git a/src/scippnexus/v2/nxtransformations.py b/src/scippnexus/v2/nxtransformations.py new file mode 100644 index 00000000..0f5b7518 --- /dev/null +++ b/src/scippnexus/v2/nxtransformations.py @@ -0,0 +1,197 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from __future__ import annotations + +from typing import List, Optional, Tuple, Union + +import numpy as np +import scipp as sc +from scipp.scipy import interpolate + +from .base import Field, NexusStructureError, NXobject, ScippIndex, base_definitions + + +class TransformationError(NexusStructureError): + pass + + +def make_transformation(obj, /, path) -> Optional[Transformation]: + if path.startswith('/'): + return obj.file[path] + elif path != '.': + return obj.parent[path] + return None # end of chain + + +class NXtransformations(NXobject): + """Group of transformations.""" + + +class Transformation: + + def __init__(self, obj: Union[Field, NXobject]): # could be an NXlog + self._obj = obj + + @property + def sizes(self) -> dict: + return self._obj.sizes + + @property + def dims(self) -> Tuple[str, ...]: + return self._obj.dims + + @property + def shape(self) -> Tuple[int, ...]: + return self._obj.shape + + @property + def attrs(self): + return self._obj.attrs + + @property + def name(self): + return self._obj.name + + @property + def depends_on(self): + if (path := self.attrs.get('depends_on')) is not None: + return make_transformation(self._obj, path) + return None + + @property + def offset(self): + if (offset := self.attrs.get('offset')) is None: + return None + if (offset_units := self.attrs.get('offset_units')) is None: + raise TransformationError( + f"Found {offset=} but no corresponding 'offset_units' " + f"attribute at {self.name}") + return sc.spatial.translation(value=offset, unit=offset_units) + + @property + def vector(self) -> sc.Variable: + return sc.vector(value=self.attrs.get('vector')) + + def __getitem__(self, select: ScippIndex): + transformation_type = self.attrs.get('transformation_type') + # According to private communication with Tobias Richter, NeXus allows 0-D or + # shape=[1] for single values. It is unclear how and if this could be + # distinguished from a scan of length 1. + value = self._obj[select] + try: + if isinstance(value, sc.DataGroup): + raise TransformationError( + f"Failed to load transformation at {self.name}.") + t = value * self.vector + v = t if isinstance(t, sc.Variable) else t.data + if transformation_type == 'translation': + v = sc.spatial.translations(dims=v.dims, values=v.values, unit=v.unit) + elif transformation_type == 'rotation': + v = sc.spatial.rotations_from_rotvecs(v) + else: + raise TransformationError( + f"{transformation_type=} attribute at {self.name}," + " expected 'translation' or 'rotation'.") + if isinstance(t, sc.Variable): + t = v + else: + t.data = v + if (offset := self.offset) is None: + transform = t + else: + offset = sc.vector(value=offset.values, unit=offset.unit) + offset = sc.spatial.translation(value=offset.value, unit=offset.unit) + if transformation_type == 'translation': + offset = offset.to(unit=t.unit, copy=False) + transform = t * offset + if (depends_on := self.depends_on) is not None: + if not isinstance(transform, sc.DataArray): + transform = sc.DataArray(transform) + transform.attrs['depends_on'] = sc.scalar(depends_on[select]) + return transform + except (sc.DimensionError, sc.UnitError) as e: + raise NexusStructureError( + f"Invalid transformation in NXtransformations: {e}") from e + + +def _interpolate_transform(transform, xnew): + # scipy can't interpolate with a single value + if transform.sizes["time"] == 1: + transform = sc.concat([transform, transform], dim="time") + return interpolate.interp1d(transform, + "time", + kind="previous", + fill_value="extrapolate")(xnew=xnew) + + +def _smaller_unit(a, b): + if a.unit == b.unit: + return a.unit + ratio = sc.scalar(1.0, unit=a.unit).to(unit=b.unit) + if ratio.value < 1.0: + return a.unit + else: + return b.unit + + +def get_full_transformation( + depends_on: Field) -> Union[None, sc.DataArray, sc.Variable]: + """ + Get the 4x4 transformation matrix for a component, resulting + from the full chain of transformations linked by "depends_on" + attributes + """ + if (t0 := make_transformation(depends_on, depends_on[()])) is None: + return None + return get_full_transformation_starting_at(t0) + + +def get_full_transformation_starting_at( + t0: Transformation, + *, + index: ScippIndex = None) -> Union[None, sc.DataArray, sc.Variable]: + transformations = _get_transformations(t0, index=() if index is None else index) + + total_transform = None + for transform in transformations: + if total_transform is None: + total_transform = transform + elif isinstance(total_transform, sc.DataArray) and isinstance( + transform, sc.DataArray): + unit = _smaller_unit(transform.coords['time'], + total_transform.coords['time']) + total_transform.coords['time'] = total_transform.coords['time'].to( + unit=unit, copy=False) + transform.coords['time'] = transform.coords['time'].to(unit=unit, + copy=False) + time = sc.concat([total_transform.coords["time"], transform.coords["time"]], + dim="time") + time = sc.datetimes(values=np.unique(time.values), dims=["time"], unit=unit) + total_transform = _interpolate_transform(transform, time) \ + * _interpolate_transform(total_transform, time) + else: + total_transform = transform * total_transform + if isinstance(total_transform, sc.DataArray): + time_dependent = [t for t in transformations if isinstance(t, sc.DataArray)] + times = [da.coords['time'][0] for da in time_dependent] + latest_log_start = sc.reduce(times).max() + return total_transform['time', latest_log_start:].copy() + return total_transform + + +def _get_transformations(transform: Transformation, *, + index: ScippIndex) -> List[Union[sc.DataArray, sc.Variable]]: + """Get all transformations in the depends_on chain.""" + transformations = [] + t = transform + while t is not None: + transformations.append(t[index]) + t = t.depends_on + # TODO: this list of transformation should probably be cached in the future + # to deal with changing beamline components (e.g. pixel positions) during a + # live data stream (see https://github.com/scipp/scippneutron/issues/76). + return transformations + + +base_definitions['NXtransformations'] = NXtransformations diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index 80ff3037..6d5cde1e 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -5,9 +5,8 @@ from scipp.testing import assert_identical import scippnexus as snx -import scippnexus.nx2 as snx2 +import scippnexus.v2 as snx2 from scippnexus import NexusStructureError, NXdetector, NXentry, NXoff_geometry, NXroot -from scippnexus.nxevent_data2 import NXevent_data def make_group(group: h5py.Group) -> snx2.Group: @@ -72,7 +71,7 @@ def test_loads_signal_and_events_when_both_found(group): detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_number) detector.create_field('data', data) - events = detector.create_class('events', NXevent_data) + events = detector.create_class('events', snx2.NXevent_data) events.create_field('event_id', sc.array(dims=[''], unit=None, values=[1])) events.create_field('event_time_offset', sc.array(dims=[''], unit='s', values=[1])) events.create_field('event_time_zero', sc.array(dims=[''], unit='s', values=[1])) @@ -180,7 +179,7 @@ def test_loads_event_data_mapped_to_detector_numbers_based_on_their_event_id(gro detector_numbers = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3, 4])) detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers) - create_event_data_ids_1234(detector.create_class('events', NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) assert detector.sizes == {'detector_number': 4, 'event_time_zero': 4} loaded = detector[...] da = snx2.group_events_by_detector_number(loaded) @@ -197,7 +196,7 @@ def test_loads_event_data_mapped_to_detector_numbers_based_on_their_event_id(gro def test_loads_event_data_with_0d_detector_numbers(group): detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', sc.index(1, dtype='int64')) - create_event_data_ids_1234(detector.create_class('events', NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) assert detector.dims == ('event_time_zero', ) assert detector.shape == (4, ) loaded = snx2.group_events_by_detector_number(detector[...]) @@ -207,7 +206,7 @@ def test_loads_event_data_with_0d_detector_numbers(group): def test_loads_event_data_with_2d_detector_numbers(group): detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers_xx_yy_1234()) - create_event_data_ids_1234(detector.create_class('events', NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) assert detector.sizes == {'dim_0': 2, 'dim_1': 2, 'event_time_zero': 4} loaded = snx2.group_events_by_detector_number(detector[...]) assert sc.identical( @@ -221,7 +220,7 @@ def test_loads_event_data_with_2d_detector_numbers(group): def test_select_events_slices_underlying_event_data(group): detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers_xx_yy_1234()) - create_event_data_ids_1234(detector.create_class('events', NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) da = snx2.group_events_by_detector_number(detector['event_time_zero', :2]) assert sc.identical( da.bins.size().data, @@ -255,7 +254,7 @@ def test_select_events_slices_underlying_event_data(group): def test_loading_event_data_creates_automatic_detector_numbers_if_not_present_in_file( group): detector = group.create_class('detector0', NXdetector) - create_event_data_ids_1234(detector.create_class('events', NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) assert detector.dims == ('event_time_zero', ) loaded = detector[...] loaded = snx2.group_events_by_detector_number(loaded) @@ -267,7 +266,7 @@ def test_loading_event_data_creates_automatic_detector_numbers_if_not_present_in def test_loading_event_data_with_det_selection_and_automatic_detector_numbers_raises( group): detector = group.create_class('detector0', NXdetector) - create_event_data_ids_1234(detector.create_class('events', NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) assert detector.dims == ('event_time_zero', ) with pytest.raises(sc.DimensionError): detector['detector_number', 0] @@ -276,7 +275,7 @@ def test_loading_event_data_with_det_selection_and_automatic_detector_numbers_ra def test_loading_event_data_with_full_selection_and_automatic_detector_numbers_works( group): detector = group.create_class('detector0', NXdetector) - create_event_data_ids_1234(detector.create_class('events', NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) assert detector.dims == ('event_time_zero', ) assert tuple(detector[...].shape) == (4, ) assert tuple(detector[()].shape) == (4, ) @@ -286,13 +285,13 @@ def test_event_data_field_dims_labels(group): detector_numbers = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3, 4])) detector = group.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers) - create_event_data_ids_1234(detector.create_class('events', NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) assert detector['detector_number'].dims == ('detector_number', ) def test_nxevent_data_selection_yields_correct_pulses(group): detector = group.create_class('detector0', NXdetector) - create_event_data_ids_1234(detector.create_class('events', NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) class Load: From f3f5df489ab1b6de406825c66496d7691a3d0854 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 22 Mar 2023 07:45:57 +0100 Subject: [PATCH 29/98] nx_class property --- src/scippnexus/v2/base.py | 26 +++++++++++++++++++++++++- src/scippnexus/v2/nexus_classes.py | 2 +- tests/nexus_test.py | 8 ++++---- tests/nxdata_test.py | 4 ++-- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index fe0e3d6d..991b051e 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -4,11 +4,12 @@ from __future__ import annotations import datetime +import inspect import re import warnings from collections.abc import Mapping from dataclasses import dataclass -from functools import cached_property +from functools import cached_property, lru_cache from typing import Any, Dict, Iterator, List, Optional, Protocol, Tuple, Union import dateutil.parser @@ -333,6 +334,10 @@ def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: return dg +class NXroot(NXobject): + pass + + class Group(Mapping): def __init__(self, @@ -341,6 +346,19 @@ def __init__(self, self._group = group self._definitions = {} if definitions is None else definitions + @property + def nx_class(self) -> Optional[type]: + """The value of the NX_class attribute of the group. + + In case of the subclass NXroot this returns 'NXroot' even if the attribute + is not actually set. This is to support the majority of all legacy files, which + do not have this attribute. + """ + if (nxclass := self.attrs.get('NX_class')) is not None: + return _nx_class_registry().get(nxclass) + if self.name == '/': + return NXroot + @cached_property def attrs(self) -> Dict[str, Any]: # Attrs are not read until needed, to avoid reading all attrs for all subgroups. @@ -797,3 +815,9 @@ def group_events_by_detector_number(dg: sc.DataGroup) -> sc.DataArray: # TODO What about _coord_to_attr mapping as NXdata? da.coords.update(dg) return da + + +@lru_cache() +def _nx_class_registry(): + from . import nexus_classes + return dict(inspect.getmembers(nexus_classes, inspect.isclass)) diff --git a/src/scippnexus/v2/nexus_classes.py b/src/scippnexus/v2/nexus_classes.py index a6faa567..ef209389 100644 --- a/src/scippnexus/v2/nexus_classes.py +++ b/src/scippnexus/v2/nexus_classes.py @@ -5,7 +5,7 @@ #from .nxlog import NXlog # noqa F401 #from .nxmonitor import NXmonitor # noqa F401 from .base import NXobject # noqa F401 -from .base import NXdata, NXdetector, NXgeometry, NXlog, NXmonitor # noqa F401 +from .base import NXdata, NXdetector, NXgeometry, NXlog, NXmonitor, NXroot # noqa F401 from .nxcylindrical_geometry import NXcylindrical_geometry # noqa F401 #from .nxdata import NXdata # noqa F401 #from .nxdetector import NXdetector # noqa F401 diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 59254267..e6a89a6d 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -6,8 +6,8 @@ import pytest import scipp as sc -import scippnexus.nx2 as snx -from scippnexus import ( +import scippnexus.v2 as snx +from scippnexus.v2 import ( Field, NexusStructureError, NXdetector, @@ -38,7 +38,7 @@ def h5root(request): def nxroot(request): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: - root = NXroot(f) + root = snx.Group(f, definitions=snx.base_definitions) root.create_class('entry', NXentry) yield root @@ -61,7 +61,7 @@ def test_nxobject_create_class_with_string_nx_class(nxroot): def test_nxobject_items(nxroot): items = nxroot.items() assert len(items) == 1 - name, entry = items[0] + name, entry = next(iter(items)) assert name == 'entry' entry.create_class('monitor', NXmonitor) entry.create_class('log', NXlog) diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 198abc5e..fcfbec63 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -3,8 +3,8 @@ import pytest import scipp as sc -import scippnexus.nx2 as snx -from scippnexus import Field, NXdata, NXentry, NXlog, NXroot +import scippnexus.v2 as snx +from scippnexus.v2 import Field, NXdata, NXentry, NXlog, NXroot @pytest.fixture() From ea702800b7ec71dd7eda767f496ced16e9f6270c Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 22 Mar 2023 07:47:29 +0100 Subject: [PATCH 30/98] Add conftest.py for scipp's assert_identical --- tests/conftest.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..d446b30e --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) + +import os +from pathlib import Path +from typing import Any, List + +import pytest +import scipp as sc + +# Load the config file in the test dir instead of the user's. +os.environ['SCIPPDIR'] = os.fspath(Path(__file__).resolve().parent) +# Silence warning from Jupyter +os.environ['JUPYTER_PLATFORM_DIRS'] = '1' + +pytest.register_assert_rewrite('scipp.testing.assertions') + + +def pytest_assertrepr_compare(op: str, left: Any, right: Any) -> List[str]: + if isinstance(left, sc.Unit) and isinstance(right, sc.Unit): + return [f'Unit({left}) {op} Unit({right})'] + if isinstance(left, sc.DType) or isinstance(right, sc.DType): + return [f'{left!r} {op} {right!r}'] From bf6466b3bb5b00fa0881e8cd62795f30d8a24837 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 22 Mar 2023 08:42:34 +0100 Subject: [PATCH 31/98] Simplify and improve shape/squeeze handling --- src/scippnexus/v2/base.py | 36 +++++++++++++++++++++--------------- tests/nexus_test.py | 26 +++++++++++--------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 991b051e..bed15d7b 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -266,7 +266,8 @@ def unit(self) -> Union[sc.Unit, None]: def _squeezed_field_sizes(dataset: H5Dataset) -> Dict[str, int]: - shape = tuple(size for size in dataset.shape if size != 1) + if (shape := dataset.shape) == (1, ): + return {} return {f'dim_{i}': size for i, size in enumerate(shape)} @@ -538,16 +539,6 @@ def _get_group_dims(): group_dims = _get_group_dims() - # Reject fallback dims if they are not compatible with group dims - if fallback_dims is not None: - for field in group._children.values(): - if len(fallback_dims) < len(field.shape): - fallback_dims = None - break - - if group_dims is None: - group_dims = fallback_dims - if self._signal is None: self._valid = False else: @@ -555,6 +546,13 @@ def _get_group_dims(): shape = self._signal.dataset.shape shape = _squeeze_trailing(group_dims, shape) self._signal.sizes = dict(zip(group_dims, shape)) + elif fallback_dims is not None: + shape = self._signal.dataset.shape + group_dims = [ + fallback_dims[i] if i < len(fallback_dims) else f'dim_{i}' + for i in range(len(shape)) + ] + self._signal.sizes = dict(zip(group_dims, shape)) if axes is not None: # Unlike self.dims we *drop* entries that are '.' @@ -693,16 +691,24 @@ def __init__(self, group: Group): class NXdetector(NXdata): _detector_number_fields = ['detector_number', 'pixel_id', 'spectrum_index'] + @staticmethod + def _detector_number(group: Group) -> Optional[str]: + for name in NXdetector._detector_number_fields: + if name in group._children: + return name + def __init__(self, group: Group): + fallback_dims = None + if (det_num_name := NXdetector._detector_number(group)) is not None: + if group._children[det_num_name].dataset.ndim == 1: + fallback_dims = ('detector_number', ) super().__init__(group, - fallback_dims=('detector_number', ), + fallback_dims=fallback_dims, fallback_signal_name='data') @property def detector_number(self) -> Optional[str]: - for name in self._detector_number_fields: - if name in self._group._children: - return name + return self._detector_number(self._group) class NXmonitor(NXdata): diff --git a/tests/nexus_test.py b/tests/nexus_test.py index e6a89a6d..b65e6bef 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -5,6 +5,7 @@ import numpy as np import pytest import scipp as sc +from scipp.testing import assert_identical import scippnexus.v2 as snx from scippnexus.v2 import ( @@ -111,7 +112,8 @@ def test_nxlog_with_missing_value_triggers_fallback(nxroot): log = nxroot['entry'].create_class('log', NXlog) log['time'] = time - sc.epoch(unit='ns') loaded = log[()] - assert sc.identical(loaded, sc.DataGroup(time=time.rename(time='dim_0'))) + # Fallback to DataGroup, but we still have partial info from NXlog: dim is time + assert_identical(loaded, sc.DataGroup(time=time)) def test_nxlog_length_1(h5root): @@ -130,20 +132,14 @@ def test_nxlog_length_1(h5root): assert sc.identical(log[...], da) -def test_nxlog_length_1_two_dims_no_time_squeezes_all_dims(nxroot): - da = sc.DataArray( - sc.array(dims=['time', 'ignored'], values=[[1.1]]), - coords={ - 'time': - sc.epoch(unit='ns') + - sc.array(dims=['time'], unit='s', values=[4.4]).to(unit='ns', dtype='int64') - }) +def test_nxlog_length_1_two_dims_no_time_defaults_inner_dim_name(nxroot): + var = sc.array(dims=['time', 'ignored'], values=[[1.1]]) log = nxroot['entry'].create_class('log', NXlog) - log['value'] = da.data - assert sc.identical(log[...], sc.DataArray(sc.scalar(1.1))) + log['value'] = var + assert_identical(log[...], sc.DataArray(var.rename(ignored='dim_1'))) -def test_nxlog_length_1_two_dims_with_time_squeezes_inner_dim(nxroot): +def test_nxlog_length_1_two_dims_with_time_defaults_inner_dim_name(nxroot): da = sc.DataArray( sc.array(dims=['time', 'ignored'], values=[[1.1]]), coords={ @@ -154,7 +150,7 @@ def test_nxlog_length_1_two_dims_with_time_squeezes_inner_dim(nxroot): log = nxroot['entry'].create_class('log', NXlog) log['value'] = da.data log['time'] = da.coords['time'] - sc.epoch(unit='ns') - assert sc.identical(log[...], da['ignored', 0]) + assert sc.identical(log[...], da.rename(ignored='dim_1')) def test_nxlog_axes_replaces_time_dim(nxroot): @@ -186,7 +182,7 @@ def test_nxlog_three_dims_with_time_of_length_1(nxroot): log['value'] = da.data log['time'] = da.coords['time'] - sc.epoch(unit='ns') loaded = log[...] - assert sc.identical( + assert_identical( loaded.data, sc.array(dims=['time', 'dim_1', 'dim_2'], values=np.arange(9.).reshape(1, 3, 3))) @@ -198,7 +194,7 @@ def test_nxlog_with_shape_0(nxroot): log = nxroot['entry'].create_class('log', NXlog) log['value'] = da.data log['time'] = da.coords['time'] - assert sc.identical(log[...], da['ignored', 0]) + assert sc.identical(log[...], da.rename(ignored='dim_1')) def test_nxobject_event_data(nxroot): From 681fcd0e2e9ccaed6df947abfdaf5cbc79336ae2 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 22 Mar 2023 08:47:28 +0100 Subject: [PATCH 32/98] Make all tests run again --- tests/nx2_test.py | 9 ++++----- tests/nxmonitor_test.py | 8 ++++---- tests/nxoff_geometry_test.py | 4 ++-- tests/nxtransformations_test.py | 4 ++-- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/tests/nx2_test.py b/tests/nx2_test.py index 203d3271..db533641 100644 --- a/tests/nx2_test.py +++ b/tests/nx2_test.py @@ -3,8 +3,7 @@ import pytest import scipp as sc -import scippnexus.nx2 as snx -from scippnexus.nxevent_data2 import NXevent_data +import scippnexus.v2 as snx @pytest.fixture() @@ -17,8 +16,8 @@ def h5root(request): def test_does_not_see_changes(h5root): entry = h5root.create_group('entry') data = entry.create_group('data') - signal = data['signal'] = np.arange(4) - coord = data['time'] = np.arange(4) + data['signal'] = np.arange(4) + data['time'] = np.arange(4) obj = snx.Group(entry) dg = obj[()] print(list(dg.items())) @@ -77,7 +76,7 @@ def test_nx_class_attribute_sets_NXobject_subclass(h5root): events = entry.create_group('events') events.attrs['NX_class'] = 'NXevent_data' root = snx.Group(entry) - assert isinstance(root['events'], NXevent_data) + assert isinstance(root['events'], snx.NXevent_data) def test_read_empty_nxevent_data(h5root): diff --git a/tests/nxmonitor_test.py b/tests/nxmonitor_test.py index de968081..fefdeb28 100644 --- a/tests/nxmonitor_test.py +++ b/tests/nxmonitor_test.py @@ -1,9 +1,9 @@ import h5py import pytest import scipp as sc +from scipp.testing import assert_identical -import scippnexus.nx2 as snx -from scippnexus.nxevent_data2 import NXevent_data +import scippnexus.v2 as snx @pytest.fixture() @@ -52,14 +52,14 @@ def test_loads_event_data_in_current_group(group): assert monitor.dims == ('pulse', ) assert monitor.shape == (4, ) loaded = monitor[...] - assert sc.identical( + assert_identical( loaded.bins.size().data, sc.array(dims=['pulse'], unit=None, dtype='int64', values=[3, 0, 2, 1])) def test_loads_event_data_in_child_group(group): monitor = group.create_class('monitor1', snx.NXmonitor) - create_event_data_no_ids(monitor.create_class('events', NXevent_data)) + create_event_data_no_ids(monitor.create_class('events', snx.NXevent_data)) assert monitor.dims == ('event_time_zero', ) assert monitor.shape == (4, ) loaded = monitor[...] diff --git a/tests/nxoff_geometry_test.py b/tests/nxoff_geometry_test.py index 1b8f3283..59e2672b 100644 --- a/tests/nxoff_geometry_test.py +++ b/tests/nxoff_geometry_test.py @@ -3,8 +3,8 @@ import pytest import scipp as sc -import scippnexus.nx2 as snx -from scippnexus.nxoff_geometry import NXoff_geometry, off_to_shape +import scippnexus.v2 as snx +from scippnexus.v2.nxoff_geometry import NXoff_geometry, off_to_shape @pytest.fixture() diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index de1402ae..c62637fc 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -4,8 +4,8 @@ import scipp as sc from scipp.testing import assert_identical -import scippnexus.nx2 as snx -from scippnexus.nxtransformations import NXtransformations +import scippnexus.v2 as snx +from scippnexus.v2.nxtransformations import NXtransformations def make_group(group: h5py.Group) -> snx.Group: From b1da422710ffa9c90864758c01f3f71d8a624dcc Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 22 Mar 2023 08:59:07 +0100 Subject: [PATCH 33/98] Pass more tests --- src/scippnexus/v2/base.py | 13 ++++++++++++- src/scippnexus/v2/nxevent_data.py | 3 ++- tests/nexus_test.py | 9 ++++----- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index bed15d7b..c91f3f3a 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -292,6 +292,10 @@ def __init__(self, group: Group): self._special_fields[name] = field self._transformations[name] = field + @property + def unit(self) -> Union[None, sc.Unit]: + raise ValueError(f"Group-like {self._group.nx_class} has no well-defined unit") + @cached_property def sizes(self) -> Dict[str, int]: # exclude geometry/tansform groups? @@ -372,6 +376,10 @@ def attrs(self) -> Dict[str, Any]: def name(self) -> str: return self._group.name + @property + def unit(self) -> Optional[sc.Unit]: + return self._nexus.unit + @property def parent(self) -> Optional[Group]: return Group(self._group.parent, @@ -628,6 +636,10 @@ def get_dims(name, field): def sizes(self) -> Dict[str, int]: return self._signal.sizes if self._valid else super().sizes + @property + def unit(self) -> Union[None, sc.Unit]: + return self._signal.unit if self._valid else super().unit + def _bin_edge_dim(self, coord: Field) -> Union[None, str]: if not isinstance(coord, Field): return None @@ -713,7 +725,6 @@ def detector_number(self) -> Optional[str]: class NXmonitor(NXdata): - # TODO should read axes of fallback signal? def __init__(self, group: Group): super().__init__(group, fallback_signal_name='data') diff --git a/src/scippnexus/v2/nxevent_data.py b/src/scippnexus/v2/nxevent_data.py index f17e7909..9793192a 100644 --- a/src/scippnexus/v2/nxevent_data.py +++ b/src/scippnexus/v2/nxevent_data.py @@ -151,7 +151,8 @@ def assemble(self, children: sc.DataGroup) -> sc.DataArray: binned = sc.bins(data=events, dim=_event_dimension, begin=begins) except IndexError as e: raise NexusStructureError( - f"Invalid index in NXevent_data at {self.name}/event_index:\n{e}.") + f"Invalid index in NXevent_data at {self._group.name}/event_index:\n{e}" + ) return sc.DataArray(data=binned, coords={'event_time_zero': event_time_zero}) diff --git a/tests/nexus_test.py b/tests/nexus_test.py index b65e6bef..57dd806a 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -281,8 +281,8 @@ def test_field_dim_labels(nxroot): events['event_id'] = sc.arange('ignored', 2) event_data = nxroot['entry/events_0'] assert event_data['event_time_offset'].dims == ('event', ) - assert event_data['event_time_zero'].dims == ('pulse', ) - assert event_data['event_index'].dims == ('pulse', ) + assert event_data['event_time_zero'].dims == ('event_time_zero', ) + assert event_data['event_index'].dims == ('event_time_zero', ) assert event_data['event_id'].dims == ('event', ) log = nxroot['entry'].create_class('log', NXlog) log['value'] = sc.arange('ignored', 2) @@ -346,7 +346,7 @@ def test_field_of_extended_ascii_in_ascii_encoded_dataset_is_loaded_correctly(): string = b"run at rot=90" + bytes([0xb0]) with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: f['title'] = np.array([string, string + b'x']) - title = NXroot(f)['title'] + title = snx.Group(f)['title'] assert sc.identical( title[...], sc.array(dims=['dim_0'], values=["run at rot=90°", "run at rot=90°x"])) @@ -456,8 +456,7 @@ def test_event_mode_monitor_without_event_id_can_be_loaded(nxroot): monitor = nxroot['entry'].create_class('monitor', NXmonitor) create_event_data_without_event_id(monitor) da = monitor[...] - assert len(da.bins.coords) == 1 - assert 'event_time_offset' in da.bins.coords + assert 'event_time_offset' in da def test___getattr__for_unique_child_groups(nxroot): From 3e4621ab1c32d0abae73940136b1d0951c645363 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 22 Mar 2023 09:22:30 +0100 Subject: [PATCH 34/98] Handle aux signals by returning dataset --- src/scippnexus/v2/base.py | 17 +++++++++++++---- tests/nxdata_test.py | 17 +++++++++-------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index c91f3f3a..8530a3d9 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -509,6 +509,7 @@ def __init__(self, # Can we just set field dims here? self._signal_name = None self._signal = None + self._aux_signals = group.attrs.get('auxiliary_signals', []) if (name := group.attrs.get( 'signal', fallback_signal_name)) is not None and name in group._children: @@ -595,8 +596,9 @@ def get_dims(name, field): return group_dims # if name in [self._signal_name, self._errors_name]: # return self._get_group_dims() # if None, field determines dims itself - # if name in list(self.attrs.get('auxiliary_signals', [])): - # return self._try_guess_dims(name) + if name in self._aux_signals: + return _guess_dims(group_dims, self._signal.dataset.shape, + field.dataset) if (dims := dims_from_indices.get(name)) is not None: return dims if (axis := axis_index.get(name)) is not None: @@ -656,14 +658,21 @@ def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex bin_edge_dim=self._bin_edge_dim(child)) return child[child_sel] - def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: + def assemble(self, + dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: if not self._valid: return super().assemble(dg) + aux = {name: dg.pop(name) for name in self._aux_signals} coords = sc.DataGroup(dg) signal = coords.pop(self._signal_name) da = sc.DataArray(data=signal) coords = {name: asarray(coord) for name, coord in coords.items()} - return self._add_coords(da, coords) + da = self._add_coords(da, coords) + if aux: + signals = {self._signal_name: da} + signals.update(aux) + return sc.Dataset(signals) + return da def _dim_of_coord(self, name: str, coord: sc.Variable) -> Union[None, str]: if len(coord.dims) == 1: diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index fcfbec63..d509fae6 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -2,9 +2,10 @@ import numpy as np import pytest import scipp as sc +from scipp.testing import assert_identical import scippnexus.v2 as snx -from scippnexus.v2 import Field, NXdata, NXentry, NXlog, NXroot +from scippnexus.v2 import Field, NXdata, NXentry, NXlog @pytest.fixture() @@ -18,7 +19,7 @@ def h5root(request): def nxroot(request): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: - root = NXroot(f) + root = snx.Group(f) root.create_class('entry', NXentry) yield root @@ -194,20 +195,20 @@ def test_transpose_indices_attribute_for_coord(h5root): assert sc.identical(data[...], da) -def test_auxiliary_signal_is_not_loaded_as_coord(nxroot): +def test_auxiliary_signal_causes_load_as_dataset(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx'] = da.data['xx', 0] - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal' # We flag 'xx' as auxiliary_signal. It should thus not be loaded as a coord, # even though we create the field. data.attrs['auxiliary_signals'] = ['xx'] - data.create_field('signal', da.data) - data.create_field('xx', da.coords['xx']) - del da.coords['xx'] - assert sc.identical(data[...], da) + snx.create_field(data, 'signal', da.data) + snx.create_field(data, 'xx', da.coords['xx']) + data = snx.Group(data, definitions=snx.base_definitions) + assert_identical(data[...], sc.Dataset({'signal': da.data, 'xx': da.coords['xx']})) def test_field_dims_match_NXdata_dims(h5root): From 6fa4c9b90b5665f7e47ebf0a11b9cbe76e85604b Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 22 Mar 2023 09:26:20 +0100 Subject: [PATCH 35/98] Update squeeze behavior tests --- tests/nxdata_test.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index d509fae6..acd9b9d5 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -384,8 +384,8 @@ def test_dims_of_length_1_are_kept_when_axes_specified(h5root): assert data['signal'].shape == (1, 1) -def test_dims_of_length_1_are_squeezed_when_no_axes_specified(h5root): - signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1]]) +def test_only_dim_of_length_1_is_squeezed_when_no_axes_specified(h5root): + signal = sc.array(dims=['xx'], unit='m', values=[1.1]) data = snx.create_class(h5root, 'data1', NXdata) snx.create_field(data, 'signal', signal) data.attrs['signal'] = 'signal' @@ -396,7 +396,20 @@ def test_dims_of_length_1_are_squeezed_when_no_axes_specified(h5root): assert data['signal'].shape == () -def test_one_dim_of_length_1_is_squeezed_when_no_axes_specified(h5root): +def test_multi_dims_of_length_1_are_kept_when_no_axes_specified(h5root): + signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1]]) + data = snx.create_class(h5root, 'data1', NXdata) + snx.create_field(data, 'signal', signal) + data.attrs['signal'] = 'signal' + data = snx.Group(data, definitions=snx.base_definitions) + loaded = data[...] + assert sc.identical(loaded.data, + sc.array(dims=['dim_0', 'dim_1'], unit='m', values=[[1.1]])) + assert data['signal'].ndim == 2 + assert data['signal'].shape == (1, 1) + + +def test_one_dim_of_length_1_is_kept_when_no_axes_specified(h5root): signal = sc.array(dims=['xx', 'yy'], unit='m', values=[[1.1, 2.2]]) data = snx.create_class(h5root, 'data1', NXdata) snx.create_field(data, 'signal', signal) @@ -404,11 +417,11 @@ def test_one_dim_of_length_1_is_squeezed_when_no_axes_specified(h5root): data = snx.Group(data, definitions=snx.base_definitions) loaded = data[...] # Note that dimension gets renamed to `dim_0` since no axes are specified - assert sc.identical(loaded.data, - sc.array(dims=['dim_0'], unit='m', values=[1.1, 2.2])) - assert data['signal'].ndim == 1 - assert data['signal'].shape == (2, ) - assert data['signal'].dims == ('dim_0', ) + assert sc.identical( + loaded.data, sc.array(dims=['dim_0', 'dim_1'], unit='m', values=[[1.1, 2.2]])) + assert data['signal'].ndim == 2 + assert data['signal'].shape == (1, 2) + assert data['signal'].dims == ('dim_0', 'dim_1') def test_only_one_axis_specified_for_2d_field(h5root): From ccc254deac53b77f44679091bbad5830dee46021 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 22 Mar 2023 09:44:08 +0100 Subject: [PATCH 36/98] Access via class --- src/scippnexus/v2/base.py | 18 +++++++++++++++++- tests/nxdata_test.py | 6 +++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 8530a3d9..d433e114 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -420,6 +420,16 @@ def __iter__(self) -> Iterator[str]: def _is_nxtransformations(self) -> bool: return self.attrs.get('NX_class') == 'NXtransformations' + def _get_children_by_nx_class( + self, select: Union[type, List[type]]) -> Dict[str, Union[NXobject, Field]]: + children = {} + select = tuple(select) if isinstance(select, list) else select + for key, child in self._children.items(): + nx_class = Field if isinstance(child, Field) else child.nx_class + if issubclass(nx_class, select): + children[key] = self[key] + return children + def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: if isinstance(sel, str): # We cannot get the child directly from the HDF5 group, since we need to @@ -438,6 +448,13 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: from .nxtransformations import Transformation return Transformation(child) return child + + def isclass(x): + return inspect.isclass(x) and issubclass(x, (Field, NXobject)) + + if isclass(sel) or (isinstance(sel, list) and len(sel) + and all(isclass(x) for x in sel)): + return self._get_children_by_nx_class(sel) # Here this is scipp.DataGroup. Child classes like NXdata may return DataArray. # (not scipp.DataArray, as that does not support lazy data) dg = self._nexus.read_children(self, sel) @@ -591,7 +608,6 @@ def get_dims(name, field): # standard recommends that readers should also make "best effort" guess # since legacy files do not set this attribute. # TODO signal and errors? - # TODO aux if name in (self._signal_name, ): return group_dims # if name in [self._signal_name, self._errors_name]: diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index acd9b9d5..9f8e553b 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -5,7 +5,7 @@ from scipp.testing import assert_identical import scippnexus.v2 as snx -from scippnexus.v2 import Field, NXdata, NXentry, NXlog +from scippnexus.v2 import NXdata, NXlog @pytest.fixture() @@ -20,7 +20,7 @@ def nxroot(request): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: root = snx.Group(f) - root.create_class('entry', NXentry) + root.create_class('entry', snx.NXentry) yield root @@ -244,7 +244,7 @@ def test_field_dims_match_NXdata_dims_when_selected_via_class_name(nxroot): data.create_field('xx', da.coords['xx']) data.create_field('xx2', da.coords['xx2']) data.create_field('yy', da.coords['yy']) - fields = data[Field] + fields = data[snx.Field] assert fields['signal1'].dims == ('xx', 'yy') assert fields['xx'].dims == ('xx', ) assert fields['xx2'].dims == ('xx', ) From 99167b4c6b4aa6250698e6d5d6daaadc34268ffb Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 22 Mar 2023 14:22:43 +0100 Subject: [PATCH 37/98] Try to avoid reinit of nexus tree from transformation chains --- src/scippnexus/v2/base.py | 40 +++++++++++++++++--------- src/scippnexus/v2/nxtransformations.py | 2 ++ tests/nxdata_test.py | 13 +++++---- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index d433e114..25219382 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -122,6 +122,7 @@ def _dtype_fromdataset(dataset: H5Dataset) -> sc.DType: @dataclass class Field: dataset: H5Dataset + parent: Group sizes: Optional[Dict[str, int]] = None dtype: Optional[sc.DType] = None errors: Optional[H5Dataset] = None @@ -143,10 +144,9 @@ def dims(self) -> Tuple[str]: def shape(self) -> Tuple[int, ...]: return tuple(self.sizes.values()) - @property - def parent(self) -> H5Group: - # TODO Get corrected definitions - return Group(self.dataset.parent, definitions=base_definitions) + @cached_property + def file(self) -> Group: + return self.parent.file def _load_variances(self, var, index): stddevs = sc.empty(dims=var.dims, @@ -347,9 +347,17 @@ class Group(Mapping): def __init__(self, group: H5Group, - definitions: Optional[Dict[str, NXobject]] = None): + definitions: Optional[Dict[str, type]] = None, + parent: Optional[Group] = None): self._group = group self._definitions = {} if definitions is None else definitions + if parent is None: + if group == group.parent: + self._parent = self + else: + self._parent = Group(group.parent, definitions=definitions) + else: + self._parent = parent @property def nx_class(self) -> Optional[type]: @@ -382,14 +390,17 @@ def unit(self) -> Optional[sc.Unit]: @property def parent(self) -> Optional[Group]: - return Group(self._group.parent, - definitions=self._definitions) if self._group.parent else None + return self._parent + + @cached_property + def file(self) -> Optional[Group]: + return self if self == self.parent else self.parent.file @cached_property def _children(self) -> Dict[str, Union[Field, Group]]: items = { - name: - Field(obj) if is_dataset(obj) else Group(obj, definitions=self._definitions) + name: Field(obj, parent=self) if is_dataset(obj) else Group( + obj, parent=self, definitions=self._definitions) for name, obj in self._group.items() } for suffix in ('_errors', '_error'): @@ -417,6 +428,7 @@ def __len__(self) -> int: def __iter__(self) -> Iterator[str]: return self._children.__iter__() + @cached_property def _is_nxtransformations(self) -> bool: return self.attrs.get('NX_class') == 'NXtransformations' @@ -437,14 +449,13 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: # such as sizes and dtype. if '/' in sel: if sel.startswith('/'): - return Group(self._group.file, - definitions=self._definitions)[sel[1:]] + return self.file[sel[1:]] else: return self[sel.split('/')[0]][sel[sel.index('/') + 1:]] child = self._children[sel] if isinstance(child, Field): self._populate_fields() - if self._is_nxtransformations(): + if self._is_nxtransformations: from .nxtransformations import Transformation return Transformation(child) return child @@ -475,10 +486,11 @@ def create_field(self, key: str, value: sc.Variable) -> H5Dataset: def create_class(self, name, class_name: str) -> Group: return Group(create_class(self._group, name, class_name), - definitions=self._definitions) + definitions=self._definitions, + parent=self) def rebuild(self) -> Group: - return Group(self._group, definitions=self._definitions) + return Group(self._group, definitions=self._definitions, parent=self.parent) @cached_property def sizes(self) -> Dict[str, int]: diff --git a/src/scippnexus/v2/nxtransformations.py b/src/scippnexus/v2/nxtransformations.py index 0f5b7518..b42f3db3 100644 --- a/src/scippnexus/v2/nxtransformations.py +++ b/src/scippnexus/v2/nxtransformations.py @@ -81,6 +81,7 @@ def __getitem__(self, select: ScippIndex): value = self._obj[select] try: if isinstance(value, sc.DataGroup): + return value raise TransformationError( f"Failed to load transformation at {self.name}.") t = value * self.vector @@ -111,6 +112,7 @@ def __getitem__(self, select: ScippIndex): transform.attrs['depends_on'] = sc.scalar(depends_on[select]) return transform except (sc.DimensionError, sc.UnitError) as e: + return value raise NexusStructureError( f"Invalid transformation in NXtransformations: {e}") from e diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 9f8e553b..8d0b83cd 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -231,19 +231,20 @@ def test_field_dims_match_NXdata_dims(h5root): assert sc.identical(data['xx', :2].coords['yy'], data['yy'][:]) -def test_field_dims_match_NXdata_dims_when_selected_via_class_name(nxroot): +def test_field_dims_match_NXdata_dims_when_selected_via_class_name(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='m', values=[[1, 2, 3], [4, 5, 6]])) da.coords['xx'] = da.data['yy', 0] da.coords['xx2'] = da.data['yy', 1] da.coords['yy'] = da.data['xx', 0] - data = nxroot.create_class('data1', NXdata) + data = snx.create_class(h5root, 'data1', NXdata) data.attrs['axes'] = da.dims data.attrs['signal'] = 'signal1' - data.create_field('signal1', da.data) - data.create_field('xx', da.coords['xx']) - data.create_field('xx2', da.coords['xx2']) - data.create_field('yy', da.coords['yy']) + snx.create_field(data, 'signal1', da.data) + snx.create_field(data, 'xx', da.coords['xx']) + snx.create_field(data, 'xx2', da.coords['xx2']) + snx.create_field(data, 'yy', da.coords['yy']) + data = snx.Group(data, definitions=snx.base_definitions) fields = data[snx.Field] assert fields['signal1'].dims == ('xx', 'yy') assert fields['xx'].dims == ('xx', ) From 405a4a0e9140ad1c95a816cfb4a9fdde486cece2 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 23 Mar 2023 08:17:00 +0100 Subject: [PATCH 38/98] Fix coord to attr code --- src/scippnexus/v2/base.py | 14 +++++++++----- tests/nxdetector_test.py | 16 ++++++++-------- tests/nxtransformations_test.py | 10 ++++++---- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 25219382..8296126b 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -694,7 +694,6 @@ def assemble(self, coords = sc.DataGroup(dg) signal = coords.pop(self._signal_name) da = sc.DataArray(data=signal) - coords = {name: asarray(coord) for name, coord in coords.items()} da = self._add_coords(da, coords) if aux: signals = {self._signal_name: da} @@ -718,10 +717,15 @@ def _coord_to_attr(self, da: sc.DataArray, name: str, coord: sc.Variable) -> boo return False def _add_coords(self, da: sc.DataArray, coords: sc.DataGroup) -> sc.DataArray: - da.coords.update(coords) - for name in coords: - if self._coord_to_attr(da, name, self._group[name]): - da.attrs[name] = da.coords.pop(name) + for name, coord in coords.items(): + if not isinstance(coord, sc.Variable): + da.coords[name] = sc.scalar(coord) + # We need the shape *before* slicing to determine dims, so we get the + # field from the group for the conditional. + elif self._coord_to_attr(da, name, self._group[name]): + da.attrs[name] = coord + else: + da.coords[name] = coord return da diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index 6d5cde1e..21e1b42d 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -154,14 +154,14 @@ def test_slicing_works_as_in_scipp(h5root): detector.attrs['axes'] = ['xx', 'yy'] detector.attrs['2d_edges_indices'] = [1, 0] detector = make_group(detector) - assert sc.identical(detector[...], da) - assert sc.identical(detector['xx', 0], da['xx', 0]) - assert sc.identical(detector['xx', 1], da['xx', 1]) - assert sc.identical(detector['xx', 0:1], da['xx', 0:1]) - assert sc.identical(detector['yy', 0], da['yy', 0]) - assert sc.identical(detector['yy', 1], da['yy', 1]) - assert sc.identical(detector['yy', 0:1], da['yy', 0:1]) - assert sc.identical(detector['yy', 1:1], da['yy', 1:1]) # empty slice + assert_identical(detector[...], da) + assert_identical(detector['xx', 0], da['xx', 0]) + assert_identical(detector['xx', 1], da['xx', 1]) + assert_identical(detector['xx', 0:1], da['xx', 0:1]) + assert_identical(detector['yy', 0], da['yy', 0]) + assert_identical(detector['yy', 1], da['yy', 1]) + assert_identical(detector['yy', 0:1], da['yy', 0:1]) + assert_identical(detector['yy', 1:1], da['yy', 1:1]) # empty slice def create_event_data_ids_1234(group): diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index c62637fc..0c9c5963 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -191,7 +191,8 @@ def test_chain_with_multiple_values_and_different_time_unit(h5root): assert sc.identical(depends_on.value, expected) -def test_broken_time_dependent_transformation_returns_path_and_transformations(h5root): +def test_broken_time_dependent_transformation_returns_datagroup_but_sets_up_depends_on( + h5root): detector = create_detector(h5root) snx.create_field(detector, 'depends_on', sc.scalar('/detector_0/transformations/t1')) @@ -216,14 +217,15 @@ def test_broken_time_dependent_transformation_returns_path_and_transformations(h detector = make_group(detector) loaded = detector[()] - assert sc.identical(loaded.coords['depends_on'], - sc.scalar('/detector_0/transformations/t1')) t = loaded.coords['transformations'].value assert isinstance(t, sc.DataGroup) # Due to the way NXtransformations works, vital information is stored in the # attributes. DataGroup does currently not support attributes, so this information # is mostly useless until that is addressed. - assert 't1' in t + t1 = t['t1'] + assert isinstance(t1, sc.DataGroup) + assert t1.keys() == {'time', 'value'} + assert_identical(loaded.coords['depends_on'].value, t1) def write_translation(group, name: str, value: sc.Variable, offset: sc.Variable, From de5b8a4a46caffa3fc3060cf1f85b39a4dbc0c78 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 23 Mar 2023 08:42:46 +0100 Subject: [PATCH 39/98] Fix or disable remaining tests --- src/scippnexus/v2/__init__.py | 2 +- tests/nexus_test.py | 2 + tests/nx2_test.py | 8 -- tests/nxdata_test.py | 2 +- tests/nxdetector_test.py | 183 +++++++++++++++------------------- tests/nxmonitor_test.py | 8 +- 6 files changed, 93 insertions(+), 112 deletions(-) diff --git a/src/scippnexus/v2/__init__.py b/src/scippnexus/v2/__init__.py index 8dcd6f36..2e6a8d07 100644 --- a/src/scippnexus/v2/__init__.py +++ b/src/scippnexus/v2/__init__.py @@ -25,5 +25,5 @@ group_events_by_detector_number, ) #from .definition import ApplicationDefinition, make_definition -#from .file import File +from .file import File from .nexus_classes import * diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 57dd806a..2216ea3d 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -459,6 +459,7 @@ def test_event_mode_monitor_without_event_id_can_be_loaded(nxroot): assert 'event_time_offset' in da +@pytest.mark.skip(reason='Special attributes disabled for now. Do we keep them?') def test___getattr__for_unique_child_groups(nxroot): entry = nxroot['entry'] with pytest.raises(NexusStructureError): @@ -473,6 +474,7 @@ def test___getattr__for_unique_child_groups(nxroot): entry.log +@pytest.mark.skip(reason='Special attributes disabled for now. Do we keep them?') def test___dir__(nxroot): entry = nxroot['entry'] assert 'log' not in entry.__dir__() diff --git a/tests/nx2_test.py b/tests/nx2_test.py index db533641..1f9ae422 100644 --- a/tests/nx2_test.py +++ b/tests/nx2_test.py @@ -71,14 +71,6 @@ def test_read_field(h5root): assert sc.identical(var, sc.array(dims=['dim_0'], values=np.arange(4), unit='m')) -def test_nx_class_attribute_sets_NXobject_subclass(h5root): - entry = h5root.create_group('entry') - events = entry.create_group('events') - events.attrs['NX_class'] = 'NXevent_data' - root = snx.Group(entry) - assert isinstance(root['events'], snx.NXevent_data) - - def test_read_empty_nxevent_data(h5root): entry = h5root.create_group('entry') events = entry.create_group('events') diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 8d0b83cd..edc9e882 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -19,7 +19,7 @@ def h5root(request): def nxroot(request): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: - root = snx.Group(f) + root = snx.Group(f, definitions=snx.base_definitions) root.create_class('entry', snx.NXentry) yield root diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index 21e1b42d..1b333b4f 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -4,38 +4,31 @@ import scipp as sc from scipp.testing import assert_identical -import scippnexus as snx -import scippnexus.v2 as snx2 -from scippnexus import NexusStructureError, NXdetector, NXentry, NXoff_geometry, NXroot +import scippnexus.v2 as snx +from scippnexus.v2 import NXdetector, NXentry, NXoff_geometry -def make_group(group: h5py.Group) -> snx2.Group: - return snx2.Group(group, definitions=snx2.base_definitions) +def make_group(group: h5py.Group) -> snx.Group: + return snx.Group(group, definitions=snx.base_definitions) @pytest.fixture() def h5root(request): - """Yield NXroot containing a single NXentry named 'entry'""" + """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f -@pytest.fixture() -def group(request): - """Yield NXroot containing a single NXentry named 'entry'""" - with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: - yield make_group(f) - - @pytest.fixture() def nxroot(request): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: - root = NXroot(f) + root = make_group(f) root.create_class('entry', NXentry) yield root +@pytest.mark.skip(reason="Warnings system not implemented yet.") def test_warns_if_no_data_found(nxroot): detector_numbers = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3, 4])) detector = nxroot.create_class('detector0', NXdetector) @@ -47,8 +40,8 @@ def test_warns_if_no_data_found(nxroot): def test_can_load_fields_if_no_data_found(h5root): detector_numbers = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3, 4])) - detector = snx2.create_class(h5root, 'detector0', NXdetector) - snx2.create_field(detector, 'detector_numbers', detector_numbers) + detector = snx.create_class(h5root, 'detector0', NXdetector) + snx.create_field(detector, 'detector_numbers', detector_numbers) detector['detector_numbers'][...] @@ -56,22 +49,22 @@ def test_finds_data_from_group_attr(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]])) da.coords['detector_numbers'] = detector_numbers_xx_yy_1234() - detector = snx2.create_class(h5root, 'detector0', NXdetector) - snx2.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) - snx2.create_field(detector, 'custom', da.data) + detector = snx.create_class(h5root, 'detector0', NXdetector) + snx.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) + snx.create_field(detector, 'custom', da.data) detector.attrs['signal'] = 'custom' detector = make_group(detector) print(detector[...]) assert sc.identical(detector[...], da.rename_dims({'xx': 'dim_0', 'yy': 'dim_1'})) -def test_loads_signal_and_events_when_both_found(group): +def test_loads_signal_and_events_when_both_found(nxroot): detector_number = sc.array(dims=[''], unit=None, values=np.array([1, 2])) data = sc.ones(dims=['detector_number'], shape=[2]) - detector = group.create_class('detector0', NXdetector) + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_number) detector.create_field('data', data) - events = detector.create_class('events', snx2.NXevent_data) + events = detector.create_class('events', snx.NXevent_data) events.create_field('event_id', sc.array(dims=[''], unit=None, values=[1])) events.create_field('event_time_offset', sc.array(dims=[''], unit='s', values=[1])) events.create_field('event_time_zero', sc.array(dims=[''], unit='s', values=[1])) @@ -88,9 +81,9 @@ def detector_numbers_xx_yy_1234(): def test_loads_data_without_coords(h5root): da = sc.DataArray(sc.array(dims=['xx', 'yy'], values=[[1.1, 2.2], [3.3, 4.4]])) da.coords['detector_numbers'] = detector_numbers_xx_yy_1234() - detector = snx2.create_class(h5root, 'detector0', NXdetector) - snx2.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) - snx2.create_field(detector, 'data', da.data) + detector = snx.create_class(h5root, 'detector0', NXdetector) + snx.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) + snx.create_field(detector, 'data', da.data) detector = make_group(detector) assert sc.identical(detector[...], da.rename_dims({'xx': 'dim_0', 'yy': 'dim_1'})) @@ -100,32 +93,22 @@ def test_loads_data_without_coords(h5root): def test_detector_number_key_alias(h5root, detector_number_key): da = sc.DataArray(sc.array(dims=['xx', 'yy'], values=[[1.1, 2.2], [3.3, 4.4]])) da.coords[detector_number_key] = detector_numbers_xx_yy_1234() - detector = snx2.create_class(h5root, 'detector0', NXdetector) - snx2.create_field(detector, detector_number_key, da.coords[detector_number_key]) - snx2.create_field(detector, 'data', da.data) + detector = snx.create_class(h5root, 'detector0', NXdetector) + snx.create_field(detector, detector_number_key, da.coords[detector_number_key]) + snx.create_field(detector, 'data', da.data) detector = make_group(detector) assert sc.identical(detector[...], da.rename_dims({'xx': 'dim_0', 'yy': 'dim_1'})) -def test_select_events_raises_if_detector_contains_data(nxroot): - da = sc.DataArray(sc.array(dims=['xx', 'yy'], values=[[1.1, 2.2], [3.3, 4.4]])) - da.coords['detector_numbers'] = detector_numbers_xx_yy_1234() - detector = nxroot.create_class('detector0', NXdetector) - detector.create_field('detector_numbers', da.coords['detector_numbers']) - detector.create_field('data', da.data) - with pytest.raises(NexusStructureError): - detector.select_events - - def test_loads_data_with_coords(h5root): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]])) da.coords['detector_numbers'] = detector_numbers_xx_yy_1234() da.coords['xx'] = sc.array(dims=['xx'], unit='m', values=[0.1, 0.2]) - detector = snx2.create_class(h5root, 'detector0', NXdetector) - snx2.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) - snx2.create_field(detector, 'xx', da.coords['xx']) - snx2.create_field(detector, 'data', da.data) + detector = snx.create_class(h5root, 'detector0', NXdetector) + snx.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) + snx.create_field(detector, 'xx', da.coords['xx']) + snx.create_field(detector, 'data', da.data) detector.attrs['axes'] = ['xx', '.'] detector = make_group(detector) assert sc.identical(detector[...], da.rename_dims({'yy': 'dim_1'})) @@ -144,13 +127,13 @@ def test_slicing_works_as_in_scipp(h5root): da.coords['2d_edges'] = sc.array(dims=['yy', 'xx'], unit='m', values=[[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - detector = snx2.create_class(h5root, 'detector0', NXdetector) - snx2.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) - snx2.create_field(detector, 'xx', da.coords['xx']) - snx2.create_field(detector, 'xx2', da.coords['xx2']) - snx2.create_field(detector, 'yy', da.coords['yy']) - snx2.create_field(detector, '2d_edges', da.coords['2d_edges']) - snx2.create_field(detector, 'data', da.data) + detector = snx.create_class(h5root, 'detector0', NXdetector) + snx.create_field(detector, 'detector_numbers', da.coords['detector_numbers']) + snx.create_field(detector, 'xx', da.coords['xx']) + snx.create_field(detector, 'xx2', da.coords['xx2']) + snx.create_field(detector, 'yy', da.coords['yy']) + snx.create_field(detector, '2d_edges', da.coords['2d_edges']) + snx.create_field(detector, 'data', da.data) detector.attrs['axes'] = ['xx', 'yy'] detector.attrs['2d_edges_indices'] = [1, 0] detector = make_group(detector) @@ -175,14 +158,14 @@ def create_event_data_ids_1234(group): sc.array(dims=[''], unit='None', values=[0, 3, 3, 5])) -def test_loads_event_data_mapped_to_detector_numbers_based_on_their_event_id(group): +def test_loads_event_data_mapped_to_detector_numbers_based_on_their_event_id(nxroot): detector_numbers = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3, 4])) - detector = group.create_class('detector0', NXdetector) + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers) - create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx.NXevent_data)) assert detector.sizes == {'detector_number': 4, 'event_time_zero': 4} loaded = detector[...] - da = snx2.group_events_by_detector_number(loaded) + da = snx.group_events_by_detector_number(loaded) assert sc.identical( da.bins.size().data, sc.array(dims=['detector_number'], @@ -193,22 +176,22 @@ def test_loads_event_data_mapped_to_detector_numbers_based_on_their_event_id(gro assert 'event_time_zero' in da.bins.coords -def test_loads_event_data_with_0d_detector_numbers(group): - detector = group.create_class('detector0', NXdetector) +def test_loads_event_data_with_0d_detector_numbers(nxroot): + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('detector_number', sc.index(1, dtype='int64')) - create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx.NXevent_data)) assert detector.dims == ('event_time_zero', ) assert detector.shape == (4, ) - loaded = snx2.group_events_by_detector_number(detector[...]) + loaded = snx.group_events_by_detector_number(detector[...]) assert sc.identical(loaded.bins.size().data, sc.index(2, dtype='int64')) -def test_loads_event_data_with_2d_detector_numbers(group): - detector = group.create_class('detector0', NXdetector) +def test_loads_event_data_with_2d_detector_numbers(nxroot): + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers_xx_yy_1234()) - create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx.NXevent_data)) assert detector.sizes == {'dim_0': 2, 'dim_1': 2, 'event_time_zero': 4} - loaded = snx2.group_events_by_detector_number(detector[...]) + loaded = snx.group_events_by_detector_number(detector[...]) assert sc.identical( loaded.bins.size().data, sc.array(dims=['dim_0', 'dim_1'], @@ -217,32 +200,32 @@ def test_loads_event_data_with_2d_detector_numbers(group): values=[[2, 3], [0, 1]])) -def test_select_events_slices_underlying_event_data(group): - detector = group.create_class('detector0', NXdetector) +def test_select_events_slices_underlying_event_data(nxroot): + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers_xx_yy_1234()) - create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) - da = snx2.group_events_by_detector_number(detector['event_time_zero', :2]) + create_event_data_ids_1234(detector.create_class('events', snx.NXevent_data)) + da = snx.group_events_by_detector_number(detector['event_time_zero', :2]) assert sc.identical( da.bins.size().data, sc.array(dims=['dim_0', 'dim_1'], unit=None, dtype='int64', values=[[1, 1], [0, 1]])) - da = snx2.group_events_by_detector_number(detector['event_time_zero', :3]) + da = snx.group_events_by_detector_number(detector['event_time_zero', :3]) assert sc.identical( da.bins.size().data, sc.array(dims=['dim_0', 'dim_1'], unit=None, dtype='int64', values=[[2, 2], [0, 1]])) - da = snx2.group_events_by_detector_number(detector['event_time_zero', 3]) + da = snx.group_events_by_detector_number(detector['event_time_zero', 3]) assert sc.identical( da.bins.size().data, sc.array(dims=['dim_0', 'dim_1'], unit=None, dtype='int64', values=[[0, 1], [0, 0]])) - da = snx2.group_events_by_detector_number(detector[()]) + da = snx.group_events_by_detector_number(detector[()]) assert sc.identical( da.bins.size().data, sc.array(dims=['dim_0', 'dim_1'], @@ -252,46 +235,46 @@ def test_select_events_slices_underlying_event_data(group): def test_loading_event_data_creates_automatic_detector_numbers_if_not_present_in_file( - group): - detector = group.create_class('detector0', NXdetector) - create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) + nxroot): + detector = nxroot.create_class('detector0', NXdetector) + create_event_data_ids_1234(detector.create_class('events', snx.NXevent_data)) assert detector.dims == ('event_time_zero', ) loaded = detector[...] - loaded = snx2.group_events_by_detector_number(loaded) + loaded = snx.group_events_by_detector_number(loaded) assert sc.identical( loaded.bins.size().data, sc.array(dims=['event_id'], unit=None, dtype='int64', values=[2, 3, 1])) def test_loading_event_data_with_det_selection_and_automatic_detector_numbers_raises( - group): - detector = group.create_class('detector0', NXdetector) - create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) + nxroot): + detector = nxroot.create_class('detector0', NXdetector) + create_event_data_ids_1234(detector.create_class('events', snx.NXevent_data)) assert detector.dims == ('event_time_zero', ) with pytest.raises(sc.DimensionError): detector['detector_number', 0] def test_loading_event_data_with_full_selection_and_automatic_detector_numbers_works( - group): - detector = group.create_class('detector0', NXdetector) - create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) + nxroot): + detector = nxroot.create_class('detector0', NXdetector) + create_event_data_ids_1234(detector.create_class('events', snx.NXevent_data)) assert detector.dims == ('event_time_zero', ) assert tuple(detector[...].shape) == (4, ) assert tuple(detector[()].shape) == (4, ) -def test_event_data_field_dims_labels(group): +def test_event_data_field_dims_labels(nxroot): detector_numbers = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3, 4])) - detector = group.create_class('detector0', NXdetector) + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_numbers) - create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) + create_event_data_ids_1234(detector.create_class('events', snx.NXevent_data)) assert detector['detector_number'].dims == ('detector_number', ) -def test_nxevent_data_selection_yields_correct_pulses(group): - detector = group.create_class('detector0', NXdetector) - create_event_data_ids_1234(detector.create_class('events', snx2.NXevent_data)) +def test_nxevent_data_selection_yields_correct_pulses(nxroot): + detector = nxroot.create_class('detector0', NXdetector) + create_event_data_ids_1234(detector.create_class('events', snx.NXevent_data)) class Load: @@ -318,7 +301,7 @@ def __getitem__(self, select=...): assert np.array_equal(Load()['event_time_zero', :-2], [3, 0]) -def create_off_geometry_detector_numbers_1234(group: snx2.Group, +def create_off_geometry_detector_numbers_1234(group: snx.Group, name: str, detector_faces: bool = True): off = group.create_class(name, NXoff_geometry) @@ -338,12 +321,12 @@ def create_off_geometry_detector_numbers_1234(group: snx2.Group, @pytest.mark.parametrize('detid_name', ['detector_number', 'pixel_id', 'spectrum_index']) -def test_loads_data_with_coords_and_off_geometry(group, detid_name): +def test_loads_data_with_coords_and_off_geometry(nxroot, detid_name): da = sc.DataArray( sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]])) da.coords['detector_number'] = detector_numbers_xx_yy_1234() da.coords['xx'] = sc.array(dims=['xx'], unit='m', values=[0.1, 0.2]) - detector = group.create_class('detector0', NXdetector) + detector = nxroot.create_class('detector0', NXdetector) detector.create_field(detid_name, da.coords['detector_number']) detector.create_field('xx', da.coords['xx']) detector.create_field('data', da.data) @@ -358,9 +341,9 @@ def test_loads_data_with_coords_and_off_geometry(group, detid_name): def test_missing_detector_numbers_triggers_fallback_given_off_geometry_with_det_faces( - group): + nxroot): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) - detector = group.create_class('detector0', NXdetector) + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] create_off_geometry_detector_numbers_1234(detector, name='shape') @@ -369,9 +352,9 @@ def test_missing_detector_numbers_triggers_fallback_given_off_geometry_with_det_ assert sc.identical(loaded['shape'], detector['shape'][()]) -def test_off_geometry_without_detector_faces_loaded_as_0d_with_multiple_faces(group): +def test_off_geometry_without_detector_faces_loaded_as_0d_with_multiple_faces(nxroot): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) - detector = group.create_class('detector0', NXdetector) + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] create_off_geometry_detector_numbers_1234(detector, @@ -382,7 +365,7 @@ def test_off_geometry_without_detector_faces_loaded_as_0d_with_multiple_faces(gr assert sc.identical(loaded.coords['shape'].bins.size(), sc.index(4)) -def create_cylindrical_geometry_detector_numbers_1234(group: snx2.Group, +def create_cylindrical_geometry_detector_numbers_1234(group: snx.Group, name: str, detector_numbers: bool = True): shape = group.create_class(name, snx.NXcylindrical_geometry) @@ -395,9 +378,9 @@ def create_cylindrical_geometry_detector_numbers_1234(group: snx2.Group, shape['detector_number'] = sc.array(dims=['_'], values=[0, 1, 1, 0], unit=None) -def test_cylindrical_geometry_without_detector_numbers_loaded_as_0d(group): +def test_cylindrical_geometry_without_detector_numbers_loaded_as_0d(nxroot): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) - detector = group.create_class('detector0', NXdetector) + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] create_cylindrical_geometry_detector_numbers_1234(detector, @@ -420,9 +403,9 @@ def test_cylindrical_geometry_without_detector_numbers_loaded_as_0d(group): def test_cylindrical_geometry_with_missing_parent_detector_numbers_triggers_fallback( - group): + nxroot): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) - detector = group.create_class('detector0', NXdetector) + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] create_cylindrical_geometry_detector_numbers_1234(detector, @@ -434,9 +417,9 @@ def test_cylindrical_geometry_with_missing_parent_detector_numbers_triggers_fall def test_cylindrical_geometry_with_inconsistent_detector_numbers_triggers_fallback( - group): + nxroot): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1], [3.3]]) - detector = group.create_class('detector0', NXdetector) + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] detector.create_field('detector_numbers', @@ -449,9 +432,9 @@ def test_cylindrical_geometry_with_inconsistent_detector_numbers_triggers_fallba assert isinstance(loaded['shape'], sc.DataGroup) -def test_cylindrical_geometry_with_detector_numbers(group): +def test_cylindrical_geometry_with_detector_numbers(nxroot): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) - detector = group.create_class('detector0', NXdetector) + detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) detector.attrs['axes'] = ['xx', 'yy'] detector_number = sc.array(dims=var.dims, values=[[1, 2], [3, 4]], unit=None) diff --git a/tests/nxmonitor_test.py b/tests/nxmonitor_test.py index fefdeb28..b12ef6ac 100644 --- a/tests/nxmonitor_test.py +++ b/tests/nxmonitor_test.py @@ -46,15 +46,19 @@ def create_event_data_no_ids(group): values=[0, 3, 3, 5])) +@pytest.mark.skip(reason="For now we do not support this broken type of NXmonitor") def test_loads_event_data_in_current_group(group): monitor = group.create_class('monitor1', snx.NXmonitor) create_event_data_no_ids(monitor) - assert monitor.dims == ('pulse', ) + assert monitor.dims == ('event_time_zero', ) assert monitor.shape == (4, ) loaded = monitor[...] assert_identical( loaded.bins.size().data, - sc.array(dims=['pulse'], unit=None, dtype='int64', values=[3, 0, 2, 1])) + sc.array(dims=['event_time_zero'], + unit=None, + dtype='int64', + values=[3, 0, 2, 1])) def test_loads_event_data_in_child_group(group): From 881b269721dc37c2ef2fbe9e59b290f5cac35b22 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 23 Mar 2023 09:26:11 +0100 Subject: [PATCH 40/98] Update file load test and avoid slow scipp integer-array indexing --- src/scippnexus/v2/nxoff_geometry.py | 8 +++++++- src/scippnexus/v2/nxtransformations.py | 2 +- tests/load_files_test.py | 2 +- tests/nxcylindrical_geometry_test.py | 10 +++++----- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/scippnexus/v2/nxoff_geometry.py b/src/scippnexus/v2/nxoff_geometry.py index b6ffb74b..1f8127db 100644 --- a/src/scippnexus/v2/nxoff_geometry.py +++ b/src/scippnexus/v2/nxoff_geometry.py @@ -19,7 +19,13 @@ def off_to_shape(*, """ # Vertices in winding order. This duplicates vertices if they are part of more than # one faces. - vw = vertices[winding_order.values] + # TODO Should use this: + # vw = vertices[winding_order.values] + # but NumPy is currently much faster. + # See https://github.com/scipp/scipp/issues/3044 + vw = sc.vectors(dims=vertices.dims, + values=vertices.values[winding_order.values], + unit=vertices.unit) # Same as above, grouped by face. fvw = sc.bins(begin=faces, data=vw, dim=vw.dim) low = fvw.bins.size().min().value diff --git a/src/scippnexus/v2/nxtransformations.py b/src/scippnexus/v2/nxtransformations.py index b42f3db3..f0fbb001 100644 --- a/src/scippnexus/v2/nxtransformations.py +++ b/src/scippnexus/v2/nxtransformations.py @@ -111,7 +111,7 @@ def __getitem__(self, select: ScippIndex): transform = sc.DataArray(transform) transform.attrs['depends_on'] = sc.scalar(depends_on[select]) return transform - except (sc.DimensionError, sc.UnitError) as e: + except (sc.DimensionError, sc.UnitError, TransformationError) as e: return value raise NexusStructureError( f"Invalid transformation in NXtransformations: {e}") from e diff --git a/tests/load_files_test.py b/tests/load_files_test.py index 419956ae..106675cb 100644 --- a/tests/load_files_test.py +++ b/tests/load_files_test.py @@ -3,7 +3,7 @@ import pytest import scipp as sc -import scippnexus as snx +import scippnexus.v2 as snx externalfile = pytest.importorskip('externalfile') diff --git a/tests/nxcylindrical_geometry_test.py b/tests/nxcylindrical_geometry_test.py index 402ce9a7..89c87134 100644 --- a/tests/nxcylindrical_geometry_test.py +++ b/tests/nxcylindrical_geometry_test.py @@ -2,20 +2,20 @@ import pytest import scipp as sc -from scippnexus import NXcylindrical_geometry, NXentry, NXroot +import scippnexus.v2 as snx @pytest.fixture() def nxroot(request): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: - root = NXroot(f) - root.create_class('entry', NXentry) + root = snx.Group(f, snx.base_definitions) + root.create_class('entry', snx.NXentry) yield root def test_vertices_loaded_as_vector3(nxroot): - shape = nxroot['entry'].create_class('shape', NXcylindrical_geometry) + shape = nxroot['entry'].create_class('shape', snx.NXcylindrical_geometry) values = [[1, 2, 3], [4, 5, 6]] shape['vertices'] = sc.array(dims=['ignored', 'comp'], values=values, unit='mm') loaded = shape[()] @@ -24,7 +24,7 @@ def test_vertices_loaded_as_vector3(nxroot): def test_field_properties(nxroot): - shape = nxroot['entry'].create_class('shape', NXcylindrical_geometry) + shape = nxroot['entry'].create_class('shape', snx.NXcylindrical_geometry) values = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] shape['vertices'] = sc.array(dims=['ignored', 'comp'], values=values, unit='m') shape['cylinders'] = sc.array(dims=['ignored', 'index'], From b62af2ee8effb802138c9f8f8776cf2a384bc046 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 23 Mar 2023 09:28:22 +0100 Subject: [PATCH 41/98] Add missing file file --- src/scippnexus/v2/file.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/scippnexus/v2/file.py diff --git a/src/scippnexus/v2/file.py b/src/scippnexus/v2/file.py new file mode 100644 index 00000000..c351650c --- /dev/null +++ b/src/scippnexus/v2/file.py @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from contextlib import AbstractContextManager + +import h5py + +from .base import Group, base_definitions + + +class File(AbstractContextManager, Group): + + def __init__(self, *args, definitions=base_definitions, **kwargs): + self._file = h5py.File(*args, **kwargs) + super().__init__(self._file, definitions=definitions) + + def __enter__(self): + self._file.__enter__() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._file.close() + + def close(self): + self._file.close() From 1992be257e960f66c8ba6ef5772da88ab6c2fde9 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 23 Mar 2023 10:06:09 +0100 Subject: [PATCH 42/98] Add write support for appl-defs --- src/scippnexus/v2/base.py | 6 +++++- tests/application_definition_test.py | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 8296126b..dc160afc 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -479,7 +479,11 @@ def isclass(x): # TODO It is not clear if we want to support these convenience methods def __setitem__(self, key, value): - return create_field(self._group, key, value) + if hasattr(value, '__write_to_nexus_group__'): + group = create_class(self._group, key, nx_class=value.nx_class) + value.__write_to_nexus_group__(group) + else: + create_field(self._group, key, value) def create_field(self, key: str, value: sc.Variable) -> H5Dataset: return create_field(self._group, key, value) diff --git a/tests/application_definition_test.py b/tests/application_definition_test.py index fe80e0f2..8c9d391b 100644 --- a/tests/application_definition_test.py +++ b/tests/application_definition_test.py @@ -2,16 +2,16 @@ import pytest import scipp as sc -from scippnexus import NXentry, NXroot -from scippnexus.definitions.nxcansas import NXcanSAS, SASdata, SASentry +import scippnexus.v2 as snx +from scippnexus.v2.definitions.nxcansas import NXcanSAS, SASdata, SASentry @pytest.fixture() def nxroot(): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: - root = NXroot(f) - root.create_class('entry', NXentry) + root = snx.Group(f, definitions=snx.base_definitions) + root.create_class('entry', snx.NXentry) yield root From 9e8af94b95f6703bcb5ada1e03d6de48f39122ad Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 23 Mar 2023 13:26:46 +0100 Subject: [PATCH 43/98] Bring back NXcanSAS support --- src/scippnexus/v2/base.py | 18 +++- src/scippnexus/v2/definitions/nxcansas.py | 117 ++++++++++++++++++++++ tests/application_definition_test.py | 50 +++++++-- 3 files changed, 176 insertions(+), 9 deletions(-) create mode 100644 src/scippnexus/v2/definitions/nxcansas.py diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index dc160afc..43dc00c8 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -350,7 +350,7 @@ def __init__(self, definitions: Optional[Dict[str, type]] = None, parent: Optional[Group] = None): self._group = group - self._definitions = {} if definitions is None else definitions + self._definitions = DefinitionsDict() if definitions is None else definitions if parent is None: if group == group.parent: self._parent = self @@ -417,7 +417,7 @@ def _children(self) -> Dict[str, Union[Field, Group]]: @cached_property def _nexus(self) -> NXobject: - return self._definitions.get(self.attrs.get('NX_class'), NXobject)(self) + return self._definitions.get(self.attrs.get('NX_class'), group=self)(self) def _populate_fields(self) -> None: _ = self._nexus @@ -785,7 +785,19 @@ def assemble_as_child(children: sc.DataGroup, return sc.scalar(children) -base_definitions = {} +class DefinitionsDict: + + def __init__(self): + self._definitions = {} + + def __setitem__(self, nx_class: str, definition: type): + self._definitions[nx_class] = definition + + def get(self, nx_class: str, group: Group) -> type: + return self._definitions.get(nx_class, NXobject) + + +base_definitions = DefinitionsDict() base_definitions['NXdata'] = NXdata base_definitions['NXlog'] = NXlog base_definitions['NXdetector'] = NXdetector diff --git a/src/scippnexus/v2/definitions/nxcansas.py b/src/scippnexus/v2/definitions/nxcansas.py new file mode 100644 index 00000000..b767cdff --- /dev/null +++ b/src/scippnexus/v2/definitions/nxcansas.py @@ -0,0 +1,117 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from typing import Literal, Optional, Union + +import scipp as sc + +from ...typing import H5Group +from ..base import Group, NXdata, NXobject, base_definitions, create_field + + +class SASdata: + nx_class = 'NXdata' + + def __init__(self, + data: sc.DataArray, + Q_variances: Optional[Literal['uncertainties', 'resolutions']] = None): + self.data = data + valid = ('uncertainties', 'resolutions') + if Q_variances not in (None, ) + valid: + raise ValueError(f"Q_variances must be in {valid}") + self._variances = Q_variances + + def __write_to_nexus_group__(self, group: H5Group): + da = self.data + group.attrs['canSAS_class'] = 'SASdata' + group.attrs['signal'] = 'I' + group.attrs['axes'] = da.dims # for NeXus compliance, same as I_axes + group.attrs['I_axes'] = da.dims + group.attrs['Q_indices'] = tuple(da.dims.index(d) for d in da.coords['Q'].dims) + # TODO writing Field should deal with variances + signal = create_field(group, 'I', sc.values(da.data)) + # We use the _errors suffix for NeXus compliance, unlike the examples given in + # NXcanSAS. + if da.variances is not None: + signal.attrs['uncertainties'] = 'I_errors' + create_field(group, 'I_errors', sc.stddevs(da.data)) + if da.coords.is_edges('Q'): + raise ValueError( + "Q is given as bin-edges, but NXcanSAS requires Q points (such as " + "bin centers).") + coord = create_field(group, 'Q', da.coords['Q']) + if da.coords['Q'].variances is not None: + if self._variances is None: + raise ValueError( + "Q has variances, must specify whether these represent " + "'uncertainties' or 'resolutions' using the 'Q_variances' option'") + + coord.attrs[self._variances] = 'Q_errors' + create_field(group, 'Q_errors', sc.stddevs(da.coords['Q'])) + + +class _SASdata(NXdata): + + def __init__(self, group: Group): + fallback_dims = group.attrs.get('I_axes') + if fallback_dims is not None: + fallback_dims = (fallback_dims, ) + super().__init__(group, fallback_dims=fallback_dims, fallback_signal_name='I') + + # TODO Mechanism for custom error names + @staticmethod + def signal_errors(group: NXobject) -> Optional[str]: + signal_name = group.attrs.get('signal', 'I') + signal = group._group[signal_name] + return signal.attrs.get('uncertainties') + + def coord_errors(group: NXobject, name: str) -> Optional[str]: + if name != 'Q': + return None + # TODO This naively stores this as Scipp errors, which are just Gaussian. + # This is probably not correct in all cases. + uncertainties = group[name].attrs.get('uncertainties') + resolutions = group[name].attrs.get('resolutions') + if uncertainties is None: + return resolutions + elif resolutions is None: + return uncertainties + raise RuntimeError("Cannot handle both uncertainties and resolutions for Q") + + +class _SAStransmission_spectrum(NXdata): + + def __init__(self, group: Group): + # TODO A valid file should have T_axes, do we need to fallback? + super().__init__(group, + fallback_dims=(group.attrs.get('T_axes', 'lambda'), ), + fallback_signal_name='T') + + +class SASentry: + nx_class = 'NXentry' + + def __init__(self, *, title: str, run: Union[str, int]): + self.title = title + self.run = run + + def __write_to_nexus_group__(self, group: H5Group): + group.attrs['canSAS_class'] = 'SASentry' + group.attrs['version'] = '1.0' + group.attrs['definition'] = 'NXcanSAS' + create_field(group, 'title', self.title) + create_field(group, 'run', self.run) + + +class NXcanSAS: + + def get(self, key: type, group: Group) -> type: + if (cls := group.attrs.get('canSAS_class')) is not None: + if cls == 'SASdata': + return _SASdata + if cls == 'SAStransmission_spectrum': + return _SAStransmission_spectrum + return base_definitions.get(key, group) + + +definitions = NXcanSAS() diff --git a/tests/application_definition_test.py b/tests/application_definition_test.py index 8c9d391b..29582af5 100644 --- a/tests/application_definition_test.py +++ b/tests/application_definition_test.py @@ -1,9 +1,10 @@ import h5py import pytest import scipp as sc +from scipp.testing import assert_identical import scippnexus.v2 as snx -from scippnexus.v2.definitions.nxcansas import NXcanSAS, SASdata, SASentry +from scippnexus.v2.definitions import nxcansas @pytest.fixture() @@ -16,7 +17,7 @@ def nxroot(): def test_setitem_SASentry(nxroot): - nxroot['sasentry'] = SASentry(title='A test', run=12345) + nxroot['sasentry'] = nxcansas.SASentry(title='A test', run=12345) assert 'sasentry' in nxroot entry = nxroot['sasentry'] assert entry.attrs['definition'] == 'NXcanSAS' @@ -39,13 +40,12 @@ def I_of_Q(): def test_setitem_SASdata_raises_ValueError_when_given_bin_edges(nxroot, I_of_Q): with pytest.raises(ValueError): - nxroot['sasdata'] = SASdata(I_of_Q, Q_variances='resolutions') + nxroot['sasdata'] = nxcansas.SASdata(I_of_Q, Q_variances='resolutions') def test_setitem_SASdata(nxroot, I_of_Q): I_of_Q.coords['Q'] = I_of_Q.coords['Q'][1:] - nxroot['sasdata'] = SASdata(I_of_Q, Q_variances='resolutions') - nxroot._definition = NXcanSAS + nxroot['sasdata'] = nxcansas.SASdata(I_of_Q, Q_variances='resolutions') data = nxroot['sasdata'] assert sc.identical(data[...], I_of_Q) @@ -60,4 +60,42 @@ def test_setitem_SASdata_raises_if_interpretation_of_variances_not_specified(nxr da.coords['Q'] = sc.linspace('Q', 0, 1, num=5, unit='1/angstrom') da.coords['Q'].variances = sc.array(dims=['Q'], values=[1, 1, 4, 4, 1]).values with pytest.raises(ValueError): - nxroot['sasdata'] = SASdata(da) + nxroot['sasdata'] = nxcansas.SASdata(da) + + +def test_load_SASdata(nxroot): + nxroot['sasentry'] = nxcansas.SASentry(title='A test', run=12345) + entry = nxroot['sasentry'] + da = sc.DataArray( + sc.array(dims=['Q'], values=[1, 2, 3], unit=''), + coords={'Q': sc.array(dims=['Q'], values=[1, 2, 3, 4], unit='1/angstrom')}) + group = entry.create_class('sasdata', snx.NXdata) + group._group.attrs['canSAS_class'] = 'SASdata' + group._group.attrs['signal'] = 'I' + group._group.attrs['I_axes'] = 'Q' + group['I'] = da.data + group['Q'] = da.coords['Q'] + sasroot = snx.Group(nxroot._group, definitions=nxcansas.definitions) + loaded = sasroot['sasentry/sasdata'][()] + assert_identical(loaded, da) + + +def test_load_SAStransmission_spectrum(nxroot): + nxroot['sasentry'] = nxcansas.SASentry(title='A test', run=12345) + entry = nxroot['sasentry'] + spectrum = sc.DataArray(sc.array(dims=['lambda'], values=[1, 2, 3], unit='counts'), + coords={ + 'lambda': + sc.array(dims=['lambda'], + values=[1, 2, 3, 4], + unit='angstrom') + }) + group = entry.create_class('sastransmission_spectrum', snx.NXdata) + group._group.attrs['canSAS_class'] = 'SAStransmission_spectrum' + group._group.attrs['signal'] = 'T' + group._group.attrs['T_axes'] = 'lambda' + group['T'] = spectrum.data + group['lambda'] = spectrum.coords['lambda'] + sasroot = snx.Group(nxroot._group, definitions=nxcansas.definitions) + loaded = sasroot['sasentry/sastransmission_spectrum'][()] + assert_identical(loaded, spectrum) From d1260b95359e5bbfb45afdbcd0a182549e2532f1 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 23 Mar 2023 13:28:56 +0100 Subject: [PATCH 44/98] Change folder name --- .../v2/{definitions => application_definitions}/nxcansas.py | 0 tests/application_definition_test.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/scippnexus/v2/{definitions => application_definitions}/nxcansas.py (100%) diff --git a/src/scippnexus/v2/definitions/nxcansas.py b/src/scippnexus/v2/application_definitions/nxcansas.py similarity index 100% rename from src/scippnexus/v2/definitions/nxcansas.py rename to src/scippnexus/v2/application_definitions/nxcansas.py diff --git a/tests/application_definition_test.py b/tests/application_definition_test.py index 29582af5..b8c48d52 100644 --- a/tests/application_definition_test.py +++ b/tests/application_definition_test.py @@ -4,7 +4,7 @@ from scipp.testing import assert_identical import scippnexus.v2 as snx -from scippnexus.v2.definitions import nxcansas +from scippnexus.v2.application_definitions import nxcansas @pytest.fixture() From a99bb94c39811803ec355d8f248829403b21bb2e Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 23 Mar 2023 13:37:51 +0100 Subject: [PATCH 45/98] Proper submodule --- .../nxcansas/__init__.py | 9 +++++ .../{ => nxcansas}/nxcansas.py | 34 +++++++++---------- 2 files changed, 26 insertions(+), 17 deletions(-) create mode 100644 src/scippnexus/v2/application_definitions/nxcansas/__init__.py rename src/scippnexus/v2/application_definitions/{ => nxcansas}/nxcansas.py (97%) diff --git a/src/scippnexus/v2/application_definitions/nxcansas/__init__.py b/src/scippnexus/v2/application_definitions/nxcansas/__init__.py new file mode 100644 index 00000000..35761501 --- /dev/null +++ b/src/scippnexus/v2/application_definitions/nxcansas/__init__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock + +# flake8: noqa + +from .nxcansas import SASdata, SASentry, definitions + +__all__ = [definitions, SASentry, SASdata] diff --git a/src/scippnexus/v2/application_definitions/nxcansas.py b/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py similarity index 97% rename from src/scippnexus/v2/application_definitions/nxcansas.py rename to src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py index b767cdff..67b26ccc 100644 --- a/src/scippnexus/v2/application_definitions/nxcansas.py +++ b/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py @@ -5,8 +5,23 @@ import scipp as sc -from ...typing import H5Group -from ..base import Group, NXdata, NXobject, base_definitions, create_field +from ....typing import H5Group +from ...base import Group, NXdata, NXobject, base_definitions, create_field + + +class SASentry: + nx_class = 'NXentry' + + def __init__(self, *, title: str, run: Union[str, int]): + self.title = title + self.run = run + + def __write_to_nexus_group__(self, group: H5Group): + group.attrs['canSAS_class'] = 'SASentry' + group.attrs['version'] = '1.0' + group.attrs['definition'] = 'NXcanSAS' + create_field(group, 'title', self.title) + create_field(group, 'run', self.run) class SASdata: @@ -88,21 +103,6 @@ def __init__(self, group: Group): fallback_signal_name='T') -class SASentry: - nx_class = 'NXentry' - - def __init__(self, *, title: str, run: Union[str, int]): - self.title = title - self.run = run - - def __write_to_nexus_group__(self, group: H5Group): - group.attrs['canSAS_class'] = 'SASentry' - group.attrs['version'] = '1.0' - group.attrs['definition'] = 'NXcanSAS' - create_field(group, 'title', self.title) - create_field(group, 'run', self.run) - - class NXcanSAS: def get(self, key: type, group: Group) -> type: From 2aec1f29bca9a487c86de2f22c6a717fb6519d6c Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 23 Mar 2023 14:03:23 +0100 Subject: [PATCH 46/98] Handle and test NXsample --- src/scippnexus/v2/base.py | 6 ++- src/scippnexus/v2/nexus_classes.py | 21 +++++++---- src/scippnexus/v2/nxtransformations.py | 8 ++-- tests/nxsample_test.py | 51 ++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 15 deletions(-) create mode 100644 tests/nxsample_test.py diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 43dc00c8..2b06e16f 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -229,7 +229,7 @@ def __getitem__(self, select) -> Union[Any, sc.Variable]: variable, start=starts[0], scaling_factor=self.attrs.get('scaling_factor')) - if variable.ndim == 0 and variable.unit is None: + if variable.ndim == 0 and variable.unit is None and variable.fields is None: # Work around scipp/scipp#2815, and avoid returning NumPy bool if isinstance(variable.values, np.ndarray) and variable.dtype != 'bool': return variable.values[()] @@ -301,7 +301,9 @@ def sizes(self) -> Dict[str, int]: # exclude geometry/tansform groups? return sc.DataGroup(self._group).sizes - def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: + def index_child( + self, child: Union[Field, Group], sel: ScippIndex + ) -> Union[sc.Variable, sc.DataArray, sc.Dataset, sc.DataGroup]: # Note that this will be similar in NXdata, but there we need to handle # bin edges as well. child_sel = to_child_select(self.sizes.keys(), child.dims, sel) diff --git a/src/scippnexus/v2/nexus_classes.py b/src/scippnexus/v2/nexus_classes.py index ef209389..98497f2a 100644 --- a/src/scippnexus/v2/nexus_classes.py +++ b/src/scippnexus/v2/nexus_classes.py @@ -1,19 +1,12 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -#from .nxfermi_chopper import NXfermi_chopper # noqa F401 -#from .nxlog import NXlog # noqa F401 -#from .nxmonitor import NXmonitor # noqa F401 from .base import NXobject # noqa F401 from .base import NXdata, NXdetector, NXgeometry, NXlog, NXmonitor, NXroot # noqa F401 from .nxcylindrical_geometry import NXcylindrical_geometry # noqa F401 -#from .nxdata import NXdata # noqa F401 -#from .nxdetector import NXdetector # noqa F401 -#from .nxdisk_chopper import NXdisk_chopper # noqa F401 from .nxevent_data import NXevent_data # noqa F401 from .nxoff_geometry import NXoff_geometry # noqa F401 -#from .nxsample import NXsample # noqa F401 -#from .nxsource import NXsource # noqa F401 +from .nxsample import NXsample # noqa F401 from .nxtransformations import NXtransformations # noqa F401 @@ -73,10 +66,18 @@ class NXdetector_module(NXobject): """NXdetector_module""" +class NXdisk_chopper(NXobject): + """NXdisk_chopper""" + + class NXenvironment(NXobject): """NXenvironment""" +class NXfermi_chopper(NXobject): + """NXfermi_chopper""" + + class NXfilter(NXobject): """NXfilter""" @@ -161,6 +162,10 @@ class NXshape(NXobject): """NXshape""" +class NXsource(NXobject): + """NXsource""" + + class NXslit(NXobject): """NXslit""" diff --git a/src/scippnexus/v2/nxtransformations.py b/src/scippnexus/v2/nxtransformations.py index f0fbb001..0ec659fa 100644 --- a/src/scippnexus/v2/nxtransformations.py +++ b/src/scippnexus/v2/nxtransformations.py @@ -82,8 +82,6 @@ def __getitem__(self, select: ScippIndex): try: if isinstance(value, sc.DataGroup): return value - raise TransformationError( - f"Failed to load transformation at {self.name}.") t = value * self.vector v = t if isinstance(t, sc.Variable) else t.data if transformation_type == 'translation': @@ -111,10 +109,10 @@ def __getitem__(self, select: ScippIndex): transform = sc.DataArray(transform) transform.attrs['depends_on'] = sc.scalar(depends_on[select]) return transform - except (sc.DimensionError, sc.UnitError, TransformationError) as e: + except (sc.DimensionError, sc.UnitError, TransformationError): + # TODO We should probably try to return some other data structure and + # also insert offset and other attributes. return value - raise NexusStructureError( - f"Invalid transformation in NXtransformations: {e}") from e def _interpolate_transform(transform, xnew): diff --git a/tests/nxsample_test.py b/tests/nxsample_test.py new file mode 100644 index 00000000..e8738f35 --- /dev/null +++ b/tests/nxsample_test.py @@ -0,0 +1,51 @@ +import h5py +import numpy as np +import pytest +import scipp as sc +from scipp import spatial +from scipp.testing import assert_identical + +import scippnexus.v2 as snx + + +@pytest.fixture() +def nxroot(request): + """Yield NXroot containing a single NXentry named 'entry'""" + with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: + root = snx.Group(f, definitions=snx.base_definitions) + root.create_class('entry', snx.NXentry) + yield root + + +def test_ub_matrix_loaded_as_linear_transform_with_inverse_angstrom_unit(nxroot): + sample = nxroot.create_class('data1', snx.NXsample) + matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + sample['ub_matrix'] = matrix + loaded = sample[()] + assert_identical( + loaded, + sc.DataGroup( + ub_matrix=spatial.linear_transform(value=matrix, unit='1/angstrom'))) + + +def test_ub_matrix_array_can_be_loaded(nxroot): + sample = nxroot.create_class('data1', snx.NXsample) + matrices = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[2, 3, 4], [5, 6, 7], [8, 9, 10]]]) + sample['ub_matrix'] = matrices + loaded = sample[()] + assert_identical( + loaded, + sc.DataGroup(ub_matrix=spatial.linear_transforms( + dims=('dim_0', ), values=matrices, unit='1/angstrom'))) + + +def test_orientation_matrix_loaded_as_linear_transform_with_dimensionless_unit(nxroot): + sample = nxroot.create_class('data1', snx.NXsample) + matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + sample['orientation_matrix'] = matrix + loaded = sample[()] + assert_identical( + loaded, + sc.DataGroup( + orientation_matrix=spatial.linear_transform(value=matrix, unit=''))) From 2b2bf524c9f525bb0bc3884712b5b21d6b0b3051 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 23 Mar 2023 14:12:43 +0100 Subject: [PATCH 47/98] Add YMIR file --- tests/externalfile.py | 1 + tests/load_files_test.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/externalfile.py b/tests/externalfile.py index 3e9cd39b..57f77fac 100644 --- a/tests/externalfile.py +++ b/tests/externalfile.py @@ -19,6 +19,7 @@ def _make_pooch(): '2023/LOKI_mcstas_nexus_geometry.nxs': 'md5:f431d9775a53caffeebe9b879189b17c', '2023/NMX_2e11-rechunk.h5': 'md5:1174c208614b2e7a5faddc284b41d2c9', + '2023/YMIR_038243_00010244.hdf': 'md5:cefb04b6d4d36f16e7f329a6045ad129', }) diff --git a/tests/load_files_test.py b/tests/load_files_test.py index 106675cb..162b441f 100644 --- a/tests/load_files_test.py +++ b/tests/load_files_test.py @@ -15,6 +15,7 @@ '2023/DREAM_mccode.h5', '2023/LOKI_mcstas_nexus_geometry.nxs', '2023/NMX_2e11-rechunk.h5', + '2023/YMIR_038243_00010244.hdf', ]) def test_files_load_as_data_groups(name): with snx.File(externalfile.get_path(name)) as f: From 0874dac72f74c1da17249f6463d746ab8c63ef39 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 23 Mar 2023 14:25:27 +0100 Subject: [PATCH 48/98] Split large file --- src/scippnexus/v2/__init__.py | 6 +- .../nxcansas/nxcansas.py | 3 +- src/scippnexus/v2/base.py | 352 +----------------- src/scippnexus/v2/nexus_classes.py | 4 +- src/scippnexus/v2/nxdata.py | 336 +++++++++++++++++ 5 files changed, 357 insertions(+), 344 deletions(-) create mode 100644 src/scippnexus/v2/nxdata.py diff --git a/src/scippnexus/v2/__init__.py b/src/scippnexus/v2/__init__.py index 2e6a8d07..732293dd 100644 --- a/src/scippnexus/v2/__init__.py +++ b/src/scippnexus/v2/__init__.py @@ -11,9 +11,6 @@ __version__ = "0.0.0" from .. import typing -#from .nxdata import NXdataStrategy -#from .nxdetector import NXdetectorStrategy -#from .nxlog import NXlogStrategy from .base import ( Field, Group, @@ -22,8 +19,7 @@ base_definitions, create_class, create_field, - group_events_by_detector_number, ) -#from .definition import ApplicationDefinition, make_definition from .file import File from .nexus_classes import * +from .nxdata import group_events_by_detector_number diff --git a/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py b/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py index 67b26ccc..dbb9e2b9 100644 --- a/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py +++ b/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py @@ -6,7 +6,8 @@ import scipp as sc from ....typing import H5Group -from ...base import Group, NXdata, NXobject, base_definitions, create_field +from ...base import Group, NXobject, base_definitions, create_field +from ...nxdata import NXdata class SASentry: diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 2b06e16f..8f050971 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -511,271 +511,6 @@ def shape(self) -> Tuple[int, ...]: return tuple(self.sizes.values()) -def _guess_dims(dims, shape, dataset: H5Dataset): - """Guess dims of non-signal dataset based on shape. - - Does not check for potential bin-edge coord. - """ - if shape == dataset.shape: - return dims - lut = {} - for d, s in zip(dims, shape): - if shape.count(s) == 1: - lut[s] = d - try: - return [lut[s] for s in dataset.shape] - except KeyError: - return None - - -class NXdata(NXobject): - - def __init__(self, - group: Group, - fallback_dims: Optional[Tuple[str, ...]] = None, - fallback_signal_name: Optional[str] = None): - super().__init__(group) - self._valid = True - # Must do full consistency check here, to define self.sizes: - # - squeeze correctly - # - check if coord dims are compatible with signal dims - # - check if there is a signal - # If not the case, fall back do DataGroup.sizes - # Can we just set field dims here? - self._signal_name = None - self._signal = None - self._aux_signals = group.attrs.get('auxiliary_signals', []) - if (name := group.attrs.get( - 'signal', - fallback_signal_name)) is not None and name in group._children: - self._signal_name = name - self._signal = group._children[name] - else: - # Legacy NXdata defines signal not as group attribute, but attr on dataset - for name, field in group._children.items(): - # What is the meaning of the attribute value? It is undocumented, - # we simply ignore it. - if 'signal' in field.attrs: - self._signal_name = name - self._signal = group._children[name] - break - - axes = group.attrs.get('axes') - signal_axes = None if self._signal is None else self._signal.attrs.get('axes') - - axis_index = {} - for name, field in group._children.items(): - if (axis := field.attrs.get('axis')) is not None: - axis_index[name] = axis - - # Apparently it is not possible to define dim labels unless there are - # corresponding coords. Special case of '.' entries means "no coord". - def _get_group_dims(): - if axes is not None: - return [f'dim_{i}' if a == '.' else a for i, a in enumerate(axes)] - if signal_axes is not None: - return tuple(signal_axes.split(',')) - if axis_index: - return [ - k for k, _ in sorted(axis_index.items(), key=lambda item: item[1]) - ] - return None - - group_dims = _get_group_dims() - - if self._signal is None: - self._valid = False - else: - if group_dims is not None: - shape = self._signal.dataset.shape - shape = _squeeze_trailing(group_dims, shape) - self._signal.sizes = dict(zip(group_dims, shape)) - elif fallback_dims is not None: - shape = self._signal.dataset.shape - group_dims = [ - fallback_dims[i] if i < len(fallback_dims) else f'dim_{i}' - for i in range(len(shape)) - ] - self._signal.sizes = dict(zip(group_dims, shape)) - - if axes is not None: - # Unlike self.dims we *drop* entries that are '.' - named_axes = [a for a in axes if a != '.'] - elif signal_axes is not None: - named_axes = signal_axes.split(',') - elif fallback_dims is not None: - named_axes = fallback_dims - else: - named_axes = [] - - # 3. Find field dims - indices_suffix = '_indices' - indices_attrs = { - key[:-len(indices_suffix)]: attr - for key, attr in group.attrs.items() if key.endswith(indices_suffix) - } - - dims = np.array(group_dims) - dims_from_indices = { - key: tuple(dims[np.array(indices).flatten()]) - for key, indices in indices_attrs.items() - } - - def get_dims(name, field): - # Newly written files should always contain indices attributes, but the - # standard recommends that readers should also make "best effort" guess - # since legacy files do not set this attribute. - # TODO signal and errors? - if name in (self._signal_name, ): - return group_dims - # if name in [self._signal_name, self._errors_name]: - # return self._get_group_dims() # if None, field determines dims itself - if name in self._aux_signals: - return _guess_dims(group_dims, self._signal.dataset.shape, - field.dataset) - if (dims := dims_from_indices.get(name)) is not None: - return dims - if (axis := axis_index.get(name)) is not None: - return (group_dims[axis - 1], ) - if name in named_axes: - # If there are named axes then items of same name are "dimension - # coordinates", i.e., have a dim matching their name. - # However, if the item is not 1-D we need more labels. Try to use labels - # of signal if dimensionality matches. - if self._signal is not None and len(field.dataset.shape) == len( - self._signal.dataset.shape): - return group_dims - return (name, ) - if self._signal is not None and group_dims is not None: - return _guess_dims(group_dims, self._signal.dataset.shape, - field.dataset) - - for name, field in group._children.items(): - if not isinstance(field, Field): - if name not in self._special_fields: - self._valid = False - elif (dims := get_dims(name, field)) is not None: - # The convention here is that the given dimensions apply to the shapes - # starting from the left. So we only squeeze dimensions that are after - # len(dims). - shape = _squeeze_trailing(dims, field.dataset.shape) - field.sizes = dict(zip(dims, shape)) - elif self._valid: - s1 = self._signal.sizes - s2 = field.sizes - if not set(s2.keys()).issubset(set(s1.keys())): - self._valid = False - elif any(s1[k] != s2[k] for k in s1.keys() & s2.keys()): - self._valid = False - - @cached_property - def sizes(self) -> Dict[str, int]: - return self._signal.sizes if self._valid else super().sizes - - @property - def unit(self) -> Union[None, sc.Unit]: - return self._signal.unit if self._valid else super().unit - - def _bin_edge_dim(self, coord: Field) -> Union[None, str]: - if not isinstance(coord, Field): - return None - sizes = self.sizes - for dim, size in zip(coord.dims, coord.shape): - if (sz := sizes.get(dim)) is not None and sz + 1 == size: - return dim - return None - - def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: - child_sel = to_child_select(self._group.dims, - child.dims, - sel, - bin_edge_dim=self._bin_edge_dim(child)) - return child[child_sel] - - def assemble(self, - dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: - if not self._valid: - return super().assemble(dg) - aux = {name: dg.pop(name) for name in self._aux_signals} - coords = sc.DataGroup(dg) - signal = coords.pop(self._signal_name) - da = sc.DataArray(data=signal) - da = self._add_coords(da, coords) - if aux: - signals = {self._signal_name: da} - signals.update(aux) - return sc.Dataset(signals) - return da - - def _dim_of_coord(self, name: str, coord: sc.Variable) -> Union[None, str]: - if len(coord.dims) == 1: - return coord.dims[0] - if name in coord.dims and name in self.dims: - return name - return self._bin_edge_dim(coord) - - def _coord_to_attr(self, da: sc.DataArray, name: str, coord: sc.Variable) -> bool: - dim_of_coord = self._dim_of_coord(name, coord) - if dim_of_coord is None: - return False - if dim_of_coord not in da.dims: - return True - return False - - def _add_coords(self, da: sc.DataArray, coords: sc.DataGroup) -> sc.DataArray: - for name, coord in coords.items(): - if not isinstance(coord, sc.Variable): - da.coords[name] = sc.scalar(coord) - # We need the shape *before* slicing to determine dims, so we get the - # field from the group for the conditional. - elif self._coord_to_attr(da, name, self._group[name]): - da.attrs[name] = coord - else: - da.coords[name] = coord - return da - - -def _squeeze_trailing(dims: Tuple[str, ...], shape: Tuple[int, ...]) -> Tuple[int, ...]: - return shape[:len(dims)] + tuple(size for size in shape[len(dims):] if size != 1) - - -class NXlog(NXdata): - - def __init__(self, group: Group): - super().__init__(group, fallback_dims=('time', ), fallback_signal_name='value') - if (time := self._group._children.get('time')) is not None: - time._is_time = True - - -class NXdetector(NXdata): - _detector_number_fields = ['detector_number', 'pixel_id', 'spectrum_index'] - - @staticmethod - def _detector_number(group: Group) -> Optional[str]: - for name in NXdetector._detector_number_fields: - if name in group._children: - return name - - def __init__(self, group: Group): - fallback_dims = None - if (det_num_name := NXdetector._detector_number(group)) is not None: - if group._children[det_num_name].dataset.ndim == 1: - fallback_dims = ('detector_number', ) - super().__init__(group, - fallback_dims=fallback_dims, - fallback_signal_name='data') - - @property - def detector_number(self) -> Optional[str]: - return self._detector_number(self._group) - - -class NXmonitor(NXdata): - - def __init__(self, group: Group): - super().__init__(group, fallback_signal_name='data') - - class NXgeometry(NXobject): def __init__(self, group: Group): @@ -787,26 +522,6 @@ def assemble_as_child(children: sc.DataGroup, return sc.scalar(children) -class DefinitionsDict: - - def __init__(self): - self._definitions = {} - - def __setitem__(self, nx_class: str, definition: type): - self._definitions[nx_class] = definition - - def get(self, nx_class: str, group: Group) -> type: - return self._definitions.get(nx_class, NXobject) - - -base_definitions = DefinitionsDict() -base_definitions['NXdata'] = NXdata -base_definitions['NXlog'] = NXlog -base_definitions['NXdetector'] = NXdetector -base_definitions['NXgeometry'] = NXgeometry -base_definitions['NXmonitor'] = NXmonitor - - def create_field(group: H5Group, name: str, data: DimensionedArray, **kwargs) -> H5Dataset: if not isinstance(data, sc.Variable): @@ -842,58 +557,23 @@ def create_class(group: H5Group, name: str, nx_class: Union[str, type]) -> H5Gro return group -def _group_events(*, - event_data: sc.DataArray, - grouping: Optional[sc.Variable] = None) -> sc.DataArray: - if isinstance(event_data, sc.DataGroup): - raise NexusStructureError("Invalid NXevent_data in NXdetector.") - if grouping is None: - event_id = 'event_id' - else: - # copy since sc.bin cannot deal with a non-contiguous view - event_id = grouping.flatten(to='event_id').copy() - event_data.bins.coords['event_time_zero'] = sc.bins_like( - event_data, fill_value=event_data.coords['event_time_zero']) - # After loading raw NXevent_data it is guaranteed that the event table - # is contiguous and that there is no masking. We can therefore use the - # more efficient approach of binning from scratch instead of erasing the - # 'event_time_zero' binning defined by NXevent_data. - event_data = event_data.bins.constituents['data'].group(event_id) - # if self._grouping is None: - # event_data.coords[self._grouping_key] = event_data.coords.pop('event_id') - # else: - # del event_data.coords['event_id'] - if grouping is None: - return event_data - return event_data.fold(dim='event_id', sizes=grouping.sizes) - - -def _find_event_entries(dg: sc.DataGroup) -> List[str]: - event_entries = [] - for name, value in dg.items(): - if isinstance( - value, sc.DataArray - ) and 'event_time_zero' in value.coords and value.bins is not None: - event_entries.append(name) - return event_entries - - -def group_events_by_detector_number(dg: sc.DataGroup) -> sc.DataArray: - event_entry = _find_event_entries(dg)[0] - events = dg.pop(event_entry) - grouping_key = None - for key in NXdetector._detector_number_fields: - if (grouping := dg.get(key)) is not None: - grouping_key = key - break - grouping = None if grouping_key is None else asarray(dg.pop(grouping_key)) - da = _group_events(event_data=events, grouping=grouping) - # TODO What about _coord_to_attr mapping as NXdata? - da.coords.update(dg) - return da - - @lru_cache() def _nx_class_registry(): from . import nexus_classes return dict(inspect.getmembers(nexus_classes, inspect.isclass)) + + +class DefinitionsDict: + + def __init__(self): + self._definitions = {} + + def __setitem__(self, nx_class: str, definition: type): + self._definitions[nx_class] = definition + + def get(self, nx_class: str, group: Group) -> type: + return self._definitions.get(nx_class, NXobject) + + +base_definitions = DefinitionsDict() +base_definitions['NXgeometry'] = NXgeometry diff --git a/src/scippnexus/v2/nexus_classes.py b/src/scippnexus/v2/nexus_classes.py index 98497f2a..06a0b5de 100644 --- a/src/scippnexus/v2/nexus_classes.py +++ b/src/scippnexus/v2/nexus_classes.py @@ -1,9 +1,9 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from .base import NXobject # noqa F401 -from .base import NXdata, NXdetector, NXgeometry, NXlog, NXmonitor, NXroot # noqa F401 +from .base import NXgeometry, NXobject, NXroot # noqa F401 from .nxcylindrical_geometry import NXcylindrical_geometry # noqa F401 +from .nxdata import NXdata, NXdetector, NXlog, NXmonitor # noqa F401 from .nxevent_data import NXevent_data # noqa F401 from .nxoff_geometry import NXoff_geometry # noqa F401 from .nxsample import NXsample # noqa F401 diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py new file mode 100644 index 00000000..141743ea --- /dev/null +++ b/src/scippnexus/v2/nxdata.py @@ -0,0 +1,336 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from __future__ import annotations + +from functools import cached_property +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import scipp as sc + +from .._common import to_child_select +from ..typing import H5Dataset, ScippIndex +from .base import Field, Group, NexusStructureError, NXobject, asarray, base_definitions + + +def _guess_dims(dims, shape, dataset: H5Dataset): + """Guess dims of non-signal dataset based on shape. + + Does not check for potential bin-edge coord. + """ + if shape == dataset.shape: + return dims + lut = {} + for d, s in zip(dims, shape): + if shape.count(s) == 1: + lut[s] = d + try: + return [lut[s] for s in dataset.shape] + except KeyError: + return None + + +class NXdata(NXobject): + + def __init__(self, + group: Group, + fallback_dims: Optional[Tuple[str, ...]] = None, + fallback_signal_name: Optional[str] = None): + super().__init__(group) + self._valid = True + # Must do full consistency check here, to define self.sizes: + # - squeeze correctly + # - check if coord dims are compatible with signal dims + # - check if there is a signal + # If not the case, fall back do DataGroup.sizes + # Can we just set field dims here? + self._signal_name = None + self._signal = None + self._aux_signals = group.attrs.get('auxiliary_signals', []) + if (name := group.attrs.get( + 'signal', + fallback_signal_name)) is not None and name in group._children: + self._signal_name = name + self._signal = group._children[name] + else: + # Legacy NXdata defines signal not as group attribute, but attr on dataset + for name, field in group._children.items(): + # What is the meaning of the attribute value? It is undocumented, + # we simply ignore it. + if 'signal' in field.attrs: + self._signal_name = name + self._signal = group._children[name] + break + + axes = group.attrs.get('axes') + signal_axes = None if self._signal is None else self._signal.attrs.get('axes') + + axis_index = {} + for name, field in group._children.items(): + if (axis := field.attrs.get('axis')) is not None: + axis_index[name] = axis + + # Apparently it is not possible to define dim labels unless there are + # corresponding coords. Special case of '.' entries means "no coord". + def _get_group_dims(): + if axes is not None: + return [f'dim_{i}' if a == '.' else a for i, a in enumerate(axes)] + if signal_axes is not None: + return tuple(signal_axes.split(',')) + if axis_index: + return [ + k for k, _ in sorted(axis_index.items(), key=lambda item: item[1]) + ] + return None + + group_dims = _get_group_dims() + + if self._signal is None: + self._valid = False + else: + if group_dims is not None: + shape = self._signal.dataset.shape + shape = _squeeze_trailing(group_dims, shape) + self._signal.sizes = dict(zip(group_dims, shape)) + elif fallback_dims is not None: + shape = self._signal.dataset.shape + group_dims = [ + fallback_dims[i] if i < len(fallback_dims) else f'dim_{i}' + for i in range(len(shape)) + ] + self._signal.sizes = dict(zip(group_dims, shape)) + + if axes is not None: + # Unlike self.dims we *drop* entries that are '.' + named_axes = [a for a in axes if a != '.'] + elif signal_axes is not None: + named_axes = signal_axes.split(',') + elif fallback_dims is not None: + named_axes = fallback_dims + else: + named_axes = [] + + # 3. Find field dims + indices_suffix = '_indices' + indices_attrs = { + key[:-len(indices_suffix)]: attr + for key, attr in group.attrs.items() if key.endswith(indices_suffix) + } + + dims = np.array(group_dims) + dims_from_indices = { + key: tuple(dims[np.array(indices).flatten()]) + for key, indices in indices_attrs.items() + } + + def get_dims(name, field): + # Newly written files should always contain indices attributes, but the + # standard recommends that readers should also make "best effort" guess + # since legacy files do not set this attribute. + # TODO signal and errors? + if name in (self._signal_name, ): + return group_dims + # if name in [self._signal_name, self._errors_name]: + # return self._get_group_dims() # if None, field determines dims itself + if name in self._aux_signals: + return _guess_dims(group_dims, self._signal.dataset.shape, + field.dataset) + if (dims := dims_from_indices.get(name)) is not None: + return dims + if (axis := axis_index.get(name)) is not None: + return (group_dims[axis - 1], ) + if name in named_axes: + # If there are named axes then items of same name are "dimension + # coordinates", i.e., have a dim matching their name. + # However, if the item is not 1-D we need more labels. Try to use labels + # of signal if dimensionality matches. + if self._signal is not None and len(field.dataset.shape) == len( + self._signal.dataset.shape): + return group_dims + return (name, ) + if self._signal is not None and group_dims is not None: + return _guess_dims(group_dims, self._signal.dataset.shape, + field.dataset) + + for name, field in group._children.items(): + if not isinstance(field, Field): + if name not in self._special_fields: + self._valid = False + elif (dims := get_dims(name, field)) is not None: + # The convention here is that the given dimensions apply to the shapes + # starting from the left. So we only squeeze dimensions that are after + # len(dims). + shape = _squeeze_trailing(dims, field.dataset.shape) + field.sizes = dict(zip(dims, shape)) + elif self._valid: + s1 = self._signal.sizes + s2 = field.sizes + if not set(s2.keys()).issubset(set(s1.keys())): + self._valid = False + elif any(s1[k] != s2[k] for k in s1.keys() & s2.keys()): + self._valid = False + + @cached_property + def sizes(self) -> Dict[str, int]: + return self._signal.sizes if self._valid else super().sizes + + @property + def unit(self) -> Union[None, sc.Unit]: + return self._signal.unit if self._valid else super().unit + + def _bin_edge_dim(self, coord: Field) -> Union[None, str]: + if not isinstance(coord, Field): + return None + sizes = self.sizes + for dim, size in zip(coord.dims, coord.shape): + if (sz := sizes.get(dim)) is not None and sz + 1 == size: + return dim + return None + + def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: + child_sel = to_child_select(self._group.dims, + child.dims, + sel, + bin_edge_dim=self._bin_edge_dim(child)) + return child[child_sel] + + def assemble(self, + dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: + if not self._valid: + return super().assemble(dg) + aux = {name: dg.pop(name) for name in self._aux_signals} + coords = sc.DataGroup(dg) + signal = coords.pop(self._signal_name) + da = sc.DataArray(data=signal) + da = self._add_coords(da, coords) + if aux: + signals = {self._signal_name: da} + signals.update(aux) + return sc.Dataset(signals) + return da + + def _dim_of_coord(self, name: str, coord: sc.Variable) -> Union[None, str]: + if len(coord.dims) == 1: + return coord.dims[0] + if name in coord.dims and name in self.dims: + return name + return self._bin_edge_dim(coord) + + def _coord_to_attr(self, da: sc.DataArray, name: str, coord: sc.Variable) -> bool: + dim_of_coord = self._dim_of_coord(name, coord) + if dim_of_coord is None: + return False + if dim_of_coord not in da.dims: + return True + return False + + def _add_coords(self, da: sc.DataArray, coords: sc.DataGroup) -> sc.DataArray: + for name, coord in coords.items(): + if not isinstance(coord, sc.Variable): + da.coords[name] = sc.scalar(coord) + # We need the shape *before* slicing to determine dims, so we get the + # field from the group for the conditional. + elif self._coord_to_attr(da, name, self._group[name]): + da.attrs[name] = coord + else: + da.coords[name] = coord + return da + + +def _squeeze_trailing(dims: Tuple[str, ...], shape: Tuple[int, ...]) -> Tuple[int, ...]: + return shape[:len(dims)] + tuple(size for size in shape[len(dims):] if size != 1) + + +class NXlog(NXdata): + + def __init__(self, group: Group): + super().__init__(group, fallback_dims=('time', ), fallback_signal_name='value') + if (time := self._group._children.get('time')) is not None: + time._is_time = True + + +class NXdetector(NXdata): + _detector_number_fields = ['detector_number', 'pixel_id', 'spectrum_index'] + + @staticmethod + def _detector_number(group: Group) -> Optional[str]: + for name in NXdetector._detector_number_fields: + if name in group._children: + return name + + def __init__(self, group: Group): + fallback_dims = None + if (det_num_name := NXdetector._detector_number(group)) is not None: + if group._children[det_num_name].dataset.ndim == 1: + fallback_dims = ('detector_number', ) + super().__init__(group, + fallback_dims=fallback_dims, + fallback_signal_name='data') + + @property + def detector_number(self) -> Optional[str]: + return self._detector_number(self._group) + + +class NXmonitor(NXdata): + + def __init__(self, group: Group): + super().__init__(group, fallback_signal_name='data') + + +def _group_events(*, + event_data: sc.DataArray, + grouping: Optional[sc.Variable] = None) -> sc.DataArray: + if isinstance(event_data, sc.DataGroup): + raise NexusStructureError("Invalid NXevent_data in NXdetector.") + if grouping is None: + event_id = 'event_id' + else: + # copy since sc.bin cannot deal with a non-contiguous view + event_id = grouping.flatten(to='event_id').copy() + event_data.bins.coords['event_time_zero'] = sc.bins_like( + event_data, fill_value=event_data.coords['event_time_zero']) + # After loading raw NXevent_data it is guaranteed that the event table + # is contiguous and that there is no masking. We can therefore use the + # more efficient approach of binning from scratch instead of erasing the + # 'event_time_zero' binning defined by NXevent_data. + event_data = event_data.bins.constituents['data'].group(event_id) + # if self._grouping is None: + # event_data.coords[self._grouping_key] = event_data.coords.pop('event_id') + # else: + # del event_data.coords['event_id'] + if grouping is None: + return event_data + return event_data.fold(dim='event_id', sizes=grouping.sizes) + + +def _find_event_entries(dg: sc.DataGroup) -> List[str]: + event_entries = [] + for name, value in dg.items(): + if isinstance( + value, sc.DataArray + ) and 'event_time_zero' in value.coords and value.bins is not None: + event_entries.append(name) + return event_entries + + +def group_events_by_detector_number(dg: sc.DataGroup) -> sc.DataArray: + event_entry = _find_event_entries(dg)[0] + events = dg.pop(event_entry) + grouping_key = None + for key in NXdetector._detector_number_fields: + if (grouping := dg.get(key)) is not None: + grouping_key = key + break + grouping = None if grouping_key is None else asarray(dg.pop(grouping_key)) + da = _group_events(event_data=events, grouping=grouping) + # TODO What about _coord_to_attr mapping as NXdata? + da.coords.update(dg) + return da + + +base_definitions['NXdata'] = NXdata +base_definitions['NXlog'] = NXlog +base_definitions['NXdetector'] = NXdetector +base_definitions['NXmonitor'] = NXmonitor From 126b6fd3129aae9a8ba7b86b26d89ac3f5ad877d Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 24 Mar 2023 10:23:04 +0100 Subject: [PATCH 49/98] Try to avoid NXobject dependency on Group --- .../nxcansas/nxcansas.py | 20 +++-- src/scippnexus/v2/base.py | 80 +++++++++++++------ src/scippnexus/v2/nxcylindrical_geometry.py | 8 +- src/scippnexus/v2/nxdata.py | 62 +++++++------- src/scippnexus/v2/nxevent_data.py | 15 ++-- src/scippnexus/v2/nxoff_geometry.py | 8 +- tests/nxtransformations_test.py | 6 +- 7 files changed, 120 insertions(+), 79 deletions(-) diff --git a/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py b/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py index dbb9e2b9..74adbf05 100644 --- a/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py +++ b/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py @@ -1,12 +1,12 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Literal, Optional, Union +from typing import Any, Dict, Literal, Optional, Union import scipp as sc from ....typing import H5Group -from ...base import Group, NXobject, base_definitions, create_field +from ...base import Field, Group, NXobject, base_definitions, create_field from ...nxdata import NXdata @@ -68,11 +68,14 @@ def __write_to_nexus_group__(self, group: H5Group): class _SASdata(NXdata): - def __init__(self, group: Group): - fallback_dims = group.attrs.get('I_axes') + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + fallback_dims = attrs.get('I_axes') if fallback_dims is not None: fallback_dims = (fallback_dims, ) - super().__init__(group, fallback_dims=fallback_dims, fallback_signal_name='I') + super().__init__(attrs=attrs, + children=children, + fallback_dims=fallback_dims, + fallback_signal_name='I') # TODO Mechanism for custom error names @staticmethod @@ -97,10 +100,11 @@ def coord_errors(group: NXobject, name: str) -> Optional[str]: class _SAStransmission_spectrum(NXdata): - def __init__(self, group: Group): + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): # TODO A valid file should have T_axes, do we need to fallback? - super().__init__(group, - fallback_dims=(group.attrs.get('T_axes', 'lambda'), ), + super().__init__(attrs=attrs, + children=children, + fallback_dims=(attrs.get('T_axes', 'lambda'), ), fallback_signal_name='T') diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 8f050971..b2a2d5fc 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -273,14 +273,24 @@ def _squeezed_field_sizes(dataset: H5Dataset) -> Dict[str, int]: class NXobject: - def __init__(self, group: Group): - self._group = group + def _init_field(self, field: Field): + field.sizes = _squeezed_field_sizes(field.dataset) + field.dtype = _dtype_fromdataset(field.dataset) + + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + from .nxtransformations import Transformation + self._attrs = attrs + self._children = children self._special_fields = {} self._transformations = {} - for name, field in group._children.items(): + for name, field in children.items(): + if name == 'depends_on': + self._special_fields[name] = field if isinstance(field, Field): - field.sizes = _squeezed_field_sizes(field.dataset) - field.dtype = _dtype_fromdataset(field.dataset) + self._init_field(field) + elif isinstance(field, Transformation): + if isinstance(field._obj, Field): + self._init_field(field._obj) elif (nx_class := field.attrs.get('NX_class')) is not None: if nx_class in [ 'NXoff_geometry', @@ -294,12 +304,13 @@ def __init__(self, group: Group): @property def unit(self) -> Union[None, sc.Unit]: - raise ValueError(f"Group-like {self._group.nx_class} has no well-defined unit") + raise ValueError( + f"Group-like {self._attrs.get('NX_class')} has no well-defined unit") @cached_property def sizes(self) -> Dict[str, int]: # exclude geometry/tansform groups? - return sc.DataGroup(self._group).sizes + return sc.DataGroup(self._children).sizes def index_child( self, child: Union[Field, Group], sel: ScippIndex @@ -320,6 +331,8 @@ def detector_number(self) -> Optional[str]: def pre_assemble(self, dg: sc.DataGroup) -> sc.DataGroup: for name, field in self._special_fields.items(): + if name == 'depends_on': + continue if name in self._transformations: continue det_num = self.detector_number @@ -327,14 +340,15 @@ def pre_assemble(self, dg: sc.DataGroup) -> sc.DataGroup: det_num = dg[det_num] dg[name] = field._nexus.assemble_as_child(dg[name], detector_number=det_num) if (depends_on := dg.get('depends_on')) is not None: - transform = self._group[depends_on] - # Avoid loading transform twice if it is a child of the same group - for name, transformations in self._transformations.items(): - if transform.name.startswith(transformations.name): - dg['depends_on'] = dg[name][depends_on.split('/')[-1]] - break - else: - dg['depends_on'] = transform[()] + dg['depends_on'] = sc.scalar(depends_on) + # transform = self._children[depends_on] + # # Avoid loading transform twice if it is a child of the same group + # for name, transformations in self._transformations.items(): + # if transform.name.startswith(transformations.name): + # dg['depends_on'] = dg[name][depends_on.split('/')[-1]] + # break + # else: + # dg['depends_on'] = transform[()] return dg def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: @@ -400,11 +414,29 @@ def file(self) -> Optional[Group]: @cached_property def _children(self) -> Dict[str, Union[Field, Group]]: - items = { - name: Field(obj, parent=self) if is_dataset(obj) else Group( - obj, parent=self, definitions=self._definitions) - for name, obj in self._group.items() - } + # Transformations should be stored in NXtransformations, which is cumbersome + # to handle, since we need to check the parent of a transform to tell whether + # it is a transform. However, we can avoid this by simply treating everything + # referenced by a 'depends_on' field or attribute as a transform. + from .nxtransformations import Transformation + + def _make_child( + name: str, obj: Union[H5Dataset, + H5Group]) -> Union[Transformation, Field, Group]: + if name == 'depends_on': + target = obj[()] + obj = obj.parent[target] + # TODO Bad, we are recreating the group + parent = Group(obj.parent, definitions=self._definitions) + else: + parent = self + if is_dataset(obj): + child = Field(obj, parent=parent) + else: + child = Group(obj, parent=parent, definitions=self._definitions) + return Transformation(child) if name == 'depends_on' else child + + items = {name: _make_child(name, obj) for name, obj in self._group.items()} for suffix in ('_errors', '_error'): field_with_errors = [name for name in items if f'{name}{suffix}' in items] for name in field_with_errors: @@ -419,7 +451,9 @@ def _children(self) -> Dict[str, Union[Field, Group]]: @cached_property def _nexus(self) -> NXobject: - return self._definitions.get(self.attrs.get('NX_class'), group=self)(self) + return self._definitions.get(self.attrs.get('NX_class'), + group=self)(attrs=self.attrs, + children=self._children) def _populate_fields(self) -> None: _ = self._nexus @@ -455,10 +489,10 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: else: return self[sel.split('/')[0]][sel[sel.index('/') + 1:]] child = self._children[sel] - if isinstance(child, Field): + from .nxtransformations import Transformation + if isinstance(child, (Field, Transformation)): self._populate_fields() if self._is_nxtransformations: - from .nxtransformations import Transformation return Transformation(child) return child diff --git a/src/scippnexus/v2/nxcylindrical_geometry.py b/src/scippnexus/v2/nxcylindrical_geometry.py index bbe89279..a2716c01 100644 --- a/src/scippnexus/v2/nxcylindrical_geometry.py +++ b/src/scippnexus/v2/nxcylindrical_geometry.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Optional +from typing import Any, Dict, Optional, Union import scipp as sc @@ -52,9 +52,9 @@ class NXcylindrical_geometry(NXobject): 'cylinders': ('cylinder', 'vertex_index') } - def __init__(self, group: Group): - super().__init__(group) - for name, field in group._children.items(): + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + super().__init__(attrs=attrs, children=children) + for name, field in children.items(): if isinstance(field, Field): field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) if name == 'vertices': diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 141743ea..4d724f07 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -4,7 +4,7 @@ from __future__ import annotations from functools import cached_property -from typing import Dict, List, Optional, Tuple, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union import numpy as np import scipp as sc @@ -34,10 +34,11 @@ def _guess_dims(dims, shape, dataset: H5Dataset): class NXdata(NXobject): def __init__(self, - group: Group, + attrs: Dict[str, Any], + children: Dict[str, Union[Field, Group]], fallback_dims: Optional[Tuple[str, ...]] = None, fallback_signal_name: Optional[str] = None): - super().__init__(group) + super().__init__(attrs=attrs, children=children) self._valid = True # Must do full consistency check here, to define self.sizes: # - squeeze correctly @@ -47,27 +48,26 @@ def __init__(self, # Can we just set field dims here? self._signal_name = None self._signal = None - self._aux_signals = group.attrs.get('auxiliary_signals', []) - if (name := group.attrs.get( - 'signal', - fallback_signal_name)) is not None and name in group._children: + self._aux_signals = attrs.get('auxiliary_signals', []) + if (name := attrs.get('signal', + fallback_signal_name)) is not None and name in children: self._signal_name = name - self._signal = group._children[name] + self._signal = children[name] else: # Legacy NXdata defines signal not as group attribute, but attr on dataset - for name, field in group._children.items(): + for name, field in children.items(): # What is the meaning of the attribute value? It is undocumented, # we simply ignore it. if 'signal' in field.attrs: self._signal_name = name - self._signal = group._children[name] + self._signal = children[name] break - axes = group.attrs.get('axes') + axes = attrs.get('axes') signal_axes = None if self._signal is None else self._signal.attrs.get('axes') axis_index = {} - for name, field in group._children.items(): + for name, field in children.items(): if (axis := field.attrs.get('axis')) is not None: axis_index[name] = axis @@ -115,7 +115,7 @@ def _get_group_dims(): indices_suffix = '_indices' indices_attrs = { key[:-len(indices_suffix)]: attr - for key, attr in group.attrs.items() if key.endswith(indices_suffix) + for key, attr in attrs.items() if key.endswith(indices_suffix) } dims = np.array(group_dims) @@ -153,7 +153,7 @@ def get_dims(name, field): return _guess_dims(group_dims, self._signal.dataset.shape, field.dataset) - for name, field in group._children.items(): + for name, field in children.items(): if not isinstance(field, Field): if name not in self._special_fields: self._valid = False @@ -189,7 +189,7 @@ def _bin_edge_dim(self, coord: Field) -> Union[None, str]: return None def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: - child_sel = to_child_select(self._group.dims, + child_sel = to_child_select(tuple(self.sizes), child.dims, sel, bin_edge_dim=self._bin_edge_dim(child)) @@ -218,6 +218,8 @@ def _dim_of_coord(self, name: str, coord: sc.Variable) -> Union[None, str]: return self._bin_edge_dim(coord) def _coord_to_attr(self, da: sc.DataArray, name: str, coord: sc.Variable) -> bool: + if name == 'depends_on': + return False dim_of_coord = self._dim_of_coord(name, coord) if dim_of_coord is None: return False @@ -231,7 +233,7 @@ def _add_coords(self, da: sc.DataArray, coords: sc.DataGroup) -> sc.DataArray: da.coords[name] = sc.scalar(coord) # We need the shape *before* slicing to determine dims, so we get the # field from the group for the conditional. - elif self._coord_to_attr(da, name, self._group[name]): + elif self._coord_to_attr(da, name, self._children[name]): da.attrs[name] = coord else: da.coords[name] = coord @@ -244,9 +246,12 @@ def _squeeze_trailing(dims: Tuple[str, ...], shape: Tuple[int, ...]) -> Tuple[in class NXlog(NXdata): - def __init__(self, group: Group): - super().__init__(group, fallback_dims=('time', ), fallback_signal_name='value') - if (time := self._group._children.get('time')) is not None: + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + super().__init__(attrs=attrs, + children=children, + fallback_dims=('time', ), + fallback_signal_name='value') + if (time := children.get('time')) is not None: time._is_time = True @@ -254,29 +259,30 @@ class NXdetector(NXdata): _detector_number_fields = ['detector_number', 'pixel_id', 'spectrum_index'] @staticmethod - def _detector_number(group: Group) -> Optional[str]: + def _detector_number(children: Iterable[str]) -> Optional[str]: for name in NXdetector._detector_number_fields: - if name in group._children: + if name in children: return name - def __init__(self, group: Group): + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): fallback_dims = None - if (det_num_name := NXdetector._detector_number(group)) is not None: - if group._children[det_num_name].dataset.ndim == 1: + if (det_num_name := NXdetector._detector_number(children)) is not None: + if children[det_num_name].dataset.ndim == 1: fallback_dims = ('detector_number', ) - super().__init__(group, + super().__init__(attrs=attrs, + children=children, fallback_dims=fallback_dims, fallback_signal_name='data') @property def detector_number(self) -> Optional[str]: - return self._detector_number(self._group) + return self._detector_number(self._children) class NXmonitor(NXdata): - def __init__(self, group: Group): - super().__init__(group, fallback_signal_name='data') + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + super().__init__(attrs=attrs, children=children, fallback_signal_name='data') def _group_events(*, diff --git a/src/scippnexus/v2/nxevent_data.py b/src/scippnexus/v2/nxevent_data.py index 9793192a..7bb20325 100644 --- a/src/scippnexus/v2/nxevent_data.py +++ b/src/scippnexus/v2/nxevent_data.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union import numpy as np import scipp as sc @@ -30,9 +30,9 @@ def _check_for_missing_fields(fields): class NXevent_data(NXobject): - def __init__(self, group: Group): - super().__init__(group) - for name, field in group._children.items(): + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + super().__init__(attrs=attrs, children=children) + for name, field in children.items(): if name in ['event_time_zero', 'event_index']: field.sizes = {_pulse_dimension: field.dataset.shape[0]} elif name in ['event_time_offset', 'event_id']: @@ -40,7 +40,7 @@ def __init__(self, group: Group): @property def shape(self) -> Tuple[int]: - if (event_index := self._group.get('event_index')) is not None: + if (event_index := self._children.get('event_index')) is not None: return event_index.shape return () @@ -150,9 +150,8 @@ def assemble(self, children: sc.DataGroup) -> sc.DataArray: try: binned = sc.bins(data=events, dim=_event_dimension, begin=begins) except IndexError as e: - raise NexusStructureError( - f"Invalid index in NXevent_data at {self._group.name}/event_index:\n{e}" - ) + path = self._children['event_index'].name + raise NexusStructureError(f"Invalid index in NXevent_data at {path}:\n{e}") return sc.DataArray(data=binned, coords={'event_time_zero': event_time_zero}) diff --git a/src/scippnexus/v2/nxoff_geometry.py b/src/scippnexus/v2/nxoff_geometry.py index 1f8127db..eaaa0fd3 100644 --- a/src/scippnexus/v2/nxoff_geometry.py +++ b/src/scippnexus/v2/nxoff_geometry.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Optional +from typing import Any, Dict, Optional, Union import scipp as sc @@ -61,9 +61,9 @@ class NXoff_geometry(NXobject): 'faces': ('face', ) } - def __init__(self, group: Group): - super().__init__(group) - for name, field in group._children.items(): + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + super().__init__(attrs=attrs, children=children) + for name, field in children.items(): if isinstance(field, Field): field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) if name == 'vertices': diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index 0c9c5963..eef90ff6 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -48,8 +48,7 @@ def test_Transformation_with_single_value(h5root): value.attrs['offset_units'] = str(offset.unit) value.attrs['vector'] = vector.value - depends_on = make_group(detector)['depends_on'][()] - t = make_group(h5root)[depends_on] + t = make_group(detector)['depends_on'] assert t.depends_on is None assert sc.identical(t.offset, offset) assert sc.identical(t.vector, vector) @@ -108,8 +107,7 @@ def test_Transformation_with_multiple_values(h5root): value.attrs['offset_units'] = str(offset.unit) value.attrs['vector'] = vector.value - depends_on = make_group(detector)['depends_on'][()] - t = make_group(h5root)[depends_on] + t = make_group(detector)['depends_on'] assert t.depends_on is None assert sc.identical(t.offset, offset) assert sc.identical(t.vector, vector) From 635758e45665187cb46a6570a2a1c619233dba7b Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 24 Mar 2023 10:44:38 +0100 Subject: [PATCH 50/98] Fix NXgeometry --- src/scippnexus/v2/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index b2a2d5fc..a50f6a7b 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -547,8 +547,8 @@ def shape(self) -> Tuple[int, ...]: class NXgeometry(NXobject): - def __init__(self, group: Group): - super().__init__(group) + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + super().__init__(attrs=attrs, children=children) @staticmethod def assemble_as_child(children: sc.DataGroup, From d7702adf616f5c21547dc652c49a15012ac8ef5c Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 24 Mar 2023 10:56:07 +0100 Subject: [PATCH 51/98] Docstrings --- tests/nexus_test.py | 2 +- tests/nx2_test.py | 2 +- tests/nxdata_test.py | 2 +- tests/nxmonitor_test.py | 2 +- tests/nxtransformations_test.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 2216ea3d..d14661e3 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -30,7 +30,7 @@ @pytest.fixture() def h5root(request): - """Yield NXroot containing a single NXentry named 'entry'""" + """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f diff --git a/tests/nx2_test.py b/tests/nx2_test.py index 1f9ae422..dd4d1041 100644 --- a/tests/nx2_test.py +++ b/tests/nx2_test.py @@ -8,7 +8,7 @@ @pytest.fixture() def h5root(request): - """Yield NXroot containing a single NXentry named 'entry'""" + """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index edc9e882..8c669019 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -10,7 +10,7 @@ @pytest.fixture() def h5root(request): - """Yield NXroot containing a single NXentry named 'entry'""" + """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f diff --git a/tests/nxmonitor_test.py b/tests/nxmonitor_test.py index b12ef6ac..7e7ee318 100644 --- a/tests/nxmonitor_test.py +++ b/tests/nxmonitor_test.py @@ -8,7 +8,7 @@ @pytest.fixture() def h5root(request): - """Yield NXroot containing a single NXentry named 'entry'""" + """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index eef90ff6..1c5edac4 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -14,7 +14,7 @@ def make_group(group: h5py.Group) -> snx.Group: @pytest.fixture() def h5root(request): - """Yield NXroot containing a single NXentry named 'entry'""" + """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f From 3a37a6524b7bf074bb03983c3bab39acd80d28e3 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 24 Mar 2023 10:57:28 +0100 Subject: [PATCH 52/98] Remove prints --- tests/nx2_test.py | 2 -- tests/nxdata_test.py | 1 - tests/nxdetector_test.py | 1 - 3 files changed, 4 deletions(-) diff --git a/tests/nx2_test.py b/tests/nx2_test.py index dd4d1041..2764d605 100644 --- a/tests/nx2_test.py +++ b/tests/nx2_test.py @@ -20,7 +20,6 @@ def test_does_not_see_changes(h5root): data['time'] = np.arange(4) obj = snx.Group(entry) dg = obj[()] - print(list(dg.items())) assert obj.sizes == {'dim_0': 4} assert 'data' in dg entry.create_group('data2') @@ -36,7 +35,6 @@ def test_read_recursive(h5root): data['time'].attrs['units'] = 's' obj = snx.Group(entry) dg = obj[()] - print(list(dg.items())) assert obj.sizes == {'dim_0': None} assert 'data' in dg diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 8c669019..1edd6ec0 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -335,7 +335,6 @@ def test_uncertainties_of_coords_are_loaded(h5root, errors_suffix): snx.create_field(data, 'scalar', sc.values(da.coords['scalar'])) snx.create_field(data, f'scalar{errors_suffix}', sc.stddevs(da.coords['scalar'])) data = snx.Group(data, definitions=snx.base_definitions) - print(data[...], da) assert sc.identical(data[...], da) diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index 1b333b4f..3abd4acc 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -54,7 +54,6 @@ def test_finds_data_from_group_attr(h5root): snx.create_field(detector, 'custom', da.data) detector.attrs['signal'] = 'custom' detector = make_group(detector) - print(detector[...]) assert sc.identical(detector[...], da.rename_dims({'xx': 'dim_0', 'yy': 'dim_1'})) From c82127fd38a26351b0c2079426e6f2e8013a5be0 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 24 Mar 2023 11:00:08 +0100 Subject: [PATCH 53/98] Missing file --- src/scippnexus/v2/nxsample.py | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 src/scippnexus/v2/nxsample.py diff --git a/src/scippnexus/v2/nxsample.py b/src/scippnexus/v2/nxsample.py new file mode 100644 index 00000000..2bc29555 --- /dev/null +++ b/src/scippnexus/v2/nxsample.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from typing import Any, Dict, Union + +import scipp as sc + +from .base import Field, Group, NXobject, ScippIndex, base_definitions + +_matrix_units = dict(zip(['orientation_matrix', 'ub_matrix'], ['one', '1/Angstrom'])) + + +def _fix_unit(name, value): + if (unit := _matrix_units.get(name)) is not None: + value.unit = unit + return value + + +class NXsample(NXobject): + """NXsample""" + + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + super().__init__(attrs=attrs, children=children) + for key in _matrix_units: + if (field := children.get(key)) is not None: + field.sizes = {k: field.sizes[k] for k in field.dims[:-2]} + field.dtype = sc.DType.linear_transform3 + + def read_children(self, obj: Group, sel: ScippIndex) -> sc.DataGroup: + return sc.DataGroup({ + name: _fix_unit(name, self.index_child(child, sel)) + for name, child in obj.items() + }) + + +base_definitions['NXsample'] = NXsample From 1cc8954f6964b06a2f417e088e6ea3f8da39ad06 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 24 Mar 2023 12:29:24 +0100 Subject: [PATCH 54/98] Prevent writing to cached attrs --- src/scippnexus/v2/base.py | 7 +++++-- tests/nexus_test.py | 16 ++++++++-------- tests/nxdetector_test.py | 14 +++++++------- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index a50f6a7b..4aa8294d 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -10,6 +10,7 @@ from collections.abc import Mapping from dataclasses import dataclass from functools import cached_property, lru_cache +from types import MappingProxyType from typing import Any, Dict, Iterator, List, Optional, Protocol, Tuple, Union import dateutil.parser @@ -134,7 +135,8 @@ class Field: @cached_property def attrs(self) -> Dict[str, Any]: - return dict(self.dataset.attrs) if self.dataset.attrs else dict() + return MappingProxyType( + dict(self.dataset.attrs) if self.dataset.attrs else dict()) @property def dims(self) -> Tuple[str]: @@ -394,7 +396,8 @@ def attrs(self) -> Dict[str, Any]: # We may expected a per-subgroup overhead of 1 ms for reading attributes, so if # all we want is access one attribute, we may save, e.g., a second for a group # with 1000 subgroups. - return dict(self._group.attrs) if self._group.attrs else dict() + return MappingProxyType( + dict(self._group.attrs) if self._group.attrs else dict()) @property def name(self) -> str: diff --git a/tests/nexus_test.py b/tests/nexus_test.py index d14661e3..6def9faa 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -162,7 +162,7 @@ def test_nxlog_axes_replaces_time_dim(nxroot): sc.array(dims=['time'], unit='s', values=[4.4]).to(unit='ns', dtype='int64') }) log = nxroot['entry'].create_class('log', NXlog) - log.attrs['axes'] = ['yy', 'xx'] + log._group.attrs['axes'] = ['yy', 'xx'] log['value'] = da.data log['time'] = da.coords['time'] - sc.epoch(unit='ns') expected = sc.DataArray(sc.array(dims=['yy', 'xx'], values=[[1.1]]), @@ -354,7 +354,7 @@ def test_field_of_extended_ascii_in_ascii_encoded_dataset_is_loaded_correctly(): def test_ms_field_with_second_datetime_attribute_loaded_as_ms_datetime(nxroot): nxroot['mytime'] = sc.arange('ignored', 2, unit='ms') - nxroot['mytime'].attrs['start_time'] = '2022-12-12T12:13:14' + nxroot['mytime'].dataset.attrs['start_time'] = '2022-12-12T12:13:14' assert sc.identical( nxroot['mytime'][...], sc.datetimes(dims=['dim_0'], @@ -364,7 +364,7 @@ def test_ms_field_with_second_datetime_attribute_loaded_as_ms_datetime(nxroot): def test_ns_field_with_second_datetime_attribute_loaded_as_ns_datetime(nxroot): nxroot['mytime'] = sc.arange('ignored', 2, unit='ns') - nxroot['mytime'].attrs['start_time'] = '1970-01-01T00:00:00' + nxroot['mytime'].dataset.attrs['start_time'] = '1970-01-01T00:00:00' assert sc.identical( nxroot['mytime'][...], sc.datetimes( @@ -375,7 +375,7 @@ def test_ns_field_with_second_datetime_attribute_loaded_as_ns_datetime(nxroot): def test_second_field_with_ns_datetime_attribute_loaded_as_ns_datetime(nxroot): nxroot['mytime'] = sc.arange('ignored', 2, unit='s') - nxroot['mytime'].attrs['start_time'] = '1984-01-01T00:00:00.000000000' + nxroot['mytime'].dataset.attrs['start_time'] = '1984-01-01T00:00:00.000000000' assert sc.identical( nxroot['mytime'][...], sc.datetimes(dims=['dim_0'], @@ -389,14 +389,14 @@ def test_second_field_with_ns_datetime_attribute_loaded_as_ns_datetime(nxroot): ('+11:30', '00:30'), ('-09:30', '21:30')]) def test_timezone_information_in_datetime_attribute_is_applied(nxroot, timezone, hhmm): nxroot['mytime'] = sc.scalar(value=3, unit='s') - nxroot['mytime'].attrs['start_time'] = f'1984-01-01T12:00:00{timezone}' + nxroot['mytime'].dataset.attrs['start_time'] = f'1984-01-01T12:00:00{timezone}' assert sc.identical(nxroot['mytime'][...], sc.datetime(unit='s', value=f'1984-01-01T{hhmm}:03')) def test_timezone_information_in_datetime_attribute_preserves_ns_precision(nxroot): nxroot['mytime'] = sc.scalar(value=3, unit='s') - nxroot['mytime'].attrs['start_time'] = '1984-01-01T12:00:00.123456789+0200' + nxroot['mytime'].dataset.attrs['start_time'] = '1984-01-01T12:00:00.123456789+0200' assert sc.identical(nxroot['mytime'][...], sc.datetime(unit='ns', value='1984-01-01T10:00:03.123456789')) @@ -404,8 +404,8 @@ def test_timezone_information_in_datetime_attribute_preserves_ns_precision(nxroo def test_loads_bare_timestamps_if_multiple_candidate_datetime_offsets_found(nxroot): offsets = sc.arange('ignored', 2, unit='ms') nxroot['mytime'] = offsets - nxroot['mytime'].attrs['offset'] = '2022-12-12T12:13:14' - nxroot['mytime'].attrs['start_time'] = '2022-12-12T12:13:15' + nxroot['mytime'].dataset.attrs['offset'] = '2022-12-12T12:13:14' + nxroot['mytime'].dataset.attrs['start_time'] = '2022-12-12T12:13:15' assert sc.identical(nxroot['mytime'][...], offsets.rename(ignored='dim_0')) diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index 3abd4acc..16b28f5c 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -329,7 +329,7 @@ def test_loads_data_with_coords_and_off_geometry(nxroot, detid_name): detector.create_field(detid_name, da.coords['detector_number']) detector.create_field('xx', da.coords['xx']) detector.create_field('data', da.data) - detector.attrs['axes'] = ['xx', 'yy'] + detector._group.attrs['axes'] = ['xx', 'yy'] create_off_geometry_detector_numbers_1234(detector, name='shape') loaded = detector[...] expected = snx.nxoff_geometry.off_to_shape( @@ -344,7 +344,7 @@ def test_missing_detector_numbers_triggers_fallback_given_off_geometry_with_det_ var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) - detector.attrs['axes'] = ['xx', 'yy'] + detector._group.attrs['axes'] = ['xx', 'yy'] create_off_geometry_detector_numbers_1234(detector, name='shape') loaded = detector[...] assert isinstance(loaded, sc.DataGroup) @@ -355,7 +355,7 @@ def test_off_geometry_without_detector_faces_loaded_as_0d_with_multiple_faces(nx var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) - detector.attrs['axes'] = ['xx', 'yy'] + detector._group.attrs['axes'] = ['xx', 'yy'] create_off_geometry_detector_numbers_1234(detector, name='shape', detector_faces=False) @@ -381,7 +381,7 @@ def test_cylindrical_geometry_without_detector_numbers_loaded_as_0d(nxroot): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) - detector.attrs['axes'] = ['xx', 'yy'] + detector._group.attrs['axes'] = ['xx', 'yy'] create_cylindrical_geometry_detector_numbers_1234(detector, name='shape', detector_numbers=False) @@ -406,7 +406,7 @@ def test_cylindrical_geometry_with_missing_parent_detector_numbers_triggers_fall var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) - detector.attrs['axes'] = ['xx', 'yy'] + detector._group.attrs['axes'] = ['xx', 'yy'] create_cylindrical_geometry_detector_numbers_1234(detector, name='shape', detector_numbers=True) @@ -420,7 +420,7 @@ def test_cylindrical_geometry_with_inconsistent_detector_numbers_triggers_fallba var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1], [3.3]]) detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) - detector.attrs['axes'] = ['xx', 'yy'] + detector._group.attrs['axes'] = ['xx', 'yy'] detector.create_field('detector_numbers', sc.array(dims=var.dims, values=[[1], [2]], unit=None)) create_cylindrical_geometry_detector_numbers_1234(detector, @@ -435,7 +435,7 @@ def test_cylindrical_geometry_with_detector_numbers(nxroot): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) - detector.attrs['axes'] = ['xx', 'yy'] + detector._group.attrs['axes'] = ['xx', 'yy'] detector_number = sc.array(dims=var.dims, values=[[1, 2], [3, 4]], unit=None) detector.create_field('detector_number', detector_number) create_cylindrical_geometry_detector_numbers_1234(detector, From ac519b74f9dbe72cf2b577a1da16b47c143b8114 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 27 Mar 2023 09:33:27 +0200 Subject: [PATCH 55/98] Some docstrings --- src/scippnexus/v2/base.py | 48 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 4aa8294d..65a1f2bf 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -306,7 +306,7 @@ def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group @property def unit(self) -> Union[None, sc.Unit]: - raise ValueError( + raise AttributeError( f"Group-like {self._attrs.get('NX_class')} has no well-defined unit") @cached_property @@ -317,12 +317,33 @@ def sizes(self) -> Dict[str, int]: def index_child( self, child: Union[Field, Group], sel: ScippIndex ) -> Union[sc.Variable, sc.DataArray, sc.Dataset, sc.DataGroup]: + """ + When a Group is indexed, this method is called to index each child. + + The main purpose of this is to translate the Group index to the child index. + Since the group dimensions (usually given by the signal) may be a superset of + the child dimensions, we need to translate the group index to a child index. + + The default implementation assumes that the child shape is identical to the + group shape, for all child dims. Subclasses of NXobject, in particular NXdata, + override this method to handle bin edges. + """ # Note that this will be similar in NXdata, but there we need to handle # bin edges as well. child_sel = to_child_select(self.sizes.keys(), child.dims, sel) return child[child_sel] def read_children(self, obj: Group, sel: ScippIndex) -> sc.DataGroup: + """ + When a Group is indexed, this method is called to read all children. + + The default implementation simply calls index_child on each child and returns + the result as a DataGroup. + + Subclasses of NXobject, in particular NXevent_data, override this method to + to implement special logic for reading children with interdependencies, i.e., + where reading each child in isolation is not possible. + """ return sc.DataGroup( {name: self.index_child(child, sel) for name, child in obj.items()}) @@ -341,6 +362,10 @@ def pre_assemble(self, dg: sc.DataGroup) -> sc.DataGroup: if det_num is not None: det_num = dg[det_num] dg[name] = field._nexus.assemble_as_child(dg[name], detector_number=det_num) + # TODO Should we remove the NXtransformations group (if there is a depends_on)? + # For now it gets inserted as a DataGroup, or wrapped in a scalar coord in case + # of NXdata + # Would it be better to dereference the depends_on links only after loading? if (depends_on := dg.get('depends_on')) is not None: dg['depends_on'] = sc.scalar(depends_on) # transform = self._children[depends_on] @@ -362,6 +387,27 @@ class NXroot(NXobject): class Group(Mapping): + """ + A group in a NeXus file. + + This class is a wrapper around an h5py.Group object. It provides a dict-like + interface to the children of the group, and provides access to the attributes + of the group. The children are either Field or Group objects, depending on + whether the child is a dataset or a group, respectively. + + The implementation of this class is unfortunately very complex, for several reasons: + 1. NeXus requires "nonlocal" information for interpreting a field. For example, + NXdata attributes define which fields are the signal, and the names of the axes. + A field cannot be read without this information, in particular since we want to + support reading slices, using the Scipp dimension-label syntax. + 2. The depend_on field and depends_on attributes in fields within NXtransformations + link to arbitrary other fields or groups in the same file. This interacts with + item 1.) and further complicates the logic. + 3. HDF5 or h5py performance is not great, and we want to avoid reading the same + attrs or datasets multiple times. We can therefore not rely on "on-the-fly" + interpretation of the file, but need to cache information. An earlier version + of ScippNexus used such a mechanism without caching, which was very slow. + """ def __init__(self, group: H5Group, From fd46585d6b140d85f5312eac60c0aa6c050729b0 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 27 Mar 2023 10:36:10 +0200 Subject: [PATCH 56/98] Avoid messy mechanism for the sole purpose of supporting NXcanSAS --- .../nxcansas/nxcansas.py | 21 +++++++++++------- src/scippnexus/v2/base.py | 22 +++++-------------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py b/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py index 74adbf05..9e3a868b 100644 --- a/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py +++ b/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Any, Dict, Literal, Optional, Union +from typing import Any, Callable, Dict, Literal, Optional, Union import scipp as sc @@ -110,13 +110,18 @@ def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group class NXcanSAS: - def get(self, key: type, group: Group) -> type: - if (cls := group.attrs.get('canSAS_class')) is not None: - if cls == 'SASdata': - return _SASdata - if cls == 'SAStransmission_spectrum': - return _SAStransmission_spectrum - return base_definitions.get(key, group) + def get(self, key: type, default: Callable) -> Callable: + + def _definition_factory(attrs: Dict[str, Any], + children: Dict[str, Union[Field, Group]]) -> NXobject: + if (cls := attrs.get('canSAS_class')) is not None: + if cls == 'SASdata': + return _SASdata(attrs, children) + if cls == 'SAStransmission_spectrum': + return _SAStransmission_spectrum(attrs, children) + return base_definitions.get(key, default)(attrs, children) + + return _definition_factory definitions = NXcanSAS() diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 65a1f2bf..be5f436f 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -330,7 +330,7 @@ def index_child( """ # Note that this will be similar in NXdata, but there we need to handle # bin edges as well. - child_sel = to_child_select(self.sizes.keys(), child.dims, sel) + child_sel = to_child_select(tuple(self.sizes), child.dims, sel) return child[child_sel] def read_children(self, obj: Group, sel: ScippIndex) -> sc.DataGroup: @@ -414,7 +414,7 @@ def __init__(self, definitions: Optional[Dict[str, type]] = None, parent: Optional[Group] = None): self._group = group - self._definitions = DefinitionsDict() if definitions is None else definitions + self._definitions = {} if definitions is None else definitions if parent is None: if group == group.parent: self._parent = self @@ -501,8 +501,8 @@ def _make_child( @cached_property def _nexus(self) -> NXobject: return self._definitions.get(self.attrs.get('NX_class'), - group=self)(attrs=self.attrs, - children=self._children) + NXobject)(attrs=self.attrs, + children=self._children) def _populate_fields(self) -> None: _ = self._nexus @@ -646,17 +646,5 @@ def _nx_class_registry(): return dict(inspect.getmembers(nexus_classes, inspect.isclass)) -class DefinitionsDict: - - def __init__(self): - self._definitions = {} - - def __setitem__(self, nx_class: str, definition: type): - self._definitions[nx_class] = definition - - def get(self, nx_class: str, group: Group) -> type: - return self._definitions.get(nx_class, NXobject) - - -base_definitions = DefinitionsDict() +base_definitions = {} base_definitions['NXgeometry'] = NXgeometry From 73fd6025c6f2f803fb446d95392ab51e4c51d623 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 27 Mar 2023 10:42:18 +0200 Subject: [PATCH 57/98] Restore ability to load with no definitions --- src/scippnexus/v2/base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index be5f436f..c3ed907b 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -352,6 +352,12 @@ def read_children(self, obj: Group, sel: ScippIndex) -> sc.DataGroup: def detector_number(self) -> Optional[str]: return None + def assemble_as_child( + self, + obj: sc.DataGroup, + detector_number: Optional[sc.Variable] = None) -> sc.DataGroup: + return obj + def pre_assemble(self, dg: sc.DataGroup) -> sc.DataGroup: for name, field in self._special_fields.items(): if name == 'depends_on': From d004d4636a213e7945362e8e75ae3c8a695a1603 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 27 Mar 2023 12:21:03 +0200 Subject: [PATCH 58/98] Detect transformations via transform_type attribute --- src/scippnexus/v2/base.py | 15 +++++-------- src/scippnexus/v2/nxtransformations.py | 30 +++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index c3ed907b..4737e4be 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -237,7 +237,8 @@ def __getitem__(self, select) -> Union[Any, sc.Variable]: return variable.values[()] else: return variable.value - return variable + from .nxtransformations import maybe_transformation + return maybe_transformation(self, value=variable, sel=select) def __repr__(self) -> str: return f'' @@ -328,8 +329,6 @@ def index_child( group shape, for all child dims. Subclasses of NXobject, in particular NXdata, override this method to handle bin edges. """ - # Note that this will be similar in NXdata, but there we need to handle - # bin edges as well. child_sel = to_child_select(tuple(self.sizes), child.dims, sel) return child[child_sel] @@ -519,10 +518,6 @@ def __len__(self) -> int: def __iter__(self) -> Iterator[str]: return self._children.__iter__() - @cached_property - def _is_nxtransformations(self) -> bool: - return self.attrs.get('NX_class') == 'NXtransformations' - def _get_children_by_nx_class( self, select: Union[type, List[type]]) -> Dict[str, Union[NXobject, Field]]: children = {} @@ -547,8 +542,6 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: from .nxtransformations import Transformation if isinstance(child, (Field, Transformation)): self._populate_fields() - if self._is_nxtransformations: - return Transformation(child) return child def isclass(x): @@ -561,8 +554,10 @@ def isclass(x): # (not scipp.DataArray, as that does not support lazy data) dg = self._nexus.read_children(self, sel) try: + from .nxtransformations import maybe_transformation dg = self._nexus.pre_assemble(dg) - return self._nexus.assemble(dg) + dg = self._nexus.assemble(dg) + return maybe_transformation(self, value=dg, sel=sel) except (sc.DimensionError, NexusStructureError) as e: print(e) # TODO log warning diff --git a/src/scippnexus/v2/nxtransformations.py b/src/scippnexus/v2/nxtransformations.py index 0ec659fa..4dd131a0 100644 --- a/src/scippnexus/v2/nxtransformations.py +++ b/src/scippnexus/v2/nxtransformations.py @@ -9,7 +9,14 @@ import scipp as sc from scipp.scipy import interpolate -from .base import Field, NexusStructureError, NXobject, ScippIndex, base_definitions +from .base import ( + Field, + Group, + NexusStructureError, + NXobject, + ScippIndex, + base_definitions, +) class TransformationError(NexusStructureError): @@ -79,6 +86,10 @@ def __getitem__(self, select: ScippIndex): # shape=[1] for single values. It is unclear how and if this could be # distinguished from a scan of length 1. value = self._obj[select] + return self.make_transformation(value, transformation_type, select) + + def make_transformation(self, value: Union[sc.Variable, sc.DataArray], + transformation_type: str, select: ScippIndex): try: if isinstance(value, sc.DataGroup): return value @@ -194,4 +205,21 @@ def _get_transformations(transform: Transformation, *, return transformations +def maybe_transformation( + obj: Union[Field, Group], value: Union[sc.Variable, sc.DataArray, sc.DataGroup], + sel: ScippIndex) -> Union[sc.Variable, sc.DataArray, sc.DataGroup]: + """ + Return a loaded field, possibly modified if it is a transformation. + + Transformations are stored in NXtransformations groups. However, identifying + transformation fields in this way requires inspecting the parent group, which + is cumbersome to implement. Instead we use the presence of the attribute + 'transformation_type' to identify transformation fields. + """ + if (transformation_type := obj.attrs.get('transformation_type')) is not None: + from .nxtransformations import Transformation + return Transformation(obj).make_transformation(value, transformation_type, sel) + return value + + base_definitions['NXtransformations'] = NXtransformations From 33d111c68c6d3de027b9fc1c22b50efc660ac912 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 27 Mar 2023 12:33:11 +0200 Subject: [PATCH 59/98] Minor cleanup --- src/scippnexus/v2/base.py | 8 ++------ src/scippnexus/v2/nxdata.py | 3 +++ src/scippnexus/v2/nxoff_geometry.py | 2 +- tests/nexus_test.py | 1 - 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 4737e4be..8d738ff3 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -464,7 +464,7 @@ def parent(self) -> Optional[Group]: @cached_property def file(self) -> Optional[Group]: - return self if self == self.parent else self.parent.file + return self if self is self.parent else self.parent.file @cached_property def _children(self) -> Dict[str, Union[Field, Group]]: @@ -550,8 +550,7 @@ def isclass(x): if isclass(sel) or (isinstance(sel, list) and len(sel) and all(isclass(x) for x in sel)): return self._get_children_by_nx_class(sel) - # Here this is scipp.DataGroup. Child classes like NXdata may return DataArray. - # (not scipp.DataArray, as that does not support lazy data) + dg = self._nexus.read_children(self, sel) try: from .nxtransformations import maybe_transformation @@ -579,9 +578,6 @@ def create_class(self, name, class_name: str) -> Group: definitions=self._definitions, parent=self) - def rebuild(self) -> Group: - return Group(self._group, definitions=self._definitions, parent=self.parent) - @cached_property def sizes(self) -> Dict[str, int]: return self._nexus.sizes diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 4d724f07..92492fee 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -155,6 +155,9 @@ def get_dims(name, field): for name, field in children.items(): if not isinstance(field, Field): + # If the NXdata contains subgroups we can generally not define valid + # sizes... except for some "special fields" that are transformed or + # wrapped into scalars. if name not in self._special_fields: self._valid = False elif (dims := get_dims(name, field)) is not None: diff --git a/src/scippnexus/v2/nxoff_geometry.py b/src/scippnexus/v2/nxoff_geometry.py index eaaa0fd3..23ea4dc7 100644 --- a/src/scippnexus/v2/nxoff_geometry.py +++ b/src/scippnexus/v2/nxoff_geometry.py @@ -31,7 +31,7 @@ def off_to_shape(*, low = fvw.bins.size().min().value high = fvw.bins.size().max().value if low == high: - # Vertices in winding order, groupbed by face. Unlike `fvw` above we now know + # Vertices in winding order, grouped by face. Unlike `fvw` above we now know # that each face has the same number of vertices, so we can fold instead of # using binned data. shapes = vw.fold(dim=vertices.dim, sizes={faces.dim: -1, vertices.dim: low}) diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 6def9faa..cd3d2921 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -128,7 +128,6 @@ def test_nxlog_length_1(h5root): log = nxroot.create_class('log', NXlog) log['value'] = da.data log['time'] = da.coords['time'] - sc.epoch(unit='ns') - log = log.rebuild() assert sc.identical(log[...], da) From 50a00a07ee287a0b3a6d27aa441baefa60910c35 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 27 Mar 2023 15:02:34 +0200 Subject: [PATCH 60/98] Document some thoughts --- src/scippnexus/v2/base.py | 20 ++++++++++++++++++++ src/scippnexus/v2/nxdata.py | 8 ++++++++ src/scippnexus/v2/nxtransformations.py | 20 ++++++-------------- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 8d738ff3..ff560b92 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -291,9 +291,22 @@ def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group self._special_fields[name] = field if isinstance(field, Field): self._init_field(field) + # TODO Consider simplifying a bunch of logic by not following depends_on + # chains. Instead provide post-processing function to resolve them. This + # will fail to resolve links that fall outside the loaded group, but that + # may be an acceptable limitation. Furthermore, resolving links may not + # possible in all cases anyway, e.g., when processing a new chunk from a + # Kafka stream. elif isinstance(field, Transformation): if isinstance(field._obj, Field): self._init_field(field._obj) + # TODO Some unfortunate logic that feels backwards: Certain subgroups have + # a special meaning, in that they are either describing a global property + # of the group, or a detector_number-dependent property. To determine valid + # dims and shape we must exclude those special groups. It would be nice to + # pass the detector_number from the parent, but at the point of NXobject + # creation for the child it has not been loaded yet. Is loading it twice + # acceptable? Or maybe it can be cached in the parent NXobject? elif (nx_class := field.attrs.get('NX_class')) is not None: if nx_class in [ 'NXoff_geometry', @@ -358,6 +371,8 @@ def assemble_as_child( return obj def pre_assemble(self, dg: sc.DataGroup) -> sc.DataGroup: + # TODO See above regarding special child groups. Maybe there is a better + # mechanism? for name, field in self._special_fields.items(): if name == 'depends_on': continue @@ -420,6 +435,8 @@ def __init__(self, parent: Optional[Group] = None): self._group = group self._definitions = {} if definitions is None else definitions + # TODO The entire 'parent' mechanism exists only for resolving 'depends_on' + # chains. Consider removing it and instead resolving the chain on the fly. if parent is None: if group == group.parent: self._parent = self @@ -553,6 +570,9 @@ def isclass(x): dg = self._nexus.read_children(self, sel) try: + # For a time-dependent transformation in NXtransformations, and NXlog may + # take the place of the `value` field. In this case, we need to read the + # properties of the NXlog group to make the actual transformation. from .nxtransformations import maybe_transformation dg = self._nexus.pre_assemble(dg) dg = self._nexus.assemble(dg) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 92492fee..8d9df9cf 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -158,6 +158,14 @@ def get_dims(name, field): # If the NXdata contains subgroups we can generally not define valid # sizes... except for some "special fields" that are transformed or # wrapped into scalars. + # TODO Another option would be to accept subgroups as they are, but + # ignore their dims, as the will be wrapped in a scalar variable. + # TODO From a usability point of view, wrapping things in scalar is + # quite bad. For example, it removes the useful visualization as a + # nested DataGroup. Would it be worth considering returning the NXdata + # as a datagroup in all cases, but make the signal (and its axes) a + # nested DataArray? However, this is special to NXtransformations, so + # maybe we should not change the overall logic just for this case. if name not in self._special_fields: self._valid = False elif (dims := get_dims(name, field)) is not None: diff --git a/src/scippnexus/v2/nxtransformations.py b/src/scippnexus/v2/nxtransformations.py index 4dd131a0..effb2d1a 100644 --- a/src/scippnexus/v2/nxtransformations.py +++ b/src/scippnexus/v2/nxtransformations.py @@ -9,14 +9,7 @@ import scipp as sc from scipp.scipy import interpolate -from .base import ( - Field, - Group, - NexusStructureError, - NXobject, - ScippIndex, - base_definitions, -) +from .base import Field, Group, NexusStructureError, NXobject, ScippIndex class TransformationError(NexusStructureError): @@ -211,15 +204,14 @@ def maybe_transformation( """ Return a loaded field, possibly modified if it is a transformation. - Transformations are stored in NXtransformations groups. However, identifying + Transformations are usually stored in NXtransformations groups. However, identifying transformation fields in this way requires inspecting the parent group, which - is cumbersome to implement. Instead we use the presence of the attribute - 'transformation_type' to identify transformation fields. + is cumbersome to implement. Furthermore, according to the NXdetector documentation + transformations are not necessarily placed inside NXtransformations. + Instead we use the presence of the attribute 'transformation_type' to identify + transformation fields. """ if (transformation_type := obj.attrs.get('transformation_type')) is not None: from .nxtransformations import Transformation return Transformation(obj).make_transformation(value, transformation_type, sel) return value - - -base_definitions['NXtransformations'] = NXtransformations From 8bd855fb0bccc3c0ae951343c310770e63d2cbf8 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 28 Mar 2023 09:12:08 +0200 Subject: [PATCH 61/98] Test loading files without definitions --- tests/load_files_test.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/load_files_test.py b/tests/load_files_test.py index 162b441f..58d60e8d 100644 --- a/tests/load_files_test.py +++ b/tests/load_files_test.py @@ -7,17 +7,27 @@ externalfile = pytest.importorskip('externalfile') - -@pytest.mark.externalfile -@pytest.mark.parametrize('name', [ +all_files = [ '2023/DREAM_baseline_all_dets.nxs', '2023/BIFROST_873855_00000015.hdf', '2023/DREAM_mccode.h5', '2023/LOKI_mcstas_nexus_geometry.nxs', '2023/NMX_2e11-rechunk.h5', '2023/YMIR_038243_00010244.hdf', -]) +] + + +@pytest.mark.externalfile +@pytest.mark.parametrize('name', all_files) def test_files_load_as_data_groups(name): with snx.File(externalfile.get_path(name)) as f: dg = f[()] assert isinstance(dg, sc.DataGroup) + + +@pytest.mark.externalfile +@pytest.mark.parametrize('name', all_files) +def test_files_load_as_data_groups_with_no_definitions(name): + with snx.File(externalfile.get_path(name), definitions={}) as f: + dg = f[()] + assert isinstance(dg, sc.DataGroup) From 138a4a32861c4755e627538751ff638ae8e091f7 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 29 Mar 2023 08:34:06 +0200 Subject: [PATCH 62/98] Do not resolve depends_on chain, but replace by relpath --- src/scippnexus/v2/base.py | 58 +++++-------- src/scippnexus/v2/nxtransformations.py | 14 +++- tests/nxtransformations_test.py | 112 +++++++++++++++++++------ 3 files changed, 119 insertions(+), 65 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index ff560b92..cbd28096 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -5,6 +5,7 @@ import datetime import inspect +import os import re import warnings from collections.abc import Mapping @@ -26,6 +27,16 @@ def asarray(obj: Union[Any, sc.Variable]) -> sc.Variable: return obj if isinstance(obj, sc.Variable) else sc.scalar(obj, unit=None) +def depends_on_to_relative_path(depends_on: str, parent_path: str) -> str: + """Replace depends_on paths with relative paths. + + After loading we will generally not have the same root so absolute paths + cannot be resolved after loading.""" + if depends_on.startswith('/'): + return os.path.relpath(depends_on, parent_path) + return depends_on + + # TODO move into scipp class DimensionedArray(Protocol): """ @@ -201,6 +212,9 @@ def __getitem__(self, select) -> Union[Any, sc.Variable]: strings = self.dataset.asstr(encoding='latin-1')[index] _warn_latin1_decode(self.dataset, strings, str(e)) variable.values = np.asarray(strings).flatten() + if self.dataset.name.endswith('depends_on') and variable.ndim == 0: + variable.value = depends_on_to_relative_path(variable.value, + self.dataset.parent.name) elif variable.values.flags["C_CONTIGUOUS"]: # On versions of h5py prior to 3.2, a TypeError occurs in some cases # where h5py cannot broadcast data with e.g. shape (20, 1) to a buffer @@ -281,25 +295,13 @@ def _init_field(self, field: Field): field.dtype = _dtype_fromdataset(field.dataset) def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - from .nxtransformations import Transformation self._attrs = attrs self._children = children self._special_fields = {} self._transformations = {} for name, field in children.items(): - if name == 'depends_on': - self._special_fields[name] = field if isinstance(field, Field): self._init_field(field) - # TODO Consider simplifying a bunch of logic by not following depends_on - # chains. Instead provide post-processing function to resolve them. This - # will fail to resolve links that fall outside the loaded group, but that - # may be an acceptable limitation. Furthermore, resolving links may not - # possible in all cases anyway, e.g., when processing a new chunk from a - # Kafka stream. - elif isinstance(field, Transformation): - if isinstance(field._obj, Field): - self._init_field(field._obj) # TODO Some unfortunate logic that feels backwards: Certain subgroups have # a special meaning, in that they are either describing a global property # of the group, or a detector_number-dependent property. To determine valid @@ -374,8 +376,6 @@ def pre_assemble(self, dg: sc.DataGroup) -> sc.DataGroup: # TODO See above regarding special child groups. Maybe there is a better # mechanism? for name, field in self._special_fields.items(): - if name == 'depends_on': - continue if name in self._transformations: continue det_num = self.detector_number @@ -386,8 +386,6 @@ def pre_assemble(self, dg: sc.DataGroup) -> sc.DataGroup: # For now it gets inserted as a DataGroup, or wrapped in a scalar coord in case # of NXdata # Would it be better to dereference the depends_on links only after loading? - if (depends_on := dg.get('depends_on')) is not None: - dg['depends_on'] = sc.scalar(depends_on) # transform = self._children[depends_on] # # Avoid loading transform twice if it is a child of the same group # for name, transformations in self._transformations.items(): @@ -485,27 +483,14 @@ def file(self) -> Optional[Group]: @cached_property def _children(self) -> Dict[str, Union[Field, Group]]: - # Transformations should be stored in NXtransformations, which is cumbersome - # to handle, since we need to check the parent of a transform to tell whether - # it is a transform. However, we can avoid this by simply treating everything - # referenced by a 'depends_on' field or attribute as a transform. - from .nxtransformations import Transformation - - def _make_child( - name: str, obj: Union[H5Dataset, - H5Group]) -> Union[Transformation, Field, Group]: - if name == 'depends_on': - target = obj[()] - obj = obj.parent[target] - # TODO Bad, we are recreating the group - parent = Group(obj.parent, definitions=self._definitions) - else: - parent = self + + def _make_child(name: str, obj: Union[H5Dataset, + H5Group]) -> Union[Field, Group]: + parent = self if is_dataset(obj): - child = Field(obj, parent=parent) + return Field(obj, parent=parent) else: - child = Group(obj, parent=parent, definitions=self._definitions) - return Transformation(child) if name == 'depends_on' else child + return Group(obj, parent=parent, definitions=self._definitions) items = {name: _make_child(name, obj) for name, obj in self._group.items()} for suffix in ('_errors', '_error'): @@ -556,8 +541,7 @@ def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: else: return self[sel.split('/')[0]][sel[sel.index('/') + 1:]] child = self._children[sel] - from .nxtransformations import Transformation - if isinstance(child, (Field, Transformation)): + if isinstance(child, Field): self._populate_fields() return child diff --git a/src/scippnexus/v2/nxtransformations.py b/src/scippnexus/v2/nxtransformations.py index effb2d1a..ae381e12 100644 --- a/src/scippnexus/v2/nxtransformations.py +++ b/src/scippnexus/v2/nxtransformations.py @@ -9,7 +9,14 @@ import scipp as sc from scipp.scipy import interpolate -from .base import Field, Group, NexusStructureError, NXobject, ScippIndex +from .base import ( + Field, + Group, + NexusStructureError, + NXobject, + ScippIndex, + depends_on_to_relative_path, +) class TransformationError(NexusStructureError): @@ -108,10 +115,11 @@ def make_transformation(self, value: Union[sc.Variable, sc.DataArray], if transformation_type == 'translation': offset = offset.to(unit=t.unit, copy=False) transform = t * offset - if (depends_on := self.depends_on) is not None: + if (depends_on := self.attrs.get('depends_on')) is not None: if not isinstance(transform, sc.DataArray): transform = sc.DataArray(transform) - transform.attrs['depends_on'] = sc.scalar(depends_on[select]) + transform.attrs['depends_on'] = sc.scalar( + depends_on_to_relative_path(depends_on, self._obj.parent.name)) return transform except (sc.DimensionError, sc.UnitError, TransformationError): # TODO We should probably try to return some other data structure and diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index 1c5edac4..3f88848a 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -48,11 +48,56 @@ def test_Transformation_with_single_value(h5root): value.attrs['offset_units'] = str(offset.unit) value.attrs['vector'] = vector.value - t = make_group(detector)['depends_on'] - assert t.depends_on is None - assert sc.identical(t.offset, offset) - assert sc.identical(t.vector, vector) - assert_identical(t[()], expected) + expected = sc.DataArray(data=expected, attrs={'depends_on': sc.scalar('.')}) + detector = make_group(detector) + depends_on = detector['depends_on'][()] + assert depends_on == 'transformations/t1' + t = detector[depends_on][()] + assert_identical(t, expected) + + +def test_depends_on_absolute_path_to_sibling_group_resolved_to_relative_path(h5root): + det1 = snx.create_class(h5root, 'det1', NXtransformations) + snx.create_field(det1, 'depends_on', sc.scalar('/det2/transformations/t1')) + + depends_on = make_group(det1)['depends_on'][()] + assert depends_on == '../det2/transformations/t1' + + +def test_depends_on_relative_path_unchanged(h5root): + det1 = snx.create_class(h5root, 'det1', NXtransformations) + snx.create_field(det1, 'depends_on', sc.scalar('transformations/t1')) + + depends_on = make_group(det1)['depends_on'][()] + assert depends_on == 'transformations/t1' + + +def test_depends_on_attr_absolute_path_to_sibling_group_resolved_to_relative_path( + h5root): + det1 = snx.create_class(h5root, 'det1', NXtransformations) + transformations = snx.create_class(det1, 'transformations', NXtransformations) + t1 = snx.create_field(transformations, 't1', sc.scalar(0.1, unit='cm')) + t1.attrs['depends_on'] = '/det2/transformations/t2' + t1.attrs['transformation_type'] = 'translation' + t1.attrs['vector'] = [0, 0, 1] + + loaded = make_group(det1)['transformations/t1'][()] + assert loaded.attrs['depends_on'].value == '../../det2/transformations/t2' + + +def test_depends_on_attr_relative_path_unchanged(h5root): + det = snx.create_class(h5root, 'det', NXtransformations) + transformations = snx.create_class(det, 'transformations', NXtransformations) + t1 = snx.create_field(transformations, 't1', sc.scalar(0.1, unit='cm')) + t1.attrs['depends_on'] = '.' + t1.attrs['transformation_type'] = 'translation' + t1.attrs['vector'] = [0, 0, 1] + + loaded = make_group(det)['transformations/t1'][()] + assert loaded.attrs['depends_on'].value == '.' + t1.attrs['depends_on'] = 't2' + loaded = make_group(det)['transformations/t1'][()] + assert loaded.attrs['depends_on'].value == 't2' def test_chain_with_single_values_and_different_unit(h5root): @@ -79,9 +124,14 @@ def test_chain_with_single_values_and_different_unit(h5root): t2 = sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit).to(unit='cm') detector = make_group(h5root['detector_0']) - depends_on = detector[...].coords['depends_on'] - assert_identical(depends_on.value.data, t1) - assert_identical(depends_on.value.attrs['depends_on'].value, t2) + loaded = detector[()] + depends_on = loaded.coords['depends_on'] + assert depends_on.value == 'transformations/t1' + transforms = loaded.coords['transformations'].value + assert_identical(transforms['t1'].data, t1) + assert transforms['t1'].attrs['depends_on'].value == 't2' + assert_identical(transforms['t2'].data, t2) + assert transforms['t2'].attrs['depends_on'].value == '.' def test_Transformation_with_multiple_values(h5root): @@ -97,7 +147,6 @@ def test_Transformation_with_multiple_values(h5root): vector = sc.vector(value=[0, 0, 1]) t = log * vector t.data = sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) - expected = t * offset value = snx.create_class(transformations, 't1', snx.NXlog) snx.create_field(value, 'time', log.coords['time'] - sc.epoch(unit='ns')) snx.create_field(value, 'value', log.data) @@ -107,11 +156,12 @@ def test_Transformation_with_multiple_values(h5root): value.attrs['offset_units'] = str(offset.unit) value.attrs['vector'] = vector.value - t = make_group(detector)['depends_on'] - assert t.depends_on is None - assert sc.identical(t.offset, offset) - assert sc.identical(t.vector, vector) - assert_identical(t[()], expected) + expected = t * offset + expected.attrs['depends_on'] = sc.scalar('.') + detector = make_group(detector) + depends_on = detector['depends_on'][()] + assert depends_on == 'transformations/t1' + assert_identical(detector[depends_on][()], expected) def test_chain_with_multiple_values(h5root): @@ -142,11 +192,15 @@ def test_chain_with_multiple_values(h5root): value2.attrs['transformation_type'] = 'translation' value2.attrs['vector'] = vector.value - expected = t * offset - expected.attrs['depends_on'] = sc.scalar(t) - detector = make_group(detector) - depends_on = detector[...].coords['depends_on'] - assert sc.identical(depends_on.value, expected) + expected1 = t * offset + expected1.attrs['depends_on'] = sc.scalar('t2') + expected2 = t + expected2.attrs['depends_on'] = sc.scalar('.') + detector = make_group(detector)[()] + depends_on = detector.coords['depends_on'] + assert depends_on.value == 'transformations/t1' + assert_identical(detector.coords['transformations'].value['t1'], expected1) + assert_identical(detector.coords['transformations'].value['t2'], expected2) def test_chain_with_multiple_values_and_different_time_unit(h5root): @@ -180,13 +234,20 @@ def test_chain_with_multiple_values_and_different_time_unit(h5root): value2.attrs['transformation_type'] = 'translation' value2.attrs['vector'] = vector.value - expected = t * offset + expected1 = t * offset + expected1.attrs['depends_on'] = sc.scalar('t2') + t2 = t.copy() t2.coords['time'] = t2.coords['time'].to(unit='ms') - expected.attrs['depends_on'] = sc.scalar(t2) + expected2 = t2 + expected2.attrs['depends_on'] = sc.scalar('.') + detector = make_group(detector) - depends_on = detector[...].coords['depends_on'] - assert sc.identical(depends_on.value, expected) + loaded = detector[...] + depends_on = loaded.coords['depends_on'] + assert depends_on.value == 'transformations/t1' + assert_identical(loaded.coords['transformations'].value['t1'], expected1) + assert_identical(loaded.coords['transformations'].value['t2'], expected2) def test_broken_time_dependent_transformation_returns_datagroup_but_sets_up_depends_on( @@ -223,7 +284,8 @@ def test_broken_time_dependent_transformation_returns_datagroup_but_sets_up_depe t1 = t['t1'] assert isinstance(t1, sc.DataGroup) assert t1.keys() == {'time', 'value'} - assert_identical(loaded.coords['depends_on'].value, t1) + assert loaded.coords['depends_on'].value == 'transformations/t1' + assert_identical(loaded.coords['transformations'].value['t1'], t1) def write_translation(group, name: str, value: sc.Variable, offset: sc.Variable, @@ -296,7 +358,7 @@ def test_nxtransformations_group_single_chain(h5root): assert set(loaded.keys()) == {'t1', 't2'} assert_identical(loaded['t1'], expected1) assert_identical(loaded['t2'].data, expected2) - assert_identical(loaded['t2'].attrs['depends_on'].value, expected1) + assert loaded['t2'].attrs['depends_on'].value == 't1' def test_slice_transformations(h5root): From 3a2a32300d7657cd6376b8ec0c19fdaab7263ffe Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 29 Mar 2023 08:42:20 +0200 Subject: [PATCH 63/98] Remove parent mechanism --- src/scippnexus/v2/base.py | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index cbd28096..674811d6 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -427,21 +427,9 @@ class Group(Mapping): of ScippNexus used such a mechanism without caching, which was very slow. """ - def __init__(self, - group: H5Group, - definitions: Optional[Dict[str, type]] = None, - parent: Optional[Group] = None): + def __init__(self, group: H5Group, definitions: Optional[Dict[str, type]] = None): self._group = group self._definitions = {} if definitions is None else definitions - # TODO The entire 'parent' mechanism exists only for resolving 'depends_on' - # chains. Consider removing it and instead resolving the chain on the fly. - if parent is None: - if group == group.parent: - self._parent = self - else: - self._parent = Group(group.parent, definitions=definitions) - else: - self._parent = parent @property def nx_class(self) -> Optional[type]: @@ -474,23 +462,22 @@ def unit(self) -> Optional[sc.Unit]: return self._nexus.unit @property - def parent(self) -> Optional[Group]: - return self._parent + def parent(self) -> Group: + return Group(self._group.parent, definitions=self._definitions) @cached_property - def file(self) -> Optional[Group]: - return self if self is self.parent else self.parent.file + def file(self) -> Group: + return Group(self._group.file, definitions=self._definitions) @cached_property def _children(self) -> Dict[str, Union[Field, Group]]: def _make_child(name: str, obj: Union[H5Dataset, H5Group]) -> Union[Field, Group]: - parent = self if is_dataset(obj): - return Field(obj, parent=parent) + return Field(obj, parent=self) else: - return Group(obj, parent=parent, definitions=self._definitions) + return Group(obj, definitions=self._definitions) items = {name: _make_child(name, obj) for name, obj in self._group.items()} for suffix in ('_errors', '_error'): @@ -579,8 +566,7 @@ def create_field(self, key: str, value: sc.Variable) -> H5Dataset: def create_class(self, name, class_name: str) -> Group: return Group(create_class(self._group, name, class_name), - definitions=self._definitions, - parent=self) + definitions=self._definitions) @cached_property def sizes(self) -> Dict[str, int]: From 9ca77596d5a679fad5fedd21afa5c6b5e9ccb83d Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 29 Mar 2023 12:04:19 +0200 Subject: [PATCH 64/98] To not transform geometry classes --- src/scippnexus/v2/base.py | 45 +---------- src/scippnexus/v2/nxdata.py | 19 ++--- src/scippnexus/v2/nxoff_geometry.py | 6 +- tests/nxdetector_test.py | 118 ++++++++++++++++------------ 4 files changed, 78 insertions(+), 110 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 674811d6..4eca1ccf 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -299,26 +299,9 @@ def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group self._children = children self._special_fields = {} self._transformations = {} - for name, field in children.items(): + for field in children.values(): if isinstance(field, Field): self._init_field(field) - # TODO Some unfortunate logic that feels backwards: Certain subgroups have - # a special meaning, in that they are either describing a global property - # of the group, or a detector_number-dependent property. To determine valid - # dims and shape we must exclude those special groups. It would be nice to - # pass the detector_number from the parent, but at the point of NXobject - # creation for the child it has not been loaded yet. Is loading it twice - # acceptable? Or maybe it can be cached in the parent NXobject? - elif (nx_class := field.attrs.get('NX_class')) is not None: - if nx_class in [ - 'NXoff_geometry', - 'NXcylindrical_geometry', - 'NXgeometry', - ]: - self._special_fields[name] = field - elif nx_class == 'NXtransformations': - self._special_fields[name] = field - self._transformations[name] = field @property def unit(self) -> Union[None, sc.Unit]: @@ -327,7 +310,6 @@ def unit(self) -> Union[None, sc.Unit]: @cached_property def sizes(self) -> Dict[str, int]: - # exclude geometry/tansform groups? return sc.DataGroup(self._children).sizes def index_child( @@ -372,30 +354,6 @@ def assemble_as_child( detector_number: Optional[sc.Variable] = None) -> sc.DataGroup: return obj - def pre_assemble(self, dg: sc.DataGroup) -> sc.DataGroup: - # TODO See above regarding special child groups. Maybe there is a better - # mechanism? - for name, field in self._special_fields.items(): - if name in self._transformations: - continue - det_num = self.detector_number - if det_num is not None: - det_num = dg[det_num] - dg[name] = field._nexus.assemble_as_child(dg[name], detector_number=det_num) - # TODO Should we remove the NXtransformations group (if there is a depends_on)? - # For now it gets inserted as a DataGroup, or wrapped in a scalar coord in case - # of NXdata - # Would it be better to dereference the depends_on links only after loading? - # transform = self._children[depends_on] - # # Avoid loading transform twice if it is a child of the same group - # for name, transformations in self._transformations.items(): - # if transform.name.startswith(transformations.name): - # dg['depends_on'] = dg[name][depends_on.split('/')[-1]] - # break - # else: - # dg['depends_on'] = transform[()] - return dg - def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: return dg @@ -545,7 +503,6 @@ def isclass(x): # take the place of the `value` field. In this case, we need to read the # properties of the NXlog group to make the actual transformation. from .nxtransformations import maybe_transformation - dg = self._nexus.pre_assemble(dg) dg = self._nexus.assemble(dg) return maybe_transformation(self, value=dg, sel=sel) except (sc.DimensionError, NexusStructureError) as e: diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 8d9df9cf..f01fe036 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -156,17 +156,14 @@ def get_dims(name, field): for name, field in children.items(): if not isinstance(field, Field): # If the NXdata contains subgroups we can generally not define valid - # sizes... except for some "special fields" that are transformed or - # wrapped into scalars. - # TODO Another option would be to accept subgroups as they are, but - # ignore their dims, as the will be wrapped in a scalar variable. - # TODO From a usability point of view, wrapping things in scalar is - # quite bad. For example, it removes the useful visualization as a - # nested DataGroup. Would it be worth considering returning the NXdata - # as a datagroup in all cases, but make the signal (and its axes) a - # nested DataArray? However, this is special to NXtransformations, so - # maybe we should not change the overall logic just for this case. - if name not in self._special_fields: + # sizes... except for some non-signal "special fields" that return + # a DataGroup that will be wrapped in a scalar Variable. + if field.attrs.get('NX_class') not in [ + 'NXoff_geometry', + 'NXcylindrical_geometry', + 'NXgeometry', + 'NXtransformations', + ]: self._valid = False elif (dims := get_dims(name, field)) is not None: # The convention here is that the given dimensions apply to the shapes diff --git a/src/scippnexus/v2/nxoff_geometry.py b/src/scippnexus/v2/nxoff_geometry.py index 23ea4dc7..712544a5 100644 --- a/src/scippnexus/v2/nxoff_geometry.py +++ b/src/scippnexus/v2/nxoff_geometry.py @@ -43,8 +43,8 @@ def off_to_shape(*, if detector_number is None: raise NexusStructureError("`detector_number` not given but NXoff_geometry " "contains `detector_faces`.") - shape_index = detector_faces['column', 0].copy() - detid = detector_faces['column', 1].copy() + shape_index = detector_faces['face_index|detector_number', 0].copy() + detid = detector_faces['face_index|detector_number', 1].copy() da = sc.DataArray(shape_index, coords={ 'detector_number': detid }).group(detector_number.flatten(to='detector_number')) @@ -55,7 +55,7 @@ def off_to_shape(*, class NXoff_geometry(NXobject): _dims = { - 'detector_faces': ('face', 'column'), + 'detector_faces': ('face', 'face_index|detector_number'), 'vertices': ('vertex', ), 'winding_order': ('winding_order', ), 'faces': ('face', ) diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index 16b28f5c..f218ef51 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -302,20 +302,26 @@ def __getitem__(self, select=...): def create_off_geometry_detector_numbers_1234(group: snx.Group, name: str, - detector_faces: bool = True): + detector_faces: bool = True + ) -> sc.DataGroup: + dg = sc.DataGroup() off = group.create_class(name, NXoff_geometry) # square with point in center values = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0], [0.5, 0.5, 0]]) - off['vertices'] = sc.array(dims=['_', 'comp'], values=values, unit='m') + dg['vertices'] = sc.array(dims=['_', 'comp'], values=values, unit='m') # triangles - off['winding_order'] = sc.array(dims=['_'], - values=[0, 1, 4, 1, 2, 4, 2, 3, 4, 3, 0, 4], - unit=None) - off['faces'] = sc.array(dims=['_'], values=[0, 3, 6, 9], unit=None) + dg['winding_order'] = sc.array(dims=['winding_order'], + values=[0, 1, 4, 1, 2, 4, 2, 3, 4, 3, 0, 4], + unit=None) + dg['faces'] = sc.array(dims=['face'], values=[0, 3, 6, 9], unit=None) if detector_faces: - off['detector_faces'] = sc.array(dims=['_', 'dummy'], - values=[[0, 1], [1, 2], [2, 3], [3, 4]], - unit=None) + dg['detector_faces'] = sc.array(dims=['face', 'face_index|detector_number'], + values=[[0, 1], [1, 2], [2, 3], [3, 4]], + unit=None) + for name, var in dg.items(): + off[name] = var + dg['vertices'] = sc.vectors(dims=['vertex'], values=values, unit='m') + return dg @pytest.mark.parametrize('detid_name', @@ -330,25 +336,20 @@ def test_loads_data_with_coords_and_off_geometry(nxroot, detid_name): detector.create_field('xx', da.coords['xx']) detector.create_field('data', da.data) detector._group.attrs['axes'] = ['xx', 'yy'] - create_off_geometry_detector_numbers_1234(detector, name='shape') + expected = create_off_geometry_detector_numbers_1234(detector, name='shape') loaded = detector[...] - expected = snx.nxoff_geometry.off_to_shape( - **detector['shape'][()], detector_number=da.coords['detector_number']) - assert sc.identical(loaded.coords['shape'].bins.size(), - sc.array(dims=da.dims, values=[[1, 1], [1, 1]], unit=None)) - assert sc.identical(loaded.coords['shape'], expected) + assert_identical(loaded.coords['shape'].value, expected) -def test_missing_detector_numbers_triggers_fallback_given_off_geometry_with_det_faces( +def test_missing_detector_numbers_given_off_geometry_with_det_faces_loads_as_usual( nxroot): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) detector._group.attrs['axes'] = ['xx', 'yy'] - create_off_geometry_detector_numbers_1234(detector, name='shape') + expected = create_off_geometry_detector_numbers_1234(detector, name='shape') loaded = detector[...] - assert isinstance(loaded, sc.DataGroup) - assert sc.identical(loaded['shape'], detector['shape'][()]) + assert_identical(loaded.coords['shape'].value, expected) def test_off_geometry_without_detector_faces_loaded_as_0d_with_multiple_faces(nxroot): @@ -356,25 +357,34 @@ def test_off_geometry_without_detector_faces_loaded_as_0d_with_multiple_faces(nx detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) detector._group.attrs['axes'] = ['xx', 'yy'] - create_off_geometry_detector_numbers_1234(detector, - name='shape', - detector_faces=False) + expected = create_off_geometry_detector_numbers_1234(detector, + name='shape', + detector_faces=False) loaded = detector[...] - assert loaded.coords['shape'].dims == () - assert sc.identical(loaded.coords['shape'].bins.size(), sc.index(4)) + assert_identical(loaded.coords['shape'].value, expected) + shape = snx.NXoff_geometry.assemble_as_child(loaded.coords['shape'].value) + assert sc.identical(shape.bins.size(), sc.index(4)) def create_cylindrical_geometry_detector_numbers_1234(group: snx.Group, name: str, - detector_numbers: bool = True): + detector_numbers: bool = True + ) -> sc.DataGroup: shape = group.create_class(name, snx.NXcylindrical_geometry) values = np.array([[0, 0, 0], [0, 1, 0], [3, 0, 0]]) - shape['vertices'] = sc.array(dims=['_', 'comp'], values=values, unit='m') - shape['cylinders'] = sc.array(dims=['_', 'vertex'], - values=[[0, 1, 2], [2, 1, 0]], - unit=None) + dg = sc.DataGroup() + dg['vertices'] = sc.array(dims=['_', 'comp'], values=values, unit='m') + dg['cylinders'] = sc.array(dims=['cylinder', 'vertex_index'], + values=[[0, 1, 2], [2, 1, 0]], + unit=None) if detector_numbers: - shape['detector_number'] = sc.array(dims=['_'], values=[0, 1, 1, 0], unit=None) + dg['detector_number'] = sc.array(dims=['detector_number'], + values=[0, 1, 1, 0], + unit=None) + for name, var in dg.items(): + shape[name] = var + dg['vertices'] = sc.vectors(dims=['vertex'], values=values, unit='m') + return dg def test_cylindrical_geometry_without_detector_numbers_loaded_as_0d(nxroot): @@ -382,11 +392,12 @@ def test_cylindrical_geometry_without_detector_numbers_loaded_as_0d(nxroot): detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) detector._group.attrs['axes'] = ['xx', 'yy'] - create_cylindrical_geometry_detector_numbers_1234(detector, - name='shape', - detector_numbers=False) + expected = create_cylindrical_geometry_detector_numbers_1234(detector, + name='shape', + detector_numbers=False) loaded = detector[...] - shape = loaded.coords['shape'] + assert_identical(loaded.coords['shape'].value, expected) + shape = snx.NXcylindrical_geometry.assemble_as_child(loaded.coords['shape'].value) assert shape.dims == () assert sc.identical(shape.bins.size(), sc.index(2)) assert sc.identical( @@ -401,34 +412,35 @@ def test_cylindrical_geometry_without_detector_numbers_loaded_as_0d(nxroot): })) -def test_cylindrical_geometry_with_missing_parent_detector_numbers_triggers_fallback( +def test_cylindrical_geometry_with_missing_parent_detector_numbers_loads_as_usual( nxroot): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1, 2.2], [3.3, 4.4]]) detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) detector._group.attrs['axes'] = ['xx', 'yy'] - create_cylindrical_geometry_detector_numbers_1234(detector, - name='shape', - detector_numbers=True) + expected = create_cylindrical_geometry_detector_numbers_1234(detector, + name='shape', + detector_numbers=True) loaded = detector[...] - assert isinstance(loaded, sc.DataGroup) - assert isinstance(loaded['shape'], sc.DataGroup) + assert_identical(loaded.coords['shape'].value, expected) -def test_cylindrical_geometry_with_inconsistent_detector_numbers_triggers_fallback( - nxroot): +def test_cylindrical_geometry_with_inconsistent_detector_numbers_loads_as_usual(nxroot): var = sc.array(dims=['xx', 'yy'], unit='K', values=[[1.1], [3.3]]) detector = nxroot.create_class('detector0', NXdetector) detector.create_field('data', var) detector._group.attrs['axes'] = ['xx', 'yy'] - detector.create_field('detector_numbers', + detector.create_field('detector_number', sc.array(dims=var.dims, values=[[1], [2]], unit=None)) - create_cylindrical_geometry_detector_numbers_1234(detector, - name='shape', - detector_numbers=True) + expected = create_cylindrical_geometry_detector_numbers_1234(detector, + name='shape', + detector_numbers=True) loaded = detector[...] - assert isinstance(loaded, sc.DataGroup) - assert isinstance(loaded['shape'], sc.DataGroup) + assert_identical(loaded.coords['shape'].value, expected) + detector_number = loaded.coords['detector_number'] + with pytest.raises(snx.NexusStructureError): + snx.NXcylindrical_geometry.assemble_as_child(loaded.coords['shape'].value, + detector_number=detector_number) def test_cylindrical_geometry_with_detector_numbers(nxroot): @@ -438,11 +450,13 @@ def test_cylindrical_geometry_with_detector_numbers(nxroot): detector._group.attrs['axes'] = ['xx', 'yy'] detector_number = sc.array(dims=var.dims, values=[[1, 2], [3, 4]], unit=None) detector.create_field('detector_number', detector_number) - create_cylindrical_geometry_detector_numbers_1234(detector, - name='shape', - detector_numbers=True) + expected = create_cylindrical_geometry_detector_numbers_1234(detector, + name='shape', + detector_numbers=True) loaded = detector[...] - shape = loaded.coords['shape'] + assert_identical(loaded.coords['shape'].value, expected) + shape = snx.NXcylindrical_geometry.assemble_as_child( + loaded.coords['shape'].value, detector_number=loaded.coords['detector_number']) assert shape.dims == detector_number.dims for i in [0, 3]: assert sc.identical( From 258a2f5b8e29620125067daddb14d91de9b0aea4 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 29 Mar 2023 12:56:32 +0200 Subject: [PATCH 65/98] Cleanup --- src/scippnexus/v2/base.py | 20 ++++--------------- src/scippnexus/v2/nxevent_data.py | 32 +------------------------------ 2 files changed, 5 insertions(+), 47 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 4eca1ccf..c77d5186 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -297,8 +297,6 @@ def _init_field(self, field: Field): def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): self._attrs = attrs self._children = children - self._special_fields = {} - self._transformations = {} for field in children.values(): if isinstance(field, Field): self._init_field(field) @@ -344,17 +342,8 @@ def read_children(self, obj: Group, sel: ScippIndex) -> sc.DataGroup: {name: self.index_child(child, sel) for name, child in obj.items()}) - @property - def detector_number(self) -> Optional[str]: - return None - - def assemble_as_child( - self, - obj: sc.DataGroup, - detector_number: Optional[sc.Variable] = None) -> sc.DataGroup: - return obj - - def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray]: + def assemble(self, + dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: return dg @@ -430,14 +419,13 @@ def file(self) -> Group: @cached_property def _children(self) -> Dict[str, Union[Field, Group]]: - def _make_child(name: str, obj: Union[H5Dataset, - H5Group]) -> Union[Field, Group]: + def _make_child(obj: Union[H5Dataset, H5Group]) -> Union[Field, Group]: if is_dataset(obj): return Field(obj, parent=self) else: return Group(obj, definitions=self._definitions) - items = {name: _make_child(name, obj) for name, obj in self._group.items()} + items = {name: _make_child(obj) for name, obj in self._group.items()} for suffix in ('_errors', '_error'): field_with_errors = [name for name in items if f'{name}{suffix}' in items] for name in field_with_errors: diff --git a/src/scippnexus/v2/nxevent_data.py b/src/scippnexus/v2/nxevent_data.py index 7bb20325..f2be1af5 100644 --- a/src/scippnexus/v2/nxevent_data.py +++ b/src/scippnexus/v2/nxevent_data.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Tuple, Union import numpy as np import scipp as sc @@ -13,7 +13,6 @@ NexusStructureError, NXobject, ScippIndex, - asarray, base_definitions, ) @@ -155,34 +154,5 @@ def assemble(self, children: sc.DataGroup) -> sc.DataArray: return sc.DataArray(data=binned, coords={'event_time_zero': event_time_zero}) - # TODO now unused - @staticmethod - def assemble_as_child( - event_data: sc.DataArray, - detector_number: Optional[sc.Variable] = None) -> sc.DataArray: - grouping = asarray(detector_number) - - if isinstance(event_data, sc.DataGroup): - raise NexusStructureError("Invalid NXevent_data in NXdetector.") - if grouping is None: - event_id = 'event_id' - else: - # copy since sc.bin cannot deal with a non-contiguous view - event_id = grouping.flatten(to='event_id').copy() - event_data.bins.coords['event_time_zero'] = sc.bins_like( - event_data, fill_value=event_data.coords['event_time_zero']) - # After loading raw NXevent_data it is guaranteed that the event table - # is contiguous and that there is no masking. We can therefore use the - # more efficient approach of binning from scratch instead of erasing the - # 'pulse' binning defined by NXevent_data. - event_data = event_data.bins.constituents['data'].group(event_id) - # if self._grouping is None: - # event_data.coords[self._grouping_key] = event_data.coords.pop('event_id') - # else: - # del event_data.coords['event_id'] - if grouping is None: - return event_data - return event_data.fold(dim='event_id', sizes=grouping.sizes) - base_definitions['NXevent_data'] = NXevent_data From 303492689ce2738e25002ae0bc967c6a3850d231 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 30 Mar 2023 08:14:50 +0200 Subject: [PATCH 66/98] Docstrings --- src/scippnexus/v2/base.py | 147 +++++++++++++++++++++++++++++------- src/scippnexus/v2/nxdata.py | 2 +- tests/nxdetector_test.py | 1 - 3 files changed, 120 insertions(+), 30 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index c77d5186..0037b856 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -12,7 +12,7 @@ from dataclasses import dataclass from functools import cached_property, lru_cache from types import MappingProxyType -from typing import Any, Dict, Iterator, List, Optional, Protocol, Tuple, Union +from typing import Any, Dict, Iterator, List, Optional, Protocol, Tuple, Union, overload import dateutil.parser import numpy as np @@ -146,6 +146,9 @@ class Field: @cached_property def attrs(self) -> Dict[str, Any]: + """The attributes of the dataset. + + Cannot be used for writing attributes, since they are cached for performance.""" return MappingProxyType( dict(self.dataset.attrs) if self.dataset.attrs else dict()) @@ -324,6 +327,8 @@ def index_child( group shape, for all child dims. Subclasses of NXobject, in particular NXdata, override this method to handle bin edges. """ + # TODO Could avoid determining sizes if sel is trivial. Merge with + # NXdata.index_child? child_sel = to_child_select(tuple(self.sizes), child.dims, sel) return child[child_sel] @@ -344,6 +349,15 @@ def read_children(self, obj: Group, sel: ScippIndex) -> sc.DataGroup: def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: + """ + When a Group is indexed, this method is called to assemble the read children + into the result object. + + The default implementation simply returns the DataGroup. + + Subclasses of NXobject, in particular NXdata, override this method to return + an object with more semantics such as a DataArray or Dataset. + """ return dg @@ -359,21 +373,19 @@ class Group(Mapping): interface to the children of the group, and provides access to the attributes of the group. The children are either Field or Group objects, depending on whether the child is a dataset or a group, respectively. - - The implementation of this class is unfortunately very complex, for several reasons: - 1. NeXus requires "nonlocal" information for interpreting a field. For example, - NXdata attributes define which fields are the signal, and the names of the axes. - A field cannot be read without this information, in particular since we want to - support reading slices, using the Scipp dimension-label syntax. - 2. The depend_on field and depends_on attributes in fields within NXtransformations - link to arbitrary other fields or groups in the same file. This interacts with - item 1.) and further complicates the logic. - 3. HDF5 or h5py performance is not great, and we want to avoid reading the same - attrs or datasets multiple times. We can therefore not rely on "on-the-fly" - interpretation of the file, but need to cache information. An earlier version - of ScippNexus used such a mechanism without caching, which was very slow. """ + # The implementation of this class is unfortunately relatively complex: + + # 1. NeXus requires "nonlocal" information for interpreting a field. For example, + # NXdata attributes define which fields are the signal, and the names of axes. + # A field cannot be read without this information, in particular since we want to + # support reading slices, using the Scipp dimension-label syntax. + # 2. HDF5 or h5py performance is not great, and we want to avoid reading the same + # attrs or datasets multiple times. We can therefore not rely on "on-the-fly" + # interpretation of the file, but need to cache information. An earlier version + # of ScippNexus used such a mechanism without caching, which was very slow. + def __init__(self, group: H5Group, definitions: Optional[Dict[str, type]] = None): self._group = group self._definitions = {} if definitions is None else definitions @@ -382,7 +394,7 @@ def __init__(self, group: H5Group, definitions: Optional[Dict[str, type]] = None def nx_class(self) -> Optional[type]: """The value of the NX_class attribute of the group. - In case of the subclass NXroot this returns 'NXroot' even if the attribute + In case of the subclass NXroot this returns :py:class:`NXroot` even if the attr is not actually set. This is to support the majority of all legacy files, which do not have this attribute. """ @@ -393,10 +405,13 @@ def nx_class(self) -> Optional[type]: @cached_property def attrs(self) -> Dict[str, Any]: + """The attributes of the group. + + Cannot be used for writing attributes, since they are cached for performance.""" # Attrs are not read until needed, to avoid reading all attrs for all subgroups. # We may expected a per-subgroup overhead of 1 ms for reading attributes, so if - # all we want is access one attribute, we may save, e.g., a second for a group - # with 1000 subgroups. + # all we want is access one subgroup, we may save, e.g., a second for a group + # with 1000 subgroups (or subfields). return MappingProxyType( dict(self._group.attrs) if self._group.attrs else dict()) @@ -418,6 +433,7 @@ def file(self) -> Group: @cached_property def _children(self) -> Dict[str, Union[Field, Group]]: + """Cached children of the group.""" def _make_child(obj: Union[H5Dataset, H5Group]) -> Union[Field, Group]: if is_dataset(obj): @@ -440,11 +456,26 @@ def _make_child(obj: Union[H5Dataset, H5Group]) -> Union[Field, Group]: @cached_property def _nexus(self) -> NXobject: + """Instance of the NXobject subclass corresponding to the NX_class attribute. + + This is used to determine dims, unit, and other attributes of the group and its + children, as well as defining how children will be read and assembled into the + result object when the group is indexed. + """ return self._definitions.get(self.attrs.get('NX_class'), NXobject)(attrs=self.attrs, children=self._children) def _populate_fields(self) -> None: + """Populate the fields of the group. + + Fields are not populated until needed, to avoid reading field and group + properties for all subgroups. However, when any field is read we must in + general parse all the field and group properties, since for classes such + as NXdata the properties of any field may indirectly depend on the properties + of any other field. For example, field attributes may define which fields are + axes, and dim labels of other fields can be defined by the names of the axes. + """ _ = self._nexus def __len__(self) -> int: @@ -463,7 +494,44 @@ def _get_children_by_nx_class( children[key] = self[key] return children - def __getitem__(self, sel) -> Union[Field, Group, sc.DataGroup]: + @overload + def __getitem__(self, sel: str) -> Union[Group, Field]: + ... + + @overload + def __getitem__(self, + sel: ScippIndex) -> Union[sc.DataArray, sc.DataGroup, sc.Dataset]: + ... + + @overload + def __getitem__(self, sel: Union[type, List[type]]) -> Dict[str, NXobject]: + ... + + def __getitem__(self, sel): + """ + Get a child group or child dataset, a selection of child groups, or load and + return the current group. + + Three cases are supported: + + - String name: The child group or child dataset of that name is returned. + - Class such as ``NXdata`` or ``NXlog``: A dict containing all direct children + with a matching ``NX_class`` attribute are returned. Also accepts a tuple of + classes. ``Field`` selects all child fields, i.e., all datasets but not + groups. + - Scipp-style index: Load the specified slice of the current group, returning + a :class:`scipp.DataArray` or :class:`scipp.DataGroup`. + + Parameters + ---------- + name: + Child name, class, or index. + + Returns + ------- + : + Field, group, dict of fields, or loaded data. + """ if isinstance(sel, str): # We cannot get the child directly from the HDF5 group, since we need to # create the parent group, to ensure that fields get the correct properties @@ -487,19 +555,25 @@ def isclass(x): dg = self._nexus.read_children(self, sel) try: - # For a time-dependent transformation in NXtransformations, and NXlog may - # take the place of the `value` field. In this case, we need to read the - # properties of the NXlog group to make the actual transformation. - from .nxtransformations import maybe_transformation dg = self._nexus.assemble(dg) - return maybe_transformation(self, value=dg, sel=sel) except (sc.DimensionError, NexusStructureError) as e: - print(e) - # TODO log warning - return dg + self._warn_fallback(e) + # For a time-dependent transformation in NXtransformations, and NXlog may + # take the place of the `value` field. In this case, we need to read the + # properties of the NXlog group to make the actual transformation. + from .nxtransformations import maybe_transformation + return maybe_transformation(self, value=dg, sel=sel) + + def _warn_fallback(self, e: Exception) -> None: + msg = (f"Failed to load {self.name} as {type(self._nexus).__name__}: {e} " + "Falling back to loading HDF5 group children as scipp.DataGroup.") + warnings.warn(msg) - # TODO It is not clear if we want to support these convenience methods def __setitem__(self, key, value): + """Set a child group or child dataset. + + Note that due to the caching mechanisms in this class, reading the group + or its children may not reflect the changes made by this method.""" if hasattr(value, '__write_to_nexus_group__'): group = create_class(self._group, key, nx_class=value.nx_class) value.__write_to_nexus_group__(group) @@ -507,9 +581,26 @@ def __setitem__(self, key, value): create_field(self._group, key, value) def create_field(self, key: str, value: sc.Variable) -> H5Dataset: + """Create a child dataset with given name and value. + + Note that due to the caching mechanisms in this class, reading the group + or its children may not reflect the changes made by this method.""" return create_field(self._group, key, value) - def create_class(self, name, class_name: str) -> Group: + def create_class(self, name: str, class_name: str) -> Group: + """Create empty HDF5 group with given name and set the NX_class attribute. + + Note that due to the caching mechanisms in this class, reading the group + or its children may not reflect the changes made by this method. + + Parameters + ---------- + name: + Group name. + nx_class: + Nexus class, can be a valid string for the NX_class attribute, or a + subclass of NXobject, such as NXdata or NXlog. + """ return Group(create_class(self._group, name, class_name), definitions=self._definitions) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index f01fe036..2bc22ce2 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -206,7 +206,7 @@ def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: if not self._valid: - return super().assemble(dg) + raise NexusStructureError("Could not determine signal field or dimensions.") aux = {name: dg.pop(name) for name in self._aux_signals} coords = sc.DataGroup(dg) signal = coords.pop(self._signal_name) diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index f218ef51..0e291cdf 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -28,7 +28,6 @@ def nxroot(request): yield root -@pytest.mark.skip(reason="Warnings system not implemented yet.") def test_warns_if_no_data_found(nxroot): detector_numbers = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3, 4])) detector = nxroot.create_class('detector0', NXdetector) From a6f2c7f2939939962208fad9693c0125bc360443 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 30 Mar 2023 08:36:35 +0200 Subject: [PATCH 67/98] Comment NXdata logic --- src/scippnexus/v2/nxdata.py | 42 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 2bc22ce2..85e38839 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -40,12 +40,6 @@ def __init__(self, fallback_signal_name: Optional[str] = None): super().__init__(attrs=attrs, children=children) self._valid = True - # Must do full consistency check here, to define self.sizes: - # - squeeze correctly - # - check if coord dims are compatible with signal dims - # - check if there is a signal - # If not the case, fall back do DataGroup.sizes - # Can we just set field dims here? self._signal_name = None self._signal = None self._aux_signals = attrs.get('auxiliary_signals', []) @@ -56,32 +50,34 @@ def __init__(self, else: # Legacy NXdata defines signal not as group attribute, but attr on dataset for name, field in children.items(): - # What is the meaning of the attribute value? It is undocumented, - # we simply ignore it. + # We ignore the signal value. Usually it is 1, but apparently one could + # multiple signals. We do not support this, since it is legacy anyway. if 'signal' in field.attrs: self._signal_name = name self._signal = children[name] break + # Latest way of defining axes axes = attrs.get('axes') + # Older way of defining axes signal_axes = None if self._signal is None else self._signal.attrs.get('axes') - + # Another old way of defining axes axis_index = {} for name, field in children.items(): if (axis := field.attrs.get('axis')) is not None: axis_index[name] = axis - # Apparently it is not possible to define dim labels unless there are - # corresponding coords. Special case of '.' entries means "no coord". - def _get_group_dims(): + def _get_group_dims() -> Optional[Tuple[str, ...]]: + """Try three ways of defining group dimensions.""" + # Apparently it is not possible to define dim labels unless there are + # corresponding coords. Special case of '.' entries means "no coord". if axes is not None: - return [f'dim_{i}' if a == '.' else a for i, a in enumerate(axes)] + return tuple(f'dim_{i}' if a == '.' else a for i, a in enumerate(axes)) if signal_axes is not None: return tuple(signal_axes.split(',')) if axis_index: - return [ - k for k, _ in sorted(axis_index.items(), key=lambda item: item[1]) - ] + return tuple( + k for k, _ in sorted(axis_index.items(), key=lambda item: item[1])) return None group_dims = _get_group_dims() @@ -91,6 +87,7 @@ def _get_group_dims(): else: if group_dims is not None: shape = self._signal.dataset.shape + # If we have explicit group dims, we can drop trailing 1s. shape = _squeeze_trailing(group_dims, shape) self._signal.sizes = dict(zip(group_dims, shape)) elif fallback_dims is not None: @@ -103,15 +100,14 @@ def _get_group_dims(): if axes is not None: # Unlike self.dims we *drop* entries that are '.' - named_axes = [a for a in axes if a != '.'] + named_axes = tuple(a for a in axes if a != '.') elif signal_axes is not None: named_axes = signal_axes.split(',') elif fallback_dims is not None: named_axes = fallback_dims else: - named_axes = [] + named_axes = () - # 3. Find field dims indices_suffix = '_indices' indices_attrs = { key[:-len(indices_suffix)]: attr @@ -128,16 +124,15 @@ def get_dims(name, field): # Newly written files should always contain indices attributes, but the # standard recommends that readers should also make "best effort" guess # since legacy files do not set this attribute. - # TODO signal and errors? if name in (self._signal_name, ): return group_dims - # if name in [self._signal_name, self._errors_name]: - # return self._get_group_dims() # if None, field determines dims itself if name in self._aux_signals: return _guess_dims(group_dims, self._signal.dataset.shape, field.dataset) + # Latest way of defining dims if (dims := dims_from_indices.get(name)) is not None: return dims + # Older way of defining dims via axis attribute if (axis := axis_index.get(name)) is not None: return (group_dims[axis - 1], ) if name in named_axes: @@ -197,6 +192,7 @@ def _bin_edge_dim(self, coord: Field) -> Union[None, str]: return None def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: + """Same as NXobject.index_child but also handles bin edges.""" child_sel = to_child_select(tuple(self.sizes), child.dims, sel, @@ -236,6 +232,8 @@ def _coord_to_attr(self, da: sc.DataArray, name: str, coord: sc.Variable) -> boo return False def _add_coords(self, da: sc.DataArray, coords: sc.DataGroup) -> sc.DataArray: + """Add coords to data array, converting to attrs in the same way as slicing + scipp.DataArray would.""" for name, coord in coords.items(): if not isinstance(coord, sc.Variable): da.coords[name] = sc.scalar(coord) From be7298144561c69a5d9ad4f167f14d97800c88d1 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 30 Mar 2023 08:47:10 +0200 Subject: [PATCH 68/98] Cleanup event grouping code --- src/scippnexus/v2/nxdata.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 85e38839..0865e12c 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -308,10 +308,6 @@ def _group_events(*, # more efficient approach of binning from scratch instead of erasing the # 'event_time_zero' binning defined by NXevent_data. event_data = event_data.bins.constituents['data'].group(event_id) - # if self._grouping is None: - # event_data.coords[self._grouping_key] = event_data.coords.pop('event_id') - # else: - # del event_data.coords['event_id'] if grouping is None: return event_data return event_data.fold(dim='event_id', sizes=grouping.sizes) @@ -327,19 +323,25 @@ def _find_event_entries(dg: sc.DataGroup) -> List[str]: return event_entries -def group_events_by_detector_number(dg: sc.DataGroup) -> sc.DataArray: - event_entry = _find_event_entries(dg)[0] - events = dg.pop(event_entry) +def group_events_by_detector_number( + dg: sc.DataGroup) -> Union[sc.DataArray, sc.Dataset]: grouping_key = None for key in NXdetector._detector_number_fields: if (grouping := dg.get(key)) is not None: grouping_key = key break grouping = None if grouping_key is None else asarray(dg.pop(grouping_key)) - da = _group_events(event_data=events, grouping=grouping) - # TODO What about _coord_to_attr mapping as NXdata? - da.coords.update(dg) - return da + grouped_events = sc.DataGroup() + for event_entry in _find_event_entries(dg): + events = dg.pop(event_entry) + grouped_events[event_entry] = _group_events(event_data=events, + grouping=grouping) + if len(grouped_events) == 1: + out = next(iter(grouped_events.values())) + else: + out = sc.Dataset(grouped_events) + out.coords.update(dg) + return out base_definitions['NXdata'] = NXdata From 17cf4cfbe4227a89f1fe467f9f55fb130984a4c5 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 31 Mar 2023 12:56:02 +0200 Subject: [PATCH 69/98] Rename asarray -> asvariable --- src/scippnexus/v2/base.py | 2 +- src/scippnexus/v2/nxdata.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 0037b856..738bda85 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -23,7 +23,7 @@ from ..typing import H5Dataset, H5Group, ScippIndex -def asarray(obj: Union[Any, sc.Variable]) -> sc.Variable: +def asvariable(obj: Union[Any, sc.Variable]) -> sc.Variable: return obj if isinstance(obj, sc.Variable) else sc.scalar(obj, unit=None) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 0865e12c..5b4dfe10 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -11,7 +11,14 @@ from .._common import to_child_select from ..typing import H5Dataset, ScippIndex -from .base import Field, Group, NexusStructureError, NXobject, asarray, base_definitions +from .base import ( + Field, + Group, + NexusStructureError, + NXobject, + asvariable, + base_definitions, +) def _guess_dims(dims, shape, dataset: H5Dataset): @@ -330,7 +337,7 @@ def group_events_by_detector_number( if (grouping := dg.get(key)) is not None: grouping_key = key break - grouping = None if grouping_key is None else asarray(dg.pop(grouping_key)) + grouping = None if grouping_key is None else asvariable(dg.pop(grouping_key)) grouped_events = sc.DataGroup() for event_entry in _find_event_entries(dg): events = dg.pop(event_entry) From 2346df884d9e6ce2733de0644078e0b0bcbb141c Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 31 Mar 2023 13:01:12 +0200 Subject: [PATCH 70/98] Use posixpath for depends_on handling --- src/scippnexus/v2/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 738bda85..60ff3a10 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -5,7 +5,7 @@ import datetime import inspect -import os +import posixpath import re import warnings from collections.abc import Mapping @@ -33,7 +33,7 @@ def depends_on_to_relative_path(depends_on: str, parent_path: str) -> str: After loading we will generally not have the same root so absolute paths cannot be resolved after loading.""" if depends_on.startswith('/'): - return os.path.relpath(depends_on, parent_path) + return posixpath.relpath(depends_on, parent_path) return depends_on From 90c786a9a91f9e8aed13d0742c3c175dbae5dc49 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 31 Mar 2023 13:23:44 +0200 Subject: [PATCH 71/98] Address comments on base.py --- src/scippnexus/v2/base.py | 42 ++++++++++++------------------ src/scippnexus/v2/nexus_classes.py | 6 ++++- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 60ff3a10..b79a5779 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -42,7 +42,7 @@ class DimensionedArray(Protocol): """ A multi-dimensional array with a unit and dimension labels. - Could be, e.g., a scipp.Variable or a dimple dataclass wrapping a numpy array. + Could be, e.g., a scipp.Variable or a simple dataclass wrapping a numpy array. """ @property @@ -133,19 +133,19 @@ def _dtype_fromdataset(dataset: H5Dataset) -> sc.DType: @dataclass class Field: + """NeXus field. + + In HDF5 fields are represented as dataset. + """ dataset: H5Dataset parent: Group sizes: Optional[Dict[str, int]] = None dtype: Optional[sc.DType] = None errors: Optional[H5Dataset] = None _is_time: Optional[bool] = None - """NeXus field. - - In HDF5 fields are represented as dataset. - """ @cached_property - def attrs(self) -> Dict[str, Any]: + def attrs(self) -> Mapping[str, Any]: """The attributes of the dataset. Cannot be used for writing attributes, since they are cached for performance.""" @@ -153,7 +153,7 @@ def attrs(self) -> Dict[str, Any]: dict(self.dataset.attrs) if self.dataset.attrs else dict()) @property - def dims(self) -> Tuple[str]: + def dims(self) -> Tuple[str, ...]: return tuple(self.sizes.keys()) @property @@ -179,7 +179,7 @@ def _load_variances(self, var, index): var.variances = np.broadcast_to(sc.pow(stddevs, sc.scalar(2)).values, shape=var.shape) - def __getitem__(self, select) -> Union[Any, sc.Variable]: + def __getitem__(self, select: ScippIndex) -> Union[Any, sc.Variable]: """Load the field as a :py:class:`scipp.Variable` or Python object. If the shape is empty and no unit is given this returns a Python object, such @@ -298,6 +298,8 @@ def _init_field(self, field: Field): field.dtype = _dtype_fromdataset(field.dataset) def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + """Subclasses should call this in their __init__ method, or ensure that they + initialize the fields in `children` with the correct sizes and dtypes.""" self._attrs = attrs self._children = children for field in children.values(): @@ -442,16 +444,17 @@ def _make_child(obj: Union[H5Dataset, H5Group]) -> Union[Field, Group]: return Group(obj, definitions=self._definitions) items = {name: _make_child(obj) for name, obj in self._group.items()} + items = {k: v for k, v in items.items() if not k.startswith('cue_')} for suffix in ('_errors', '_error'): field_with_errors = [name for name in items if f'{name}{suffix}' in items] for name in field_with_errors: values = items[name] errors = items[f'{name}{suffix}'] - if (values.unit == errors.unit + if (isinstance(values, Field) and isinstance(errors, Field) + and values.unit == errors.unit and values.dataset.shape == errors.dataset.shape): values.errors = errors.dataset del items[f'{name}{suffix}'] - items = {k: v for k, v in items.items() if not k.startswith('cue_')} return items @cached_property @@ -540,7 +543,8 @@ def __getitem__(self, sel): if sel.startswith('/'): return self.file[sel[1:]] else: - return self[sel.split('/')[0]][sel[sel.index('/') + 1:]] + grp, path = sel.split('/', 1) + return self[grp][path] child = self._children[sel] if isinstance(child, Field): self._populate_fields() @@ -558,7 +562,7 @@ def isclass(x): dg = self._nexus.assemble(dg) except (sc.DimensionError, NexusStructureError) as e: self._warn_fallback(e) - # For a time-dependent transformation in NXtransformations, and NXlog may + # For a time-dependent transformation in NXtransformations, an NXlog may # take the place of the `value` field. In this case, we need to read the # properties of the NXlog group to make the actual transformation. from .nxtransformations import maybe_transformation @@ -617,18 +621,7 @@ def shape(self) -> Tuple[int, ...]: return tuple(self.sizes.values()) -class NXgeometry(NXobject): - - def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - super().__init__(attrs=attrs, children=children) - - @staticmethod - def assemble_as_child(children: sc.DataGroup, - detector_number: Optional[sc.Variable] = None) -> sc.Variable: - return sc.scalar(children) - - -def create_field(group: H5Group, name: str, data: DimensionedArray, +def create_field(group: H5Group, name: str, data: Union[np.ndarray, DimensionedArray], **kwargs) -> H5Dataset: if not isinstance(data, sc.Variable): return group.create_dataset(name, data=data, **kwargs) @@ -670,4 +663,3 @@ def _nx_class_registry(): base_definitions = {} -base_definitions['NXgeometry'] = NXgeometry diff --git a/src/scippnexus/v2/nexus_classes.py b/src/scippnexus/v2/nexus_classes.py index 06a0b5de..bcf43ab2 100644 --- a/src/scippnexus/v2/nexus_classes.py +++ b/src/scippnexus/v2/nexus_classes.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from .base import NXgeometry, NXobject, NXroot # noqa F401 +from .base import NXobject, NXroot # noqa F401 from .nxcylindrical_geometry import NXcylindrical_geometry # noqa F401 from .nxdata import NXdata, NXdetector, NXlog, NXmonitor # noqa F401 from .nxevent_data import NXevent_data # noqa F401 @@ -90,6 +90,10 @@ class NXfresnel_zone_plate(NXobject): """NXfresnel_zone_plate""" +class NXgeometry(NXobject): + """NXgeometry""" + + class NXgrating(NXobject): """NXgrating""" From 856d2ac95a4a1d71d6c409755eb99d01e5011ffe Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 31 Mar 2023 13:26:37 +0200 Subject: [PATCH 72/98] Fix __all__ --- src/scippnexus/v2/application_definitions/nxcansas/__init__.py | 2 +- src/scippnexus/v2/nxsample.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scippnexus/v2/application_definitions/nxcansas/__init__.py b/src/scippnexus/v2/application_definitions/nxcansas/__init__.py index 35761501..6f25aa44 100644 --- a/src/scippnexus/v2/application_definitions/nxcansas/__init__.py +++ b/src/scippnexus/v2/application_definitions/nxcansas/__init__.py @@ -6,4 +6,4 @@ from .nxcansas import SASdata, SASentry, definitions -__all__ = [definitions, SASentry, SASdata] +__all__ = ['definitions', 'SASentry', 'SASdata'] diff --git a/src/scippnexus/v2/nxsample.py b/src/scippnexus/v2/nxsample.py index 2bc29555..168d2994 100644 --- a/src/scippnexus/v2/nxsample.py +++ b/src/scippnexus/v2/nxsample.py @@ -7,7 +7,7 @@ from .base import Field, Group, NXobject, ScippIndex, base_definitions -_matrix_units = dict(zip(['orientation_matrix', 'ub_matrix'], ['one', '1/Angstrom'])) +_matrix_units = {'orientation_matrix': 'one', 'ub_matrix': '1/Angstrom'} def _fix_unit(name, value): From e6a20a8e5c1cdea184ce69eefc69ddc47d80ac11 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 31 Mar 2023 13:35:33 +0200 Subject: [PATCH 73/98] Avoid cached_property in cases where it makes logic intransparent --- src/scippnexus/v2/base.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index b79a5779..5fbcc086 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -391,6 +391,8 @@ class Group(Mapping): def __init__(self, group: H5Group, definitions: Optional[Dict[str, type]] = None): self._group = group self._definitions = {} if definitions is None else definitions + self._lazy_children = None + self._lazy_nexus = None @property def nx_class(self) -> Optional[type]: @@ -433,9 +435,14 @@ def parent(self) -> Group: def file(self) -> Group: return Group(self._group.file, definitions=self._definitions) - @cached_property + @property def _children(self) -> Dict[str, Union[Field, Group]]: - """Cached children of the group.""" + """Lazily initialized children of the group.""" + if self._lazy_children is None: + self._lazy_children = self._read_children() + return self._lazy_children + + def _read_children(self) -> Dict[str, Union[Field, Group]]: def _make_child(obj: Union[H5Dataset, H5Group]) -> Union[Field, Group]: if is_dataset(obj): @@ -457,17 +464,19 @@ def _make_child(obj: Union[H5Dataset, H5Group]) -> Union[Field, Group]: del items[f'{name}{suffix}'] return items - @cached_property + @property def _nexus(self) -> NXobject: """Instance of the NXobject subclass corresponding to the NX_class attribute. This is used to determine dims, unit, and other attributes of the group and its children, as well as defining how children will be read and assembled into the result object when the group is indexed. + + Lazily initialized since the NXobject subclass init can be costly. """ - return self._definitions.get(self.attrs.get('NX_class'), - NXobject)(attrs=self.attrs, - children=self._children) + if self._lazy_nexus is None: + self._populate_fields() + return self._lazy_nexus def _populate_fields(self) -> None: """Populate the fields of the group. @@ -479,7 +488,9 @@ def _populate_fields(self) -> None: of any other field. For example, field attributes may define which fields are axes, and dim labels of other fields can be defined by the names of the axes. """ - _ = self._nexus + self._lazy_nexus = self._definitions.get(self.attrs.get('NX_class'), + NXobject)(attrs=self.attrs, + children=self._children) def __len__(self) -> int: return len(self._children) From c4fb7c9bcefdf08624b3340571b3165f56f0cfcf Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 06:59:57 +0200 Subject: [PATCH 74/98] Cleaner and fixed mechanism for forcing NXlog time axes to datetime --- src/scippnexus/v2/base.py | 3 --- src/scippnexus/v2/nxdata.py | 12 +++++++++--- tests/nexus_test.py | 3 ++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 5fbcc086..fa96b1de 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -142,7 +142,6 @@ class Field: sizes: Optional[Dict[str, int]] = None dtype: Optional[sc.DType] = None errors: Optional[H5Dataset] = None - _is_time: Optional[bool] = None @cached_property def attrs(self) -> Mapping[str, Any]: @@ -241,8 +240,6 @@ def __getitem__(self, select: ScippIndex) -> Union[Any, sc.Variable]: for name in self.attrs: if (dt := _as_datetime(self.attrs[name])) is not None: starts.append(dt) - if self._is_time and len(starts) == 0: - starts.append(sc.epoch(unit=self.unit)) if len(starts) == 1: variable = convert_time_to_datetime64( variable, diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 5b4dfe10..e173a1f1 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -9,7 +9,7 @@ import numpy as np import scipp as sc -from .._common import to_child_select +from .._common import convert_time_to_datetime64, to_child_select from ..typing import H5Dataset, ScippIndex from .base import ( Field, @@ -264,8 +264,14 @@ def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group children=children, fallback_dims=('time', ), fallback_signal_name='value') - if (time := children.get('time')) is not None: - time._is_time = True + + def assemble(self, + dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: + if (time := dg.get('time')) is not None: + if time.dtype != sc.DType.datetime64: + dg['time'] = convert_time_to_datetime64(time, + start=sc.epoch(unit=time.unit)) + return super().assemble(dg) class NXdetector(NXdata): diff --git a/tests/nexus_test.py b/tests/nexus_test.py index cd3d2921..e2934c80 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -193,7 +193,8 @@ def test_nxlog_with_shape_0(nxroot): log = nxroot['entry'].create_class('log', NXlog) log['value'] = da.data log['time'] = da.coords['time'] - assert sc.identical(log[...], da.rename(ignored='dim_1')) + da.coords['time'] = sc.datetimes(dims=['time'], values=[], unit='ns') + assert_identical(log[...], da.rename(ignored='dim_1')) def test_nxobject_event_data(nxroot): From 49a18ef34640c827336112e0d1d2340fb49eb35f Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 07:09:30 +0200 Subject: [PATCH 75/98] Fix length-0 datetime fields --- src/scippnexus/v2/base.py | 23 ++++++++++++++--------- tests/nexus_test.py | 7 +++++++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index fa96b1de..c0f42afd 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -205,7 +205,7 @@ def __getitem__(self, select: ScippIndex) -> Union[Any, sc.Variable]: # If the variable is empty, return early if np.prod(shape) == 0: - return variable + return self._maybe_datetime(variable) if self.dtype == sc.DType.string: try: @@ -235,6 +235,17 @@ def __getitem__(self, select: ScippIndex) -> Union[Any, sc.Variable]: self._load_variances(variable, index) else: variable.values = self.dataset[index] + if variable.ndim == 0 and variable.unit is None and variable.fields is None: + # Work around scipp/scipp#2815, and avoid returning NumPy bool + if isinstance(variable.values, np.ndarray) and variable.dtype != 'bool': + return variable.values[()] + else: + return variable.value + variable = self._maybe_datetime(variable) + from .nxtransformations import maybe_transformation + return maybe_transformation(self, value=variable, sel=select) + + def _maybe_datetime(self, variable: sc.Variable) -> sc.Variable: if _is_time(variable): starts = [] for name in self.attrs: @@ -245,14 +256,8 @@ def __getitem__(self, select: ScippIndex) -> Union[Any, sc.Variable]: variable, start=starts[0], scaling_factor=self.attrs.get('scaling_factor')) - if variable.ndim == 0 and variable.unit is None and variable.fields is None: - # Work around scipp/scipp#2815, and avoid returning NumPy bool - if isinstance(variable.values, np.ndarray) and variable.dtype != 'bool': - return variable.values[()] - else: - return variable.value - from .nxtransformations import maybe_transformation - return maybe_transformation(self, value=variable, sel=select) + + return variable def __repr__(self) -> str: return f'' diff --git a/tests/nexus_test.py b/tests/nexus_test.py index e2934c80..8b1642c7 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -409,6 +409,13 @@ def test_loads_bare_timestamps_if_multiple_candidate_datetime_offsets_found(nxro assert sc.identical(nxroot['mytime'][...], offsets.rename(ignored='dim_0')) +def test_length_0_field_with_datetime_attribute_loaded_as_datetime(nxroot): + nxroot['mytime'] = sc.arange('ignored', 0, unit='ms') + nxroot['mytime'].dataset.attrs['start_time'] = '2022-12-12T12:13:14' + assert_identical(nxroot['mytime'][...], + sc.datetimes(dims=['dim_0'], unit='ms', values=[])) + + def create_event_data_ids_1234(group): group['event_id'] = sc.array(dims=[''], unit=None, values=[1, 2, 4, 1, 2, 2]) group['event_time_offset'] = sc.array(dims=[''], From bd4c5ca20a1f3db7f13ac30bb10a8f36a9fc1491 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 07:14:16 +0200 Subject: [PATCH 76/98] Fix length-0 time-independent transformation loading --- src/scippnexus/v2/base.py | 5 +++-- tests/nxtransformations_test.py | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index c0f42afd..593fd843 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -184,6 +184,7 @@ def __getitem__(self, select: ScippIndex) -> Union[Any, sc.Variable]: If the shape is empty and no unit is given this returns a Python object, such as a string or integer. Otherwise a :py:class:`scipp.Variable` is returned. """ + from .nxtransformations import maybe_transformation index = to_plain_index(self.dims, select) if isinstance(index, (int, slice)): index = (index, ) @@ -205,7 +206,8 @@ def __getitem__(self, select: ScippIndex) -> Union[Any, sc.Variable]: # If the variable is empty, return early if np.prod(shape) == 0: - return self._maybe_datetime(variable) + variable = self._maybe_datetime(variable) + return maybe_transformation(self, value=variable, sel=select) if self.dtype == sc.DType.string: try: @@ -242,7 +244,6 @@ def __getitem__(self, select: ScippIndex) -> Union[Any, sc.Variable]: else: return variable.value variable = self._maybe_datetime(variable) - from .nxtransformations import maybe_transformation return maybe_transformation(self, value=variable, sel=select) def _maybe_datetime(self, variable: sc.Variable) -> sc.Variable: diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index 3f88848a..38f84d95 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -56,6 +56,32 @@ def test_Transformation_with_single_value(h5root): assert_identical(t, expected) +def test_time_independent_Transformation_with_length_0(h5root): + detector = create_detector(h5root) + snx.create_field(detector, 'depends_on', + sc.scalar('/detector_0/transformations/t1')) + transformations = snx.create_class(detector, 'transformations', NXtransformations) + value = sc.array(dims=['dim_0'], values=[], unit='mm') + offset = sc.spatial.translation(value=[1, 2, 3], unit='mm') + vector = sc.vector(value=[0, 0, 1]) + t = value * vector + expected = sc.spatial.translations(dims=t.dims, values=t.values, unit=t.unit) + expected = expected * offset + value = snx.create_field(transformations, 't1', value) + value.attrs['depends_on'] = '.' + value.attrs['transformation_type'] = 'translation' + value.attrs['offset'] = offset.values + value.attrs['offset_units'] = str(offset.unit) + value.attrs['vector'] = vector.value + + expected = sc.DataArray(data=expected, attrs={'depends_on': sc.scalar('.')}) + detector = make_group(detector) + depends_on = detector['depends_on'][()] + assert depends_on == 'transformations/t1' + t = detector[depends_on][()] + assert_identical(t, expected) + + def test_depends_on_absolute_path_to_sibling_group_resolved_to_relative_path(h5root): det1 = snx.create_class(h5root, 'det1', NXtransformations) snx.create_field(det1, 'depends_on', sc.scalar('/det2/transformations/t1')) From 2f9a239769283a75f6a896363413e2b0f62398c4 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 07:22:34 +0200 Subject: [PATCH 77/98] Fix type hint --- src/scippnexus/v2/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 593fd843..39b121f5 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -635,7 +635,7 @@ def shape(self) -> Tuple[int, ...]: return tuple(self.sizes.values()) -def create_field(group: H5Group, name: str, data: Union[np.ndarray, DimensionedArray], +def create_field(group: H5Group, name: str, data: Union[np.ndarray, sc.Variable], **kwargs) -> H5Dataset: if not isinstance(data, sc.Variable): return group.create_dataset(name, data=data, **kwargs) From 3784fdff9dc2de632ee9368d0cb2813ba5ff80e0 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 07:36:08 +0200 Subject: [PATCH 78/98] Small readability improvements --- src/scippnexus/v2/nxdata.py | 18 ++++++----- src/scippnexus/v2/nxevent_data.py | 50 ++++++++++++++++--------------- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index e173a1f1..89a10baf 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -46,7 +46,7 @@ def __init__(self, fallback_dims: Optional[Tuple[str, ...]] = None, fallback_signal_name: Optional[str] = None): super().__init__(attrs=attrs, children=children) - self._valid = True + self._valid = True # True if the children can be assembled self._signal_name = None self._signal = None self._aux_signals = attrs.get('auxiliary_signals', []) @@ -131,17 +131,17 @@ def get_dims(name, field): # Newly written files should always contain indices attributes, but the # standard recommends that readers should also make "best effort" guess # since legacy files do not set this attribute. - if name in (self._signal_name, ): + if name == self._signal_name: return group_dims - if name in self._aux_signals: - return _guess_dims(group_dims, self._signal.dataset.shape, - field.dataset) # Latest way of defining dims if (dims := dims_from_indices.get(name)) is not None: return dims # Older way of defining dims via axis attribute if (axis := axis_index.get(name)) is not None: return (group_dims[axis - 1], ) + if name in self._aux_signals: + return _guess_dims(group_dims, self._signal.dataset.shape, + field.dataset) if name in named_axes: # If there are named axes then items of same name are "dimension # coordinates", i.e., have a dim matching their name. @@ -189,7 +189,7 @@ def sizes(self) -> Dict[str, int]: def unit(self) -> Union[None, sc.Unit]: return self._signal.unit if self._valid else super().unit - def _bin_edge_dim(self, coord: Field) -> Union[None, str]: + def _bin_edge_dim(self, coord: Union[Any, Field]) -> Union[None, str]: if not isinstance(coord, Field): return None sizes = self.sizes @@ -210,9 +210,10 @@ def assemble(self, dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: if not self._valid: raise NexusStructureError("Could not determine signal field or dimensions.") + dg = dg.copy() aux = {name: dg.pop(name) for name in self._aux_signals} - coords = sc.DataGroup(dg) - signal = coords.pop(self._signal_name) + signal = dg.pop(self._signal_name) + coords = dg da = sc.DataArray(data=signal) da = self._add_coords(da, coords) if aux: @@ -338,6 +339,7 @@ def _find_event_entries(dg: sc.DataGroup) -> List[str]: def group_events_by_detector_number( dg: sc.DataGroup) -> Union[sc.DataArray, sc.Dataset]: + dg = dg.copy() grouping_key = None for key in NXdetector._detector_number_fields: if (grouping := dg.get(key)) is not None: diff --git a/src/scippnexus/v2/nxevent_data.py b/src/scippnexus/v2/nxevent_data.py index f2be1af5..e2e19647 100644 --- a/src/scippnexus/v2/nxevent_data.py +++ b/src/scippnexus/v2/nxevent_data.py @@ -58,34 +58,13 @@ def field_dims(self, name: str, field: Field) -> Tuple[str, ...]: return (_event_dimension, ) return None - def read_children(self, obj: Group, select: ScippIndex) -> sc.DataGroup: - children = obj - index = to_plain_index([_pulse_dimension], select) - + def read_children(self, children: Group, select: ScippIndex) -> sc.DataGroup: if not children: # TODO Check that select is trivial? return sc.DataGroup() - max_index = self.shape[0] + index = to_plain_index([_pulse_dimension], select) event_time_zero = children['event_time_zero'][index] - if index is Ellipsis or index == tuple(): - last_loaded = False - else: - if isinstance(index, int): - start, stop, _ = slice(index, None).indices(max_index) - if start == stop: - raise IndexError('Index {start} is out of range') - index = slice(start, start + 1) - start, stop, stride = index.indices(max_index) - if stop + stride > max_index: - last_loaded = False - elif start == stop: - last_loaded = True - else: - stop += stride - last_loaded = True - index = slice(start, stop, stride) - - event_index = children['event_index'][index].values + last_loaded, event_index = self._get_event_index(children, index) num_event = children["event_time_offset"].shape[0] # Some files contain uint64 "max" indices, which turn into negatives during @@ -120,6 +99,29 @@ def read_children(self, obj: Group, select: ScippIndex) -> sc.DataGroup: dg['event_id'] = event_id return dg + def _get_event_index(self, children: sc.DataGroup, index): + max_index = self.shape[0] + if index is Ellipsis or index == tuple(): + last_loaded = False + else: + if isinstance(index, int): + start, stop, _ = slice(index, None).indices(max_index) + if start == stop: + raise IndexError(f'Index {start} is out of range') + index = slice(start, start + 1) + start, stop, stride = index.indices(max_index) + if stop + stride > max_index: + last_loaded = False + elif start == stop: + last_loaded = True + else: + stop += stride + last_loaded = True + index = slice(start, stop, stride) + + event_index = children['event_index'][index].values + return last_loaded, event_index + def assemble(self, children: sc.DataGroup) -> sc.DataArray: _check_for_missing_fields(children) event_time_offset = children['event_time_offset'] From 538926da47199045353bf39b93d9182b5650fba0 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 07:52:00 +0200 Subject: [PATCH 79/98] Move Field to file --- src/scippnexus/v2/__init__.py | 2 +- .../nxcansas/nxcansas.py | 3 +- src/scippnexus/v2/base.py | 215 +---------------- src/scippnexus/v2/field.py | 224 ++++++++++++++++++ src/scippnexus/v2/nxcylindrical_geometry.py | 3 +- src/scippnexus/v2/nxdata.py | 10 +- src/scippnexus/v2/nxevent_data.py | 10 +- src/scippnexus/v2/nxoff_geometry.py | 3 +- src/scippnexus/v2/nxsample.py | 3 +- src/scippnexus/v2/nxtransformations.py | 10 +- tests/nexus_test.py | 9 +- 11 files changed, 245 insertions(+), 247 deletions(-) create mode 100644 src/scippnexus/v2/field.py diff --git a/src/scippnexus/v2/__init__.py b/src/scippnexus/v2/__init__.py index 732293dd..05c01bdd 100644 --- a/src/scippnexus/v2/__init__.py +++ b/src/scippnexus/v2/__init__.py @@ -12,7 +12,6 @@ from .. import typing from .base import ( - Field, Group, NexusStructureError, NXobject, @@ -20,6 +19,7 @@ create_class, create_field, ) +from .field import Field from .file import File from .nexus_classes import * from .nxdata import group_events_by_detector_number diff --git a/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py b/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py index 9e3a868b..3e773e56 100644 --- a/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py +++ b/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py @@ -6,7 +6,8 @@ import scipp as sc from ....typing import H5Group -from ...base import Field, Group, NXobject, base_definitions, create_field +from ...base import Group, NXobject, base_definitions, create_field +from ...field import Field from ...nxdata import NXdata diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 39b121f5..c267eb4d 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -3,40 +3,25 @@ # @author Simon Heybrock from __future__ import annotations -import datetime import inspect -import posixpath -import re import warnings from collections.abc import Mapping -from dataclasses import dataclass from functools import cached_property, lru_cache from types import MappingProxyType from typing import Any, Dict, Iterator, List, Optional, Protocol, Tuple, Union, overload -import dateutil.parser import numpy as np import scipp as sc -from .._common import convert_time_to_datetime64, to_child_select, to_plain_index -from .._hdf5_nexus import _warn_latin1_decode +from .._common import to_child_select from ..typing import H5Dataset, H5Group, ScippIndex +from .field import Field def asvariable(obj: Union[Any, sc.Variable]) -> sc.Variable: return obj if isinstance(obj, sc.Variable) else sc.scalar(obj, unit=None) -def depends_on_to_relative_path(depends_on: str, parent_path: str) -> str: - """Replace depends_on paths with relative paths. - - After loading we will generally not have the same root so absolute paths - cannot be resolved after loading.""" - if depends_on.startswith('/'): - return posixpath.relpath(depends_on, parent_path) - return depends_on - - # TODO move into scipp class DimensionedArray(Protocol): """ @@ -73,45 +58,6 @@ def is_dataset(obj: Union[H5Group, H5Dataset]) -> bool: return hasattr(obj, 'shape') -def _is_time(obj): - if (unit := obj.unit) is None: - return False - return unit.to_dict().get('powers') == {'s': 1} - - -def _as_datetime(obj: Any): - if isinstance(obj, str): - try: - # NumPy and scipp cannot handle timezone information. We therefore apply it, - # i.e., convert to UTC. - # Would like to use dateutil directly, but with Python's datetime we do not - # get nanosecond precision. Therefore we combine numpy and dateutil parsing. - date_only = 'T' not in obj - if date_only: - return sc.datetime(obj) - date, time = obj.split('T') - time_and_timezone_offset = re.split(r'Z|\+|-', time) - time = time_and_timezone_offset[0] - if len(time_and_timezone_offset) == 1: - # No timezone, parse directly (scipp based on numpy) - return sc.datetime(f'{date}T{time}') - else: - # There is timezone info. Parse with dateutil. - dt = dateutil.parser.isoparse(obj) - dt = dt.replace(microsecond=0) # handled by numpy - dt = dt.astimezone(datetime.timezone.utc) - dt = dt.replace(tzinfo=None).isoformat() - # We operate with string operations here and thus end up parsing date - # and time twice. The reason is that the timezone-offset arithmetic - # cannot be done, e.g., in nanoseconds without causing rounding errors. - if '.' in time: - dt += f".{time.split('.')[1]}" - return sc.datetime(dt) - except ValueError: - pass - return None - - _scipp_dtype = { np.dtype('int8'): sc.DType.int32, np.dtype('int16'): sc.DType.int32, @@ -131,163 +77,6 @@ def _dtype_fromdataset(dataset: H5Dataset) -> sc.DType: return _scipp_dtype.get(dataset.dtype, sc.DType.string) -@dataclass -class Field: - """NeXus field. - - In HDF5 fields are represented as dataset. - """ - dataset: H5Dataset - parent: Group - sizes: Optional[Dict[str, int]] = None - dtype: Optional[sc.DType] = None - errors: Optional[H5Dataset] = None - - @cached_property - def attrs(self) -> Mapping[str, Any]: - """The attributes of the dataset. - - Cannot be used for writing attributes, since they are cached for performance.""" - return MappingProxyType( - dict(self.dataset.attrs) if self.dataset.attrs else dict()) - - @property - def dims(self) -> Tuple[str, ...]: - return tuple(self.sizes.keys()) - - @property - def shape(self) -> Tuple[int, ...]: - return tuple(self.sizes.values()) - - @cached_property - def file(self) -> Group: - return self.parent.file - - def _load_variances(self, var, index): - stddevs = sc.empty(dims=var.dims, - shape=var.shape, - dtype=var.dtype, - unit=var.unit) - try: - self.errors.read_direct(stddevs.values, source_sel=index) - except TypeError: - stddevs.values = self.errors[index].squeeze() - # According to the standard, errors must have the same shape as the data. - # This is not the case in all files we observed, is there any harm in - # attempting a broadcast? - var.variances = np.broadcast_to(sc.pow(stddevs, sc.scalar(2)).values, - shape=var.shape) - - def __getitem__(self, select: ScippIndex) -> Union[Any, sc.Variable]: - """Load the field as a :py:class:`scipp.Variable` or Python object. - - If the shape is empty and no unit is given this returns a Python object, such - as a string or integer. Otherwise a :py:class:`scipp.Variable` is returned. - """ - from .nxtransformations import maybe_transformation - index = to_plain_index(self.dims, select) - if isinstance(index, (int, slice)): - index = (index, ) - - base_dims = self.dims - base_shape = self.shape - dims = [] - shape = [] - for i, ind in enumerate(index): - if not isinstance(ind, int): - dims.append(base_dims[i]) - shape.append(len(range(*ind.indices(base_shape[i])))) - - variable = sc.empty(dims=dims, - shape=shape, - dtype=self.dtype, - unit=self.unit, - with_variances=self.errors is not None) - - # If the variable is empty, return early - if np.prod(shape) == 0: - variable = self._maybe_datetime(variable) - return maybe_transformation(self, value=variable, sel=select) - - if self.dtype == sc.DType.string: - try: - strings = self.dataset.asstr()[index] - except UnicodeDecodeError as e: - strings = self.dataset.asstr(encoding='latin-1')[index] - _warn_latin1_decode(self.dataset, strings, str(e)) - variable.values = np.asarray(strings).flatten() - if self.dataset.name.endswith('depends_on') and variable.ndim == 0: - variable.value = depends_on_to_relative_path(variable.value, - self.dataset.parent.name) - elif variable.values.flags["C_CONTIGUOUS"]: - # On versions of h5py prior to 3.2, a TypeError occurs in some cases - # where h5py cannot broadcast data with e.g. shape (20, 1) to a buffer - # of shape (20,). Note that broadcasting (1, 20) -> (20,) does work - # (see https://github.com/h5py/h5py/pull/1796). - # Therefore, we manually squeeze here. - # A pin of h5py<3.2 is currently required by Mantid and hence scippneutron - # (see https://github.com/h5py/h5py/issues/1880#issuecomment-823223154) - # hence this workaround. Once we can use a more recent h5py with Mantid, - # this try/except can be removed. - try: - self.dataset.read_direct(variable.values, source_sel=index) - except TypeError: - variable.values = self.dataset[index].squeeze() - if self.errors is not None: - self._load_variances(variable, index) - else: - variable.values = self.dataset[index] - if variable.ndim == 0 and variable.unit is None and variable.fields is None: - # Work around scipp/scipp#2815, and avoid returning NumPy bool - if isinstance(variable.values, np.ndarray) and variable.dtype != 'bool': - return variable.values[()] - else: - return variable.value - variable = self._maybe_datetime(variable) - return maybe_transformation(self, value=variable, sel=select) - - def _maybe_datetime(self, variable: sc.Variable) -> sc.Variable: - if _is_time(variable): - starts = [] - for name in self.attrs: - if (dt := _as_datetime(self.attrs[name])) is not None: - starts.append(dt) - if len(starts) == 1: - variable = convert_time_to_datetime64( - variable, - start=starts[0], - scaling_factor=self.attrs.get('scaling_factor')) - - return variable - - def __repr__(self) -> str: - return f'' - - @property - def name(self) -> str: - return self.dataset.name - - @property - def ndim(self) -> int: - """Total number of dimensions in the dataset. - - See the shape property for potential differences to the value returned by the - underlying h5py.Dataset.ndim. - """ - return len(self.shape) - - @cached_property - def unit(self) -> Union[sc.Unit, None]: - if (unit := self.attrs.get('units')) is not None: - try: - return sc.Unit(unit) - except sc.UnitError: - warnings.warn(f"Unrecognized unit '{unit}' for value dataset " - f"in '{self.name}'; setting unit as 'dimensionless'") - return sc.units.one - return None - - def _squeezed_field_sizes(dataset: H5Dataset) -> Dict[str, int]: if (shape := dataset.shape) == (1, ): return {} diff --git a/src/scippnexus/v2/field.py b/src/scippnexus/v2/field.py new file mode 100644 index 00000000..8b34a09f --- /dev/null +++ b/src/scippnexus/v2/field.py @@ -0,0 +1,224 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +import datetime +import posixpath +import re +import warnings +from dataclasses import dataclass +from functools import cached_property +from types import MappingProxyType +from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union + +import dateutil.parser +import numpy as np +import scipp as sc + +from scippnexus._common import convert_time_to_datetime64, to_plain_index +from scippnexus._hdf5_nexus import _warn_latin1_decode +from scippnexus.typing import H5Dataset, ScippIndex + +if TYPE_CHECKING: + from .base import Group + + +def depends_on_to_relative_path(depends_on: str, parent_path: str) -> str: + """Replace depends_on paths with relative paths. + + After loading we will generally not have the same root so absolute paths + cannot be resolved after loading.""" + if depends_on.startswith('/'): + return posixpath.relpath(depends_on, parent_path) + return depends_on + + +def _is_time(obj): + if (unit := obj.unit) is None: + return False + return unit.to_dict().get('powers') == {'s': 1} + + +def _as_datetime(obj: Any): + if isinstance(obj, str): + try: + # NumPy and scipp cannot handle timezone information. We therefore apply it, + # i.e., convert to UTC. + # Would like to use dateutil directly, but with Python's datetime we do not + # get nanosecond precision. Therefore we combine numpy and dateutil parsing. + date_only = 'T' not in obj + if date_only: + return sc.datetime(obj) + date, time = obj.split('T') + time_and_timezone_offset = re.split(r'Z|\+|-', time) + time = time_and_timezone_offset[0] + if len(time_and_timezone_offset) == 1: + # No timezone, parse directly (scipp based on numpy) + return sc.datetime(f'{date}T{time}') + else: + # There is timezone info. Parse with dateutil. + dt = dateutil.parser.isoparse(obj) + dt = dt.replace(microsecond=0) # handled by numpy + dt = dt.astimezone(datetime.timezone.utc) + dt = dt.replace(tzinfo=None).isoformat() + # We operate with string operations here and thus end up parsing date + # and time twice. The reason is that the timezone-offset arithmetic + # cannot be done, e.g., in nanoseconds without causing rounding errors. + if '.' in time: + dt += f".{time.split('.')[1]}" + return sc.datetime(dt) + except ValueError: + pass + return None + + +@dataclass +class Field: + """NeXus field. + In HDF5 fields are represented as dataset. + """ + dataset: H5Dataset + parent: 'Group' + sizes: Optional[Dict[str, int]] = None + dtype: Optional[sc.DType] = None + errors: Optional[H5Dataset] = None + + @cached_property + def attrs(self) -> Dict[str, Any]: + """The attributes of the dataset. + Cannot be used for writing attributes, since they are cached for performance.""" + return MappingProxyType( + dict(self.dataset.attrs) if self.dataset.attrs else dict()) + + @property + def dims(self) -> Tuple[str, ...]: + return tuple(self.sizes.keys()) + + @property + def shape(self) -> Tuple[int, ...]: + return tuple(self.sizes.values()) + + @cached_property + def file(self) -> 'Group': + return self.parent.file + + def _load_variances(self, var, index): + stddevs = sc.empty(dims=var.dims, + shape=var.shape, + dtype=var.dtype, + unit=var.unit) + try: + self.errors.read_direct(stddevs.values, source_sel=index) + except TypeError: + stddevs.values = self.errors[index].squeeze() + # According to the standard, errors must have the same shape as the data. + # This is not the case in all files we observed, is there any harm in + # attempting a broadcast? + var.variances = np.broadcast_to(sc.pow(stddevs, sc.scalar(2)).values, + shape=var.shape) + + def __getitem__(self, select: ScippIndex) -> Union[Any, sc.Variable]: + """Load the field as a :py:class:`scipp.Variable` or Python object. + If the shape is empty and no unit is given this returns a Python object, such + as a string or integer. Otherwise a :py:class:`scipp.Variable` is returned. + """ + from .nxtransformations import maybe_transformation + index = to_plain_index(self.dims, select) + if isinstance(index, (int, slice)): + index = (index, ) + + base_dims = self.dims + base_shape = self.shape + dims = [] + shape = [] + for i, ind in enumerate(index): + if not isinstance(ind, int): + dims.append(base_dims[i]) + shape.append(len(range(*ind.indices(base_shape[i])))) + + variable = sc.empty(dims=dims, + shape=shape, + dtype=self.dtype, + unit=self.unit, + with_variances=self.errors is not None) + + # If the variable is empty, return early + if np.prod(shape) == 0: + variable = self._maybe_datetime(variable) + return maybe_transformation(self, value=variable, sel=select) + + if self.dtype == sc.DType.string: + try: + strings = self.dataset.asstr()[index] + except UnicodeDecodeError as e: + strings = self.dataset.asstr(encoding='latin-1')[index] + _warn_latin1_decode(self.dataset, strings, str(e)) + variable.values = np.asarray(strings).flatten() + if self.dataset.name.endswith('depends_on') and variable.ndim == 0: + variable.value = depends_on_to_relative_path(variable.value, + self.dataset.parent.name) + elif variable.values.flags["C_CONTIGUOUS"]: + # On versions of h5py prior to 3.2, a TypeError occurs in some cases + # where h5py cannot broadcast data with e.g. shape (20, 1) to a buffer + # of shape (20,). Note that broadcasting (1, 20) -> (20,) does work + # (see https://github.com/h5py/h5py/pull/1796). + # Therefore, we manually squeeze here. + # A pin of h5py<3.2 is currently required by Mantid and hence scippneutron + # (see https://github.com/h5py/h5py/issues/1880#issuecomment-823223154) + # hence this workaround. Once we can use a more recent h5py with Mantid, + # this try/except can be removed. + try: + self.dataset.read_direct(variable.values, source_sel=index) + except TypeError: + variable.values = self.dataset[index].squeeze() + if self.errors is not None: + self._load_variances(variable, index) + else: + variable.values = self.dataset[index] + if variable.ndim == 0 and variable.unit is None and variable.fields is None: + # Work around scipp/scipp#2815, and avoid returning NumPy bool + if isinstance(variable.values, np.ndarray) and variable.dtype != 'bool': + return variable.values[()] + else: + return variable.value + variable = self._maybe_datetime(variable) + return maybe_transformation(self, value=variable, sel=select) + + def _maybe_datetime(self, variable: sc.Variable) -> sc.Variable: + if _is_time(variable): + starts = [] + for name in self.attrs: + if (dt := _as_datetime(self.attrs[name])) is not None: + starts.append(dt) + if len(starts) == 1: + variable = convert_time_to_datetime64( + variable, + start=starts[0], + scaling_factor=self.attrs.get('scaling_factor')) + + return variable + + def __repr__(self) -> str: + return f'' + + @property + def name(self) -> str: + return self.dataset.name + + @property + def ndim(self) -> int: + """Total number of dimensions in the dataset. + See the shape property for potential differences to the value returned by the + underlying h5py.Dataset.ndim. + """ + return len(self.shape) + + @cached_property + def unit(self) -> Union[sc.Unit, None]: + if (unit := self.attrs.get('units')) is not None: + try: + return sc.Unit(unit) + except sc.UnitError: + warnings.warn(f"Unrecognized unit '{unit}' for value dataset " + f"in '{self.name}'; setting unit as 'dimensionless'") + return sc.units.one + return None diff --git a/src/scippnexus/v2/nxcylindrical_geometry.py b/src/scippnexus/v2/nxcylindrical_geometry.py index a2716c01..25e2b8a9 100644 --- a/src/scippnexus/v2/nxcylindrical_geometry.py +++ b/src/scippnexus/v2/nxcylindrical_geometry.py @@ -5,7 +5,8 @@ import scipp as sc -from .base import Field, Group, NexusStructureError, NXobject, base_definitions +from .base import Group, NexusStructureError, NXobject, base_definitions +from .field import Field def _parse(*, diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 89a10baf..36607c1d 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -11,14 +11,8 @@ from .._common import convert_time_to_datetime64, to_child_select from ..typing import H5Dataset, ScippIndex -from .base import ( - Field, - Group, - NexusStructureError, - NXobject, - asvariable, - base_definitions, -) +from .base import Group, NexusStructureError, NXobject, asvariable, base_definitions +from .field import Field def _guess_dims(dims, shape, dataset: H5Dataset): diff --git a/src/scippnexus/v2/nxevent_data.py b/src/scippnexus/v2/nxevent_data.py index e2e19647..c8865519 100644 --- a/src/scippnexus/v2/nxevent_data.py +++ b/src/scippnexus/v2/nxevent_data.py @@ -7,14 +7,8 @@ import scipp as sc from .._common import to_plain_index -from .base import ( - Field, - Group, - NexusStructureError, - NXobject, - ScippIndex, - base_definitions, -) +from .base import Group, NexusStructureError, NXobject, ScippIndex, base_definitions +from .field import Field _event_dimension = "event" _pulse_dimension = "event_time_zero" diff --git a/src/scippnexus/v2/nxoff_geometry.py b/src/scippnexus/v2/nxoff_geometry.py index 712544a5..f66e6635 100644 --- a/src/scippnexus/v2/nxoff_geometry.py +++ b/src/scippnexus/v2/nxoff_geometry.py @@ -5,7 +5,8 @@ import scipp as sc -from .base import Field, Group, NexusStructureError, NXobject, base_definitions +from .base import Group, NexusStructureError, NXobject, base_definitions +from .field import Field def off_to_shape(*, diff --git a/src/scippnexus/v2/nxsample.py b/src/scippnexus/v2/nxsample.py index 168d2994..0964f4ab 100644 --- a/src/scippnexus/v2/nxsample.py +++ b/src/scippnexus/v2/nxsample.py @@ -5,7 +5,8 @@ import scipp as sc -from .base import Field, Group, NXobject, ScippIndex, base_definitions +from .base import Group, NXobject, ScippIndex, base_definitions +from .field import Field _matrix_units = {'orientation_matrix': 'one', 'ub_matrix': '1/Angstrom'} diff --git a/src/scippnexus/v2/nxtransformations.py b/src/scippnexus/v2/nxtransformations.py index ae381e12..2a149ae2 100644 --- a/src/scippnexus/v2/nxtransformations.py +++ b/src/scippnexus/v2/nxtransformations.py @@ -9,14 +9,8 @@ import scipp as sc from scipp.scipy import interpolate -from .base import ( - Field, - Group, - NexusStructureError, - NXobject, - ScippIndex, - depends_on_to_relative_path, -) +from .base import Group, NexusStructureError, NXobject, ScippIndex +from .field import Field, depends_on_to_relative_path class TransformationError(NexusStructureError): diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 8b1642c7..3fb9fcb8 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -9,7 +9,6 @@ import scippnexus.v2 as snx from scippnexus.v2 import ( - Field, NexusStructureError, NXdetector, NXentry, @@ -242,8 +241,8 @@ def test_nxobject_getitem_by_class_get_fields(nxroot): nxroot['entry'].create_class('events_0', NXevent_data) nxroot['entry']['field1'] = sc.arange('event', 4.0, unit='ns') nxroot['entry']['field2'] = sc.arange('event', 2.0, unit='ns') - assert list(nxroot[Field]) == [] - assert set(nxroot['entry'][Field]) == {'field1', 'field2'} + assert list(nxroot[snx.Field]) == [] + assert set(nxroot['entry'][snx.Field]) == {'field1', 'field2'} def test_nxobject_getitem_by_class_list(nxroot): @@ -253,14 +252,14 @@ def test_nxobject_getitem_by_class_list(nxroot): nxroot['entry']['field1'] = sc.arange('event', 4.0, unit='ns') assert set(nxroot['entry'][[NXlog, NXevent_data]]) == {'log', 'events_0', 'events_1'} - assert set(nxroot['entry'][[NXlog, Field]]) == {'log', 'field1'} + assert set(nxroot['entry'][[NXlog, snx.Field]]) == {'log', 'field1'} def test_nxobject_dataset_items_are_returned_as_Field(nxroot): events = nxroot['entry'].create_class('events_0', NXevent_data) events['event_time_offset'] = sc.arange('event', 5) field = nxroot['entry/events_0/event_time_offset'] - assert isinstance(field, Field) + assert isinstance(field, snx.Field) def test_field_properties(nxroot): From 0d62b3a51e9d7f767d92268fc34b36677b733a76 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 07:53:50 +0200 Subject: [PATCH 80/98] Remove unused protocol class --- src/scippnexus/v2/base.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index c267eb4d..615ff911 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -8,7 +8,7 @@ from collections.abc import Mapping from functools import cached_property, lru_cache from types import MappingProxyType -from typing import Any, Dict, Iterator, List, Optional, Protocol, Tuple, Union, overload +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, overload import numpy as np import scipp as sc @@ -22,27 +22,6 @@ def asvariable(obj: Union[Any, sc.Variable]) -> sc.Variable: return obj if isinstance(obj, sc.Variable) else sc.scalar(obj, unit=None) -# TODO move into scipp -class DimensionedArray(Protocol): - """ - A multi-dimensional array with a unit and dimension labels. - - Could be, e.g., a scipp.Variable or a simple dataclass wrapping a numpy array. - """ - - @property - def values(self): - """Multi-dimensional array of values""" - - @property - def unit(self): - """Physical unit of the values""" - - @property - def dims(self) -> Tuple[str]: - """Dimension labels for the values""" - - class NexusStructureError(Exception): """Invalid or unsupported class and field structure in Nexus. """ From d9df2c22e8305f4cf796ca9ed958b0f9b1e066d4 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 08:48:05 +0200 Subject: [PATCH 81/98] Begin splitting NXdata.__init__ --- src/scippnexus/v2/nxdata.py | 136 +++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 63 deletions(-) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 36607c1d..e3f40d0c 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -58,30 +58,8 @@ def __init__(self, self._signal = children[name] break - # Latest way of defining axes - axes = attrs.get('axes') - # Older way of defining axes - signal_axes = None if self._signal is None else self._signal.attrs.get('axes') - # Another old way of defining axes - axis_index = {} - for name, field in children.items(): - if (axis := field.attrs.get('axis')) is not None: - axis_index[name] = axis - - def _get_group_dims() -> Optional[Tuple[str, ...]]: - """Try three ways of defining group dimensions.""" - # Apparently it is not possible to define dim labels unless there are - # corresponding coords. Special case of '.' entries means "no coord". - if axes is not None: - return tuple(f'dim_{i}' if a == '.' else a for i, a in enumerate(axes)) - if signal_axes is not None: - return tuple(signal_axes.split(',')) - if axis_index: - return tuple( - k for k, _ in sorted(axis_index.items(), key=lambda item: item[1])) - return None - - group_dims = _get_group_dims() + self._init_axes(attrs=attrs, children=children) + group_dims = self._get_group_dims() if self._signal is None: self._valid = False @@ -99,15 +77,8 @@ def _get_group_dims() -> Optional[Tuple[str, ...]]: ] self._signal.sizes = dict(zip(group_dims, shape)) - if axes is not None: - # Unlike self.dims we *drop* entries that are '.' - named_axes = tuple(a for a in axes if a != '.') - elif signal_axes is not None: - named_axes = signal_axes.split(',') - elif fallback_dims is not None: - named_axes = fallback_dims - else: - named_axes = () + self._group_dims = group_dims + self._named_axes = self._get_named_axes(fallback_dims) indices_suffix = '_indices' indices_attrs = { @@ -116,39 +87,11 @@ def _get_group_dims() -> Optional[Tuple[str, ...]]: } dims = np.array(group_dims) - dims_from_indices = { + self._dims_from_indices = { key: tuple(dims[np.array(indices).flatten()]) for key, indices in indices_attrs.items() } - def get_dims(name, field): - # Newly written files should always contain indices attributes, but the - # standard recommends that readers should also make "best effort" guess - # since legacy files do not set this attribute. - if name == self._signal_name: - return group_dims - # Latest way of defining dims - if (dims := dims_from_indices.get(name)) is not None: - return dims - # Older way of defining dims via axis attribute - if (axis := axis_index.get(name)) is not None: - return (group_dims[axis - 1], ) - if name in self._aux_signals: - return _guess_dims(group_dims, self._signal.dataset.shape, - field.dataset) - if name in named_axes: - # If there are named axes then items of same name are "dimension - # coordinates", i.e., have a dim matching their name. - # However, if the item is not 1-D we need more labels. Try to use labels - # of signal if dimensionality matches. - if self._signal is not None and len(field.dataset.shape) == len( - self._signal.dataset.shape): - return group_dims - return (name, ) - if self._signal is not None and group_dims is not None: - return _guess_dims(group_dims, self._signal.dataset.shape, - field.dataset) - for name, field in children.items(): if not isinstance(field, Field): # If the NXdata contains subgroups we can generally not define valid @@ -161,7 +104,7 @@ def get_dims(name, field): 'NXtransformations', ]: self._valid = False - elif (dims := get_dims(name, field)) is not None: + elif (dims := self._get_dims(name, field)) is not None: # The convention here is that the given dimensions apply to the shapes # starting from the left. So we only squeeze dimensions that are after # len(dims). @@ -175,6 +118,73 @@ def get_dims(name, field): elif any(s1[k] != s2[k] for k in s1.keys() & s2.keys()): self._valid = False + def _get_named_axes(self, fallback_dims) -> Tuple[str, ...]: + if self._axes is not None: + # Unlike self.dims we *drop* entries that are '.' + return tuple(a for a in self._axes if a != '.') + elif self._signal_axes is not None: + return self._signal_axes.split(',') + elif fallback_dims is not None: + return fallback_dims + else: + return () + + def _init_axes(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, + Group]]): + # Latest way of defining axes + self._axes = attrs.get('axes') + # Older way of defining axes + self._signal_axes = None if self._signal is None else self._signal.attrs.get( + 'axes') + # Another old way of defining axes + self._axis_index = {} + for name, field in children.items(): + if (axis := field.attrs.get('axis')) is not None: + self._axis_index[name] = axis + + def _get_group_dims(self) -> Optional[Tuple[str, ...]]: + """Try three ways of defining group dimensions.""" + # Apparently it is not possible to define dim labels unless there are + # corresponding coords. Special case of '.' entries means "no coord". + if self._axes is not None: + return tuple(f'dim_{i}' if a == '.' else a + for i, a in enumerate(self._axes)) + if self._signal_axes is not None: + return tuple(self._signal_axes.split(',')) + if self._axis_index: + return tuple( + k + for k, _ in sorted(self._axis_index.items(), key=lambda item: item[1])) + return None + + def _get_dims(self, name, field): + # Newly written files should always contain indices attributes, but the + # standard recommends that readers should also make "best effort" guess + # since legacy files do not set this attribute. + if name == self._signal_name: + return self._group_dims + # Latest way of defining dims + if (dims := self._dims_from_indices.get(name)) is not None: + return dims + # Older way of defining dims via axis attribute + if (axis := self._axis_index.get(name)) is not None: + return (self._group_dims[axis - 1], ) + if name in self._aux_signals: + return _guess_dims(self._group_dims, self._signal.dataset.shape, + field.dataset) + if name in self._named_axes: + # If there are named axes then items of same name are "dimension + # coordinates", i.e., have a dim matching their name. + # However, if the item is not 1-D we need more labels. Try to use labels + # of signal if dimensionality matches. + if self._signal is not None and len(field.dataset.shape) == len( + self._signal.dataset.shape): + return self._group_dims + return (name, ) + if self._signal is not None and self._group_dims is not None: + return _guess_dims(self._group_dims, self._signal.dataset.shape, + field.dataset) + @cached_property def sizes(self) -> Dict[str, int]: return self._signal.sizes if self._valid else super().sizes From a8d2f4e5f5bdd32918cd94a68fc32e3980cad561 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 08:59:08 +0200 Subject: [PATCH 82/98] Finish split --- src/scippnexus/v2/nxdata.py | 120 +++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 55 deletions(-) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index e3f40d0c..59fe6cce 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -44,53 +44,11 @@ def __init__(self, self._signal_name = None self._signal = None self._aux_signals = attrs.get('auxiliary_signals', []) - if (name := attrs.get('signal', - fallback_signal_name)) is not None and name in children: - self._signal_name = name - self._signal = children[name] - else: - # Legacy NXdata defines signal not as group attribute, but attr on dataset - for name, field in children.items(): - # We ignore the signal value. Usually it is 1, but apparently one could - # multiple signals. We do not support this, since it is legacy anyway. - if 'signal' in field.attrs: - self._signal_name = name - self._signal = children[name] - break + self._init_signal(name=attrs.get('signal', fallback_signal_name), + children=children) self._init_axes(attrs=attrs, children=children) - group_dims = self._get_group_dims() - - if self._signal is None: - self._valid = False - else: - if group_dims is not None: - shape = self._signal.dataset.shape - # If we have explicit group dims, we can drop trailing 1s. - shape = _squeeze_trailing(group_dims, shape) - self._signal.sizes = dict(zip(group_dims, shape)) - elif fallback_dims is not None: - shape = self._signal.dataset.shape - group_dims = [ - fallback_dims[i] if i < len(fallback_dims) else f'dim_{i}' - for i in range(len(shape)) - ] - self._signal.sizes = dict(zip(group_dims, shape)) - - self._group_dims = group_dims - self._named_axes = self._get_named_axes(fallback_dims) - - indices_suffix = '_indices' - indices_attrs = { - key[:-len(indices_suffix)]: attr - for key, attr in attrs.items() if key.endswith(indices_suffix) - } - - dims = np.array(group_dims) - self._dims_from_indices = { - key: tuple(dims[np.array(indices).flatten()]) - for key, indices in indices_attrs.items() - } + self._init_group_dims(attrs=attrs, fallback_dims=fallback_dims) for name, field in children.items(): if not isinstance(field, Field): @@ -118,16 +76,19 @@ def __init__(self, elif any(s1[k] != s2[k] for k in s1.keys() & s2.keys()): self._valid = False - def _get_named_axes(self, fallback_dims) -> Tuple[str, ...]: - if self._axes is not None: - # Unlike self.dims we *drop* entries that are '.' - return tuple(a for a in self._axes if a != '.') - elif self._signal_axes is not None: - return self._signal_axes.split(',') - elif fallback_dims is not None: - return fallback_dims + def _init_signal(self, name: Optional[str], children): + if name is not None and name in children: + self._signal_name = name + self._signal = children[name] else: - return () + # Legacy NXdata defines signal not as group attribute, but attr on dataset + for name, field in children.items(): + # We ignore the signal value. Usually it is 1, but apparently one could + # multiple signals. We do not support this, since it is legacy anyway. + if 'signal' in field.attrs: + self._signal_name = name + self._signal = children[name] + break def _init_axes(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): @@ -136,12 +97,25 @@ def _init_axes(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, # Older way of defining axes self._signal_axes = None if self._signal is None else self._signal.attrs.get( 'axes') + if self._signal_axes is not None: + self._signal_axes = tuple(self._signal_axes.split(',')) # Another old way of defining axes self._axis_index = {} for name, field in children.items(): if (axis := field.attrs.get('axis')) is not None: self._axis_index[name] = axis + def _get_named_axes(self, fallback_dims) -> Tuple[str, ...]: + if self._axes is not None: + # Unlike self.dims we *drop* entries that are '.' + return tuple(a for a in self._axes if a != '.') + elif self._signal_axes is not None: + return self._signal_axes + elif fallback_dims is not None: + return fallback_dims + else: + return () + def _get_group_dims(self) -> Optional[Tuple[str, ...]]: """Try three ways of defining group dimensions.""" # Apparently it is not possible to define dim labels unless there are @@ -150,13 +124,49 @@ def _get_group_dims(self) -> Optional[Tuple[str, ...]]: return tuple(f'dim_{i}' if a == '.' else a for i, a in enumerate(self._axes)) if self._signal_axes is not None: - return tuple(self._signal_axes.split(',')) + return self._signal_axes if self._axis_index: return tuple( k for k, _ in sorted(self._axis_index.items(), key=lambda item: item[1])) return None + def _init_group_dims(self, + attrs: Dict[str, Any], + fallback_dims: Optional[Tuple[str, ...]] = None): + group_dims = self._get_group_dims() + + if self._signal is None: + self._valid = False + else: + if group_dims is not None: + shape = self._signal.dataset.shape + # If we have explicit group dims, we can drop trailing 1s. + shape = _squeeze_trailing(group_dims, shape) + self._signal.sizes = dict(zip(group_dims, shape)) + elif fallback_dims is not None: + shape = self._signal.dataset.shape + group_dims = [ + fallback_dims[i] if i < len(fallback_dims) else f'dim_{i}' + for i in range(len(shape)) + ] + self._signal.sizes = dict(zip(group_dims, shape)) + + self._group_dims = group_dims + self._named_axes = self._get_named_axes(fallback_dims) + + indices_suffix = '_indices' + indices_attrs = { + key[:-len(indices_suffix)]: attr + for key, attr in attrs.items() if key.endswith(indices_suffix) + } + + dims = np.array(group_dims) + self._dims_from_indices = { + key: tuple(dims[np.array(indices).flatten()]) + for key, indices in indices_attrs.items() + } + def _get_dims(self, name, field): # Newly written files should always contain indices attributes, but the # standard recommends that readers should also make "best effort" guess From 2376aa8a77f67926d312d021bdc360f3b1f55bca Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 09:25:45 +0200 Subject: [PATCH 83/98] Remove unused fixture param --- tests/nexus_test.py | 4 ++-- tests/nx2_test.py | 2 +- tests/nxcylindrical_geometry_test.py | 2 +- tests/nxdata_test.py | 4 ++-- tests/nxdetector_test.py | 4 ++-- tests/nxmonitor_test.py | 4 ++-- tests/nxoff_geometry_test.py | 2 +- tests/nxsample_test.py | 2 +- tests/nxtransformations_test.py | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 3fb9fcb8..31e109fe 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -28,14 +28,14 @@ @pytest.fixture() -def h5root(request): +def h5root(): """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f @pytest.fixture() -def nxroot(request): +def nxroot(): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: root = snx.Group(f, definitions=snx.base_definitions) diff --git a/tests/nx2_test.py b/tests/nx2_test.py index 2764d605..5a7af1a7 100644 --- a/tests/nx2_test.py +++ b/tests/nx2_test.py @@ -7,7 +7,7 @@ @pytest.fixture() -def h5root(request): +def h5root(): """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f diff --git a/tests/nxcylindrical_geometry_test.py b/tests/nxcylindrical_geometry_test.py index 89c87134..480d47ff 100644 --- a/tests/nxcylindrical_geometry_test.py +++ b/tests/nxcylindrical_geometry_test.py @@ -6,7 +6,7 @@ @pytest.fixture() -def nxroot(request): +def nxroot(): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: root = snx.Group(f, snx.base_definitions) diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 1edd6ec0..51ff937b 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -9,14 +9,14 @@ @pytest.fixture() -def h5root(request): +def h5root(): """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f @pytest.fixture() -def nxroot(request): +def nxroot(): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: root = snx.Group(f, definitions=snx.base_definitions) diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index 0e291cdf..f5ca6bed 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -13,14 +13,14 @@ def make_group(group: h5py.Group) -> snx.Group: @pytest.fixture() -def h5root(request): +def h5root(): """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f @pytest.fixture() -def nxroot(request): +def nxroot(): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: root = make_group(f) diff --git a/tests/nxmonitor_test.py b/tests/nxmonitor_test.py index 7e7ee318..4fe3da3b 100644 --- a/tests/nxmonitor_test.py +++ b/tests/nxmonitor_test.py @@ -7,7 +7,7 @@ @pytest.fixture() -def h5root(request): +def h5root(): """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f @@ -18,7 +18,7 @@ def make_group(group: h5py.Group) -> snx.Group: @pytest.fixture() -def group(request): +def group(): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield snx.Group(f, definitions=snx.base_definitions) diff --git a/tests/nxoff_geometry_test.py b/tests/nxoff_geometry_test.py index 59e2672b..fac70525 100644 --- a/tests/nxoff_geometry_test.py +++ b/tests/nxoff_geometry_test.py @@ -8,7 +8,7 @@ @pytest.fixture() -def group(request): +def group(): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield snx.Group(f, definitions=snx.base_definitions) diff --git a/tests/nxsample_test.py b/tests/nxsample_test.py index e8738f35..0b03b8c6 100644 --- a/tests/nxsample_test.py +++ b/tests/nxsample_test.py @@ -9,7 +9,7 @@ @pytest.fixture() -def nxroot(request): +def nxroot(): """Yield NXroot containing a single NXentry named 'entry'""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: root = snx.Group(f, definitions=snx.base_definitions) diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index 38f84d95..0945247d 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -13,7 +13,7 @@ def make_group(group: h5py.Group) -> snx.Group: @pytest.fixture() -def h5root(request): +def h5root(): """Yield h5py root group (file)""" with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: yield f From 1544f13caa1baf87eab97c0bc1fe9b62f9f1a4d2 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 09:35:36 +0200 Subject: [PATCH 84/98] Add test for errors field with different unit --- tests/nexus_test.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 31e109fe..bdc779fe 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -299,6 +299,25 @@ def test_field_unit_is_none_if_no_units_attribute(nxroot): assert field.unit is None +def test_field_errors_with_same_unit_handles_them_with_value(nxroot): + entry = nxroot.create_class('group', snx.NXentry) + entry['value'] = sc.array(dims=['ignored'], values=[10.0], unit='m') + entry['value_errors'] = sc.array(dims=['ignored'], values=[2.0], unit='m') + value = nxroot['group']['value'][()] + assert_identical(value, sc.scalar(value=10.0, variance=4.0, unit='m')) + + +def test_field_errors_with_different_unit_handles_them_individually(nxroot): + entry = nxroot.create_class('group', snx.NXentry) + entry['value'] = sc.array(dims=['ignored'], values=[10.0], unit='m') + entry['value_errors'] = sc.array(dims=['ignored'], values=[200.0], unit='cm') + value = nxroot['group']['value'][()] + assert_identical(value, sc.scalar(value=10.0, unit='m')) + assert 'value_errors' in nxroot['group'] + errors = nxroot['group']['value_errors'][()] + assert_identical(errors, sc.scalar(value=200.0, unit='cm')) + + @pytest.mark.parametrize('value,type_', [(1.2, np.float32), (123, np.int32), ('abc', str), (True, bool)]) def test_field_is_returned_as_python_object_if_shape_empty_and_no_unit( From 398dd97d1ec638b3cd352169c6f9827f5acf01de Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 09:41:42 +0200 Subject: [PATCH 85/98] Move tests --- tests/nexus_test.py | 168 ++++++++++++++++++++--------- tests/nx2_test.py | 209 ------------------------------------- tests/nxevent_data_test.py | 140 +++++++++++++++++++++++++ 3 files changed, 258 insertions(+), 259 deletions(-) delete mode 100644 tests/nx2_test.py create mode 100644 tests/nxevent_data_test.py diff --git a/tests/nexus_test.py b/tests/nexus_test.py index bdc779fe..80453b7d 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -434,56 +434,6 @@ def test_length_0_field_with_datetime_attribute_loaded_as_datetime(nxroot): sc.datetimes(dims=['dim_0'], unit='ms', values=[])) -def create_event_data_ids_1234(group): - group['event_id'] = sc.array(dims=[''], unit=None, values=[1, 2, 4, 1, 2, 2]) - group['event_time_offset'] = sc.array(dims=[''], - unit='s', - values=[456, 7, 3, 345, 632, 23]) - group['event_time_zero'] = sc.array(dims=[''], unit='s', values=[1, 2, 3, 4]) - group['event_index'] = sc.array(dims=[''], unit=None, values=[0, 3, 3, -1000]) - - -def test_negative_event_index_converted_to_num_event(nxroot): - event_data = nxroot['entry'].create_class('events_0', NXevent_data) - create_event_data_ids_1234(event_data) - events = nxroot['entry/events_0'][...] - assert events.bins.size().values[2] == 3 - assert events.bins.size().values[3] == 0 - - -def test_bad_event_index_causes_load_as_DataGroup(nxroot): - event_data = nxroot['entry'].create_class('events_0', NXevent_data) - event_data['event_id'] = sc.array(dims=[''], unit=None, values=[1, 2, 4, 1, 2]) - event_data['event_time_offset'] = sc.array(dims=[''], unit='s', values=[0, 0, 0, 0]) - event_data['event_time_zero'] = sc.array(dims=[''], unit='s', values=[1, 2, 3, 4]) - event_data['event_index'] = sc.array(dims=[''], unit=None, values=[0, 3, 3, 666]) - dg = nxroot['entry/events_0'][...] - assert isinstance(dg, sc.DataGroup) - - -def create_event_data_without_event_id(group): - group['event_time_offset'] = sc.array(dims=[''], - unit='s', - values=[456, 7, 3, 345, 632, 23]) - group['event_time_zero'] = sc.array(dims=[''], unit='s', values=[1, 2, 3, 4]) - group['event_index'] = sc.array(dims=[''], unit=None, values=[0, 3, 3, 5]) - - -def test_event_data_without_event_id_can_be_loaded(nxroot): - event_data = nxroot['entry'].create_class('events_0', NXevent_data) - create_event_data_without_event_id(event_data) - da = event_data[...] - assert len(da.bins.coords) == 1 - assert 'event_time_offset' in da.bins.coords - - -def test_event_mode_monitor_without_event_id_can_be_loaded(nxroot): - monitor = nxroot['entry'].create_class('monitor', NXmonitor) - create_event_data_without_event_id(monitor) - da = monitor[...] - assert 'event_time_offset' in da - - @pytest.mark.skip(reason='Special attributes disabled for now. Do we keep them?') def test___getattr__for_unique_child_groups(nxroot): entry = nxroot['entry'] @@ -515,3 +465,121 @@ def test___dir__includes_non_dynamic_properties(nxroot): det.create_class('events', NXevent_data) # Ensure we are not replacing __dir__ but adding to it assert 'unit' in det.__dir__() + + +def test_read_recursive(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data['signal'] = np.arange(4) + data['signal'].attrs['units'] = 'm' + data['time'] = np.arange(5) + data['time'].attrs['units'] = 's' + obj = snx.Group(entry) + dg = obj[()] + assert obj.sizes == {'dim_0': None} + assert 'data' in dg + + +def test_errors_read_as_variances(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data['signal'] = np.arange(4.0) + data['signal'].attrs['units'] = 'm' + data['signal_errors'] = np.arange(4.0) + data['signal_errors'].attrs['units'] = 'm' + data['time'] = np.arange(5.0) + data['time'].attrs['units'] = 's' + data['time_errors'] = np.arange(5.0) + data['time_errors'].attrs['units'] = 's' + obj = snx.Group(data) + assert set(obj._children.keys()) == {'signal', 'time'} + dg = obj[()] + assert dg['signal'].variances is not None + assert dg['time'].variances is not None + assert np.array_equal(dg['signal'].variances, np.arange(4.0)**2) + assert np.array_equal(dg['time'].variances, np.arange(5.0)**2) + + +def test_read_field(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data['signal'] = np.arange(4) + data['signal'].attrs['units'] = 'm' + obj = snx.Group(data) + var = obj['signal'][()] + assert sc.identical(var, sc.array(dims=['dim_0'], values=np.arange(4), unit='m')) + + +def test_nxdata_with_signal_axes_indices_reads_as_data_array(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data.attrs['NX_class'] = 'NXdata' + data.attrs['signal'] = 'signal' + data.attrs['axes'] = ['time', 'temperature'] + data.attrs['time_indices'] = [0] + data.attrs['temperature_indices'] = [1] + ref = sc.DataArray( + data=sc.ones(dims=['time', 'temperature'], shape=[3, 4], unit='m')) + ref.coords['time'] = sc.array(dims=['time'], values=np.arange(3), unit='s') + ref.coords['temperature'] = sc.array(dims=['temperature'], + values=np.arange(4), + unit='K') + data['signal'] = ref.values + data['signal'].attrs['units'] = str(ref.unit) + data['time'] = ref.coords['time'].values + data['time'].attrs['units'] = str(ref.coords['time'].unit) + data['temperature'] = ref.coords['temperature'].values + data['temperature'].attrs['units'] = str(ref.coords['temperature'].unit) + obj = snx.Group(data, definitions=snx.base_definitions) + da = obj[()] + assert sc.identical(da, ref) + + +def test_nxdata_positional_indexing_returns_correct_slice(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data.attrs['NX_class'] = 'NXdata' + data.attrs['signal'] = 'signal' + data.attrs['axes'] = ['time', 'temperature'] + data.attrs['time_indices'] = [0] + data.attrs['temperature_indices'] = [1] + ref = sc.DataArray( + data=sc.ones(dims=['time', 'temperature'], shape=[3, 4], unit='m')) + ref.coords['time'] = sc.array(dims=['time'], values=np.arange(3), unit='s') + ref.coords['temperature'] = sc.array(dims=['temperature'], + values=np.arange(4), + unit='K') + data['signal'] = ref.values + data['signal'].attrs['units'] = str(ref.unit) + data['time'] = ref.coords['time'].values + data['time'].attrs['units'] = str(ref.coords['time'].unit) + data['temperature'] = ref.coords['temperature'].values + data['temperature'].attrs['units'] = str(ref.coords['temperature'].unit) + obj = snx.Group(data, definitions=snx.base_definitions) + da = obj['time', 0:2] + assert sc.identical(da, ref['time', 0:2]) + + +def test_nxdata_with_bin_edges_positional_indexing_returns_correct_slice(h5root): + entry = h5root.create_group('entry') + data = entry.create_group('data') + data.attrs['NX_class'] = 'NXdata' + data.attrs['signal'] = 'signal' + data.attrs['axes'] = ['time', 'temperature'] + data.attrs['time_indices'] = [0] + data.attrs['temperature_indices'] = [1] + ref = sc.DataArray( + data=sc.ones(dims=['time', 'temperature'], shape=[3, 4], unit='m')) + ref.coords['time'] = sc.array(dims=['time'], values=np.arange(3), unit='s') + ref.coords['temperature'] = sc.array(dims=['temperature'], + values=np.arange(5), + unit='K') + data['signal'] = ref.values + data['signal'].attrs['units'] = str(ref.unit) + data['time'] = ref.coords['time'].values + data['time'].attrs['units'] = str(ref.coords['time'].unit) + data['temperature'] = ref.coords['temperature'].values + data['temperature'].attrs['units'] = str(ref.coords['temperature'].unit) + obj = snx.Group(data, definitions=snx.base_definitions) + da = obj['temperature', 0:2] + assert sc.identical(da, ref['temperature', 0:2]) diff --git a/tests/nx2_test.py b/tests/nx2_test.py deleted file mode 100644 index 5a7af1a7..00000000 --- a/tests/nx2_test.py +++ /dev/null @@ -1,209 +0,0 @@ -import h5py -import numpy as np -import pytest -import scipp as sc - -import scippnexus.v2 as snx - - -@pytest.fixture() -def h5root(): - """Yield h5py root group (file)""" - with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: - yield f - - -def test_does_not_see_changes(h5root): - entry = h5root.create_group('entry') - data = entry.create_group('data') - data['signal'] = np.arange(4) - data['time'] = np.arange(4) - obj = snx.Group(entry) - dg = obj[()] - assert obj.sizes == {'dim_0': 4} - assert 'data' in dg - entry.create_group('data2') - assert 'data2' not in dg # inserted after NXobject creation - - -def test_read_recursive(h5root): - entry = h5root.create_group('entry') - data = entry.create_group('data') - data['signal'] = np.arange(4) - data['signal'].attrs['units'] = 'm' - data['time'] = np.arange(5) - data['time'].attrs['units'] = 's' - obj = snx.Group(entry) - dg = obj[()] - assert obj.sizes == {'dim_0': None} - assert 'data' in dg - - -def test_errors_read_as_variances(h5root): - entry = h5root.create_group('entry') - data = entry.create_group('data') - data['signal'] = np.arange(4.0) - data['signal'].attrs['units'] = 'm' - data['signal_errors'] = np.arange(4.0) - data['signal_errors'].attrs['units'] = 'm' - data['time'] = np.arange(5.0) - data['time'].attrs['units'] = 's' - data['time_errors'] = np.arange(5.0) - data['time_errors'].attrs['units'] = 's' - obj = snx.Group(data) - assert set(obj._children.keys()) == {'signal', 'time'} - dg = obj[()] - assert dg['signal'].variances is not None - assert dg['time'].variances is not None - assert np.array_equal(dg['signal'].variances, np.arange(4.0)**2) - assert np.array_equal(dg['time'].variances, np.arange(5.0)**2) - - -def test_read_field(h5root): - entry = h5root.create_group('entry') - data = entry.create_group('data') - data['signal'] = np.arange(4) - data['signal'].attrs['units'] = 'm' - obj = snx.Group(data) - var = obj['signal'][()] - assert sc.identical(var, sc.array(dims=['dim_0'], values=np.arange(4), unit='m')) - - -def test_read_empty_nxevent_data(h5root): - entry = h5root.create_group('entry') - events = entry.create_group('events') - events.attrs['NX_class'] = 'NXevent_data' - root = snx.Group(entry) - event_data = root['events'] - dg = event_data[()] - assert sc.identical(dg, sc.DataGroup()) - - -def make_event_data(h5root): - entry = h5root.create_group('entry') - events = entry.create_group('events') - events.attrs['NX_class'] = 'NXevent_data' - rng = np.random.default_rng(0) - events['event_id'] = rng.integers(0, 2, size=4) - events['event_time_offset'] = np.arange(4) - events['event_time_offset'].attrs['units'] = 'ns' - events['event_time_zero'] = np.array([100, 200]) - events['event_time_zero'].attrs['units'] = 'ms' - events['event_index'] = np.array([0, 3]) - return entry - - -def test_nxevent_data_keys(h5root): - entry = make_event_data(h5root) - root = snx.Group(entry) - event_data = root['events'] - assert set(event_data.keys()) == { - 'event_id', 'event_time_offset', 'event_time_zero', 'event_index' - } - - -def test_nxevent_data_children_read_as_variables_with_correct_dims(h5root): - entry = make_event_data(h5root) - root = snx.Group(entry, definitions=snx.base_definitions) - event_data = root['events'] - assert sc.identical(event_data['event_id'][()], - sc.array(dims=['event'], values=[1, 1, 1, 0], unit=None)) - assert sc.identical(event_data['event_time_offset'][()], - sc.array(dims=['event'], values=[0, 1, 2, 3], unit='ns')) - assert sc.identical( - event_data['event_time_zero'][()], - sc.array(dims=['event_time_zero'], values=[100, 200], unit='ms')) - assert sc.identical(event_data['event_index'][()], - sc.array(dims=['event_time_zero'], values=[0, 3], unit=None)) - - -def test_nxevent_data_dims_and_sizes_ignore_pulse_contents(h5root): - entry = make_event_data(h5root) - root = snx.Group(entry, definitions=snx.base_definitions) - event_data = root['events'] - assert event_data.dims == ('event_time_zero', ) - assert event_data.sizes == {'event_time_zero': 2} - - -def test_read_nxevent_data(h5root): - entry = make_event_data(h5root) - root = snx.Group(entry, definitions=snx.base_definitions) - event_data = root['events'] - da = event_data[()] - assert sc.identical(da.data.bins.size(), - sc.array(dims=['event_time_zero'], values=[3, 1], unit=None)) - - -def test_nxdata_with_signal_axes_indices_reads_as_data_array(h5root): - entry = h5root.create_group('entry') - data = entry.create_group('data') - data.attrs['NX_class'] = 'NXdata' - data.attrs['signal'] = 'signal' - data.attrs['axes'] = ['time', 'temperature'] - data.attrs['time_indices'] = [0] - data.attrs['temperature_indices'] = [1] - ref = sc.DataArray( - data=sc.ones(dims=['time', 'temperature'], shape=[3, 4], unit='m')) - ref.coords['time'] = sc.array(dims=['time'], values=np.arange(3), unit='s') - ref.coords['temperature'] = sc.array(dims=['temperature'], - values=np.arange(4), - unit='K') - data['signal'] = ref.values - data['signal'].attrs['units'] = str(ref.unit) - data['time'] = ref.coords['time'].values - data['time'].attrs['units'] = str(ref.coords['time'].unit) - data['temperature'] = ref.coords['temperature'].values - data['temperature'].attrs['units'] = str(ref.coords['temperature'].unit) - obj = snx.Group(data, definitions=snx.base_definitions) - da = obj[()] - assert sc.identical(da, ref) - - -def test_nxdata_positional_indexing_returns_correct_slice(h5root): - entry = h5root.create_group('entry') - data = entry.create_group('data') - data.attrs['NX_class'] = 'NXdata' - data.attrs['signal'] = 'signal' - data.attrs['axes'] = ['time', 'temperature'] - data.attrs['time_indices'] = [0] - data.attrs['temperature_indices'] = [1] - ref = sc.DataArray( - data=sc.ones(dims=['time', 'temperature'], shape=[3, 4], unit='m')) - ref.coords['time'] = sc.array(dims=['time'], values=np.arange(3), unit='s') - ref.coords['temperature'] = sc.array(dims=['temperature'], - values=np.arange(4), - unit='K') - data['signal'] = ref.values - data['signal'].attrs['units'] = str(ref.unit) - data['time'] = ref.coords['time'].values - data['time'].attrs['units'] = str(ref.coords['time'].unit) - data['temperature'] = ref.coords['temperature'].values - data['temperature'].attrs['units'] = str(ref.coords['temperature'].unit) - obj = snx.Group(data, definitions=snx.base_definitions) - da = obj['time', 0:2] - assert sc.identical(da, ref['time', 0:2]) - - -def test_nxdata_with_bin_edges_positional_indexing_returns_correct_slice(h5root): - entry = h5root.create_group('entry') - data = entry.create_group('data') - data.attrs['NX_class'] = 'NXdata' - data.attrs['signal'] = 'signal' - data.attrs['axes'] = ['time', 'temperature'] - data.attrs['time_indices'] = [0] - data.attrs['temperature_indices'] = [1] - ref = sc.DataArray( - data=sc.ones(dims=['time', 'temperature'], shape=[3, 4], unit='m')) - ref.coords['time'] = sc.array(dims=['time'], values=np.arange(3), unit='s') - ref.coords['temperature'] = sc.array(dims=['temperature'], - values=np.arange(5), - unit='K') - data['signal'] = ref.values - data['signal'].attrs['units'] = str(ref.unit) - data['time'] = ref.coords['time'].values - data['time'].attrs['units'] = str(ref.coords['time'].unit) - data['temperature'] = ref.coords['temperature'].values - data['temperature'].attrs['units'] = str(ref.coords['temperature'].unit) - obj = snx.Group(data, definitions=snx.base_definitions) - da = obj['temperature', 0:2] - assert sc.identical(da, ref['temperature', 0:2]) diff --git a/tests/nxevent_data_test.py b/tests/nxevent_data_test.py new file mode 100644 index 00000000..3dcc473b --- /dev/null +++ b/tests/nxevent_data_test.py @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) + +import h5py +import numpy as np +import pytest +import scipp as sc + +import scippnexus.v2 as snx + + +@pytest.fixture() +def h5root(): + """Yield h5py root group (file)""" + with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: + yield f + + +@pytest.fixture() +def nxroot(): + """Yield NXroot containing a single NXentry named 'entry'""" + with h5py.File('dummy.nxs', mode='w', driver="core", backing_store=False) as f: + root = snx.Group(f, definitions=snx.base_definitions) + root.create_class('entry', snx.NXentry) + yield root + + +def create_event_data_ids_1234(group): + group['event_id'] = sc.array(dims=[''], unit=None, values=[1, 2, 4, 1, 2, 2]) + group['event_time_offset'] = sc.array(dims=[''], + unit='s', + values=[456, 7, 3, 345, 632, 23]) + group['event_time_zero'] = sc.array(dims=[''], unit='s', values=[1, 2, 3, 4]) + group['event_index'] = sc.array(dims=[''], unit=None, values=[0, 3, 3, -1000]) + + +def test_negative_event_index_converted_to_num_event(nxroot): + event_data = nxroot['entry'].create_class('events_0', snx.NXevent_data) + create_event_data_ids_1234(event_data) + events = nxroot['entry/events_0'][...] + assert events.bins.size().values[2] == 3 + assert events.bins.size().values[3] == 0 + + +def test_bad_event_index_causes_load_as_DataGroup(nxroot): + event_data = nxroot['entry'].create_class('events_0', snx.NXevent_data) + event_data['event_id'] = sc.array(dims=[''], unit=None, values=[1, 2, 4, 1, 2]) + event_data['event_time_offset'] = sc.array(dims=[''], unit='s', values=[0, 0, 0, 0]) + event_data['event_time_zero'] = sc.array(dims=[''], unit='s', values=[1, 2, 3, 4]) + event_data['event_index'] = sc.array(dims=[''], unit=None, values=[0, 3, 3, 666]) + dg = nxroot['entry/events_0'][...] + assert isinstance(dg, sc.DataGroup) + + +def create_event_data_without_event_id(group): + group['event_time_offset'] = sc.array(dims=[''], + unit='s', + values=[456, 7, 3, 345, 632, 23]) + group['event_time_zero'] = sc.array(dims=[''], unit='s', values=[1, 2, 3, 4]) + group['event_index'] = sc.array(dims=[''], unit=None, values=[0, 3, 3, 5]) + + +def test_event_data_without_event_id_can_be_loaded(nxroot): + event_data = nxroot['entry'].create_class('events_0', snx.NXevent_data) + create_event_data_without_event_id(event_data) + da = event_data[...] + assert len(da.bins.coords) == 1 + assert 'event_time_offset' in da.bins.coords + + +def test_event_mode_monitor_without_event_id_can_be_loaded(nxroot): + monitor = nxroot['entry'].create_class('monitor', snx.NXmonitor) + create_event_data_without_event_id(monitor) + da = monitor[...] + assert 'event_time_offset' in da + + +def test_read_empty_nxevent_data(h5root): + entry = h5root.create_group('entry') + events = entry.create_group('events') + events.attrs['NX_class'] = 'NXevent_data' + root = snx.Group(entry) + event_data = root['events'] + dg = event_data[()] + assert sc.identical(dg, sc.DataGroup()) + + +def make_event_data(h5root): + entry = h5root.create_group('entry') + events = entry.create_group('events') + events.attrs['NX_class'] = 'NXevent_data' + rng = np.random.default_rng(0) + events['event_id'] = rng.integers(0, 2, size=4) + events['event_time_offset'] = np.arange(4) + events['event_time_offset'].attrs['units'] = 'ns' + events['event_time_zero'] = np.array([100, 200]) + events['event_time_zero'].attrs['units'] = 'ms' + events['event_index'] = np.array([0, 3]) + return entry + + +def test_nxevent_data_keys(h5root): + entry = make_event_data(h5root) + root = snx.Group(entry) + event_data = root['events'] + assert set(event_data.keys()) == { + 'event_id', 'event_time_offset', 'event_time_zero', 'event_index' + } + + +def test_nxevent_data_children_read_as_variables_with_correct_dims(h5root): + entry = make_event_data(h5root) + root = snx.Group(entry, definitions=snx.base_definitions) + event_data = root['events'] + assert sc.identical(event_data['event_id'][()], + sc.array(dims=['event'], values=[1, 1, 1, 0], unit=None)) + assert sc.identical(event_data['event_time_offset'][()], + sc.array(dims=['event'], values=[0, 1, 2, 3], unit='ns')) + assert sc.identical( + event_data['event_time_zero'][()], + sc.array(dims=['event_time_zero'], values=[100, 200], unit='ms')) + assert sc.identical(event_data['event_index'][()], + sc.array(dims=['event_time_zero'], values=[0, 3], unit=None)) + + +def test_nxevent_data_dims_and_sizes_ignore_pulse_contents(h5root): + entry = make_event_data(h5root) + root = snx.Group(entry, definitions=snx.base_definitions) + event_data = root['events'] + assert event_data.dims == ('event_time_zero', ) + assert event_data.sizes == {'event_time_zero': 2} + + +def test_read_nxevent_data(h5root): + entry = make_event_data(h5root) + root = snx.Group(entry, definitions=snx.base_definitions) + event_data = root['events'] + da = event_data[()] + assert sc.identical(da.data.bins.size(), + sc.array(dims=['event_time_zero'], values=[3, 1], unit=None)) From 77e2d7cea892a771cbc621ee958c2c6370355e16 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 11:45:12 +0200 Subject: [PATCH 86/98] Restore ability to load monitors with embedded event data --- src/scippnexus/v2/nxdata.py | 13 ++++++++++++- src/scippnexus/v2/nxevent_data.py | 3 ++- tests/nxevent_data_test.py | 2 +- tests/nxmonitor_test.py | 1 - 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 59fe6cce..0cf32dd9 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -13,6 +13,7 @@ from ..typing import H5Dataset, ScippIndex from .base import Group, NexusStructureError, NXobject, asvariable, base_definitions from .field import Field +from .nxevent_data import NXevent_data def _guess_dims(dims, shape, dataset: H5Dataset): @@ -373,7 +374,17 @@ def group_events_by_detector_number( return out +def nxmonitor_factory(attrs: Dict[str, Any], + children: Dict[str, Union[Field, Group]]) -> NXobject: + if ('signal' not in attrs and 'data' not in children + and all(name in children for name in NXevent_data.mandatory_fields)): + # NXevent_data fields embedded in NXmonitor. This is not probably not + # really valid NeXus, but it is used in practice. + return NXevent_data(attrs=attrs, children=children) + return NXmonitor(attrs=attrs, children=children) + + base_definitions['NXdata'] = NXdata base_definitions['NXlog'] = NXlog base_definitions['NXdetector'] = NXdetector -base_definitions['NXmonitor'] = NXmonitor +base_definitions['NXmonitor'] = nxmonitor_factory diff --git a/src/scippnexus/v2/nxevent_data.py b/src/scippnexus/v2/nxevent_data.py index c8865519..9fc5b0c2 100644 --- a/src/scippnexus/v2/nxevent_data.py +++ b/src/scippnexus/v2/nxevent_data.py @@ -15,13 +15,14 @@ def _check_for_missing_fields(fields): - for field in ("event_time_zero", "event_index", "event_time_offset"): + for field in NXevent_data.mandatory_fields: if field not in fields: raise NexusStructureError( f"Required field {field} not found in NXevent_data") class NXevent_data(NXobject): + mandatory_fields = ("event_time_zero", "event_index", "event_time_offset") def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): super().__init__(attrs=attrs, children=children) diff --git a/tests/nxevent_data_test.py b/tests/nxevent_data_test.py index 3dcc473b..e13585f5 100644 --- a/tests/nxevent_data_test.py +++ b/tests/nxevent_data_test.py @@ -72,7 +72,7 @@ def test_event_mode_monitor_without_event_id_can_be_loaded(nxroot): monitor = nxroot['entry'].create_class('monitor', snx.NXmonitor) create_event_data_without_event_id(monitor) da = monitor[...] - assert 'event_time_offset' in da + assert 'event_time_offset' in da.bins.coords def test_read_empty_nxevent_data(h5root): diff --git a/tests/nxmonitor_test.py b/tests/nxmonitor_test.py index 4fe3da3b..e70d686d 100644 --- a/tests/nxmonitor_test.py +++ b/tests/nxmonitor_test.py @@ -46,7 +46,6 @@ def create_event_data_no_ids(group): values=[0, 3, 3, 5])) -@pytest.mark.skip(reason="For now we do not support this broken type of NXmonitor") def test_loads_event_data_in_current_group(group): monitor = group.create_class('monitor1', snx.NXmonitor) create_event_data_no_ids(monitor) From 6964f0b4309789209f5c5b913b1bc9b360805c58 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 12:20:21 +0200 Subject: [PATCH 87/98] Revert "Restore ability to load monitors with embedded event data" This reverts commit 77e2d7cea892a771cbc621ee958c2c6370355e16. --- src/scippnexus/v2/nxdata.py | 13 +------------ src/scippnexus/v2/nxevent_data.py | 3 +-- tests/nxevent_data_test.py | 2 +- tests/nxmonitor_test.py | 1 + 4 files changed, 4 insertions(+), 15 deletions(-) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 0cf32dd9..59fe6cce 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -13,7 +13,6 @@ from ..typing import H5Dataset, ScippIndex from .base import Group, NexusStructureError, NXobject, asvariable, base_definitions from .field import Field -from .nxevent_data import NXevent_data def _guess_dims(dims, shape, dataset: H5Dataset): @@ -374,17 +373,7 @@ def group_events_by_detector_number( return out -def nxmonitor_factory(attrs: Dict[str, Any], - children: Dict[str, Union[Field, Group]]) -> NXobject: - if ('signal' not in attrs and 'data' not in children - and all(name in children for name in NXevent_data.mandatory_fields)): - # NXevent_data fields embedded in NXmonitor. This is not probably not - # really valid NeXus, but it is used in practice. - return NXevent_data(attrs=attrs, children=children) - return NXmonitor(attrs=attrs, children=children) - - base_definitions['NXdata'] = NXdata base_definitions['NXlog'] = NXlog base_definitions['NXdetector'] = NXdetector -base_definitions['NXmonitor'] = nxmonitor_factory +base_definitions['NXmonitor'] = NXmonitor diff --git a/src/scippnexus/v2/nxevent_data.py b/src/scippnexus/v2/nxevent_data.py index 9fc5b0c2..c8865519 100644 --- a/src/scippnexus/v2/nxevent_data.py +++ b/src/scippnexus/v2/nxevent_data.py @@ -15,14 +15,13 @@ def _check_for_missing_fields(fields): - for field in NXevent_data.mandatory_fields: + for field in ("event_time_zero", "event_index", "event_time_offset"): if field not in fields: raise NexusStructureError( f"Required field {field} not found in NXevent_data") class NXevent_data(NXobject): - mandatory_fields = ("event_time_zero", "event_index", "event_time_offset") def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): super().__init__(attrs=attrs, children=children) diff --git a/tests/nxevent_data_test.py b/tests/nxevent_data_test.py index e13585f5..3dcc473b 100644 --- a/tests/nxevent_data_test.py +++ b/tests/nxevent_data_test.py @@ -72,7 +72,7 @@ def test_event_mode_monitor_without_event_id_can_be_loaded(nxroot): monitor = nxroot['entry'].create_class('monitor', snx.NXmonitor) create_event_data_without_event_id(monitor) da = monitor[...] - assert 'event_time_offset' in da.bins.coords + assert 'event_time_offset' in da def test_read_empty_nxevent_data(h5root): diff --git a/tests/nxmonitor_test.py b/tests/nxmonitor_test.py index e70d686d..4fe3da3b 100644 --- a/tests/nxmonitor_test.py +++ b/tests/nxmonitor_test.py @@ -46,6 +46,7 @@ def create_event_data_no_ids(group): values=[0, 3, 3, 5])) +@pytest.mark.skip(reason="For now we do not support this broken type of NXmonitor") def test_loads_event_data_in_current_group(group): monitor = group.create_class('monitor1', snx.NXmonitor) create_event_data_no_ids(monitor) From 69ec01e16eb7b93d9321959f33622e3e7111598a Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 12:29:04 +0200 Subject: [PATCH 88/98] Remove unreachable code --- src/scippnexus/v2/nxdata.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 59fe6cce..9c99f3a3 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -322,8 +322,6 @@ def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group def _group_events(*, event_data: sc.DataArray, grouping: Optional[sc.Variable] = None) -> sc.DataArray: - if isinstance(event_data, sc.DataGroup): - raise NexusStructureError("Invalid NXevent_data in NXdetector.") if grouping is None: event_id = 'event_id' else: From 266a56bcf40c49f3e45833d8ab2bbe4de28b2689 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 13:01:43 +0200 Subject: [PATCH 89/98] Restore handling of files with raw bytes instead of string attrs --- src/scippnexus/v2/attrs.py | 28 ++++++++++++++++++++++++++++ src/scippnexus/v2/base.py | 6 +++--- src/scippnexus/v2/field.py | 5 +++-- tests/nexus_test.py | 9 +++++++++ 4 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 src/scippnexus/v2/attrs.py diff --git a/src/scippnexus/v2/attrs.py b/src/scippnexus/v2/attrs.py new file mode 100644 index 00000000..3e5f7f9e --- /dev/null +++ b/src/scippnexus/v2/attrs.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from collections.abc import Mapping +from typing import Any, Iterator + +from .._hdf5_nexus import _cset_to_encoding, _ensure_str + + +class Attrs(Mapping): + + def __init__(self, attrs: Mapping): + self._base_attrs = attrs + self._attrs = dict(attrs) if attrs else dict() + + def __getitem__(self, name: str) -> Any: + attr = self._attrs[name] + # Is this check for string attributes sufficient? Is there a better way? + if isinstance(attr, (str, bytes)): + cset = self._base_attrs.get_id(name.encode("utf-8")).get_type().get_cset() + return _ensure_str(attr, _cset_to_encoding(cset)) + return attr + + def __iter__(self) -> Iterator[str]: + return iter(self._attrs) + + def __len__(self) -> int: + return len(self._attrs) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 615ff911..5e876148 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -15,6 +15,7 @@ from .._common import to_child_select from ..typing import H5Dataset, H5Group, ScippIndex +from .attrs import Attrs from .field import Field @@ -187,8 +188,7 @@ def attrs(self) -> Dict[str, Any]: # We may expected a per-subgroup overhead of 1 ms for reading attributes, so if # all we want is access one subgroup, we may save, e.g., a second for a group # with 1000 subgroups (or subfields). - return MappingProxyType( - dict(self._group.attrs) if self._group.attrs else dict()) + return MappingProxyType(Attrs(self._group.attrs)) @property def name(self) -> str: @@ -433,7 +433,7 @@ def create_class(group: H5Group, name: str, nx_class: Union[str, type]) -> H5Gro subclass of NXobject, such as NXdata or NXlog. """ group = group.create_group(name) - attr = nx_class if isinstance(nx_class, str) else nx_class.__name__ + attr = nx_class if isinstance(nx_class, (str, bytes)) else nx_class.__name__ group.attrs['NX_class'] = attr return group diff --git a/src/scippnexus/v2/field.py b/src/scippnexus/v2/field.py index 8b34a09f..4294dfd8 100644 --- a/src/scippnexus/v2/field.py +++ b/src/scippnexus/v2/field.py @@ -18,6 +18,8 @@ from scippnexus._hdf5_nexus import _warn_latin1_decode from scippnexus.typing import H5Dataset, ScippIndex +from .attrs import Attrs + if TYPE_CHECKING: from .base import Group @@ -86,8 +88,7 @@ class Field: def attrs(self) -> Dict[str, Any]: """The attributes of the dataset. Cannot be used for writing attributes, since they are cached for performance.""" - return MappingProxyType( - dict(self.dataset.attrs) if self.dataset.attrs else dict()) + return MappingProxyType(Attrs(self.dataset.attrs)) @property def dims(self) -> Tuple[str, ...]: diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 80453b7d..d0a6e1ad 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -90,6 +90,15 @@ def test_nxobject_entry(nxroot): assert set(entry.keys()) == {'events_0', 'events_1', 'log'} +def test_nx_class_can_be_bytes(h5root): + log = h5root.create_group('log') + attr = np.chararray((), itemsize=5) + attr[()] = b'NXlog' + log.attrs['NX_class'] = attr + group = snx.Group(log, definitions=snx.base_definitions) + assert group.nx_class == NXlog + + def test_nxobject_log(h5root): da = sc.DataArray(sc.array(dims=['time'], values=[1.1, 2.2, 3.3]), coords={ From c55bcd351daf734bbb62fb45df5911ad51548501 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 3 Apr 2023 13:18:47 +0200 Subject: [PATCH 90/98] Bump scipp requirement to fix segfault with Python3.10 var.value --- conda/meta.yaml | 2 +- requirements/base.txt | 4 ++-- setup.cfg | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 9dab4150..bb3c46a5 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -11,7 +11,7 @@ requirements: run: - python>=3.8 - python-dateutil - - scipp>=23.01.1 + - scipp>=23.03.1 - scipy - h5py diff --git a/requirements/base.txt b/requirements/base.txt index 06cc0697..d290a903 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,4 +1,4 @@ -# SHA1:20e195123d87c5cf695f1f70a994a4a4b8d9178b +# SHA1:c3c05567d668423130fa3b892cc2cae373423b1c # # This file is autogenerated by pip-compile-multi # To update, run: @@ -20,7 +20,7 @@ python-dateutil==2.8.2 # via -r base.in pyyaml==6.0 # via confuse -scipp==23.3.0 +scipp==23.3.1 # via -r base.in scipy==1.10.0 # via -r base.in diff --git a/setup.cfg b/setup.cfg index f997c943..6e320060 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,7 +24,7 @@ package_dir = packages = find: install_requires = python-dateutil - scipp>=23.01.1 + scipp>=23.03.1 scipy # we use scipp.interpolate which depends on this h5py python_requires = >=3.8 From 94357b5189e92b53a07901246f11f93b4cc64ff8 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 4 Apr 2023 07:45:26 +0200 Subject: [PATCH 91/98] Comment on dropping of cue_* fields --- src/scippnexus/v2/base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 5e876148..584c8e02 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -222,6 +222,12 @@ def _make_child(obj: Union[H5Dataset, H5Group]) -> Union[Field, Group]: return Group(obj, definitions=self._definitions) items = {name: _make_child(obj) for name, obj in self._group.items()} + # In the case of NXevent_data, the `cue_` fields are unusable, since + # the definition is broken (the cue_index points into the + # event_time_offset/event_id fields, instead of the + # event_time_zero/event_index fields). In the case of NXlog they may + # be some utility if we deal with extremely long time-series that + # could be leverage for label-based indexing in the future. items = {k: v for k, v in items.items() if not k.startswith('cue_')} for suffix in ('_errors', '_error'): field_with_errors = [name for name in items if f'{name}{suffix}' in items] From e338eae49d329beb1801259da042cbedf63d239e Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 4 Apr 2023 08:13:37 +0200 Subject: [PATCH 92/98] Try another way of handling embedded event data --- src/scippnexus/v2/nxdata.py | 19 +++++++++++++++++-- src/scippnexus/v2/nxevent_data.py | 3 ++- tests/nxevent_data_test.py | 2 +- tests/nxmonitor_test.py | 1 - 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 9c99f3a3..55854045 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -13,6 +13,7 @@ from ..typing import H5Dataset, ScippIndex from .base import Group, NexusStructureError, NXobject, asvariable, base_definitions from .field import Field +from .nxevent_data import NXevent_data def _guess_dims(dims, shape, dataset: H5Dataset): @@ -55,6 +56,8 @@ def __init__(self, # If the NXdata contains subgroups we can generally not define valid # sizes... except for some non-signal "special fields" that return # a DataGroup that will be wrapped in a scalar Variable. + if name == self._signal_name: + continue if field.attrs.get('NX_class') not in [ 'NXoff_geometry', 'NXcylindrical_geometry', @@ -144,6 +147,8 @@ def _init_group_dims(self, # If we have explicit group dims, we can drop trailing 1s. shape = _squeeze_trailing(group_dims, shape) self._signal.sizes = dict(zip(group_dims, shape)) + elif isinstance(self._signal, Group): + group_dims = self._signal.dims elif fallback_dims is not None: shape = self._signal.dataset.shape group_dims = [ @@ -228,7 +233,7 @@ def assemble(self, aux = {name: dg.pop(name) for name in self._aux_signals} signal = dg.pop(self._signal_name) coords = dg - da = sc.DataArray(data=signal) + da = sc.DataArray(data=signal) if isinstance(signal, sc.Variable) else signal da = self._add_coords(da, coords) if aux: signals = {self._signal_name: da} @@ -316,7 +321,17 @@ def detector_number(self) -> Optional[str]: class NXmonitor(NXdata): def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - super().__init__(attrs=attrs, children=children, fallback_signal_name='data') + if all(name in children for name in NXevent_data.mandatory_fields): + parent = children['event_index'].parent._group + event_group = Group(parent, definitions={'NXmonitor': NXevent_data}) + children['events'] = event_group + for name in list(children): + if name.startswith('event_'): + del children[name] + signal = 'events' + else: + signal = 'data' + super().__init__(attrs=attrs, children=children, fallback_signal_name=signal) def _group_events(*, diff --git a/src/scippnexus/v2/nxevent_data.py b/src/scippnexus/v2/nxevent_data.py index c8865519..9fc5b0c2 100644 --- a/src/scippnexus/v2/nxevent_data.py +++ b/src/scippnexus/v2/nxevent_data.py @@ -15,13 +15,14 @@ def _check_for_missing_fields(fields): - for field in ("event_time_zero", "event_index", "event_time_offset"): + for field in NXevent_data.mandatory_fields: if field not in fields: raise NexusStructureError( f"Required field {field} not found in NXevent_data") class NXevent_data(NXobject): + mandatory_fields = ("event_time_zero", "event_index", "event_time_offset") def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): super().__init__(attrs=attrs, children=children) diff --git a/tests/nxevent_data_test.py b/tests/nxevent_data_test.py index 3dcc473b..e13585f5 100644 --- a/tests/nxevent_data_test.py +++ b/tests/nxevent_data_test.py @@ -72,7 +72,7 @@ def test_event_mode_monitor_without_event_id_can_be_loaded(nxroot): monitor = nxroot['entry'].create_class('monitor', snx.NXmonitor) create_event_data_without_event_id(monitor) da = monitor[...] - assert 'event_time_offset' in da + assert 'event_time_offset' in da.bins.coords def test_read_empty_nxevent_data(h5root): diff --git a/tests/nxmonitor_test.py b/tests/nxmonitor_test.py index 4fe3da3b..e70d686d 100644 --- a/tests/nxmonitor_test.py +++ b/tests/nxmonitor_test.py @@ -46,7 +46,6 @@ def create_event_data_no_ids(group): values=[0, 3, 3, 5])) -@pytest.mark.skip(reason="For now we do not support this broken type of NXmonitor") def test_loads_event_data_in_current_group(group): monitor = group.create_class('monitor1', snx.NXmonitor) create_event_data_no_ids(monitor) From 0cba7d6e1cb43d33f97ada0f2839925c6d8425d8 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 4 Apr 2023 08:44:19 +0200 Subject: [PATCH 93/98] Restore handling of embedded event fields in NXdetector --- src/scippnexus/v2/nxdata.py | 37 +++++++++++++++++++++---------- src/scippnexus/v2/nxevent_data.py | 1 + tests/nxdetector_test.py | 17 ++++++++++++++ 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index 55854045..b9976b88 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -3,6 +3,7 @@ # @author Simon Heybrock from __future__ import annotations +import uuid from functools import cached_property from typing import Any, Dict, Iterable, List, Optional, Tuple, Union @@ -192,7 +193,7 @@ def _get_dims(self, name, field): # coordinates", i.e., have a dim matching their name. # However, if the item is not 1-D we need more labels. Try to use labels # of signal if dimensionality matches. - if self._signal is not None and len(field.dataset.shape) == len( + if isinstance(self._signal, Field) and len(field.dataset.shape) == len( self._signal.dataset.shape): return self._group_dims return (name, ) @@ -294,6 +295,26 @@ def assemble(self, return super().assemble(dg) +def _fallback_signal_or_embedded_nxevent_data( + children: Dict[str, Union[Field, Group]]) -> str: + if all(name in children for name in NXevent_data.mandatory_fields): + parent = children['event_index'].parent._group + event_group = Group(parent, + definitions={ + 'NXmonitor': NXevent_data, + 'NXdetector': NXevent_data + }) + event_name = 'events' + if event_name in children: + event_name = uuid.uuid4().hex + children[event_name] = event_group + for name in list(children): + if name in NXevent_data.handled_fields: + del children[name] + return event_name + return 'data' + + class NXdetector(NXdata): _detector_number_fields = ['detector_number', 'pixel_id', 'spectrum_index'] @@ -308,10 +329,11 @@ def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group if (det_num_name := NXdetector._detector_number(children)) is not None: if children[det_num_name].dataset.ndim == 1: fallback_dims = ('detector_number', ) + signal = _fallback_signal_or_embedded_nxevent_data(children) super().__init__(attrs=attrs, children=children, fallback_dims=fallback_dims, - fallback_signal_name='data') + fallback_signal_name=signal) @property def detector_number(self) -> Optional[str]: @@ -321,16 +343,7 @@ def detector_number(self) -> Optional[str]: class NXmonitor(NXdata): def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - if all(name in children for name in NXevent_data.mandatory_fields): - parent = children['event_index'].parent._group - event_group = Group(parent, definitions={'NXmonitor': NXevent_data}) - children['events'] = event_group - for name in list(children): - if name.startswith('event_'): - del children[name] - signal = 'events' - else: - signal = 'data' + signal = _fallback_signal_or_embedded_nxevent_data(children) super().__init__(attrs=attrs, children=children, fallback_signal_name=signal) diff --git a/src/scippnexus/v2/nxevent_data.py b/src/scippnexus/v2/nxevent_data.py index 9fc5b0c2..f9438b78 100644 --- a/src/scippnexus/v2/nxevent_data.py +++ b/src/scippnexus/v2/nxevent_data.py @@ -23,6 +23,7 @@ def _check_for_missing_fields(fields): class NXevent_data(NXobject): mandatory_fields = ("event_time_zero", "event_index", "event_time_offset") + handled_fields = mandatory_fields + ("event_id", ) def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): super().__init__(attrs=attrs, children=children) diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index f5ca6bed..e0b4353c 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -72,6 +72,23 @@ def test_loads_signal_and_events_when_both_found(nxroot): assert loaded['events'].bins is not None +def test_loads_embedded_events_as_subgroup(nxroot): + detector_number = sc.array(dims=[''], unit=None, values=np.array([1, 2])) + detector = nxroot.create_class('detector0', NXdetector) + detector.create_field('detector_number', detector_number) + detector.create_field('event_id', sc.array(dims=[''], unit=None, values=[1])) + detector.create_field('event_time_offset', sc.array(dims=[''], unit='s', + values=[1])) + detector.create_field('event_time_zero', sc.array(dims=[''], unit='s', values=[1])) + detector.create_field('event_index', sc.array(dims=[''], unit='None', values=[0])) + loaded = detector[...] + assert_identical(loaded['detector_number'], + detector_number.rename({'': 'detector_number'})) + assert loaded['events'].bins is not None + event_data = snx.group_events_by_detector_number(loaded) + assert event_data.sizes == {'detector_number': 2} + + def detector_numbers_xx_yy_1234(): return sc.array(dims=['xx', 'yy'], unit=None, values=np.array([[1, 2], [3, 4]])) From d3ddad26d26d25c2da77984b12b0b9109eb322dc Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 4 Apr 2023 14:24:56 +0200 Subject: [PATCH 94/98] Fix issue that lead to multi-init of NXobject sbuclasses --- src/scippnexus/v2/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/v2/base.py index 584c8e02..62b6d633 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/v2/base.py @@ -251,8 +251,7 @@ def _nexus(self) -> NXobject: Lazily initialized since the NXobject subclass init can be costly. """ - if self._lazy_nexus is None: - self._populate_fields() + self._populate_fields() return self._lazy_nexus def _populate_fields(self) -> None: @@ -265,6 +264,8 @@ def _populate_fields(self) -> None: of any other field. For example, field attributes may define which fields are axes, and dim labels of other fields can be defined by the names of the axes. """ + if self._lazy_nexus is not None: + return self._lazy_nexus = self._definitions.get(self.attrs.get('NX_class'), NXobject)(attrs=self.attrs, children=self._children) From 098b806bb43e5f3d02ee0ccd9b2c10a9b47882eb Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 4 Apr 2023 14:26:02 +0200 Subject: [PATCH 95/98] Try to load embedded events in NXdetector better --- src/scippnexus/v2/nxdata.py | 44 +++++++++++++++++++++++++++---------- tests/nxdetector_test.py | 4 ++-- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py index b9976b88..dcdd4d97 100644 --- a/src/scippnexus/v2/nxdata.py +++ b/src/scippnexus/v2/nxdata.py @@ -295,8 +295,8 @@ def assemble(self, return super().assemble(dg) -def _fallback_signal_or_embedded_nxevent_data( - children: Dict[str, Union[Field, Group]]) -> str: +def _find_embedded_nxevent_data( + children: Dict[str, Union[Field, Group]]) -> Optional[Group]: if all(name in children for name in NXevent_data.mandatory_fields): parent = children['event_index'].parent._group event_group = Group(parent, @@ -304,15 +304,10 @@ def _fallback_signal_or_embedded_nxevent_data( 'NXmonitor': NXevent_data, 'NXdetector': NXevent_data }) - event_name = 'events' - if event_name in children: - event_name = uuid.uuid4().hex - children[event_name] = event_group for name in list(children): if name in NXevent_data.handled_fields: del children[name] - return event_name - return 'data' + return event_group class NXdetector(NXdata): @@ -329,11 +324,34 @@ def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group if (det_num_name := NXdetector._detector_number(children)) is not None: if children[det_num_name].dataset.ndim == 1: fallback_dims = ('detector_number', ) - signal = _fallback_signal_or_embedded_nxevent_data(children) + self._embedded_events = _find_embedded_nxevent_data(children) super().__init__(attrs=attrs, children=children, fallback_dims=fallback_dims, - fallback_signal_name=signal) + fallback_signal_name='data') + + def assemble(self, + dg: sc.DataGroup) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: + if self._valid: + obj = super().assemble(dg) + else: + obj = NXobject.assemble(self, dg) + if self._embedded_events is None: + return obj + # If events are embedded we are currently not including them in the `sizes`, + # so indexing is not possible. We could extend this in the future. + events = self._embedded_events[()] + if isinstance(events, sc.DataGroup): + if isinstance(obj, sc.DataArray): + return sc.DataGroup({self._signal_name: obj, 'events': events}) + else: + obj.update(events) + return obj + if isinstance(obj, sc.DataArray): + return sc.Dataset({self._signal_name: obj, 'events': events}) + else: + obj[uuid.uuid4().hex if 'events' in obj else 'events'] = events + return obj @property def detector_number(self) -> Optional[str]: @@ -343,7 +361,11 @@ def detector_number(self) -> Optional[str]: class NXmonitor(NXdata): def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - signal = _fallback_signal_or_embedded_nxevent_data(children) + if (event_group := _find_embedded_nxevent_data(children)) is not None: + signal = uuid.uuid4().hex if 'events' in children else 'events' + children[signal] = event_group + else: + signal = 'data' super().__init__(attrs=attrs, children=children, fallback_signal_name=signal) diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index e0b4353c..3c19e4c4 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -73,7 +73,7 @@ def test_loads_signal_and_events_when_both_found(nxroot): def test_loads_embedded_events_as_subgroup(nxroot): - detector_number = sc.array(dims=[''], unit=None, values=np.array([1, 2])) + detector_number = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3])) detector = nxroot.create_class('detector0', NXdetector) detector.create_field('detector_number', detector_number) detector.create_field('event_id', sc.array(dims=[''], unit=None, values=[1])) @@ -86,7 +86,7 @@ def test_loads_embedded_events_as_subgroup(nxroot): detector_number.rename({'': 'detector_number'})) assert loaded['events'].bins is not None event_data = snx.group_events_by_detector_number(loaded) - assert event_data.sizes == {'detector_number': 2} + assert event_data.sizes == {'detector_number': 3} def detector_numbers_xx_yy_1234(): From 7d199bfd999fc8ec2893866760cb7fba855f9806 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 4 Apr 2023 14:28:08 +0200 Subject: [PATCH 96/98] Update test --- tests/nxdetector_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index 3c19e4c4..76aaa000 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -28,12 +28,11 @@ def nxroot(): yield root -def test_warns_if_no_data_found(nxroot): +def test_returns_as_datagroup_if_no_signal_found(nxroot): detector_numbers = sc.array(dims=[''], unit=None, values=np.array([1, 2, 3, 4])) detector = nxroot.create_class('detector0', NXdetector) detector.create_field('detector_numbers', detector_numbers) - with pytest.warns(UserWarning, match="Failed to load "): - dg = detector[...] + dg = detector[...] assert isinstance(dg, sc.DataGroup) From bd17388856fbd592bdc49c77d1621ad56d5139c2 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 5 Apr 2023 09:54:07 +0200 Subject: [PATCH 97/98] Minor datetime code simplification --- src/scippnexus/v2/field.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scippnexus/v2/field.py b/src/scippnexus/v2/field.py index 4294dfd8..b88ea429 100644 --- a/src/scippnexus/v2/field.py +++ b/src/scippnexus/v2/field.py @@ -55,7 +55,7 @@ def _as_datetime(obj: Any): time = time_and_timezone_offset[0] if len(time_and_timezone_offset) == 1: # No timezone, parse directly (scipp based on numpy) - return sc.datetime(f'{date}T{time}') + return sc.datetime(obj) else: # There is timezone info. Parse with dateutil. dt = dateutil.parser.isoparse(obj) From d6cf7170c1c147d8df9a2608ac9c84ed9bcfc688 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 5 Apr 2023 09:57:41 +0200 Subject: [PATCH 98/98] Remove superfluous dtype check for event_id --- src/scippnexus/v2/nxevent_data.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/scippnexus/v2/nxevent_data.py b/src/scippnexus/v2/nxevent_data.py index f9438b78..3a007def 100644 --- a/src/scippnexus/v2/nxevent_data.py +++ b/src/scippnexus/v2/nxevent_data.py @@ -73,12 +73,6 @@ def read_children(self, children: Group, select: ScippIndex) -> sc.DataGroup: else: event_select = slice(None) - if (event_id := children.get('event_id')) is not None: - event_id = event_id[event_select] - if event_id.dtype not in [sc.DType.int32, sc.DType.int64]: - raise NexusStructureError( - "NXevent_data contains event_id field with non-integer values") - event_time_offset = children['event_time_offset'][event_select] event_index = sc.array(dims=[_pulse_dimension], @@ -91,8 +85,8 @@ def read_children(self, children: Group, select: ScippIndex) -> sc.DataGroup: dg = sc.DataGroup(event_time_zero=event_time_zero, event_index=event_index, event_time_offset=event_time_offset) - if event_id is not None: - dg['event_id'] = event_id + if (event_id := children.get('event_id')) is not None: + dg['event_id'] = event_id[event_select] return dg def _get_event_index(self, children: sc.DataGroup, index):