From 2d292fbd186d2c30de11bdca2bd9df45a527a2eb Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 14 Aug 2023 06:44:38 +0200 Subject: [PATCH 1/7] Move files and update imports --- src/scippnexus/__init__.py | 15 +- .../nxcansas/__init__.py | 0 .../nxcansas/nxcansas.py | 2 +- src/scippnexus/{v2 => }/attrs.py | 2 +- src/scippnexus/{v2 => }/base.py | 4 +- src/scippnexus/{v2 => }/field.py | 0 src/scippnexus/file.py | 31 +- src/scippnexus/nexus_classes.py | 22 +- src/scippnexus/nxcylindrical_geometry.py | 28 +- src/scippnexus/nxdata.py | 856 +++++++++++++----- src/scippnexus/nxevent_data.py | 166 ++-- src/scippnexus/nxoff_geometry.py | 46 +- src/scippnexus/nxsample.py | 44 +- src/scippnexus/nxtransformations.py | 92 +- src/scippnexus/{v2 => v1}/__init__.py | 15 +- src/scippnexus/{ => v1}/definition.py | 0 .../{ => v1}/definitions/nxcansas.py | 0 src/scippnexus/v1/file.py | 31 + src/scippnexus/{ => v1}/leaf.py | 2 +- src/scippnexus/{v2 => v1}/nexus_classes.py | 22 +- .../{v2 => v1}/nxcylindrical_geometry.py | 28 +- src/scippnexus/v1/nxdata.py | 302 ++++++ src/scippnexus/{ => v1}/nxdetector.py | 0 src/scippnexus/{ => v1}/nxdisk_chopper.py | 0 src/scippnexus/v1/nxevent_data.py | 138 +++ src/scippnexus/{ => v1}/nxfermi_chopper.py | 0 src/scippnexus/{ => v1}/nxlog.py | 0 src/scippnexus/{ => v1}/nxmonitor.py | 0 src/scippnexus/{ => v1}/nxobject.py | 6 +- src/scippnexus/{v2 => v1}/nxoff_geometry.py | 46 +- src/scippnexus/v1/nxsample.py | 27 + src/scippnexus/{ => v1}/nxsource.py | 0 .../{v2 => v1}/nxtransformations.py | 92 +- src/scippnexus/v2/file.py | 40 - src/scippnexus/v2/nxdata.py | 658 -------------- src/scippnexus/v2/nxevent_data.py | 166 ---- src/scippnexus/v2/nxsample.py | 39 - tests/nxoff_geometry_test.py | 2 +- tests/nxtransformations_test.py | 2 +- 39 files changed, 1462 insertions(+), 1462 deletions(-) rename src/scippnexus/{v2 => }/application_definitions/nxcansas/__init__.py (100%) rename src/scippnexus/{v2 => }/application_definitions/nxcansas/nxcansas.py (99%) rename src/scippnexus/{v2 => }/attrs.py (93%) rename src/scippnexus/{v2 => }/base.py (99%) rename src/scippnexus/{v2 => }/field.py (100%) rename src/scippnexus/{v2 => v1}/__init__.py (60%) rename src/scippnexus/{ => v1}/definition.py (100%) rename src/scippnexus/{ => v1}/definitions/nxcansas.py (100%) create mode 100644 src/scippnexus/v1/file.py rename src/scippnexus/{ => v1}/leaf.py (95%) rename src/scippnexus/{v2 => v1}/nexus_classes.py (88%) rename src/scippnexus/{v2 => v1}/nxcylindrical_geometry.py (72%) create mode 100644 src/scippnexus/v1/nxdata.py rename src/scippnexus/{ => v1}/nxdetector.py (100%) rename src/scippnexus/{ => v1}/nxdisk_chopper.py (100%) create mode 100644 src/scippnexus/v1/nxevent_data.py rename src/scippnexus/{ => v1}/nxfermi_chopper.py (100%) rename src/scippnexus/{ => v1}/nxlog.py (100%) rename src/scippnexus/{ => v1}/nxmonitor.py (100%) rename src/scippnexus/{ => v1}/nxobject.py (99%) rename src/scippnexus/{v2 => v1}/nxoff_geometry.py (59%) create mode 100644 src/scippnexus/v1/nxsample.py rename src/scippnexus/{ => v1}/nxsource.py (100%) rename src/scippnexus/{v2 => v1}/nxtransformations.py (68%) delete mode 100644 src/scippnexus/v2/file.py delete mode 100644 src/scippnexus/v2/nxdata.py delete mode 100644 src/scippnexus/v2/nxevent_data.py delete mode 100644 src/scippnexus/v2/nxsample.py diff --git a/src/scippnexus/__init__.py b/src/scippnexus/__init__.py index de3588e5..72a4e9e7 100644 --- a/src/scippnexus/__init__.py +++ b/src/scippnexus/__init__.py @@ -11,10 +11,15 @@ __version__ = "0.0.0" from . import typing -from .definition import ApplicationDefinition, make_definition +from .base import ( + Group, + NexusStructureError, + NXobject, + base_definitions, + create_class, + create_field, +) +from .field import Attrs, Field from .file import File from .nexus_classes import * -from .nxdata import NXdataStrategy -from .nxdetector import NXdetectorStrategy -from .nxlog import NXlogStrategy -from .nxobject import Attrs, Field, NexusStructureError, NXobject, NXobjectStrategy +from .nxdata import group_events_by_detector_number diff --git a/src/scippnexus/v2/application_definitions/nxcansas/__init__.py b/src/scippnexus/application_definitions/nxcansas/__init__.py similarity index 100% rename from src/scippnexus/v2/application_definitions/nxcansas/__init__.py rename to src/scippnexus/application_definitions/nxcansas/__init__.py diff --git a/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py b/src/scippnexus/application_definitions/nxcansas/nxcansas.py similarity index 99% rename from src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py rename to src/scippnexus/application_definitions/nxcansas/nxcansas.py index db04d126..a3cc542c 100644 --- a/src/scippnexus/v2/application_definitions/nxcansas/nxcansas.py +++ b/src/scippnexus/application_definitions/nxcansas/nxcansas.py @@ -5,10 +5,10 @@ import scipp as sc -from ....typing import H5Group from ...base import Group, NXobject, base_definitions, create_field from ...field import Field from ...nxdata import NXdata +from ...typing import H5Group class SASentry: diff --git a/src/scippnexus/v2/attrs.py b/src/scippnexus/attrs.py similarity index 93% rename from src/scippnexus/v2/attrs.py rename to src/scippnexus/attrs.py index 88858dea..45f4ec3d 100644 --- a/src/scippnexus/v2/attrs.py +++ b/src/scippnexus/attrs.py @@ -4,7 +4,7 @@ from collections.abc import Mapping from typing import Any, Iterator -from .._hdf5_nexus import _cset_to_encoding, _ensure_str +from ._hdf5_nexus import _cset_to_encoding, _ensure_str class Attrs(Mapping): diff --git a/src/scippnexus/v2/base.py b/src/scippnexus/base.py similarity index 99% rename from src/scippnexus/v2/base.py rename to src/scippnexus/base.py index a3bc5a91..7ecc4aa2 100644 --- a/src/scippnexus/v2/base.py +++ b/src/scippnexus/base.py @@ -13,10 +13,10 @@ import numpy as np import scipp as sc -from .._common import to_child_select -from ..typing import H5Dataset, H5Group, ScippIndex +from ._common import to_child_select from .attrs import Attrs from .field import Field +from .typing import H5Dataset, H5Group, ScippIndex def asvariable(obj: Union[Any, sc.Variable]) -> sc.Variable: diff --git a/src/scippnexus/v2/field.py b/src/scippnexus/field.py similarity index 100% rename from src/scippnexus/v2/field.py rename to src/scippnexus/field.py diff --git a/src/scippnexus/file.py b/src/scippnexus/file.py index c59b53a4..015d0635 100644 --- a/src/scippnexus/file.py +++ b/src/scippnexus/file.py @@ -1,24 +1,33 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -import warnings from contextlib import AbstractContextManager +from typing import Mapping import h5py -from scipp import VisibleDeprecationWarning -from .nexus_classes import NXroot +from .base import Group, base_definitions +_default_definitions = object() -class File(AbstractContextManager, NXroot): - def __init__(self, *args, definition=None, **kwargs): - warnings.warn( - "This API is deprecated and will be removed and replaced in release 23.06. " - "Switch to 'import scippnexus.v2 as snx' to prepare for this.", - VisibleDeprecationWarning, - ) + +class File(AbstractContextManager, Group): + def __init__(self, *args, definitions: Mapping = _default_definitions, **kwargs): + """Context manager for NeXus files, similar to h5py.File. + + Arguments other than documented are as in :py:class:`h5py.File`. + + Parameters + ---------- + definitions: + Mapping of NX_class names to application-specific definitions. + The default is to use the base definitions as defined in the + NeXus standard. + """ + if definitions is _default_definitions: + definitions = base_definitions() self._file = h5py.File(*args, **kwargs) - NXroot.__init__(self, self._file, definition=definition) + super().__init__(self._file, definitions=definitions) def __enter__(self): self._file.__enter__() diff --git a/src/scippnexus/nexus_classes.py b/src/scippnexus/nexus_classes.py index 7fc5080f..bcf43ab2 100644 --- a/src/scippnexus/nexus_classes.py +++ b/src/scippnexus/nexus_classes.py @@ -1,18 +1,12 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock +from .base import NXobject, NXroot # noqa F401 from .nxcylindrical_geometry import NXcylindrical_geometry # noqa F401 -from .nxdata import NXdata # noqa F401 -from .nxdetector import NXdetector # noqa F401 -from .nxdisk_chopper import NXdisk_chopper # noqa F401 +from .nxdata import NXdata, NXdetector, NXlog, NXmonitor # noqa F401 from .nxevent_data import NXevent_data # noqa F401 -from .nxfermi_chopper import NXfermi_chopper # noqa F401 -from .nxlog import NXlog # noqa F401 -from .nxmonitor import NXmonitor # noqa F401 -from .nxobject import NXobject, NXroot # noqa F401 from .nxoff_geometry import NXoff_geometry # noqa F401 from .nxsample import NXsample # noqa F401 -from .nxsource import NXsource # noqa F401 from .nxtransformations import NXtransformations # noqa F401 @@ -72,10 +66,18 @@ class NXdetector_module(NXobject): """NXdetector_module""" +class NXdisk_chopper(NXobject): + """NXdisk_chopper""" + + class NXenvironment(NXobject): """NXenvironment""" +class NXfermi_chopper(NXobject): + """NXfermi_chopper""" + + class NXfilter(NXobject): """NXfilter""" @@ -164,6 +166,10 @@ class NXshape(NXobject): """NXshape""" +class NXsource(NXobject): + """NXsource""" + + class NXslit(NXobject): """NXslit""" diff --git a/src/scippnexus/nxcylindrical_geometry.py b/src/scippnexus/nxcylindrical_geometry.py index 145174c1..4e325961 100644 --- a/src/scippnexus/nxcylindrical_geometry.py +++ b/src/scippnexus/nxcylindrical_geometry.py @@ -1,11 +1,12 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Optional, Tuple, Union +from typing import Any, Dict, Optional, Union import scipp as sc -from .nxobject import NexusStructureError, NXobject +from .base import Group, NexusStructureError, NXobject, base_definitions_dict +from .field import Field def _parse( @@ -55,15 +56,24 @@ class NXcylindrical_geometry(NXobject): 'cylinders': ('cylinder', 'vertex_index'), } - def _get_field_dims(self, name: str) -> Union[None, Tuple[str]]: - return self._dims.get(name) - - def _get_field_dtype(self, name: str) -> Union[None, sc.DType]: - if name == 'vertices': - return sc.DType.vector3 - return None + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + super().__init__(attrs=attrs, children=children) + for name, field in children.items(): + if isinstance(field, Field): + field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) + if name == 'vertices': + field.dtype = sc.DType.vector3 def load_as_array( self, detector_number: Optional[sc.Variable] = None ) -> sc.Variable: return _parse(**self[()], parent_detector_number=detector_number) + + @staticmethod + def assemble_as_child( + children: sc.DataGroup, detector_number: Optional[sc.Variable] = None + ) -> sc.Variable: + return _parse(**children, parent_detector_number=detector_number) + + +base_definitions_dict['NXcylindrical_geometry'] = NXcylindrical_geometry diff --git a/src/scippnexus/nxdata.py b/src/scippnexus/nxdata.py index 095f6c47..c7639618 100644 --- a/src/scippnexus/nxdata.py +++ b/src/scippnexus/nxdata.py @@ -3,231 +3,362 @@ # @author Simon Heybrock from __future__ import annotations -from typing import List, Optional, Union -from warnings import warn +import uuid +from functools import cached_property +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union import numpy as np import scipp as sc -from ._common import to_child_select -from .nxcylindrical_geometry import NXcylindrical_geometry -from .nxobject import Field, NexusStructureError, NXobject, ScippIndex, asarray -from .nxoff_geometry import NXoff_geometry -from .nxtransformations import NXtransformations -from .typing import H5Group - - -class NXdataStrategy: - """ - Strategy used by :py:class:`scippnexus.NXdata`. - - May be subclassed to customize behavior. - """ - - _error_suffixes = ['_errors', '_error'] # _error is the deprecated suffix - - @staticmethod - def axes(group): - """Names of the axes (dimension labels).""" - return group.attrs.get('axes') - - @staticmethod - def signal(group): - """Name of the signal field.""" - if (name := group.attrs.get('signal')) is not None: - if name in group: - return name - # Legacy NXdata defines signal not as group attribute, but attr on dataset - for name in group.keys(): - # What is the meaning of the attribute value? It is undocumented, we simply - # ignore it. - if 'signal' in group._get_child(name).attrs: - return name +from ._common import _to_canonical_select, convert_time_to_datetime64, to_child_select +from .base import ( + Group, + NexusStructureError, + NXobject, + asvariable, + base_definitions_dict, +) +from .field import Field, _is_time +from .nxevent_data import NXevent_data +from .typing import H5Dataset, ScippIndex + + +def _guess_dims(dims, shape, dataset: H5Dataset): + """Guess dims of non-signal dataset based on shape.""" + if shape is None: return None - - @staticmethod - def signal_errors(group) -> Optional[str]: - """Name of the field to use for standard-deviations of the signal.""" - name = f'{NXdataStrategy.signal(group)}_errors' - if name in group: - return name - # This is a legacy named, deprecated in the NeXus format. - if 'errors' in group: - return 'errors' - - @staticmethod - def coord_errors(group, name): - """Name of the field to use for standard-deviations of a coordinate.""" - errors = [f'{name}{suffix}' for suffix in NXdataStrategy._error_suffixes] - errors = [x for x in errors if x in group] - if len(errors) == 0: - return None - if len(errors) == 2: - warn( - f"Found {name}_errors as well as the deprecated " - f"{name}_error. The latter will be ignored." - ) - return errors[0] + if shape == dataset.shape: + return dims + lut = {} + for d, s in zip(dims, shape): + if shape.count(s) == 1: + lut[s] = d + try: + return [lut[s] for s in dataset.shape] + except KeyError: + try: # Inner dimension may be bin-edges + shape = list(dataset.shape) + shape[-1] -= 1 + return [lut[s] for s in shape] + except KeyError: + pass + return None class NXdata(NXobject): def __init__( self, - group: H5Group, - *, - definition=None, - strategy=None, - signal_override: Union[Field, '_EventField'] = None, # noqa: F821 - skip: List[str] = None, + attrs: Dict[str, Any], + children: Dict[str, Union[Field, Group]], + fallback_dims: Optional[Tuple[str, ...]] = None, + fallback_signal_name: Optional[str] = None, ): - """ - Parameters - ---------- - signal_override: - Field-like to use instead of trying to read signal from the file. This is - used when there is no signal or to provide a signal computed from - NXevent_data. - skip: - Names of fields to skip when loading coords. - """ - super().__init__(group, definition=definition, strategy=strategy) - self._signal_override = signal_override - self._skip = skip if skip is not None else [] - - def _default_strategy(self): - return NXdataStrategy - - @property - def shape(self) -> List[int]: - return self._signal.shape - - def _get_group_dims(self) -> Union[None, List[str]]: + super().__init__(attrs=attrs, children=children) + self._valid = True # True if the children can be assembled + self._signal_name = None + self._signal = None + self._aux_signals = attrs.get('auxiliary_signals', []) + + self._init_signal( + name=attrs.get('signal', fallback_signal_name), children=children + ) + if (errors := children.get('errors')) is not None: + if ( + isinstance(self._signal, Field) + and isinstance(errors, Field) + and self._signal.errors is None + and (errors.unit is None or self._signal.unit == errors.unit) + and self._signal.dataset.shape == errors.dataset.shape + ): + self._signal.errors = errors.dataset + del children['errors'] + self._init_axes(attrs=attrs, children=children) + self._init_group_dims(attrs=attrs, fallback_dims=fallback_dims) + + for name, field in children.items(): + self._init_field_dims(name, field) + + def _init_field_dims(self, name: str, field: Union[Field, Group]) -> None: + if not isinstance(field, Field): + # If the NXdata contains subgroups we can generally not define valid + # sizes... except for some non-signal "special fields" that return + # a DataGroup that will be wrapped in a scalar Variable. + if name == self._signal_name or name in self._aux_signals: + return + if field.attrs.get('NX_class') not in [ + 'NXoff_geometry', + 'NXcylindrical_geometry', + 'NXgeometry', + 'NXtransformations', + ]: + self._valid = False + elif (dims := self._get_dims(name, field)) is not None: + # The convention here is that the given dimensions apply to the shapes + # starting from the left. So we only squeeze dimensions that are after + # len(dims). + shape = _squeeze_trailing(dims, field.dataset.shape) + field.sizes = dict(zip(dims, shape)) + elif self._valid: + s1 = self._signal.sizes + s2 = field.sizes + if not set(s2.keys()).issubset(set(s1.keys())): + self._valid = False + elif any(s1[k] != s2[k] for k in s1.keys() & s2.keys()): + self._valid = False + + def _init_signal(self, name: Optional[str], children): + # There are multiple ways NeXus can define the "signal" dataset. The latest + # version uses `signal` attribute on the group (passed as `name`). However, + # we must give precedence to the `signal` attribute on the dataset, since + # older files may use that (and the `signal` group attribute is unrelated). + # Finally, NXlog and NXevent_data can take the role of the signal. In practice + # those may not be indicate by a `signal` attribute, but we support that + # anyway since otherwise we would not be able to find NXevent_data signals + # in many common files. + if name is not None and name in children: + self._signal_name = name + self._signal = children[name] + # Legacy NXdata defines signal not as group attribute, but attr on dataset + for name, field in children.items(): + # We ignore the signal value. Usually it is 1, but apparently one could + # multiple signals. We do not support this, since it is legacy anyway. + if 'signal' in field.attrs: + self._signal_name = name + self._signal = children[name] + break + # NXlog or NXevent_data can take the role of the signal. + for name, field in children.items(): + if name == self._signal_name: + # Avoid duplicate handling + continue + if isinstance(field, EventField) or ( + isinstance(field, Group) and field.nx_class in [NXlog, NXevent_data] + ): + if self._signal is None: + self._signal_name = name + self._signal = field + else: + self._aux_signals.append(name) + + def _init_axes( + self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]] + ): + # Latest way of defining axes + self._axes = attrs.get('axes') + # Older way of defining axes + self._signal_axes = ( + None if self._signal is None else self._signal.attrs.get('axes') + ) + if self._signal_axes is not None: + self._signal_axes = tuple(self._signal_axes.split(':')) + # The standard says that the axes should be colon-separated, but some + # files use comma-separated. + if len(self._signal_axes) == 1 and self._signal.dataset.ndim > 1: + self._signal_axes = tuple(self._signal_axes[0].split(',')) + # Another old way of defining axes. Apparently there are two different ways in + # which this is used: A value of '1' indicates "this is an axis". As this would + # not allow for determining an order, we have to assume that the signal field + # has an "axes" attribute that defines the order. We can then ignore the "axis" + # attributes, since they hold no further information. If there is not "axes" + # attribute on the signal field then we have to assume that "axis" gives the + # 1-based index of the axis. + self._axis_index = {} + if self._signal_axes is None: + for name, field in children.items(): + if (axis := field.attrs.get('axis')) is not None: + self._axis_index[name] = axis + + def _get_named_axes(self, fallback_dims) -> Tuple[str, ...]: + if self._axes is not None: + # Unlike self.dims we *drop* entries that are '.' + return tuple(a for a in self._axes if a != '.') + elif self._signal_axes is not None: + return self._signal_axes + elif fallback_dims is not None: + return fallback_dims + else: + return () + + def _get_group_dims(self) -> Optional[Tuple[str, ...]]: + """Try three ways of defining group dimensions.""" # Apparently it is not possible to define dim labels unless there are # corresponding coords. Special case of '.' entries means "no coord". - if (axes := self._strategy.axes(self)) is not None: - return [f'dim_{i}' if a == '.' else a for i, a in enumerate(axes)] - axes = [] - # Names of axes that have an "axis" attribute serve as dim labels in legacy case - for name, field in self._group.items(): - if (axis := field.attrs.get('axis')) is not None: - axes.append((axis, name)) - if axes: - return [x[1] for x in sorted(axes)] - return None - - @property - def dims(self) -> List[str]: - if (d := self._get_group_dims()) is not None: - return d - # Legacy NXdata defines axes not as group attribute, but attr on dataset. - # This is handled by class Field. - return self._signal.dims - - @property - def unit(self) -> Union[sc.Unit, None]: - return self._signal.unit - - @property - def _signal_name(self) -> str: - return self._strategy.signal(self) - - @property - def _errors_name(self) -> Optional[str]: - return self._strategy.signal_errors(self) - - @property - def _signal(self) -> Union[Field, '_EventField', None]: # noqa: F821 - if self._signal_override is not None: - return self._signal_override - if self._signal_name is not None: - if self._signal_name not in self: - raise NexusStructureError( - f"Signal field '{self._signal_name}' not found in group." - ) - return self[self._signal_name] - return None - - def _get_axes(self): - """Return labels of named axes. Does not include default 'dim_{i}' names.""" - if (axes := self._strategy.axes(self)) is not None: - # Unlike self.dims we *drop* entries that are '.' - return [a for a in axes if a != '.'] - elif (signal := self._signal) is not None: - if (axes := signal.attrs.get('axes')) is not None: - dims = axes.split(':') - # The standard says that the axes should be colon-separated, but some - # files use comma-separated. - if len(dims) == 1 and self._signal.ndim > 1: - dims = tuple(axes.split(',')) - return dims - return [] - - def _guess_dims(self, name: str): - """Guess dims of non-signal dataset based on shape. - - Does not check for potential bin-edge coord. - """ - shape = self._get_child(name).shape - if self.shape == shape: - return self.dims - lut = {} - if self._signal is not None: - for d, s in zip(self.dims, self.shape): - if self.shape.count(s) == 1: - lut[s] = d - try: - dims = [lut[s] for s in shape] - except KeyError: - raise NexusStructureError( - f"Could not determine axis indices for {self.name}/{name}" + if self._axes is not None: + return tuple( + f'dim_{i}' if a == '.' else a for i, a in enumerate(self._axes) ) - return dims - - def _try_guess_dims(self, name): - try: - return self._guess_dims(name) - except NexusStructureError: - return None + if self._signal_axes is not None: + return self._signal_axes + if self._axis_index: + return tuple( + k for k, _ in sorted(self._axis_index.items(), key=lambda item: item[1]) + ) + return None - def _get_field_dims(self, name: str) -> Union[None, List[str]]: + def _init_group_dims( + self, attrs: Dict[str, Any], fallback_dims: Optional[Tuple[str, ...]] = None + ): + group_dims = self._get_group_dims() + + if self._signal is None: + self._valid = False + elif isinstance(self._signal, EventField): + group_dims = self._signal.dims + else: + if group_dims is not None: + shape = self._signal.dataset.shape + # If we have explicit group dims, we can drop trailing 1s. + shape = _squeeze_trailing(group_dims, shape) + self._signal.sizes = dict(zip(group_dims, shape)) + elif isinstance(self._signal, Group): + group_dims = self._signal.dims + elif fallback_dims is not None: + shape = self._signal.dataset.shape + group_dims = [ + fallback_dims[i] if i < len(fallback_dims) else f'dim_{i}' + for i in range(len(shape)) + ] + self._signal.sizes = dict(zip(group_dims, shape)) + + self._group_dims = group_dims + self._named_axes = self._get_named_axes(fallback_dims) + + indices_suffix = '_indices' + indices_attrs = { + key[: -len(indices_suffix)]: attr + for key, attr in attrs.items() + if key.endswith(indices_suffix) + } + + dims = np.array(self._axes) + self._dims_from_indices = { + key: tuple(dims[np.array(indices).flatten()]) + for key, indices in indices_attrs.items() + } + + def _get_dims(self, name, field): # Newly written files should always contain indices attributes, but the # standard recommends that readers should also make "best effort" guess # since legacy files do not set this attribute. - if (indices := self.attrs.get(f'{name}_indices')) is not None: - return list(np.array(self.dims)[np.array(indices).flatten()]) - if (axis := self._get_child(name).attrs.get('axis')) is not None: - return (self._get_group_dims()[axis - 1],) - if name in [self._signal_name, self._errors_name]: - return self._get_group_dims() # if None, field determines dims itself - if name in list(self.attrs.get('auxiliary_signals', [])): - return self._try_guess_dims(name) - if name in self._get_axes(): + if name == self._signal_name: + return self._group_dims + # Latest way of defining dims + if (dims := self._dims_from_indices.get(name)) is not None: + if '.' in dims: + hdf5_dims = self._dims_from_hdf5(field) + return tuple( + dim if dim != '.' else hdf5_dim + for dim, hdf5_dim in zip(dims, hdf5_dims) + ) + return dims + # Older way of defining dims via axis attribute + if (axis := self._axis_index.get(name)) is not None: + return (self._group_dims[axis - 1],) + if name in self._aux_signals: + return _guess_dims( + self._group_dims, self._signal.dataset.shape, field.dataset + ) + if name in self._named_axes: # If there are named axes then items of same name are "dimension # coordinates", i.e., have a dim matching their name. - # However, if the item is not 1-D we need more labels. Try to use labels of - # signal if dimensionality matches. - if self._signal_name in self and self._get_child(name).ndim == len( - self.shape + # However, if the item is not 1-D we need more labels. Try to use labels + # of signal if dimensionality matches. + if isinstance(self._signal, Field) and len(field.dataset.shape) == len( + self._signal.dataset.shape ): - return self[self._signal_name].dims - return [name] - return self._try_guess_dims(name) + return self._group_dims + return (name,) + if self._signal is not None and self._group_dims is not None: + signal_shape = ( + self._signal.dataset.shape + if isinstance(self._signal, Field) + else ( + self._signal.shape if isinstance(self._signal, EventField) else None + ) + ) + return _guess_dims(self._group_dims, signal_shape, field.dataset) + # While not mandated or recommended by the standard, we can try to find HDF5 + # dim labels as a fallback option for defining dimension labels. Ideally we + # would like to do so in NXobject._init_field, but this causes significant + # overhead for small files with many datasets. Defined here, this will only + # take effect for NXdata, NXdetector, NXlog, and NXmonitor. + return self._dims_from_hdf5(field) + + def _dims_from_hdf5(self, field): + hdf5_dims = [dim.label for dim in getattr(field.dataset, 'dims', [])] + if any(dim != '' for dim in hdf5_dims): + while hdf5_dims and hdf5_dims[-1] == '': + hdf5_dims.pop() + return [f'dim_{i}' if dim == '' else dim for i, dim in enumerate(hdf5_dims)] + + @cached_property + def sizes(self) -> Dict[str, int]: + if not self._valid: + return super().sizes + sizes = dict(self._signal.sizes) + for name in self._aux_signals: + sizes.update(self._children[name].sizes) + return sizes + + @property + def unit(self) -> Union[None, sc.Unit]: + return self._signal.unit if self._valid else super().unit - def _bin_edge_dim(self, coord: Field) -> Union[None, str]: - sizes = dict(zip(self.dims, self.shape)) + def _bin_edge_dim(self, coord: Union[Any, Field]) -> Union[None, str]: + if not isinstance(coord, Field): + return None + sizes = self.sizes for dim, size in zip(coord.dims, coord.shape): - if dim in sizes and sizes[dim] + 1 == size: + if (sz := sizes.get(dim)) is not None and sz + 1 == size: return dim return None - def _dim_of_coord(self, name: str, coord: Field) -> Union[None, str]: + def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: + """Same as NXobject.index_child but also handles bin edges.""" + child_sel = to_child_select( + tuple(self.sizes), child.dims, sel, bin_edge_dim=self._bin_edge_dim(child) + ) + return child[child_sel] + + def assemble( + self, dg: sc.DataGroup + ) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: + if not self._valid: + raise NexusStructureError("Could not determine signal field or dimensions.") + dg = dg.copy(deep=False) + aux = {name: dg.pop(name) for name in self._aux_signals} + signal = dg.pop(self._signal_name) + coords = dg + if isinstance(signal, sc.DataGroup): + raise NexusStructureError("Signal is not an array-like.") + da = ( + signal + if isinstance(signal, sc.DataArray) + else sc.DataArray(data=asvariable(signal)) + ) + da = self._add_coords(da, coords) + if aux: + signals = {self._signal_name: da} + signals.update(aux) + if all( + isinstance(v, (sc.Variable, sc.DataArray)) for v in signals.values() + ): + return sc.Dataset(signals) + return sc.DataGroup(signals) + return da + + def _dim_of_coord(self, name: str, coord: sc.Variable) -> Union[None, str]: if len(coord.dims) == 1: return coord.dims[0] if name in coord.dims and name in self.dims: return name return self._bin_edge_dim(coord) - def _should_be_aligned(self, da: sc.DataArray, name: str, coord: Field) -> bool: + def _should_be_aligned( + self, da: sc.DataArray, name: str, coord: sc.Variable + ) -> bool: + if name == 'depends_on': + return True dim_of_coord = self._dim_of_coord(name, coord) if dim_of_coord is None: return True @@ -235,68 +366,293 @@ def _should_be_aligned(self, da: sc.DataArray, name: str, coord: Field) -> bool: return False return True - def _getitem(self, select: ScippIndex) -> sc.DataArray: - from .nexus_classes import NXgeometry - - signal = self._signal - if signal is None: - raise NexusStructureError("No signal field found, cannot load group.") - signal = signal[select] - if self._errors_name is not None: - stddevs = self[self._errors_name][select] - # According to the standard, errors must have the same shape as the data. - # This is not the case in all files we observed, is there any harm in - # attempting a broadcast? - signal.variances = np.broadcast_to( - sc.pow(stddevs, sc.scalar(2)).values, shape=signal.shape - ) + def _add_coords(self, da: sc.DataArray, coords: sc.DataGroup) -> sc.DataArray: + """Add coords to a data array. - da = ( - signal - if isinstance(signal, sc.DataArray) - else sc.DataArray(data=asarray(signal)) + Sets alignment in the same way as slicing scipp.DataArray would. + """ + for name, coord in coords.items(): + if not isinstance(coord, sc.Variable): + da.coords[name] = sc.scalar(coord) + else: + da.coords[name] = coord + # We need the shape *before* slicing to determine dims, so we get the + # field from the group for the conditional. + da.coords.set_aligned( + name, self._should_be_aligned(da, name, self._children[name]) + ) + return da + + +def _squeeze_trailing(dims: Tuple[str, ...], shape: Tuple[int, ...]) -> Tuple[int, ...]: + return shape[: len(dims)] + tuple(size for size in shape[len(dims) :] if size != 1) + + +class NXlog(NXdata): + """ + NXlog, a time-series that can be loaded as a DataArray. + + In some cases the NXlog may contain additional time series, such as a connection + status or alarm. These cannot be handled in a standard way, since the result cannot + be represented as a single DataArray. Furthermore, they prevent positional + time-indexing, since the time coord of each time-series is different. We can + support label-based indexing for this in the future. If additional time-series + are contained within the NXlog then loading will return a DataGroup of the + individual time-series (DataArrays). + """ + + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + children = dict(children) + self._sublogs = [] + self._sublog_children = {} + for name in children: + if name.endswith('_time'): + self._sublogs.append(name[:-5]) + # Extract all fields that belong to sublogs, since they will interfere with the + # setup logic in the base class (NXdata). + for name in self._sublogs: + for k in list(children): + if k.startswith(name): + field = children.pop(k) + self._init_field(field) + field.sizes = { + 'time' if i == 0 else f'dim_{i}': size + for i, size in enumerate(field.dataset.shape) + } + self._sublog_children[k] = field + + super().__init__( + attrs=attrs, + children=children, + fallback_dims=('time',), + fallback_signal_name='value', ) - skip = self._skip - skip += [self._signal_name, self._errors_name] - skip += list(self.attrs.get('auxiliary_signals', [])) - for name in self: - if (errors := self._strategy.coord_errors(self, name)) is not None: - skip += [errors] - for name in self: - if name in skip: - continue - # It is not entirely clear whether skipping NXtransformations is the right - # solution. In principle NXobject will load them via the 'depends_on' - # mechanism, so for valid files this should be sufficient. - allowed = ( - Field, - NXtransformations, - NXcylindrical_geometry, - NXoff_geometry, - NXgeometry, + def read_children(self, sel: ScippIndex) -> sc.DataGroup: + # Sublogs have distinct time axes (with a different length). Must disable + # positional indexing. + if self._sublogs and ('time' in _to_canonical_select(list(self.sizes), sel)): + raise sc.DimensionError( + "Cannot positionally select time since there are multiple " + "time fields. Label-based selection is not supported yet." ) - if not isinstance(self._get_child(name), allowed): - raise NexusStructureError( - "Invalid NXdata: may not contain nested groups" + dg = super().read_children(sel) + for name, field in self._sublog_children.items(): + dg[name] = field[sel] + return dg + + def _time_to_datetime(self, mapping): + if (time := mapping.get('time')) is not None: + if time.dtype != sc.DType.datetime64 and _is_time(time): + mapping['time'] = convert_time_to_datetime64( + time, start=sc.epoch(unit=time.unit) ) - for name, field in self[Field].items(): - if name in skip: - continue - sel = to_child_select( - self.dims, field.dims, select, bin_edge_dim=self._bin_edge_dim(field) - ) - coord: sc.Variable = asarray(self[name][sel]) - if (error_name := self._strategy.coord_errors(self, name)) is not None: - stddevs = asarray(self[error_name][sel]) - coord.variances = sc.pow(stddevs, sc.scalar(2)).values - try: - da.coords[name] = coord - da.coords.set_aligned(name, self._should_be_aligned(da, name, field)) - except sc.DimensionError as e: - raise NexusStructureError( - f"Field in NXdata incompatible with dims or shape of signal: {e}" - ) from e + def _assemble_sublog( + self, dg: sc.DataGroup, name: str, value_name: Optional[str] = None + ) -> sc.DataArray: + value_name = name if value_name is None else f'{name}_{value_name}' + da = sc.DataArray(dg.pop(value_name), coords={'time': dg.pop(f'{name}_time')}) + for k in list(dg): + if k.startswith(name): + da.coords[k[len(name) + 1 :]] = dg.pop(k) + self._time_to_datetime(da.coords) + return da + + def assemble( + self, dg: sc.DataGroup + ) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: + self._time_to_datetime(dg) + dg = sc.DataGroup(dg) + sublogs = sc.DataGroup() + for name in self._sublogs: + # Somewhat arbitrary definition of which fields is the "value" + value_name = 'severity' if name == 'alarm' else None + sublogs[name] = self._assemble_sublog(dg, name, value_name=value_name) + out = super().assemble(dg) + return out if not sublogs else sc.DataGroup(value=out, **sublogs) + + +def _find_embedded_nxevent_data( + children: Dict[str, Union[Field, Group]] +) -> Optional[Group]: + if all(name in children for name in NXevent_data.mandatory_fields): + parent = children['event_index'].parent._group + event_group = Group( + parent, definitions={'NXmonitor': NXevent_data, 'NXdetector': NXevent_data} + ) + for name in list(children): + if name in NXevent_data.handled_fields: + del children[name] + return event_group + + +class EventField: + def __init__(self, event_data: Group, grouping_name: str, grouping: Field) -> None: + """Create a field that represents an event data group. + + Parameters + ---------- + event_data: + The event data group holding the NXevent_data fields. + grouping_name: + The name of the field that contains the grouping information. + grouping: + The field that contains the grouping keys. These are IDs corresponding to + the event_id field of the NXevent_data group, such as the detector_number + field of an NXdetector. + """ + self._event_data = event_data + self._grouping_name = grouping_name + self._grouping = grouping + + @property + def attrs(self) -> Dict[str, Any]: + return self._event_data.attrs + + @property + def sizes(self) -> Dict[str, int]: + return {**self._grouping.sizes, **self._event_data.sizes} + + @property + def dims(self) -> Tuple[str, ...]: + return self._grouping.dims + self._event_data.dims + @property + def shape(self) -> Tuple[int, ...]: + return self._grouping.shape + self._event_data.shape + + def __getitem__(self, sel: ScippIndex) -> sc.DataArray: + event_sel = to_child_select(self.dims, self._event_data.dims, sel) + events = self._event_data[event_sel] + detector_sel = to_child_select(self.dims, self._grouping.dims, sel) + if not isinstance(events, sc.DataArray): + return events + da = _group_events(event_data=events, grouping=self._grouping[detector_sel]) + da.coords[self._grouping_name] = da.coords.pop('event_id') return da + + +class NXdetector(NXdata): + _detector_number_fields = ['detector_number', 'pixel_id', 'spectrum_index'] + + @staticmethod + def _detector_number(children: Iterable[str]) -> Optional[str]: + for name in NXdetector._detector_number_fields: + if name in children: + return name + + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + fallback_dims = None + if (det_num_name := NXdetector._detector_number(children)) is not None: + if (detector_number := children[det_num_name]).dataset.ndim == 1: + fallback_dims = (det_num_name,) + detector_number.sizes = {det_num_name: detector_number.dataset.shape[0]} + + if (event_group := _find_embedded_nxevent_data(children)) is not None: + embedded_events = uuid.uuid4().hex if 'events' in children else 'events' + children[embedded_events] = event_group + else: + embedded_events = None + + def _maybe_event_field(name: str, child: Union[Field, Group]): + if ( + name == embedded_events + or (isinstance(child, Group) and child.nx_class == NXevent_data) + ) and det_num_name is not None: + event_field = EventField( + event_data=child, + grouping_name=det_num_name, + grouping=children.get(det_num_name), + ) + return event_field + return child + + children = { + name: _maybe_event_field(name, child) for name, child in children.items() + } + + super().__init__( + attrs=attrs, + children=children, + fallback_dims=fallback_dims, + fallback_signal_name='data', + ) + + @property + def detector_number(self) -> Optional[str]: + return self._detector_number(self._children) + + +class NXmonitor(NXdata): + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + if (event_group := _find_embedded_nxevent_data(children)) is not None: + signal = uuid.uuid4().hex if 'events' in children else 'events' + children[signal] = event_group + else: + signal = 'data' + super().__init__(attrs=attrs, children=children, fallback_signal_name=signal) + + +def _group_events( + *, event_data: sc.DataArray, grouping: Optional[sc.Variable] = None +) -> sc.DataArray: + if grouping is None: + event_id = 'event_id' + else: + # copy since sc.bin cannot deal with a non-contiguous view + grouping = asvariable(grouping) + event_id = grouping.flatten(to='event_id').copy() + event_data.bins.coords['event_time_zero'] = sc.bins_like( + event_data, fill_value=event_data.coords['event_time_zero'] + ) + # After loading raw NXevent_data it is guaranteed that the event table + # is contiguous and that there is no masking. We can therefore use the + # more efficient approach of binning from scratch instead of erasing the + # 'event_time_zero' binning defined by NXevent_data. + event_data = event_data.bins.constituents['data'].group(event_id) + if grouping is None: + return event_data + return event_data.fold(dim='event_id', sizes=grouping.sizes) + + +def _find_event_entries(dg: sc.DataGroup) -> List[str]: + event_entries = [] + for name, value in dg.items(): + if ( + isinstance(value, sc.DataArray) + and 'event_time_zero' in value.coords + and value.bins is not None + ): + event_entries.append(name) + return event_entries + + +def group_events_by_detector_number( + dg: sc.DataGroup, +) -> Union[sc.DataArray, sc.Dataset]: + dg = dg.copy(deep=False) + grouping_key = None + for key in NXdetector._detector_number_fields: + if (grouping := dg.get(key)) is not None: + grouping_key = key + break + grouping = None if grouping_key is None else asvariable(dg.pop(grouping_key)) + grouped_events = sc.DataGroup() + for event_entry in _find_event_entries(dg): + events = dg.pop(event_entry) + grouped_events[event_entry] = _group_events( + event_data=events, grouping=grouping + ) + if len(grouped_events) == 1: + out = next(iter(grouped_events.values())) + else: + out = sc.Dataset(grouped_events) + out.coords.update(dg) + return out + + +base_definitions_dict['NXdata'] = NXdata +base_definitions_dict['NXlog'] = NXlog +base_definitions_dict['NXdetector'] = NXdetector +base_definitions_dict['NXmonitor'] = NXmonitor diff --git a/src/scippnexus/nxevent_data.py b/src/scippnexus/nxevent_data.py index 931e042b..27e0b224 100644 --- a/src/scippnexus/nxevent_data.py +++ b/src/scippnexus/nxevent_data.py @@ -1,85 +1,135 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import List, Union +from typing import Any, Dict, List, Tuple, Union import numpy as np import scipp as sc from ._common import to_plain_index -from .nxobject import NexusStructureError, NXobject, ScippIndex +from .base import ( + Group, + NexusStructureError, + NXobject, + ScippIndex, + base_definitions_dict, +) +from .field import Field _event_dimension = "event" -_pulse_dimension = "pulse" +_pulse_dimension = "event_time_zero" + + +def _check_for_missing_fields(fields): + for field in NXevent_data.mandatory_fields: + if field not in fields: + raise NexusStructureError( + f"Required field {field} not found in NXevent_data" + ) class NXevent_data(NXobject): + mandatory_fields = ("event_time_zero", "event_index", "event_time_offset") + handled_fields = mandatory_fields + ("event_id",) + + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + super().__init__(attrs=attrs, children=children) + for name, field in children.items(): + if name in ['event_time_zero', 'event_index']: + field.sizes = {_pulse_dimension: field.dataset.shape[0]} + elif name in ['event_time_offset', 'event_id']: + field.sizes = {_event_dimension: field.dataset.shape[0]} + @property - def shape(self) -> List[int]: - return self['event_index'].shape + def shape(self) -> Tuple[int]: + if (event_index := self._children.get('event_index')) is not None: + return event_index.shape + return () @property def dims(self) -> List[str]: - return [_pulse_dimension] + return (_pulse_dimension,)[: len(self.shape)] @property - def unit(self) -> None: - # Binned data, bins do not have a unit - return None + def sizes(self) -> Dict[str, int]: + return dict(zip(self.dims, self.shape)) - def _get_field_dims(self, name: str) -> Union[None, List[str]]: + def field_dims(self, name: str, field: Field) -> Tuple[str, ...]: if name in ['event_time_zero', 'event_index']: - return [_pulse_dimension] + return (_pulse_dimension,) if name in ['event_time_offset', 'event_id']: - return [_event_dimension] + return (_event_dimension,) return None - def _getitem(self, select: ScippIndex) -> sc.DataArray: - self._check_for_missing_fields() + def read_children(self, select: ScippIndex) -> sc.DataGroup: + children = self._children + if not children: # TODO Check that select is trivial? + return sc.DataGroup() + index = to_plain_index([_pulse_dimension], select) + event_time_zero = children['event_time_zero'][index] + last_loaded, event_index = self._get_event_index(children, index) + + num_event = children["event_time_offset"].shape[0] + # Some files contain uint64 "max" indices, which turn into negatives during + # conversion to int64. This is a hack to get around this. + event_index[event_index < 0] = num_event + + if len(event_index) > 0: + event_select = slice( + event_index[0], event_index[-1] if last_loaded else num_event + ) + else: + event_select = slice(0, 0) + + event_time_offset = children['event_time_offset'][event_select] + + event_index = sc.array( + dims=[_pulse_dimension], + values=event_index[:-1] if last_loaded else event_index, + dtype=sc.DType.int64, + unit=None, + ) + + event_index -= event_index.min() + dg = sc.DataGroup( + event_time_zero=event_time_zero, + event_index=event_index, + event_time_offset=event_time_offset, + ) + if (event_id := children.get('event_id')) is not None: + dg['event_id'] = event_id[event_select] + return dg + + def _get_event_index(self, children: sc.DataGroup, index): max_index = self.shape[0] - single = False if index is Ellipsis or index == tuple(): last_loaded = False else: if isinstance(index, int): - single = True start, stop, _ = slice(index, None).indices(max_index) if start == stop: - raise IndexError('Index {start} is out of range') + raise IndexError(f'Index {start} is out of range') index = slice(start, start + 1) start, stop, stride = index.indices(max_index) if stop + stride > max_index: last_loaded = False + elif start == stop: + last_loaded = True else: stop += stride last_loaded = True index = slice(start, stop, stride) - event_index = self['event_index'][index].values - event_time_zero = self['event_time_zero'][index] - - num_event = self["event_time_offset"].shape[0] - # Some files contain uint64 "max" indices, which turn into negatives during - # conversion to int64. This is a hack to get around this. - event_index[event_index < 0] = num_event - - if len(event_index) > 0: - event_select = slice( - event_index[0], event_index[-1] if last_loaded else num_event - ) - else: - event_select = slice(None) - - if (event_id := self.get('event_id')) is not None: - event_id = event_id[event_select] - if event_id.dtype not in [sc.DType.int32, sc.DType.int64]: - raise NexusStructureError( - "NXevent_data contains event_id field with non-integer values" - ) + event_index = children['event_index'][index].values + return last_loaded, event_index - event_time_offset = self['event_time_offset'][event_select] + def assemble(self, children: sc.DataGroup) -> sc.DataArray: + _check_for_missing_fields(children) + event_time_offset = children['event_time_offset'] + event_time_zero = children['event_time_zero'] + event_index = children['event_index'] # Weights are not stored in NeXus, so use 1s weights = sc.ones( @@ -92,47 +142,25 @@ def _getitem(self, select: ScippIndex) -> sc.DataArray: events = sc.DataArray( data=weights, coords={'event_time_offset': event_time_offset} ) - if event_id is not None: + if (event_id := children.get('event_id')) is not None: events.coords['event_id'] = event_id - if not last_loaded: - event_index = np.append(event_index, num_event) - else: - # Not a bin-edge coord, all events in bin are associated with same - # (previous) pulse time value - # Copy to avoid confusing size display in _repr_html_ - event_time_zero = event_time_zero[:-1].copy() - - event_index = sc.array( - dims=[_pulse_dimension], values=event_index, dtype=sc.DType.int64, unit=None - ) - - event_index -= event_index.min() - # There is some variation in the last recorded event_index in files from # different institutions. We try to make sure here that it is what would be the # first index of the next pulse. In other words, ensure that event_index # includes the bin edge for the last pulse. - if single: + if event_time_zero.ndim == 0: begins = event_index[_pulse_dimension, 0] - ends = event_index[_pulse_dimension, 1] - event_time_zero = event_time_zero[_pulse_dimension, 0].copy() else: - begins = event_index[_pulse_dimension, :-1] - ends = event_index[_pulse_dimension, 1:] + begins = event_index try: - binned = sc.bins(data=events, dim=_event_dimension, begin=begins, end=ends) + binned = sc.bins(data=events, dim=_event_dimension, begin=begins) except IndexError as e: - raise NexusStructureError( - f"Invalid index in NXevent_data at {self.name}/event_index:\n{e}." - ) + path = self._children['event_index'].name + raise NexusStructureError(f"Invalid index in NXevent_data at {path}:\n{e}") return sc.DataArray(data=binned, coords={'event_time_zero': event_time_zero}) - def _check_for_missing_fields(self): - for field in ("event_time_zero", "event_index", "event_time_offset"): - if field not in self: - raise NexusStructureError( - f"Required field {field} not found in NXevent_data" - ) + +base_definitions_dict['NXevent_data'] = NXevent_data diff --git a/src/scippnexus/nxoff_geometry.py b/src/scippnexus/nxoff_geometry.py index 0e4d550f..01a3a358 100644 --- a/src/scippnexus/nxoff_geometry.py +++ b/src/scippnexus/nxoff_geometry.py @@ -1,11 +1,12 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Optional, Tuple, Union +from typing import Any, Dict, Optional, Union import scipp as sc -from .nxobject import NexusStructureError, NXobject +from .base import Group, NexusStructureError, NXobject, base_definitions_dict +from .field import Field def off_to_shape( @@ -21,13 +22,21 @@ def off_to_shape( """ # Vertices in winding order. This duplicates vertices if they are part of more than # one faces. - vw = vertices[winding_order.values] + # TODO Should use this: + # vw = vertices[winding_order.values] + # but NumPy is currently much faster. + # See https://github.com/scipp/scipp/issues/3044 + vw = sc.vectors( + dims=vertices.dims, + values=vertices.values[winding_order.values], + unit=vertices.unit, + ) # Same as above, grouped by face. fvw = sc.bins(begin=faces, data=vw, dim=vw.dim) low = fvw.bins.size().min().value high = fvw.bins.size().max().value if low == high: - # Vertices in winding order, groupbed by face. Unlike `fvw` above we now know + # Vertices in winding order, grouped by face. Unlike `fvw` above we now know # that each face has the same number of vertices, so we can fold instead of # using binned data. shapes = vw.fold(dim=vertices.dim, sizes={faces.dim: -1, vertices.dim: low}) @@ -43,8 +52,8 @@ def off_to_shape( "`detector_number` not given but NXoff_geometry " "contains `detector_faces`." ) - shape_index = detector_faces['column', 0].copy() - detid = detector_faces['column', 1].copy() + shape_index = detector_faces['face_index|detector_number', 0].copy() + detid = detector_faces['face_index|detector_number', 1].copy() da = sc.DataArray(shape_index, coords={'detector_number': detid}).group( detector_number.flatten(to='detector_number') ) @@ -55,21 +64,30 @@ def off_to_shape( class NXoff_geometry(NXobject): _dims = { - 'detector_faces': ('face', 'column'), + 'detector_faces': ('face', 'face_index|detector_number'), 'vertices': ('vertex',), 'winding_order': ('winding_order',), 'faces': ('face',), } - def _get_field_dims(self, name: str) -> Union[None, Tuple[str]]: - return self._dims.get(name) - - def _get_field_dtype(self, name: str) -> Union[None, sc.DType]: - if name == 'vertices': - return sc.DType.vector3 - return None + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + super().__init__(attrs=attrs, children=children) + for name, field in children.items(): + if isinstance(field, Field): + field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) + if name == 'vertices': + field.dtype = sc.DType.vector3 def load_as_array( self, detector_number: Optional[sc.Variable] = None ) -> sc.Variable: return off_to_shape(**self[()], detector_number=detector_number) + + @staticmethod + def assemble_as_child( + children: sc.DataGroup, detector_number: Optional[sc.Variable] = None + ) -> sc.Variable: + return off_to_shape(**children, detector_number=detector_number) + + +base_definitions_dict['NXoff_geometry'] = NXoff_geometry diff --git a/src/scippnexus/nxsample.py b/src/scippnexus/nxsample.py index ed732ed1..9efb145f 100644 --- a/src/scippnexus/nxsample.py +++ b/src/scippnexus/nxsample.py @@ -1,27 +1,39 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Dict, Union +from typing import Any, Dict, Union import scipp as sc -from scipp.spatial import linear_transform -from .leaf import Leaf -from .nxobject import ScippIndex +from .base import Group, NXobject, ScippIndex, base_definitions_dict +from .field import Field -_matrix_units = dict(zip(['orientation_matrix', 'ub_matrix'], ['one', '1/Angstrom'])) +_matrix_units = {'orientation_matrix': 'one', 'ub_matrix': '1/Angstrom'} -class NXsample(Leaf): - """Sample information, can be read as a dict.""" +def _fix_unit(name, value): + if (unit := _matrix_units.get(name)) is not None: + value.unit = unit + return value - def _getitem( - self, select: ScippIndex - ) -> Dict[str, Union[sc.Variable, sc.DataArray]]: - content = super()._getitem(select) + +class NXsample(NXobject): + """NXsample""" + + def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): + super().__init__(attrs=attrs, children=children) for key in _matrix_units: - if (item := content.get(key)) is not None: - content[key] = linear_transform( - value=item.values, unit=_matrix_units[key] - ) - return content + if (field := children.get(key)) is not None: + field.sizes = {k: field.sizes[k] for k in field.dims[:-2]} + field.dtype = sc.DType.linear_transform3 + + def read_children(self, sel: ScippIndex) -> sc.DataGroup: + return sc.DataGroup( + { + name: _fix_unit(name, self.index_child(child, sel)) + for name, child in self._children.items() + } + ) + + +base_definitions_dict['NXsample'] = NXsample diff --git a/src/scippnexus/nxtransformations.py b/src/scippnexus/nxtransformations.py index 628c5133..d20fe540 100644 --- a/src/scippnexus/nxtransformations.py +++ b/src/scippnexus/nxtransformations.py @@ -3,13 +3,14 @@ # @author Simon Heybrock from __future__ import annotations -from typing import List, Optional, Union +from typing import List, Optional, Tuple, Union import numpy as np import scipp as sc from scipp.scipy import interpolate -from .nxobject import Field, NexusStructureError, NXobject, ScippIndex +from .base import Group, NexusStructureError, NXobject, ScippIndex +from .field import Field, depends_on_to_relative_path class TransformationError(NexusStructureError): @@ -18,30 +19,32 @@ class TransformationError(NexusStructureError): def make_transformation(obj, /, path) -> Optional[Transformation]: if path.startswith('/'): - return Transformation(obj.file[path]) + return obj.file[path] elif path != '.': - return Transformation(obj.parent[path]) + return obj.parent[path] return None # end of chain class NXtransformations(NXobject): """Group of transformations.""" - def _getitem(self, index: ScippIndex) -> sc.DataGroup: - return sc.DataGroup( - { - name: get_full_transformation_starting_at( - Transformation(child), index=index - ) - for name, child in self.items() - } - ) - class Transformation: def __init__(self, obj: Union[Field, NXobject]): # could be an NXlog self._obj = obj + @property + def sizes(self) -> dict: + return self._obj.sizes + + @property + def dims(self) -> Tuple[str, ...]: + return self._obj.dims + + @property + def shape(self) -> Tuple[int, ...]: + return self._obj.shape + @property def attrs(self): return self._obj.attrs @@ -77,15 +80,20 @@ def __getitem__(self, select: ScippIndex): # shape=[1] for single values. It is unclear how and if this could be # distinguished from a scan of length 1. value = self._obj[select] + return self.make_transformation(value, transformation_type, select) + + def make_transformation( + self, + value: Union[sc.Variable, sc.DataArray], + transformation_type: str, + select: ScippIndex, + ): try: if isinstance(value, sc.DataGroup): - raise TransformationError( - f"Failed to load transformation at {self.name}." - ) + return value t = value * self.vector v = t if isinstance(t, sc.Variable) else t.data if transformation_type == 'translation': - v = v.to(unit='m', copy=False) v = sc.spatial.translations(dims=v.dims, values=v.values, unit=v.unit) elif transformation_type == 'rotation': v = sc.spatial.rotations_from_rotvecs(v) @@ -99,14 +107,24 @@ def __getitem__(self, select: ScippIndex): else: t.data = v if (offset := self.offset) is None: - return t - offset = sc.vector(value=offset.values, unit=offset.unit).to(unit='m') - offset = sc.spatial.translation(value=offset.value, unit=offset.unit) - return t * offset - except (sc.DimensionError, sc.UnitError) as e: - raise NexusStructureError( - f"Invalid transformation in NXtransformations: {e}" - ) from e + transform = t + else: + offset = sc.vector(value=offset.values, unit=offset.unit) + offset = sc.spatial.translation(value=offset.value, unit=offset.unit) + if transformation_type == 'translation': + offset = offset.to(unit=t.unit, copy=False) + transform = t * offset + if (depends_on := self.attrs.get('depends_on')) is not None: + if not isinstance(transform, sc.DataArray): + transform = sc.DataArray(transform) + transform.attrs['depends_on'] = sc.scalar( + depends_on_to_relative_path(depends_on, self._obj.parent.name) + ) + return transform + except (sc.DimensionError, sc.UnitError, TransformationError): + # TODO We should probably try to return some other data structure and + # also insert offset and other attributes. + return value def _interpolate_transform(transform, xnew): @@ -192,3 +210,25 @@ def _get_transformations( # to deal with changing beamline components (e.g. pixel positions) during a # live data stream (see https://github.com/scipp/scippneutron/issues/76). return transformations + + +def maybe_transformation( + obj: Union[Field, Group], + value: Union[sc.Variable, sc.DataArray, sc.DataGroup], + sel: ScippIndex, +) -> Union[sc.Variable, sc.DataArray, sc.DataGroup]: + """ + Return a loaded field, possibly modified if it is a transformation. + + Transformations are usually stored in NXtransformations groups. However, identifying + transformation fields in this way requires inspecting the parent group, which + is cumbersome to implement. Furthermore, according to the NXdetector documentation + transformations are not necessarily placed inside NXtransformations. + Instead we use the presence of the attribute 'transformation_type' to identify + transformation fields. + """ + if (transformation_type := obj.attrs.get('transformation_type')) is not None: + from .nxtransformations import Transformation + + return Transformation(obj).make_transformation(value, transformation_type, sel) + return value diff --git a/src/scippnexus/v2/__init__.py b/src/scippnexus/v1/__init__.py similarity index 60% rename from src/scippnexus/v2/__init__.py rename to src/scippnexus/v1/__init__.py index 7cbb1b82..9b04cff0 100644 --- a/src/scippnexus/v2/__init__.py +++ b/src/scippnexus/v1/__init__.py @@ -11,15 +11,10 @@ __version__ = "0.0.0" from .. import typing -from .base import ( - Group, - NexusStructureError, - NXobject, - base_definitions, - create_class, - create_field, -) -from .field import Attrs, Field +from .definition import ApplicationDefinition, make_definition from .file import File from .nexus_classes import * -from .nxdata import group_events_by_detector_number +from .nxdata import NXdataStrategy +from .nxdetector import NXdetectorStrategy +from .nxlog import NXlogStrategy +from .nxobject import Attrs, Field, NexusStructureError, NXobject, NXobjectStrategy diff --git a/src/scippnexus/definition.py b/src/scippnexus/v1/definition.py similarity index 100% rename from src/scippnexus/definition.py rename to src/scippnexus/v1/definition.py diff --git a/src/scippnexus/definitions/nxcansas.py b/src/scippnexus/v1/definitions/nxcansas.py similarity index 100% rename from src/scippnexus/definitions/nxcansas.py rename to src/scippnexus/v1/definitions/nxcansas.py diff --git a/src/scippnexus/v1/file.py b/src/scippnexus/v1/file.py new file mode 100644 index 00000000..c59b53a4 --- /dev/null +++ b/src/scippnexus/v1/file.py @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +import warnings +from contextlib import AbstractContextManager + +import h5py +from scipp import VisibleDeprecationWarning + +from .nexus_classes import NXroot + + +class File(AbstractContextManager, NXroot): + def __init__(self, *args, definition=None, **kwargs): + warnings.warn( + "This API is deprecated and will be removed and replaced in release 23.06. " + "Switch to 'import scippnexus.v2 as snx' to prepare for this.", + VisibleDeprecationWarning, + ) + self._file = h5py.File(*args, **kwargs) + NXroot.__init__(self, self._file, definition=definition) + + def __enter__(self): + self._file.__enter__() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._file.close() + + def close(self): + self._file.close() diff --git a/src/scippnexus/leaf.py b/src/scippnexus/v1/leaf.py similarity index 95% rename from src/scippnexus/leaf.py rename to src/scippnexus/v1/leaf.py index 2b93943e..bb617cf0 100644 --- a/src/scippnexus/leaf.py +++ b/src/scippnexus/v1/leaf.py @@ -5,7 +5,7 @@ import scipp as sc -from ._common import to_plain_index +from .._common import to_plain_index from .nxobject import NXobject, ScippIndex diff --git a/src/scippnexus/v2/nexus_classes.py b/src/scippnexus/v1/nexus_classes.py similarity index 88% rename from src/scippnexus/v2/nexus_classes.py rename to src/scippnexus/v1/nexus_classes.py index bcf43ab2..7fc5080f 100644 --- a/src/scippnexus/v2/nexus_classes.py +++ b/src/scippnexus/v1/nexus_classes.py @@ -1,12 +1,18 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from .base import NXobject, NXroot # noqa F401 from .nxcylindrical_geometry import NXcylindrical_geometry # noqa F401 -from .nxdata import NXdata, NXdetector, NXlog, NXmonitor # noqa F401 +from .nxdata import NXdata # noqa F401 +from .nxdetector import NXdetector # noqa F401 +from .nxdisk_chopper import NXdisk_chopper # noqa F401 from .nxevent_data import NXevent_data # noqa F401 +from .nxfermi_chopper import NXfermi_chopper # noqa F401 +from .nxlog import NXlog # noqa F401 +from .nxmonitor import NXmonitor # noqa F401 +from .nxobject import NXobject, NXroot # noqa F401 from .nxoff_geometry import NXoff_geometry # noqa F401 from .nxsample import NXsample # noqa F401 +from .nxsource import NXsource # noqa F401 from .nxtransformations import NXtransformations # noqa F401 @@ -66,18 +72,10 @@ class NXdetector_module(NXobject): """NXdetector_module""" -class NXdisk_chopper(NXobject): - """NXdisk_chopper""" - - class NXenvironment(NXobject): """NXenvironment""" -class NXfermi_chopper(NXobject): - """NXfermi_chopper""" - - class NXfilter(NXobject): """NXfilter""" @@ -166,10 +164,6 @@ class NXshape(NXobject): """NXshape""" -class NXsource(NXobject): - """NXsource""" - - class NXslit(NXobject): """NXslit""" diff --git a/src/scippnexus/v2/nxcylindrical_geometry.py b/src/scippnexus/v1/nxcylindrical_geometry.py similarity index 72% rename from src/scippnexus/v2/nxcylindrical_geometry.py rename to src/scippnexus/v1/nxcylindrical_geometry.py index 4e325961..145174c1 100644 --- a/src/scippnexus/v2/nxcylindrical_geometry.py +++ b/src/scippnexus/v1/nxcylindrical_geometry.py @@ -1,12 +1,11 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Any, Dict, Optional, Union +from typing import Optional, Tuple, Union import scipp as sc -from .base import Group, NexusStructureError, NXobject, base_definitions_dict -from .field import Field +from .nxobject import NexusStructureError, NXobject def _parse( @@ -56,24 +55,15 @@ class NXcylindrical_geometry(NXobject): 'cylinders': ('cylinder', 'vertex_index'), } - def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - super().__init__(attrs=attrs, children=children) - for name, field in children.items(): - if isinstance(field, Field): - field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) - if name == 'vertices': - field.dtype = sc.DType.vector3 + def _get_field_dims(self, name: str) -> Union[None, Tuple[str]]: + return self._dims.get(name) + + def _get_field_dtype(self, name: str) -> Union[None, sc.DType]: + if name == 'vertices': + return sc.DType.vector3 + return None def load_as_array( self, detector_number: Optional[sc.Variable] = None ) -> sc.Variable: return _parse(**self[()], parent_detector_number=detector_number) - - @staticmethod - def assemble_as_child( - children: sc.DataGroup, detector_number: Optional[sc.Variable] = None - ) -> sc.Variable: - return _parse(**children, parent_detector_number=detector_number) - - -base_definitions_dict['NXcylindrical_geometry'] = NXcylindrical_geometry diff --git a/src/scippnexus/v1/nxdata.py b/src/scippnexus/v1/nxdata.py new file mode 100644 index 00000000..8e4a71f3 --- /dev/null +++ b/src/scippnexus/v1/nxdata.py @@ -0,0 +1,302 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from __future__ import annotations + +from typing import List, Optional, Union +from warnings import warn + +import numpy as np +import scipp as sc + +from .._common import to_child_select +from ..typing import H5Group +from .nxcylindrical_geometry import NXcylindrical_geometry +from .nxobject import Field, NexusStructureError, NXobject, ScippIndex, asarray +from .nxoff_geometry import NXoff_geometry +from .nxtransformations import NXtransformations + + +class NXdataStrategy: + """ + Strategy used by :py:class:`scippnexus.NXdata`. + + May be subclassed to customize behavior. + """ + + _error_suffixes = ['_errors', '_error'] # _error is the deprecated suffix + + @staticmethod + def axes(group): + """Names of the axes (dimension labels).""" + return group.attrs.get('axes') + + @staticmethod + def signal(group): + """Name of the signal field.""" + if (name := group.attrs.get('signal')) is not None: + if name in group: + return name + # Legacy NXdata defines signal not as group attribute, but attr on dataset + for name in group.keys(): + # What is the meaning of the attribute value? It is undocumented, we simply + # ignore it. + if 'signal' in group._get_child(name).attrs: + return name + return None + + @staticmethod + def signal_errors(group) -> Optional[str]: + """Name of the field to use for standard-deviations of the signal.""" + name = f'{NXdataStrategy.signal(group)}_errors' + if name in group: + return name + # This is a legacy named, deprecated in the NeXus format. + if 'errors' in group: + return 'errors' + + @staticmethod + def coord_errors(group, name): + """Name of the field to use for standard-deviations of a coordinate.""" + errors = [f'{name}{suffix}' for suffix in NXdataStrategy._error_suffixes] + errors = [x for x in errors if x in group] + if len(errors) == 0: + return None + if len(errors) == 2: + warn( + f"Found {name}_errors as well as the deprecated " + f"{name}_error. The latter will be ignored." + ) + return errors[0] + + +class NXdata(NXobject): + def __init__( + self, + group: H5Group, + *, + definition=None, + strategy=None, + signal_override: Union[Field, '_EventField'] = None, # noqa: F821 + skip: List[str] = None, + ): + """ + Parameters + ---------- + signal_override: + Field-like to use instead of trying to read signal from the file. This is + used when there is no signal or to provide a signal computed from + NXevent_data. + skip: + Names of fields to skip when loading coords. + """ + super().__init__(group, definition=definition, strategy=strategy) + self._signal_override = signal_override + self._skip = skip if skip is not None else [] + + def _default_strategy(self): + return NXdataStrategy + + @property + def shape(self) -> List[int]: + return self._signal.shape + + def _get_group_dims(self) -> Union[None, List[str]]: + # Apparently it is not possible to define dim labels unless there are + # corresponding coords. Special case of '.' entries means "no coord". + if (axes := self._strategy.axes(self)) is not None: + return [f'dim_{i}' if a == '.' else a for i, a in enumerate(axes)] + axes = [] + # Names of axes that have an "axis" attribute serve as dim labels in legacy case + for name, field in self._group.items(): + if (axis := field.attrs.get('axis')) is not None: + axes.append((axis, name)) + if axes: + return [x[1] for x in sorted(axes)] + return None + + @property + def dims(self) -> List[str]: + if (d := self._get_group_dims()) is not None: + return d + # Legacy NXdata defines axes not as group attribute, but attr on dataset. + # This is handled by class Field. + return self._signal.dims + + @property + def unit(self) -> Union[sc.Unit, None]: + return self._signal.unit + + @property + def _signal_name(self) -> str: + return self._strategy.signal(self) + + @property + def _errors_name(self) -> Optional[str]: + return self._strategy.signal_errors(self) + + @property + def _signal(self) -> Union[Field, '_EventField', None]: # noqa: F821 + if self._signal_override is not None: + return self._signal_override + if self._signal_name is not None: + if self._signal_name not in self: + raise NexusStructureError( + f"Signal field '{self._signal_name}' not found in group." + ) + return self[self._signal_name] + return None + + def _get_axes(self): + """Return labels of named axes. Does not include default 'dim_{i}' names.""" + if (axes := self._strategy.axes(self)) is not None: + # Unlike self.dims we *drop* entries that are '.' + return [a for a in axes if a != '.'] + elif (signal := self._signal) is not None: + if (axes := signal.attrs.get('axes')) is not None: + dims = axes.split(':') + # The standard says that the axes should be colon-separated, but some + # files use comma-separated. + if len(dims) == 1 and self._signal.ndim > 1: + dims = tuple(axes.split(',')) + return dims + return [] + + def _guess_dims(self, name: str): + """Guess dims of non-signal dataset based on shape. + + Does not check for potential bin-edge coord. + """ + shape = self._get_child(name).shape + if self.shape == shape: + return self.dims + lut = {} + if self._signal is not None: + for d, s in zip(self.dims, self.shape): + if self.shape.count(s) == 1: + lut[s] = d + try: + dims = [lut[s] for s in shape] + except KeyError: + raise NexusStructureError( + f"Could not determine axis indices for {self.name}/{name}" + ) + return dims + + def _try_guess_dims(self, name): + try: + return self._guess_dims(name) + except NexusStructureError: + return None + + def _get_field_dims(self, name: str) -> Union[None, List[str]]: + # Newly written files should always contain indices attributes, but the + # standard recommends that readers should also make "best effort" guess + # since legacy files do not set this attribute. + if (indices := self.attrs.get(f'{name}_indices')) is not None: + return list(np.array(self.dims)[np.array(indices).flatten()]) + if (axis := self._get_child(name).attrs.get('axis')) is not None: + return (self._get_group_dims()[axis - 1],) + if name in [self._signal_name, self._errors_name]: + return self._get_group_dims() # if None, field determines dims itself + if name in list(self.attrs.get('auxiliary_signals', [])): + return self._try_guess_dims(name) + if name in self._get_axes(): + # If there are named axes then items of same name are "dimension + # coordinates", i.e., have a dim matching their name. + # However, if the item is not 1-D we need more labels. Try to use labels of + # signal if dimensionality matches. + if self._signal_name in self and self._get_child(name).ndim == len( + self.shape + ): + return self[self._signal_name].dims + return [name] + return self._try_guess_dims(name) + + def _bin_edge_dim(self, coord: Field) -> Union[None, str]: + sizes = dict(zip(self.dims, self.shape)) + for dim, size in zip(coord.dims, coord.shape): + if dim in sizes and sizes[dim] + 1 == size: + return dim + return None + + def _dim_of_coord(self, name: str, coord: Field) -> Union[None, str]: + if len(coord.dims) == 1: + return coord.dims[0] + if name in coord.dims and name in self.dims: + return name + return self._bin_edge_dim(coord) + + def _should_be_aligned(self, da: sc.DataArray, name: str, coord: Field) -> bool: + dim_of_coord = self._dim_of_coord(name, coord) + if dim_of_coord is None: + return True + if dim_of_coord not in da.dims: + return False + return True + + def _getitem(self, select: ScippIndex) -> sc.DataArray: + from .nexus_classes import NXgeometry + + signal = self._signal + if signal is None: + raise NexusStructureError("No signal field found, cannot load group.") + signal = signal[select] + if self._errors_name is not None: + stddevs = self[self._errors_name][select] + # According to the standard, errors must have the same shape as the data. + # This is not the case in all files we observed, is there any harm in + # attempting a broadcast? + signal.variances = np.broadcast_to( + sc.pow(stddevs, sc.scalar(2)).values, shape=signal.shape + ) + + da = ( + signal + if isinstance(signal, sc.DataArray) + else sc.DataArray(data=asarray(signal)) + ) + + skip = self._skip + skip += [self._signal_name, self._errors_name] + skip += list(self.attrs.get('auxiliary_signals', [])) + for name in self: + if (errors := self._strategy.coord_errors(self, name)) is not None: + skip += [errors] + for name in self: + if name in skip: + continue + # It is not entirely clear whether skipping NXtransformations is the right + # solution. In principle NXobject will load them via the 'depends_on' + # mechanism, so for valid files this should be sufficient. + allowed = ( + Field, + NXtransformations, + NXcylindrical_geometry, + NXoff_geometry, + NXgeometry, + ) + if not isinstance(self._get_child(name), allowed): + raise NexusStructureError( + "Invalid NXdata: may not contain nested groups" + ) + + for name, field in self[Field].items(): + if name in skip: + continue + sel = to_child_select( + self.dims, field.dims, select, bin_edge_dim=self._bin_edge_dim(field) + ) + coord: sc.Variable = asarray(self[name][sel]) + if (error_name := self._strategy.coord_errors(self, name)) is not None: + stddevs = asarray(self[error_name][sel]) + coord.variances = sc.pow(stddevs, sc.scalar(2)).values + try: + da.coords[name] = coord + da.coords.set_aligned(name, self._should_be_aligned(da, name, field)) + except sc.DimensionError as e: + raise NexusStructureError( + f"Field in NXdata incompatible with dims or shape of signal: {e}" + ) from e + + return da diff --git a/src/scippnexus/nxdetector.py b/src/scippnexus/v1/nxdetector.py similarity index 100% rename from src/scippnexus/nxdetector.py rename to src/scippnexus/v1/nxdetector.py diff --git a/src/scippnexus/nxdisk_chopper.py b/src/scippnexus/v1/nxdisk_chopper.py similarity index 100% rename from src/scippnexus/nxdisk_chopper.py rename to src/scippnexus/v1/nxdisk_chopper.py diff --git a/src/scippnexus/v1/nxevent_data.py b/src/scippnexus/v1/nxevent_data.py new file mode 100644 index 00000000..f532f092 --- /dev/null +++ b/src/scippnexus/v1/nxevent_data.py @@ -0,0 +1,138 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from typing import List, Union + +import numpy as np +import scipp as sc + +from .._common import to_plain_index +from .nxobject import NexusStructureError, NXobject, ScippIndex + +_event_dimension = "event" +_pulse_dimension = "pulse" + + +class NXevent_data(NXobject): + @property + def shape(self) -> List[int]: + return self['event_index'].shape + + @property + def dims(self) -> List[str]: + return [_pulse_dimension] + + @property + def unit(self) -> None: + # Binned data, bins do not have a unit + return None + + def _get_field_dims(self, name: str) -> Union[None, List[str]]: + if name in ['event_time_zero', 'event_index']: + return [_pulse_dimension] + if name in ['event_time_offset', 'event_id']: + return [_event_dimension] + return None + + def _getitem(self, select: ScippIndex) -> sc.DataArray: + self._check_for_missing_fields() + index = to_plain_index([_pulse_dimension], select) + + max_index = self.shape[0] + single = False + if index is Ellipsis or index == tuple(): + last_loaded = False + else: + if isinstance(index, int): + single = True + start, stop, _ = slice(index, None).indices(max_index) + if start == stop: + raise IndexError('Index {start} is out of range') + index = slice(start, start + 1) + start, stop, stride = index.indices(max_index) + if stop + stride > max_index: + last_loaded = False + else: + stop += stride + last_loaded = True + index = slice(start, stop, stride) + + event_index = self['event_index'][index].values + event_time_zero = self['event_time_zero'][index] + + num_event = self["event_time_offset"].shape[0] + # Some files contain uint64 "max" indices, which turn into negatives during + # conversion to int64. This is a hack to get around this. + event_index[event_index < 0] = num_event + + if len(event_index) > 0: + event_select = slice( + event_index[0], event_index[-1] if last_loaded else num_event + ) + else: + event_select = slice(None) + + if (event_id := self.get('event_id')) is not None: + event_id = event_id[event_select] + if event_id.dtype not in [sc.DType.int32, sc.DType.int64]: + raise NexusStructureError( + "NXevent_data contains event_id field with non-integer values" + ) + + event_time_offset = self['event_time_offset'][event_select] + + # Weights are not stored in NeXus, so use 1s + weights = sc.ones( + dims=[_event_dimension], + shape=event_time_offset.shape, + unit='counts', + dtype=np.float32, + ) + + events = sc.DataArray( + data=weights, coords={'event_time_offset': event_time_offset} + ) + if event_id is not None: + events.coords['event_id'] = event_id + + if not last_loaded: + event_index = np.append(event_index, num_event) + else: + # Not a bin-edge coord, all events in bin are associated with same + # (previous) pulse time value + # Copy to avoid confusing size display in _repr_html_ + event_time_zero = event_time_zero[:-1].copy() + + event_index = sc.array( + dims=[_pulse_dimension], values=event_index, dtype=sc.DType.int64, unit=None + ) + + event_index -= event_index.min() + + # There is some variation in the last recorded event_index in files from + # different institutions. We try to make sure here that it is what would be the + # first index of the next pulse. In other words, ensure that event_index + # includes the bin edge for the last pulse. + if single: + begins = event_index[_pulse_dimension, 0] + ends = event_index[_pulse_dimension, 1] + event_time_zero = event_time_zero[_pulse_dimension, 0].copy() + else: + begins = event_index[_pulse_dimension, :-1] + ends = event_index[_pulse_dimension, 1:] + + try: + binned = sc.bins(data=events, dim=_event_dimension, begin=begins, end=ends) + except IndexError as e: + raise NexusStructureError( + f"Invalid index in NXevent_data at {self.name}/event_index:\n{e}." + ) + + return sc.DataArray(data=binned, coords={'event_time_zero': event_time_zero}) + + def _check_for_missing_fields(self): + for field in ("event_time_zero", "event_index", "event_time_offset"): + if field not in self: + raise NexusStructureError( + f"Required field {field} not found in NXevent_data" + ) diff --git a/src/scippnexus/nxfermi_chopper.py b/src/scippnexus/v1/nxfermi_chopper.py similarity index 100% rename from src/scippnexus/nxfermi_chopper.py rename to src/scippnexus/v1/nxfermi_chopper.py diff --git a/src/scippnexus/nxlog.py b/src/scippnexus/v1/nxlog.py similarity index 100% rename from src/scippnexus/nxlog.py rename to src/scippnexus/v1/nxlog.py diff --git a/src/scippnexus/nxmonitor.py b/src/scippnexus/v1/nxmonitor.py similarity index 100% rename from src/scippnexus/nxmonitor.py rename to src/scippnexus/v1/nxmonitor.py diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/v1/nxobject.py similarity index 99% rename from src/scippnexus/nxobject.py rename to src/scippnexus/v1/nxobject.py index 263a9153..db7a4883 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/v1/nxobject.py @@ -15,14 +15,14 @@ import numpy as np import scipp as sc -from ._common import convert_time_to_datetime64, to_plain_index -from ._hdf5_nexus import ( +from .._common import convert_time_to_datetime64, to_plain_index +from .._hdf5_nexus import ( _cset_to_encoding, _ensure_str, _ensure_supported_int_type, _warn_latin1_decode, ) -from .typing import H5Dataset, H5Group, ScippIndex +from ..typing import H5Dataset, H5Group, ScippIndex NXobjectIndex = Union[str, ScippIndex] diff --git a/src/scippnexus/v2/nxoff_geometry.py b/src/scippnexus/v1/nxoff_geometry.py similarity index 59% rename from src/scippnexus/v2/nxoff_geometry.py rename to src/scippnexus/v1/nxoff_geometry.py index 01a3a358..0e4d550f 100644 --- a/src/scippnexus/v2/nxoff_geometry.py +++ b/src/scippnexus/v1/nxoff_geometry.py @@ -1,12 +1,11 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock -from typing import Any, Dict, Optional, Union +from typing import Optional, Tuple, Union import scipp as sc -from .base import Group, NexusStructureError, NXobject, base_definitions_dict -from .field import Field +from .nxobject import NexusStructureError, NXobject def off_to_shape( @@ -22,21 +21,13 @@ def off_to_shape( """ # Vertices in winding order. This duplicates vertices if they are part of more than # one faces. - # TODO Should use this: - # vw = vertices[winding_order.values] - # but NumPy is currently much faster. - # See https://github.com/scipp/scipp/issues/3044 - vw = sc.vectors( - dims=vertices.dims, - values=vertices.values[winding_order.values], - unit=vertices.unit, - ) + vw = vertices[winding_order.values] # Same as above, grouped by face. fvw = sc.bins(begin=faces, data=vw, dim=vw.dim) low = fvw.bins.size().min().value high = fvw.bins.size().max().value if low == high: - # Vertices in winding order, grouped by face. Unlike `fvw` above we now know + # Vertices in winding order, groupbed by face. Unlike `fvw` above we now know # that each face has the same number of vertices, so we can fold instead of # using binned data. shapes = vw.fold(dim=vertices.dim, sizes={faces.dim: -1, vertices.dim: low}) @@ -52,8 +43,8 @@ def off_to_shape( "`detector_number` not given but NXoff_geometry " "contains `detector_faces`." ) - shape_index = detector_faces['face_index|detector_number', 0].copy() - detid = detector_faces['face_index|detector_number', 1].copy() + shape_index = detector_faces['column', 0].copy() + detid = detector_faces['column', 1].copy() da = sc.DataArray(shape_index, coords={'detector_number': detid}).group( detector_number.flatten(to='detector_number') ) @@ -64,30 +55,21 @@ def off_to_shape( class NXoff_geometry(NXobject): _dims = { - 'detector_faces': ('face', 'face_index|detector_number'), + 'detector_faces': ('face', 'column'), 'vertices': ('vertex',), 'winding_order': ('winding_order',), 'faces': ('face',), } - def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - super().__init__(attrs=attrs, children=children) - for name, field in children.items(): - if isinstance(field, Field): - field.sizes = dict(zip(self._dims.get(name), field.dataset.shape)) - if name == 'vertices': - field.dtype = sc.DType.vector3 + def _get_field_dims(self, name: str) -> Union[None, Tuple[str]]: + return self._dims.get(name) + + def _get_field_dtype(self, name: str) -> Union[None, sc.DType]: + if name == 'vertices': + return sc.DType.vector3 + return None def load_as_array( self, detector_number: Optional[sc.Variable] = None ) -> sc.Variable: return off_to_shape(**self[()], detector_number=detector_number) - - @staticmethod - def assemble_as_child( - children: sc.DataGroup, detector_number: Optional[sc.Variable] = None - ) -> sc.Variable: - return off_to_shape(**children, detector_number=detector_number) - - -base_definitions_dict['NXoff_geometry'] = NXoff_geometry diff --git a/src/scippnexus/v1/nxsample.py b/src/scippnexus/v1/nxsample.py new file mode 100644 index 00000000..ed732ed1 --- /dev/null +++ b/src/scippnexus/v1/nxsample.py @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +# @author Simon Heybrock +from typing import Dict, Union + +import scipp as sc +from scipp.spatial import linear_transform + +from .leaf import Leaf +from .nxobject import ScippIndex + +_matrix_units = dict(zip(['orientation_matrix', 'ub_matrix'], ['one', '1/Angstrom'])) + + +class NXsample(Leaf): + """Sample information, can be read as a dict.""" + + def _getitem( + self, select: ScippIndex + ) -> Dict[str, Union[sc.Variable, sc.DataArray]]: + content = super()._getitem(select) + for key in _matrix_units: + if (item := content.get(key)) is not None: + content[key] = linear_transform( + value=item.values, unit=_matrix_units[key] + ) + return content diff --git a/src/scippnexus/nxsource.py b/src/scippnexus/v1/nxsource.py similarity index 100% rename from src/scippnexus/nxsource.py rename to src/scippnexus/v1/nxsource.py diff --git a/src/scippnexus/v2/nxtransformations.py b/src/scippnexus/v1/nxtransformations.py similarity index 68% rename from src/scippnexus/v2/nxtransformations.py rename to src/scippnexus/v1/nxtransformations.py index d20fe540..628c5133 100644 --- a/src/scippnexus/v2/nxtransformations.py +++ b/src/scippnexus/v1/nxtransformations.py @@ -3,14 +3,13 @@ # @author Simon Heybrock from __future__ import annotations -from typing import List, Optional, Tuple, Union +from typing import List, Optional, Union import numpy as np import scipp as sc from scipp.scipy import interpolate -from .base import Group, NexusStructureError, NXobject, ScippIndex -from .field import Field, depends_on_to_relative_path +from .nxobject import Field, NexusStructureError, NXobject, ScippIndex class TransformationError(NexusStructureError): @@ -19,32 +18,30 @@ class TransformationError(NexusStructureError): def make_transformation(obj, /, path) -> Optional[Transformation]: if path.startswith('/'): - return obj.file[path] + return Transformation(obj.file[path]) elif path != '.': - return obj.parent[path] + return Transformation(obj.parent[path]) return None # end of chain class NXtransformations(NXobject): """Group of transformations.""" + def _getitem(self, index: ScippIndex) -> sc.DataGroup: + return sc.DataGroup( + { + name: get_full_transformation_starting_at( + Transformation(child), index=index + ) + for name, child in self.items() + } + ) + class Transformation: def __init__(self, obj: Union[Field, NXobject]): # could be an NXlog self._obj = obj - @property - def sizes(self) -> dict: - return self._obj.sizes - - @property - def dims(self) -> Tuple[str, ...]: - return self._obj.dims - - @property - def shape(self) -> Tuple[int, ...]: - return self._obj.shape - @property def attrs(self): return self._obj.attrs @@ -80,20 +77,15 @@ def __getitem__(self, select: ScippIndex): # shape=[1] for single values. It is unclear how and if this could be # distinguished from a scan of length 1. value = self._obj[select] - return self.make_transformation(value, transformation_type, select) - - def make_transformation( - self, - value: Union[sc.Variable, sc.DataArray], - transformation_type: str, - select: ScippIndex, - ): try: if isinstance(value, sc.DataGroup): - return value + raise TransformationError( + f"Failed to load transformation at {self.name}." + ) t = value * self.vector v = t if isinstance(t, sc.Variable) else t.data if transformation_type == 'translation': + v = v.to(unit='m', copy=False) v = sc.spatial.translations(dims=v.dims, values=v.values, unit=v.unit) elif transformation_type == 'rotation': v = sc.spatial.rotations_from_rotvecs(v) @@ -107,24 +99,14 @@ def make_transformation( else: t.data = v if (offset := self.offset) is None: - transform = t - else: - offset = sc.vector(value=offset.values, unit=offset.unit) - offset = sc.spatial.translation(value=offset.value, unit=offset.unit) - if transformation_type == 'translation': - offset = offset.to(unit=t.unit, copy=False) - transform = t * offset - if (depends_on := self.attrs.get('depends_on')) is not None: - if not isinstance(transform, sc.DataArray): - transform = sc.DataArray(transform) - transform.attrs['depends_on'] = sc.scalar( - depends_on_to_relative_path(depends_on, self._obj.parent.name) - ) - return transform - except (sc.DimensionError, sc.UnitError, TransformationError): - # TODO We should probably try to return some other data structure and - # also insert offset and other attributes. - return value + return t + offset = sc.vector(value=offset.values, unit=offset.unit).to(unit='m') + offset = sc.spatial.translation(value=offset.value, unit=offset.unit) + return t * offset + except (sc.DimensionError, sc.UnitError) as e: + raise NexusStructureError( + f"Invalid transformation in NXtransformations: {e}" + ) from e def _interpolate_transform(transform, xnew): @@ -210,25 +192,3 @@ def _get_transformations( # to deal with changing beamline components (e.g. pixel positions) during a # live data stream (see https://github.com/scipp/scippneutron/issues/76). return transformations - - -def maybe_transformation( - obj: Union[Field, Group], - value: Union[sc.Variable, sc.DataArray, sc.DataGroup], - sel: ScippIndex, -) -> Union[sc.Variable, sc.DataArray, sc.DataGroup]: - """ - Return a loaded field, possibly modified if it is a transformation. - - Transformations are usually stored in NXtransformations groups. However, identifying - transformation fields in this way requires inspecting the parent group, which - is cumbersome to implement. Furthermore, according to the NXdetector documentation - transformations are not necessarily placed inside NXtransformations. - Instead we use the presence of the attribute 'transformation_type' to identify - transformation fields. - """ - if (transformation_type := obj.attrs.get('transformation_type')) is not None: - from .nxtransformations import Transformation - - return Transformation(obj).make_transformation(value, transformation_type, sel) - return value diff --git a/src/scippnexus/v2/file.py b/src/scippnexus/v2/file.py deleted file mode 100644 index 015d0635..00000000 --- a/src/scippnexus/v2/file.py +++ /dev/null @@ -1,40 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from contextlib import AbstractContextManager -from typing import Mapping - -import h5py - -from .base import Group, base_definitions - -_default_definitions = object() - - -class File(AbstractContextManager, Group): - def __init__(self, *args, definitions: Mapping = _default_definitions, **kwargs): - """Context manager for NeXus files, similar to h5py.File. - - Arguments other than documented are as in :py:class:`h5py.File`. - - Parameters - ---------- - definitions: - Mapping of NX_class names to application-specific definitions. - The default is to use the base definitions as defined in the - NeXus standard. - """ - if definitions is _default_definitions: - definitions = base_definitions() - self._file = h5py.File(*args, **kwargs) - super().__init__(self._file, definitions=definitions) - - def __enter__(self): - self._file.__enter__() - return self - - def __exit__(self, exc_type, exc_value, traceback): - self._file.close() - - def close(self): - self._file.close() diff --git a/src/scippnexus/v2/nxdata.py b/src/scippnexus/v2/nxdata.py deleted file mode 100644 index 493167a5..00000000 --- a/src/scippnexus/v2/nxdata.py +++ /dev/null @@ -1,658 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from __future__ import annotations - -import uuid -from functools import cached_property -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union - -import numpy as np -import scipp as sc - -from .._common import _to_canonical_select, convert_time_to_datetime64, to_child_select -from ..typing import H5Dataset, ScippIndex -from .base import ( - Group, - NexusStructureError, - NXobject, - asvariable, - base_definitions_dict, -) -from .field import Field, _is_time -from .nxevent_data import NXevent_data - - -def _guess_dims(dims, shape, dataset: H5Dataset): - """Guess dims of non-signal dataset based on shape.""" - if shape is None: - return None - if shape == dataset.shape: - return dims - lut = {} - for d, s in zip(dims, shape): - if shape.count(s) == 1: - lut[s] = d - try: - return [lut[s] for s in dataset.shape] - except KeyError: - try: # Inner dimension may be bin-edges - shape = list(dataset.shape) - shape[-1] -= 1 - return [lut[s] for s in shape] - except KeyError: - pass - return None - - -class NXdata(NXobject): - def __init__( - self, - attrs: Dict[str, Any], - children: Dict[str, Union[Field, Group]], - fallback_dims: Optional[Tuple[str, ...]] = None, - fallback_signal_name: Optional[str] = None, - ): - super().__init__(attrs=attrs, children=children) - self._valid = True # True if the children can be assembled - self._signal_name = None - self._signal = None - self._aux_signals = attrs.get('auxiliary_signals', []) - - self._init_signal( - name=attrs.get('signal', fallback_signal_name), children=children - ) - if (errors := children.get('errors')) is not None: - if ( - isinstance(self._signal, Field) - and isinstance(errors, Field) - and self._signal.errors is None - and (errors.unit is None or self._signal.unit == errors.unit) - and self._signal.dataset.shape == errors.dataset.shape - ): - self._signal.errors = errors.dataset - del children['errors'] - self._init_axes(attrs=attrs, children=children) - self._init_group_dims(attrs=attrs, fallback_dims=fallback_dims) - - for name, field in children.items(): - self._init_field_dims(name, field) - - def _init_field_dims(self, name: str, field: Union[Field, Group]) -> None: - if not isinstance(field, Field): - # If the NXdata contains subgroups we can generally not define valid - # sizes... except for some non-signal "special fields" that return - # a DataGroup that will be wrapped in a scalar Variable. - if name == self._signal_name or name in self._aux_signals: - return - if field.attrs.get('NX_class') not in [ - 'NXoff_geometry', - 'NXcylindrical_geometry', - 'NXgeometry', - 'NXtransformations', - ]: - self._valid = False - elif (dims := self._get_dims(name, field)) is not None: - # The convention here is that the given dimensions apply to the shapes - # starting from the left. So we only squeeze dimensions that are after - # len(dims). - shape = _squeeze_trailing(dims, field.dataset.shape) - field.sizes = dict(zip(dims, shape)) - elif self._valid: - s1 = self._signal.sizes - s2 = field.sizes - if not set(s2.keys()).issubset(set(s1.keys())): - self._valid = False - elif any(s1[k] != s2[k] for k in s1.keys() & s2.keys()): - self._valid = False - - def _init_signal(self, name: Optional[str], children): - # There are multiple ways NeXus can define the "signal" dataset. The latest - # version uses `signal` attribute on the group (passed as `name`). However, - # we must give precedence to the `signal` attribute on the dataset, since - # older files may use that (and the `signal` group attribute is unrelated). - # Finally, NXlog and NXevent_data can take the role of the signal. In practice - # those may not be indicate by a `signal` attribute, but we support that - # anyway since otherwise we would not be able to find NXevent_data signals - # in many common files. - if name is not None and name in children: - self._signal_name = name - self._signal = children[name] - # Legacy NXdata defines signal not as group attribute, but attr on dataset - for name, field in children.items(): - # We ignore the signal value. Usually it is 1, but apparently one could - # multiple signals. We do not support this, since it is legacy anyway. - if 'signal' in field.attrs: - self._signal_name = name - self._signal = children[name] - break - # NXlog or NXevent_data can take the role of the signal. - for name, field in children.items(): - if name == self._signal_name: - # Avoid duplicate handling - continue - if isinstance(field, EventField) or ( - isinstance(field, Group) and field.nx_class in [NXlog, NXevent_data] - ): - if self._signal is None: - self._signal_name = name - self._signal = field - else: - self._aux_signals.append(name) - - def _init_axes( - self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]] - ): - # Latest way of defining axes - self._axes = attrs.get('axes') - # Older way of defining axes - self._signal_axes = ( - None if self._signal is None else self._signal.attrs.get('axes') - ) - if self._signal_axes is not None: - self._signal_axes = tuple(self._signal_axes.split(':')) - # The standard says that the axes should be colon-separated, but some - # files use comma-separated. - if len(self._signal_axes) == 1 and self._signal.dataset.ndim > 1: - self._signal_axes = tuple(self._signal_axes[0].split(',')) - # Another old way of defining axes. Apparently there are two different ways in - # which this is used: A value of '1' indicates "this is an axis". As this would - # not allow for determining an order, we have to assume that the signal field - # has an "axes" attribute that defines the order. We can then ignore the "axis" - # attributes, since they hold no further information. If there is not "axes" - # attribute on the signal field then we have to assume that "axis" gives the - # 1-based index of the axis. - self._axis_index = {} - if self._signal_axes is None: - for name, field in children.items(): - if (axis := field.attrs.get('axis')) is not None: - self._axis_index[name] = axis - - def _get_named_axes(self, fallback_dims) -> Tuple[str, ...]: - if self._axes is not None: - # Unlike self.dims we *drop* entries that are '.' - return tuple(a for a in self._axes if a != '.') - elif self._signal_axes is not None: - return self._signal_axes - elif fallback_dims is not None: - return fallback_dims - else: - return () - - def _get_group_dims(self) -> Optional[Tuple[str, ...]]: - """Try three ways of defining group dimensions.""" - # Apparently it is not possible to define dim labels unless there are - # corresponding coords. Special case of '.' entries means "no coord". - if self._axes is not None: - return tuple( - f'dim_{i}' if a == '.' else a for i, a in enumerate(self._axes) - ) - if self._signal_axes is not None: - return self._signal_axes - if self._axis_index: - return tuple( - k for k, _ in sorted(self._axis_index.items(), key=lambda item: item[1]) - ) - return None - - def _init_group_dims( - self, attrs: Dict[str, Any], fallback_dims: Optional[Tuple[str, ...]] = None - ): - group_dims = self._get_group_dims() - - if self._signal is None: - self._valid = False - elif isinstance(self._signal, EventField): - group_dims = self._signal.dims - else: - if group_dims is not None: - shape = self._signal.dataset.shape - # If we have explicit group dims, we can drop trailing 1s. - shape = _squeeze_trailing(group_dims, shape) - self._signal.sizes = dict(zip(group_dims, shape)) - elif isinstance(self._signal, Group): - group_dims = self._signal.dims - elif fallback_dims is not None: - shape = self._signal.dataset.shape - group_dims = [ - fallback_dims[i] if i < len(fallback_dims) else f'dim_{i}' - for i in range(len(shape)) - ] - self._signal.sizes = dict(zip(group_dims, shape)) - - self._group_dims = group_dims - self._named_axes = self._get_named_axes(fallback_dims) - - indices_suffix = '_indices' - indices_attrs = { - key[: -len(indices_suffix)]: attr - for key, attr in attrs.items() - if key.endswith(indices_suffix) - } - - dims = np.array(self._axes) - self._dims_from_indices = { - key: tuple(dims[np.array(indices).flatten()]) - for key, indices in indices_attrs.items() - } - - def _get_dims(self, name, field): - # Newly written files should always contain indices attributes, but the - # standard recommends that readers should also make "best effort" guess - # since legacy files do not set this attribute. - if name == self._signal_name: - return self._group_dims - # Latest way of defining dims - if (dims := self._dims_from_indices.get(name)) is not None: - if '.' in dims: - hdf5_dims = self._dims_from_hdf5(field) - return tuple( - dim if dim != '.' else hdf5_dim - for dim, hdf5_dim in zip(dims, hdf5_dims) - ) - return dims - # Older way of defining dims via axis attribute - if (axis := self._axis_index.get(name)) is not None: - return (self._group_dims[axis - 1],) - if name in self._aux_signals: - return _guess_dims( - self._group_dims, self._signal.dataset.shape, field.dataset - ) - if name in self._named_axes: - # If there are named axes then items of same name are "dimension - # coordinates", i.e., have a dim matching their name. - # However, if the item is not 1-D we need more labels. Try to use labels - # of signal if dimensionality matches. - if isinstance(self._signal, Field) and len(field.dataset.shape) == len( - self._signal.dataset.shape - ): - return self._group_dims - return (name,) - if self._signal is not None and self._group_dims is not None: - signal_shape = ( - self._signal.dataset.shape - if isinstance(self._signal, Field) - else ( - self._signal.shape if isinstance(self._signal, EventField) else None - ) - ) - return _guess_dims(self._group_dims, signal_shape, field.dataset) - # While not mandated or recommended by the standard, we can try to find HDF5 - # dim labels as a fallback option for defining dimension labels. Ideally we - # would like to do so in NXobject._init_field, but this causes significant - # overhead for small files with many datasets. Defined here, this will only - # take effect for NXdata, NXdetector, NXlog, and NXmonitor. - return self._dims_from_hdf5(field) - - def _dims_from_hdf5(self, field): - hdf5_dims = [dim.label for dim in getattr(field.dataset, 'dims', [])] - if any(dim != '' for dim in hdf5_dims): - while hdf5_dims and hdf5_dims[-1] == '': - hdf5_dims.pop() - return [f'dim_{i}' if dim == '' else dim for i, dim in enumerate(hdf5_dims)] - - @cached_property - def sizes(self) -> Dict[str, int]: - if not self._valid: - return super().sizes - sizes = dict(self._signal.sizes) - for name in self._aux_signals: - sizes.update(self._children[name].sizes) - return sizes - - @property - def unit(self) -> Union[None, sc.Unit]: - return self._signal.unit if self._valid else super().unit - - def _bin_edge_dim(self, coord: Union[Any, Field]) -> Union[None, str]: - if not isinstance(coord, Field): - return None - sizes = self.sizes - for dim, size in zip(coord.dims, coord.shape): - if (sz := sizes.get(dim)) is not None and sz + 1 == size: - return dim - return None - - def index_child(self, child: Union[Field, Group], sel: ScippIndex) -> ScippIndex: - """Same as NXobject.index_child but also handles bin edges.""" - child_sel = to_child_select( - tuple(self.sizes), child.dims, sel, bin_edge_dim=self._bin_edge_dim(child) - ) - return child[child_sel] - - def assemble( - self, dg: sc.DataGroup - ) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: - if not self._valid: - raise NexusStructureError("Could not determine signal field or dimensions.") - dg = dg.copy(deep=False) - aux = {name: dg.pop(name) for name in self._aux_signals} - signal = dg.pop(self._signal_name) - coords = dg - if isinstance(signal, sc.DataGroup): - raise NexusStructureError("Signal is not an array-like.") - da = ( - signal - if isinstance(signal, sc.DataArray) - else sc.DataArray(data=asvariable(signal)) - ) - da = self._add_coords(da, coords) - if aux: - signals = {self._signal_name: da} - signals.update(aux) - if all( - isinstance(v, (sc.Variable, sc.DataArray)) for v in signals.values() - ): - return sc.Dataset(signals) - return sc.DataGroup(signals) - return da - - def _dim_of_coord(self, name: str, coord: sc.Variable) -> Union[None, str]: - if len(coord.dims) == 1: - return coord.dims[0] - if name in coord.dims and name in self.dims: - return name - return self._bin_edge_dim(coord) - - def _should_be_aligned( - self, da: sc.DataArray, name: str, coord: sc.Variable - ) -> bool: - if name == 'depends_on': - return True - dim_of_coord = self._dim_of_coord(name, coord) - if dim_of_coord is None: - return True - if dim_of_coord not in da.dims: - return False - return True - - def _add_coords(self, da: sc.DataArray, coords: sc.DataGroup) -> sc.DataArray: - """Add coords to a data array. - - Sets alignment in the same way as slicing scipp.DataArray would. - """ - for name, coord in coords.items(): - if not isinstance(coord, sc.Variable): - da.coords[name] = sc.scalar(coord) - else: - da.coords[name] = coord - # We need the shape *before* slicing to determine dims, so we get the - # field from the group for the conditional. - da.coords.set_aligned( - name, self._should_be_aligned(da, name, self._children[name]) - ) - return da - - -def _squeeze_trailing(dims: Tuple[str, ...], shape: Tuple[int, ...]) -> Tuple[int, ...]: - return shape[: len(dims)] + tuple(size for size in shape[len(dims) :] if size != 1) - - -class NXlog(NXdata): - """ - NXlog, a time-series that can be loaded as a DataArray. - - In some cases the NXlog may contain additional time series, such as a connection - status or alarm. These cannot be handled in a standard way, since the result cannot - be represented as a single DataArray. Furthermore, they prevent positional - time-indexing, since the time coord of each time-series is different. We can - support label-based indexing for this in the future. If additional time-series - are contained within the NXlog then loading will return a DataGroup of the - individual time-series (DataArrays). - """ - - def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - children = dict(children) - self._sublogs = [] - self._sublog_children = {} - for name in children: - if name.endswith('_time'): - self._sublogs.append(name[:-5]) - # Extract all fields that belong to sublogs, since they will interfere with the - # setup logic in the base class (NXdata). - for name in self._sublogs: - for k in list(children): - if k.startswith(name): - field = children.pop(k) - self._init_field(field) - field.sizes = { - 'time' if i == 0 else f'dim_{i}': size - for i, size in enumerate(field.dataset.shape) - } - self._sublog_children[k] = field - - super().__init__( - attrs=attrs, - children=children, - fallback_dims=('time',), - fallback_signal_name='value', - ) - - def read_children(self, sel: ScippIndex) -> sc.DataGroup: - # Sublogs have distinct time axes (with a different length). Must disable - # positional indexing. - if self._sublogs and ('time' in _to_canonical_select(list(self.sizes), sel)): - raise sc.DimensionError( - "Cannot positionally select time since there are multiple " - "time fields. Label-based selection is not supported yet." - ) - dg = super().read_children(sel) - for name, field in self._sublog_children.items(): - dg[name] = field[sel] - return dg - - def _time_to_datetime(self, mapping): - if (time := mapping.get('time')) is not None: - if time.dtype != sc.DType.datetime64 and _is_time(time): - mapping['time'] = convert_time_to_datetime64( - time, start=sc.epoch(unit=time.unit) - ) - - def _assemble_sublog( - self, dg: sc.DataGroup, name: str, value_name: Optional[str] = None - ) -> sc.DataArray: - value_name = name if value_name is None else f'{name}_{value_name}' - da = sc.DataArray(dg.pop(value_name), coords={'time': dg.pop(f'{name}_time')}) - for k in list(dg): - if k.startswith(name): - da.coords[k[len(name) + 1 :]] = dg.pop(k) - self._time_to_datetime(da.coords) - return da - - def assemble( - self, dg: sc.DataGroup - ) -> Union[sc.DataGroup, sc.DataArray, sc.Dataset]: - self._time_to_datetime(dg) - dg = sc.DataGroup(dg) - sublogs = sc.DataGroup() - for name in self._sublogs: - # Somewhat arbitrary definition of which fields is the "value" - value_name = 'severity' if name == 'alarm' else None - sublogs[name] = self._assemble_sublog(dg, name, value_name=value_name) - out = super().assemble(dg) - return out if not sublogs else sc.DataGroup(value=out, **sublogs) - - -def _find_embedded_nxevent_data( - children: Dict[str, Union[Field, Group]] -) -> Optional[Group]: - if all(name in children for name in NXevent_data.mandatory_fields): - parent = children['event_index'].parent._group - event_group = Group( - parent, definitions={'NXmonitor': NXevent_data, 'NXdetector': NXevent_data} - ) - for name in list(children): - if name in NXevent_data.handled_fields: - del children[name] - return event_group - - -class EventField: - def __init__(self, event_data: Group, grouping_name: str, grouping: Field) -> None: - """Create a field that represents an event data group. - - Parameters - ---------- - event_data: - The event data group holding the NXevent_data fields. - grouping_name: - The name of the field that contains the grouping information. - grouping: - The field that contains the grouping keys. These are IDs corresponding to - the event_id field of the NXevent_data group, such as the detector_number - field of an NXdetector. - """ - self._event_data = event_data - self._grouping_name = grouping_name - self._grouping = grouping - - @property - def attrs(self) -> Dict[str, Any]: - return self._event_data.attrs - - @property - def sizes(self) -> Dict[str, int]: - return {**self._grouping.sizes, **self._event_data.sizes} - - @property - def dims(self) -> Tuple[str, ...]: - return self._grouping.dims + self._event_data.dims - - @property - def shape(self) -> Tuple[int, ...]: - return self._grouping.shape + self._event_data.shape - - def __getitem__(self, sel: ScippIndex) -> sc.DataArray: - event_sel = to_child_select(self.dims, self._event_data.dims, sel) - events = self._event_data[event_sel] - detector_sel = to_child_select(self.dims, self._grouping.dims, sel) - if not isinstance(events, sc.DataArray): - return events - da = _group_events(event_data=events, grouping=self._grouping[detector_sel]) - da.coords[self._grouping_name] = da.coords.pop('event_id') - return da - - -class NXdetector(NXdata): - _detector_number_fields = ['detector_number', 'pixel_id', 'spectrum_index'] - - @staticmethod - def _detector_number(children: Iterable[str]) -> Optional[str]: - for name in NXdetector._detector_number_fields: - if name in children: - return name - - def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - fallback_dims = None - if (det_num_name := NXdetector._detector_number(children)) is not None: - if (detector_number := children[det_num_name]).dataset.ndim == 1: - fallback_dims = (det_num_name,) - detector_number.sizes = {det_num_name: detector_number.dataset.shape[0]} - - if (event_group := _find_embedded_nxevent_data(children)) is not None: - embedded_events = uuid.uuid4().hex if 'events' in children else 'events' - children[embedded_events] = event_group - else: - embedded_events = None - - def _maybe_event_field(name: str, child: Union[Field, Group]): - if ( - name == embedded_events - or (isinstance(child, Group) and child.nx_class == NXevent_data) - ) and det_num_name is not None: - event_field = EventField( - event_data=child, - grouping_name=det_num_name, - grouping=children.get(det_num_name), - ) - return event_field - return child - - children = { - name: _maybe_event_field(name, child) for name, child in children.items() - } - - super().__init__( - attrs=attrs, - children=children, - fallback_dims=fallback_dims, - fallback_signal_name='data', - ) - - @property - def detector_number(self) -> Optional[str]: - return self._detector_number(self._children) - - -class NXmonitor(NXdata): - def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - if (event_group := _find_embedded_nxevent_data(children)) is not None: - signal = uuid.uuid4().hex if 'events' in children else 'events' - children[signal] = event_group - else: - signal = 'data' - super().__init__(attrs=attrs, children=children, fallback_signal_name=signal) - - -def _group_events( - *, event_data: sc.DataArray, grouping: Optional[sc.Variable] = None -) -> sc.DataArray: - if grouping is None: - event_id = 'event_id' - else: - # copy since sc.bin cannot deal with a non-contiguous view - grouping = asvariable(grouping) - event_id = grouping.flatten(to='event_id').copy() - event_data.bins.coords['event_time_zero'] = sc.bins_like( - event_data, fill_value=event_data.coords['event_time_zero'] - ) - # After loading raw NXevent_data it is guaranteed that the event table - # is contiguous and that there is no masking. We can therefore use the - # more efficient approach of binning from scratch instead of erasing the - # 'event_time_zero' binning defined by NXevent_data. - event_data = event_data.bins.constituents['data'].group(event_id) - if grouping is None: - return event_data - return event_data.fold(dim='event_id', sizes=grouping.sizes) - - -def _find_event_entries(dg: sc.DataGroup) -> List[str]: - event_entries = [] - for name, value in dg.items(): - if ( - isinstance(value, sc.DataArray) - and 'event_time_zero' in value.coords - and value.bins is not None - ): - event_entries.append(name) - return event_entries - - -def group_events_by_detector_number( - dg: sc.DataGroup, -) -> Union[sc.DataArray, sc.Dataset]: - dg = dg.copy(deep=False) - grouping_key = None - for key in NXdetector._detector_number_fields: - if (grouping := dg.get(key)) is not None: - grouping_key = key - break - grouping = None if grouping_key is None else asvariable(dg.pop(grouping_key)) - grouped_events = sc.DataGroup() - for event_entry in _find_event_entries(dg): - events = dg.pop(event_entry) - grouped_events[event_entry] = _group_events( - event_data=events, grouping=grouping - ) - if len(grouped_events) == 1: - out = next(iter(grouped_events.values())) - else: - out = sc.Dataset(grouped_events) - out.coords.update(dg) - return out - - -base_definitions_dict['NXdata'] = NXdata -base_definitions_dict['NXlog'] = NXlog -base_definitions_dict['NXdetector'] = NXdetector -base_definitions_dict['NXmonitor'] = NXmonitor diff --git a/src/scippnexus/v2/nxevent_data.py b/src/scippnexus/v2/nxevent_data.py deleted file mode 100644 index 0f2845ea..00000000 --- a/src/scippnexus/v2/nxevent_data.py +++ /dev/null @@ -1,166 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from typing import Any, Dict, List, Tuple, Union - -import numpy as np -import scipp as sc - -from .._common import to_plain_index -from .base import ( - Group, - NexusStructureError, - NXobject, - ScippIndex, - base_definitions_dict, -) -from .field import Field - -_event_dimension = "event" -_pulse_dimension = "event_time_zero" - - -def _check_for_missing_fields(fields): - for field in NXevent_data.mandatory_fields: - if field not in fields: - raise NexusStructureError( - f"Required field {field} not found in NXevent_data" - ) - - -class NXevent_data(NXobject): - mandatory_fields = ("event_time_zero", "event_index", "event_time_offset") - handled_fields = mandatory_fields + ("event_id",) - - def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - super().__init__(attrs=attrs, children=children) - for name, field in children.items(): - if name in ['event_time_zero', 'event_index']: - field.sizes = {_pulse_dimension: field.dataset.shape[0]} - elif name in ['event_time_offset', 'event_id']: - field.sizes = {_event_dimension: field.dataset.shape[0]} - - @property - def shape(self) -> Tuple[int]: - if (event_index := self._children.get('event_index')) is not None: - return event_index.shape - return () - - @property - def dims(self) -> List[str]: - return (_pulse_dimension,)[: len(self.shape)] - - @property - def sizes(self) -> Dict[str, int]: - return dict(zip(self.dims, self.shape)) - - def field_dims(self, name: str, field: Field) -> Tuple[str, ...]: - if name in ['event_time_zero', 'event_index']: - return (_pulse_dimension,) - if name in ['event_time_offset', 'event_id']: - return (_event_dimension,) - return None - - def read_children(self, select: ScippIndex) -> sc.DataGroup: - children = self._children - if not children: # TODO Check that select is trivial? - return sc.DataGroup() - - index = to_plain_index([_pulse_dimension], select) - event_time_zero = children['event_time_zero'][index] - last_loaded, event_index = self._get_event_index(children, index) - - num_event = children["event_time_offset"].shape[0] - # Some files contain uint64 "max" indices, which turn into negatives during - # conversion to int64. This is a hack to get around this. - event_index[event_index < 0] = num_event - - if len(event_index) > 0: - event_select = slice( - event_index[0], event_index[-1] if last_loaded else num_event - ) - else: - event_select = slice(0, 0) - - event_time_offset = children['event_time_offset'][event_select] - - event_index = sc.array( - dims=[_pulse_dimension], - values=event_index[:-1] if last_loaded else event_index, - dtype=sc.DType.int64, - unit=None, - ) - - event_index -= event_index.min() - - dg = sc.DataGroup( - event_time_zero=event_time_zero, - event_index=event_index, - event_time_offset=event_time_offset, - ) - if (event_id := children.get('event_id')) is not None: - dg['event_id'] = event_id[event_select] - return dg - - def _get_event_index(self, children: sc.DataGroup, index): - max_index = self.shape[0] - if index is Ellipsis or index == tuple(): - last_loaded = False - else: - if isinstance(index, int): - start, stop, _ = slice(index, None).indices(max_index) - if start == stop: - raise IndexError(f'Index {start} is out of range') - index = slice(start, start + 1) - start, stop, stride = index.indices(max_index) - if stop + stride > max_index: - last_loaded = False - elif start == stop: - last_loaded = True - else: - stop += stride - last_loaded = True - index = slice(start, stop, stride) - - event_index = children['event_index'][index].values - return last_loaded, event_index - - def assemble(self, children: sc.DataGroup) -> sc.DataArray: - _check_for_missing_fields(children) - event_time_offset = children['event_time_offset'] - event_time_zero = children['event_time_zero'] - event_index = children['event_index'] - - # Weights are not stored in NeXus, so use 1s - weights = sc.ones( - dims=[_event_dimension], - shape=event_time_offset.shape, - unit='counts', - dtype=np.float32, - ) - - events = sc.DataArray( - data=weights, coords={'event_time_offset': event_time_offset} - ) - if (event_id := children.get('event_id')) is not None: - events.coords['event_id'] = event_id - - # There is some variation in the last recorded event_index in files from - # different institutions. We try to make sure here that it is what would be the - # first index of the next pulse. In other words, ensure that event_index - # includes the bin edge for the last pulse. - if event_time_zero.ndim == 0: - begins = event_index[_pulse_dimension, 0] - else: - begins = event_index - - try: - binned = sc.bins(data=events, dim=_event_dimension, begin=begins) - except IndexError as e: - path = self._children['event_index'].name - raise NexusStructureError(f"Invalid index in NXevent_data at {path}:\n{e}") - - return sc.DataArray(data=binned, coords={'event_time_zero': event_time_zero}) - - -base_definitions_dict['NXevent_data'] = NXevent_data diff --git a/src/scippnexus/v2/nxsample.py b/src/scippnexus/v2/nxsample.py deleted file mode 100644 index 9efb145f..00000000 --- a/src/scippnexus/v2/nxsample.py +++ /dev/null @@ -1,39 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from typing import Any, Dict, Union - -import scipp as sc - -from .base import Group, NXobject, ScippIndex, base_definitions_dict -from .field import Field - -_matrix_units = {'orientation_matrix': 'one', 'ub_matrix': '1/Angstrom'} - - -def _fix_unit(name, value): - if (unit := _matrix_units.get(name)) is not None: - value.unit = unit - return value - - -class NXsample(NXobject): - """NXsample""" - - def __init__(self, attrs: Dict[str, Any], children: Dict[str, Union[Field, Group]]): - super().__init__(attrs=attrs, children=children) - for key in _matrix_units: - if (field := children.get(key)) is not None: - field.sizes = {k: field.sizes[k] for k in field.dims[:-2]} - field.dtype = sc.DType.linear_transform3 - - def read_children(self, sel: ScippIndex) -> sc.DataGroup: - return sc.DataGroup( - { - name: _fix_unit(name, self.index_child(child, sel)) - for name, child in self._children.items() - } - ) - - -base_definitions_dict['NXsample'] = NXsample diff --git a/tests/nxoff_geometry_test.py b/tests/nxoff_geometry_test.py index dc682210..f15a4ba2 100644 --- a/tests/nxoff_geometry_test.py +++ b/tests/nxoff_geometry_test.py @@ -4,7 +4,7 @@ import scipp as sc import scippnexus.v2 as snx -from scippnexus.v2.nxoff_geometry import NXoff_geometry, off_to_shape +from scippnexus.nxoff_geometry import NXoff_geometry, off_to_shape @pytest.fixture() diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index be41c8bf..71813a8c 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -5,7 +5,7 @@ from scipp.testing import assert_identical import scippnexus.v2 as snx -from scippnexus.v2.nxtransformations import NXtransformations +from scippnexus.nxtransformations import NXtransformations def make_group(group: h5py.Group) -> snx.Group: From 263e4123f78648c3796a4a853ee6e7d218cc1b54 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 14 Aug 2023 06:45:52 +0200 Subject: [PATCH 2/7] Add v2 forwarders --- src/scippnexus/v2/__init__.py | 2 ++ src/scippnexus/v2/application_definitions/nxcansas/__init__.py | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 src/scippnexus/v2/__init__.py create mode 100644 src/scippnexus/v2/application_definitions/nxcansas/__init__.py diff --git a/src/scippnexus/v2/__init__.py b/src/scippnexus/v2/__init__.py new file mode 100644 index 00000000..a4c1bae3 --- /dev/null +++ b/src/scippnexus/v2/__init__.py @@ -0,0 +1,2 @@ +# flake8: noqa +from scippnexus import * diff --git a/src/scippnexus/v2/application_definitions/nxcansas/__init__.py b/src/scippnexus/v2/application_definitions/nxcansas/__init__.py new file mode 100644 index 00000000..84072bf2 --- /dev/null +++ b/src/scippnexus/v2/application_definitions/nxcansas/__init__.py @@ -0,0 +1,2 @@ +# flake8: noqa +from scippnexus.application_definitions.nxcansas import * From 3e63e7edf858d7be7cdf9be45e42d272601b1eba Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 14 Aug 2023 06:48:02 +0200 Subject: [PATCH 3/7] Update docs --- docs/getting-started/quick-start-guide.ipynb | 7 ++++--- docs/user-guide/nexus-classes.ipynb | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/getting-started/quick-start-guide.ipynb b/docs/getting-started/quick-start-guide.ipynb index da7e6f96..3951b141 100644 --- a/docs/getting-started/quick-start-guide.ipynb +++ b/docs/getting-started/quick-start-guide.ipynb @@ -65,8 +65,8 @@ "outputs": [], "source": [ "# To use the legacy interface, use:\n", - "# import scippnexus as snx\n", - "import scippnexus.v2 as snx\n", + "# import scippnexus.v1 as snx\n", + "import scippnexus as snx\n", "\n", "with snx.File(filename) as f:\n", " print(list(f.keys()))" @@ -354,7 +354,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.8.16" } }, "nbformat": 4, diff --git a/docs/user-guide/nexus-classes.ipynb b/docs/user-guide/nexus-classes.ipynb index 74fb33e9..044339df 100644 --- a/docs/user-guide/nexus-classes.ipynb +++ b/docs/user-guide/nexus-classes.ipynb @@ -62,7 +62,7 @@ "source": [ "from scippnexus import data\n", "filename = data.get_path('PG3_4844_event.nxs')\n", - "import scippnexus.v2 as snx\n", + "import scippnexus as snx\n", "f = snx.File(filename)" ] }, @@ -330,7 +330,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.8.16" } }, "nbformat": 4, From 475902a8c9d3e1f146b476ba23c5b5a16f89d96f Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 14 Aug 2023 06:51:12 +0200 Subject: [PATCH 4/7] Release notes --- docs/about/release-notes.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/about/release-notes.rst b/docs/about/release-notes.rst index 8b331fea..54a348fd 100644 --- a/docs/about/release-notes.rst +++ b/docs/about/release-notes.rst @@ -21,6 +21,15 @@ Release Notes Deprecations ~~~~~~~~~~~~ +v23.08.0 +-------- + +Breaking changes +~~~~~~~~~~~~~~~~ + +* ``scippnexus.v2`` is now the default when running ``import scippnexus``. + Legacy interface has moved to ``scippnexus.v1``, but this will be removed in a future release `#158 `_. + v23.07.0 -------- From 292b310f12711e22970ba16c677d0ee8fff14527 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 14 Aug 2023 06:52:20 +0200 Subject: [PATCH 5/7] Update front page --- docs/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/index.rst b/docs/index.rst index e028dd08..62e22bb3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -27,6 +27,8 @@ This is especially powerful since a number of concepts of Scipp map well to conc News ---- +- [August 2023] scippnexus-23.08.0 has been released. + The "v2" API is now the default. - [April 2023] scippnexus-23.04.0 has been released. This adds ``scippnexus.v2``, which provides the future API of ``scippnexus``. The new API avoids performance bottlenecks when working with small files that contain many groups and datasets. From 5c4fe9bdb812c678bdbafdc5d48d8d64c11784ed Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 14 Aug 2023 07:00:59 +0200 Subject: [PATCH 6/7] Update other imports --- docs/user-guide/application-definitions.ipynb | 4 ++-- src/scippnexus/v1/file.py | 4 ++-- tests/application_definition_test.py | 4 ++-- tests/load_files_test.py | 2 +- tests/nexus_test.py | 4 ++-- tests/nxcylindrical_geometry_test.py | 2 +- tests/nxdata_test.py | 4 ++-- tests/nxdetector_test.py | 4 ++-- tests/nxevent_data_test.py | 2 +- tests/nxlog_test.py | 4 ++-- tests/nxmonitor_test.py | 2 +- tests/nxoff_geometry_test.py | 2 +- tests/nxsample_test.py | 2 +- tests/nxtransformations_test.py | 2 +- 14 files changed, 21 insertions(+), 21 deletions(-) diff --git a/docs/user-guide/application-definitions.ipynb b/docs/user-guide/application-definitions.ipynb index 06623d53..f5b84d50 100644 --- a/docs/user-guide/application-definitions.ipynb +++ b/docs/user-guide/application-definitions.ipynb @@ -78,7 +78,7 @@ "source": [ "import h5py\n", "import scipp as sc\n", - "import scippnexus.v2 as snx\n", + "import scippnexus as snx\n", "\n", "\n", "class MyData:\n", @@ -200,7 +200,7 @@ "metadata": {}, "outputs": [], "source": [ - "import scippnexus.v2 as snx\n", + "import scippnexus as snx\n", "\n", "\n", "def skip(name, obj):\n", diff --git a/src/scippnexus/v1/file.py b/src/scippnexus/v1/file.py index c59b53a4..88308fb0 100644 --- a/src/scippnexus/v1/file.py +++ b/src/scippnexus/v1/file.py @@ -13,8 +13,8 @@ class File(AbstractContextManager, NXroot): def __init__(self, *args, definition=None, **kwargs): warnings.warn( - "This API is deprecated and will be removed and replaced in release 23.06. " - "Switch to 'import scippnexus.v2 as snx' to prepare for this.", + "The scippnexus.v1 API is deprecated and will be removed in release 24.01. " + "Switch to 'import scippnexus as snx' to use API version 2.", VisibleDeprecationWarning, ) self._file = h5py.File(*args, **kwargs) diff --git a/tests/application_definition_test.py b/tests/application_definition_test.py index cdb09fd5..6b09d992 100644 --- a/tests/application_definition_test.py +++ b/tests/application_definition_test.py @@ -3,8 +3,8 @@ import scipp as sc from scipp.testing import assert_identical -import scippnexus.v2 as snx -from scippnexus.v2.application_definitions import nxcansas +import scippnexus as snx +from scippnexus.application_definitions import nxcansas @pytest.fixture() diff --git a/tests/load_files_test.py b/tests/load_files_test.py index ecd4bbc1..e2fc9bd5 100644 --- a/tests/load_files_test.py +++ b/tests/load_files_test.py @@ -5,7 +5,7 @@ import pytest import scipp as sc -import scippnexus.v2 as snx +import scippnexus as snx externalfile = pytest.importorskip('externalfile') diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 9ed49e1f..7e21c37d 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -7,8 +7,8 @@ import scipp as sc from scipp.testing import assert_identical -import scippnexus.v2 as snx -from scippnexus.v2 import ( +import scippnexus as snx +from scippnexus import ( NexusStructureError, NXdetector, NXentry, diff --git a/tests/nxcylindrical_geometry_test.py b/tests/nxcylindrical_geometry_test.py index 63a47a1e..155b16ca 100644 --- a/tests/nxcylindrical_geometry_test.py +++ b/tests/nxcylindrical_geometry_test.py @@ -2,7 +2,7 @@ import pytest import scipp as sc -import scippnexus.v2 as snx +import scippnexus as snx @pytest.fixture() diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 9dca5353..61c8d90c 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -4,8 +4,8 @@ import scipp as sc from scipp.testing import assert_identical -import scippnexus.v2 as snx -from scippnexus.v2 import NXdata, NXlog +import scippnexus as snx +from scippnexus import NXdata, NXlog @pytest.fixture() diff --git a/tests/nxdetector_test.py b/tests/nxdetector_test.py index 969f7214..c0a1b904 100644 --- a/tests/nxdetector_test.py +++ b/tests/nxdetector_test.py @@ -4,8 +4,8 @@ import scipp as sc from scipp.testing import assert_identical -import scippnexus.v2 as snx -from scippnexus.v2 import NXdetector, NXentry, NXoff_geometry +import scippnexus as snx +from scippnexus import NXdetector, NXentry, NXoff_geometry def make_group(group: h5py.Group) -> snx.Group: diff --git a/tests/nxevent_data_test.py b/tests/nxevent_data_test.py index f27bcd46..e1f18558 100644 --- a/tests/nxevent_data_test.py +++ b/tests/nxevent_data_test.py @@ -6,7 +6,7 @@ import pytest import scipp as sc -import scippnexus.v2 as snx +import scippnexus as snx @pytest.fixture() diff --git a/tests/nxlog_test.py b/tests/nxlog_test.py index ed52a18e..112a11ec 100644 --- a/tests/nxlog_test.py +++ b/tests/nxlog_test.py @@ -7,8 +7,8 @@ import scipp as sc from scipp.testing import assert_identical -import scippnexus.v2 as snx -from scippnexus.v2 import NXentry, NXlog +import scippnexus as snx +from scippnexus import NXentry, NXlog # representative sample of UTF-8 test strings from # https://www.w3.org/2001/06/utf-8-test/UTF-8-demo.html diff --git a/tests/nxmonitor_test.py b/tests/nxmonitor_test.py index ccefa204..1dce7f31 100644 --- a/tests/nxmonitor_test.py +++ b/tests/nxmonitor_test.py @@ -3,7 +3,7 @@ import scipp as sc from scipp.testing import assert_identical -import scippnexus.v2 as snx +import scippnexus as snx @pytest.fixture() diff --git a/tests/nxoff_geometry_test.py b/tests/nxoff_geometry_test.py index f15a4ba2..2c4db44b 100644 --- a/tests/nxoff_geometry_test.py +++ b/tests/nxoff_geometry_test.py @@ -3,7 +3,7 @@ import pytest import scipp as sc -import scippnexus.v2 as snx +import scippnexus as snx from scippnexus.nxoff_geometry import NXoff_geometry, off_to_shape diff --git a/tests/nxsample_test.py b/tests/nxsample_test.py index 12d64c88..a3911e74 100644 --- a/tests/nxsample_test.py +++ b/tests/nxsample_test.py @@ -5,7 +5,7 @@ from scipp import spatial from scipp.testing import assert_identical -import scippnexus.v2 as snx +import scippnexus as snx @pytest.fixture() diff --git a/tests/nxtransformations_test.py b/tests/nxtransformations_test.py index 71813a8c..a341d90d 100644 --- a/tests/nxtransformations_test.py +++ b/tests/nxtransformations_test.py @@ -4,7 +4,7 @@ import scipp as sc from scipp.testing import assert_identical -import scippnexus.v2 as snx +import scippnexus as snx from scippnexus.nxtransformations import NXtransformations From a3a19b6ae777792285e1cdfb0c8c3452ed45126b Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 14 Aug 2023 07:02:23 +0200 Subject: [PATCH 7/7] Update API docs module --- docs/user-guide/classes.rst | 2 +- docs/user-guide/functions.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/classes.rst b/docs/user-guide/classes.rst index 68573bf5..bef3683a 100644 --- a/docs/user-guide/classes.rst +++ b/docs/user-guide/classes.rst @@ -1,4 +1,4 @@ -.. currentmodule:: scippnexus.v2 +.. currentmodule:: scippnexus Classes ======= diff --git a/docs/user-guide/functions.rst b/docs/user-guide/functions.rst index 4c28c939..7856f76e 100644 --- a/docs/user-guide/functions.rst +++ b/docs/user-guide/functions.rst @@ -1,4 +1,4 @@ -.. currentmodule:: scippnexus.v2 +.. currentmodule:: scippnexus Functions =========