diff --git a/docs/user-guide/quick-start-guide.ipynb b/docs/user-guide/quick-start-guide.ipynb index 763608a1..d164ccf3 100644 --- a/docs/user-guide/quick-start-guide.ipynb +++ b/docs/user-guide/quick-start-guide.ipynb @@ -117,8 +117,6 @@ "metadata": {}, "outputs": [], "source": [ - "# To use the legacy interface, use:\n", - "# import scippnexus.v1 as snx\n", "import scippnexus as snx\n", "\n", "with snx.File(filename) as f:\n", diff --git a/src/scippnexus/v1/__init__.py b/src/scippnexus/v1/__init__.py deleted file mode 100644 index 9b04cff0..00000000 --- a/src/scippnexus/v1/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock - -# flake8: noqa -import importlib.metadata - -try: - __version__ = importlib.metadata.version(__package__ or __name__) -except importlib.metadata.PackageNotFoundError: - __version__ = "0.0.0" - -from .. import typing -from .definition import ApplicationDefinition, make_definition -from .file import File -from .nexus_classes import * -from .nxdata import NXdataStrategy -from .nxdetector import NXdetectorStrategy -from .nxlog import NXlogStrategy -from .nxobject import Attrs, Field, NexusStructureError, NXobject, NXobjectStrategy diff --git a/src/scippnexus/v1/definition.py b/src/scippnexus/v1/definition.py deleted file mode 100644 index 2d32cc40..00000000 --- a/src/scippnexus/v1/definition.py +++ /dev/null @@ -1,28 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from abc import ABC, abstractmethod -from typing import Dict - -from .nxobject import NXobject - - -class ApplicationDefinition(ABC): - """Abstract base class that can be subclassed for custom application definitions.""" - - @abstractmethod - def make_strategy(self, group: NXobject) -> type: - """Return a strategy that ScippNexus should use to load the given group.""" - ... - - -def make_definition(mapping: Dict[NXobject, type]) -> ApplicationDefinition: - """ - Create an application definition from a mapping of NeXus classes to strategies. - """ - - class Definition(ApplicationDefinition): - def make_strategy(self, group: NXobject) -> type: - return mapping.get(group.nx_class) - - return Definition() diff --git a/src/scippnexus/v1/definitions/nxcansas.py b/src/scippnexus/v1/definitions/nxcansas.py deleted file mode 100644 index 95a35950..00000000 --- a/src/scippnexus/v1/definitions/nxcansas.py +++ /dev/null @@ -1,146 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from typing import Literal, Optional, Tuple, Union - -import scipp as sc - -from ..definition import ApplicationDefinition as BaseDef -from ..nxobject import NXobject - - -class ApplicationDefinition(BaseDef): - def __init__(self, class_attribute: str, default: str = None): - self._default_class = default - self._class_attribute = class_attribute - self._strategies = {} - - def make_strategy(self, group: NXobject): - # This approach will likely need to be generalized as many application - # definitions to not define a "class attribute" in the style of canSAS_class, - # but seem to rely on basic structure and the NX_class attribute. - if ( - definition_class := group.attrs.get( - self._class_attribute, self._default_class - ) - ) is not None: - return self._strategies.get(definition_class) - - def register(self, sas_class): - def decorator(strategy): - self._strategies[sas_class] = strategy - return strategy - - return decorator - - -NXcanSAS = ApplicationDefinition("canSAS_class", "SASroot") - - -class SASdata: - nx_class = "NXdata" - - def __init__( - self, - data: sc.DataArray, - Q_variances: Optional[Literal["uncertainties", "resolutions"]] = None, - ): - self.data = data - valid = ("uncertainties", "resolutions") - if Q_variances not in (None,) + valid: - raise ValueError(f"Q_variances must be in {valid}") - self._variances = Q_variances - - def __write_to_nexus_group__(self, group: NXobject): - da = self.data - group.attrs["canSAS_class"] = "SASdata" - group.attrs["signal"] = "I" - group.attrs["axes"] = da.dims # for NeXus compliance, same as I_axes - group.attrs["I_axes"] = da.dims - group.attrs["Q_indices"] = tuple(da.dims.index(d) for d in da.coords["Q"].dims) - signal = group.create_field("I", sc.values(da.data)) - # We use the _errors suffix for NeXus compliance, unlike the examples given in - # NXcanSAS. - if da.variances is not None: - signal.attrs["uncertainties"] = "I_errors" - group.create_field("I_errors", sc.stddevs(da.data)) - if da.coords.is_edges("Q"): - raise ValueError( - "Q is given as bin-edges, but NXcanSAS requires Q points (such as " - "bin centers)." - ) - coord = group.create_field("Q", da.coords["Q"]) - if da.coords["Q"].variances is not None: - if self._variances is None: - raise ValueError( - "Q has variances, must specify whether these represent " - "'uncertainties' or 'resolutions' using the 'Q_variances' option'" - ) - - coord.attrs[self._variances] = "Q_errors" - group.create_field("Q_errors", sc.stddevs(da.coords["Q"])) - - -@NXcanSAS.register("SASdata") -class SASdataStrategy: - @staticmethod - def axes(group: NXobject) -> Tuple[str]: - return group.attrs.get("I_axes") - - @staticmethod - def signal(group: NXobject) -> str: - return group.attrs.get("signal", "I") - - @staticmethod - def signal_errors(group: NXobject) -> Optional[str]: - signal_name = group.attrs.get("signal", "I") - signal = group._group[signal_name] - return signal.attrs.get("uncertainties") - - def coord_errors(group: NXobject, name: str) -> Optional[str]: - if name != "Q": - return None - # TODO This naively stores this as Scipp errors, which are just Gaussian. - # This is probably not correct in all cases. - uncertainties = group[name].attrs.get("uncertainties") - resolutions = group[name].attrs.get("resolutions") - if uncertainties is None: - return resolutions - elif resolutions is None: - return uncertainties - raise RuntimeError("Cannot handle both uncertainties and resolutions for Q") - - -@NXcanSAS.register("SAStransmission_spectrum") -class SAStransmission_spectrumStrategy: - @staticmethod - def dims(group: NXobject) -> Tuple[str]: - # TODO A valid file should have T_axes, do we need to fallback? - if (axes := group.attrs.get("T_axes")) is not None: - return (axes,) - return ("lambda",) - - -class SASentry: - nx_class = "NXentry" - - def __init__(self, *, title: str, run: Union[str, int]): - self.title = title - self.run = run - - def __write_to_nexus_group__(self, group: NXobject): - group.attrs["canSAS_class"] = "SASentry" - group.attrs["version"] = "1.0" - group.attrs["definition"] = "NXcanSAS" - group.create_field("title", self.title) - group.create_field("run", self.run) - - -@NXcanSAS.register("SASentry") -class SASentryStrategy: - pass - - -@NXcanSAS.register("SASroot") -class SASrootStrategy: - pass diff --git a/src/scippnexus/v1/file.py b/src/scippnexus/v1/file.py deleted file mode 100644 index c797f104..00000000 --- a/src/scippnexus/v1/file.py +++ /dev/null @@ -1,32 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -import warnings -from contextlib import AbstractContextManager - -import h5py -from scipp import VisibleDeprecationWarning - -from .nexus_classes import NXroot - - -class File(AbstractContextManager, NXroot): - def __init__(self, *args, definition=None, **kwargs): - warnings.warn( - "The scippnexus.v1 API is deprecated and will be removed in release 24.01. " - "Switch to 'import scippnexus as snx' to use API version 2.", - VisibleDeprecationWarning, - stacklevel=2, - ) - self._file = h5py.File(*args, **kwargs) - NXroot.__init__(self, self._file, definition=definition) - - def __enter__(self): - self._file.__enter__() - return self - - def __exit__(self, exc_type, exc_value, traceback): - self._file.close() - - def close(self): - self._file.close() diff --git a/src/scippnexus/v1/leaf.py b/src/scippnexus/v1/leaf.py deleted file mode 100644 index bb617cf0..00000000 --- a/src/scippnexus/v1/leaf.py +++ /dev/null @@ -1,28 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from typing import Dict, Union - -import scipp as sc - -from .._common import to_plain_index -from .nxobject import NXobject, ScippIndex - - -class Leaf(NXobject): - """Base class for "leaf" groups than can be loaded as a dict.""" - - def _getitem( - self, select: ScippIndex - ) -> Dict[str, Union[sc.Variable, sc.DataArray]]: - from .nexus_classes import NXtransformations - - index = to_plain_index([], select) - if index != tuple(): - raise ValueError(f"Cannot select slice when loading {type(self).__name__}") - content = sc.DataGroup() - for key, obj in self.items(): - if key == 'depends_on' or isinstance(obj, NXtransformations): - continue - content[key] = obj[()] - return content diff --git a/src/scippnexus/v1/nexus_classes.py b/src/scippnexus/v1/nexus_classes.py deleted file mode 100644 index 7fc5080f..00000000 --- a/src/scippnexus/v1/nexus_classes.py +++ /dev/null @@ -1,192 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from .nxcylindrical_geometry import NXcylindrical_geometry # noqa F401 -from .nxdata import NXdata # noqa F401 -from .nxdetector import NXdetector # noqa F401 -from .nxdisk_chopper import NXdisk_chopper # noqa F401 -from .nxevent_data import NXevent_data # noqa F401 -from .nxfermi_chopper import NXfermi_chopper # noqa F401 -from .nxlog import NXlog # noqa F401 -from .nxmonitor import NXmonitor # noqa F401 -from .nxobject import NXobject, NXroot # noqa F401 -from .nxoff_geometry import NXoff_geometry # noqa F401 -from .nxsample import NXsample # noqa F401 -from .nxsource import NXsource # noqa F401 -from .nxtransformations import NXtransformations # noqa F401 - - -class NXentry(NXobject): - """Entry in a NeXus file.""" - - -class NXinstrument(NXobject): - """Group of instrument-related information.""" - - -class NXaperture(NXobject): - """NXaperture""" - - -class NXattenuator(NXobject): - """NXattenuator""" - - -class NXbeam(NXobject): - """NXbeam""" - - -class NXbeam_stop(NXobject): - """NXbeam_stop""" - - -class NXbending_magnet(NXobject): - """NXbending_magnet""" - - -class NXcapillary(NXobject): - """NXcapillary""" - - -class NXcite(NXobject): - """NXcite""" - - -class NXcollection(NXobject): - """NXcollection""" - - -class NXcollimator(NXobject): - """NXcollimator""" - - -class NXcrystal(NXobject): - """NXcrystal""" - - -class NXdetector_group(NXobject): - """NXdetector_group""" - - -class NXdetector_module(NXobject): - """NXdetector_module""" - - -class NXenvironment(NXobject): - """NXenvironment""" - - -class NXfilter(NXobject): - """NXfilter""" - - -class NXflipper(NXobject): - """NXflipper""" - - -class NXfresnel_zone_plate(NXobject): - """NXfresnel_zone_plate""" - - -class NXgeometry(NXobject): - """NXgeometry""" - - -class NXgrating(NXobject): - """NXgrating""" - - -class NXguide(NXobject): - """NXguide""" - - -class NXinsertion_device(NXobject): - """NXinsertion_device""" - - -class NXmirror(NXobject): - """NXmirror""" - - -class NXmoderator(NXobject): - """NXmoderator""" - - -class NXmonochromator(NXobject): - """NXmonochromator""" - - -class NXnote(NXobject): - """NXnote""" - - -class NXorientation(NXobject): - """NXorientation""" - - -class NXparameters(NXobject): - """NXparameters""" - - -class NXpdb(NXobject): - """NXpdb""" - - -class NXpinhole(NXobject): - """NXpinhole""" - - -class NXpolarizer(NXobject): - """NXpolarizer""" - - -class NXpositioner(NXobject): - """NXpositioner""" - - -class NXprocess(NXobject): - """NXprocess""" - - -class NXreflections(NXobject): - """NXreflections""" - - -class NXsample_component(NXobject): - """NXsample_component""" - - -class NXsensor(NXobject): - """NXsensor""" - - -class NXshape(NXobject): - """NXshape""" - - -class NXslit(NXobject): - """NXslit""" - - -class NXsubentry(NXobject): - """NXsubentry""" - - -class NXtranslation(NXobject): - """NXtranslation""" - - -class NXuser(NXobject): - """NXuser""" - - -class NXvelocity_selector(NXobject): - """NXvelocity_selector""" - - -class NXxraylens(NXobject): - """NXxraylens""" - - -# Not included in list of NeXus classes since this is the "base" of all others -del NXobject diff --git a/src/scippnexus/v1/nxcylindrical_geometry.py b/src/scippnexus/v1/nxcylindrical_geometry.py deleted file mode 100644 index 6fb7905a..00000000 --- a/src/scippnexus/v1/nxcylindrical_geometry.py +++ /dev/null @@ -1,72 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from typing import Optional, Tuple, Union - -import scipp as sc - -from .nxobject import NexusStructureError, NXobject - - -def _parse( - *, - vertices: sc.Variable, - cylinders: sc.Variable, - detector_number: Optional[sc.Variable] = None, - parent_detector_number: Optional[sc.Variable] = None, -) -> sc.Variable: - face1_center = cylinders['vertex_index', 0] - face1_edge = cylinders['vertex_index', 1] - face2_center = cylinders['vertex_index', 2] - ds = sc.Dataset( - { - 'face1_center': vertices[face1_center.values], - 'face1_edge': vertices[face1_edge.values], - 'face2_center': vertices[face2_center.values], - } - ) - ds = ds.rename(**{vertices.dim: 'cylinder'}) - if detector_number is None: - # All cylinders belong to the same shape - return sc.bins(begin=sc.index(0), dim='cylinder', data=ds) - if parent_detector_number is None: - raise NexusStructureError( - "`detector_number` not given, but " - "NXcylindrical_geometry contains mapping to `detector_number`." - ) - # detector_number gives indices into cylinders, the naming in the NeXus - # standard appears to be misleading - if parent_detector_number.values.size != detector_number.values.size: - raise NexusStructureError( - "Number of detector numbers in NXcylindrical_geometry " - "does not match the one given by the parent." - ) - detecting_cylinders = ds['cylinder', detector_number.values] - # One cylinder per detector - begin = sc.arange( - 'dummy', parent_detector_number.values.size, unit=None, dtype='int64' - ) - end = begin + sc.index(1) - shape = sc.bins(begin=begin, end=end, dim='cylinder', data=detecting_cylinders) - return shape.fold(dim='dummy', sizes=parent_detector_number.sizes) - - -class NXcylindrical_geometry(NXobject): - _dims = { - 'vertices': ('vertex',), - 'detector_number': ('detector_number',), - 'cylinders': ('cylinder', 'vertex_index'), - } - - def _get_field_dims(self, name: str) -> Union[None, Tuple[str]]: - return self._dims.get(name) - - def _get_field_dtype(self, name: str) -> Union[None, sc.DType]: - if name == 'vertices': - return sc.DType.vector3 - return None - - def load_as_array( - self, detector_number: Optional[sc.Variable] = None - ) -> sc.Variable: - return _parse(**self[()], parent_detector_number=detector_number) diff --git a/src/scippnexus/v1/nxdata.py b/src/scippnexus/v1/nxdata.py deleted file mode 100644 index aae98ea0..00000000 --- a/src/scippnexus/v1/nxdata.py +++ /dev/null @@ -1,302 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from __future__ import annotations - -from typing import List, Optional, Union -from warnings import warn - -import numpy as np -import scipp as sc - -from .._common import to_child_select -from ..typing import H5Group -from .nxcylindrical_geometry import NXcylindrical_geometry -from .nxobject import Field, NexusStructureError, NXobject, ScippIndex, asarray -from .nxoff_geometry import NXoff_geometry -from .nxtransformations import NXtransformations - - -class NXdataStrategy: - """ - Strategy used by :py:class:`scippnexus.NXdata`. - - May be subclassed to customize behavior. - """ - - _error_suffixes = ["_errors", "_error"] # _error is the deprecated suffix - - @staticmethod - def axes(group): - """Names of the axes (dimension labels).""" - return group.attrs.get("axes") - - @staticmethod - def signal(group): - """Name of the signal field.""" - if (name := group.attrs.get("signal")) is not None: - if name in group: - return name - # Legacy NXdata defines signal not as group attribute, but attr on dataset - for name in group.keys(): - # What is the meaning of the attribute value? It is undocumented, we simply - # ignore it. - if "signal" in group._get_child(name).attrs: - return name - return None - - @staticmethod - def signal_errors(group) -> str | None: - """Name of the field to use for standard-deviations of the signal.""" - name = f"{NXdataStrategy.signal(group)}_errors" - if name in group: - return name - # This is a legacy named, deprecated in the NeXus format. - if "errors" in group: - return "errors" - - @staticmethod - def coord_errors(group, name): - """Name of the field to use for standard-deviations of a coordinate.""" - errors = [f"{name}{suffix}" for suffix in NXdataStrategy._error_suffixes] - errors = [x for x in errors if x in group] - if len(errors) == 0: - return None - if len(errors) == 2: - warn( - f"Found {name}_errors as well as the deprecated " - f"{name}_error. The latter will be ignored." - ) - return errors[0] - - -class NXdata(NXobject): - def __init__( - self, - group: H5Group, - *, - definition=None, - strategy=None, - signal_override: Field | _EventField = None, # noqa: F821 - skip: list[str] = None, - ): - """ - Parameters - ---------- - signal_override: - Field-like to use instead of trying to read signal from the file. This is - used when there is no signal or to provide a signal computed from - NXevent_data. - skip: - Names of fields to skip when loading coords. - """ - super().__init__(group, definition=definition, strategy=strategy) - self._signal_override = signal_override - self._skip = skip if skip is not None else [] - - def _default_strategy(self): - return NXdataStrategy - - @property - def shape(self) -> list[int]: - return self._signal.shape - - def _get_group_dims(self) -> None | list[str]: - # Apparently it is not possible to define dim labels unless there are - # corresponding coords. Special case of '.' entries means "no coord". - if (axes := self._strategy.axes(self)) is not None: - return [f"dim_{i}" if a == "." else a for i, a in enumerate(axes)] - axes = [] - # Names of axes that have an "axis" attribute serve as dim labels in legacy case - for name, field in self._group.items(): - if (axis := field.attrs.get("axis")) is not None: - axes.append((axis, name)) - if axes: - return [x[1] for x in sorted(axes)] - return None - - @property - def dims(self) -> list[str]: - if (d := self._get_group_dims()) is not None: - return d - # Legacy NXdata defines axes not as group attribute, but attr on dataset. - # This is handled by class Field. - return self._signal.dims - - @property - def unit(self) -> sc.Unit | None: - return self._signal.unit - - @property - def _signal_name(self) -> str: - return self._strategy.signal(self) - - @property - def _errors_name(self) -> str | None: - return self._strategy.signal_errors(self) - - @property - def _signal(self) -> Field | _EventField | None: # noqa: F821 - if self._signal_override is not None: - return self._signal_override - if self._signal_name is not None: - if self._signal_name not in self: - raise NexusStructureError( - f"Signal field '{self._signal_name}' not found in group." - ) - return self[self._signal_name] - return None - - def _get_axes(self): - """Return labels of named axes. Does not include default 'dim_{i}' names.""" - if (axes := self._strategy.axes(self)) is not None: - # Unlike self.dims we *drop* entries that are '.' - return [a for a in axes if a != "."] - elif (signal := self._signal) is not None: - if (axes := signal.attrs.get("axes")) is not None: - dims = axes.split(":") - # The standard says that the axes should be colon-separated, but some - # files use comma-separated. - if len(dims) == 1 and self._signal.ndim > 1: - dims = tuple(axes.split(",")) - return dims - return [] - - def _guess_dims(self, name: str): - """Guess dims of non-signal dataset based on shape. - - Does not check for potential bin-edge coord. - """ - shape = self._get_child(name).shape - if self.shape == shape: - return self.dims - lut = {} - if self._signal is not None: - for d, s in zip(self.dims, self.shape): - if self.shape.count(s) == 1: - lut[s] = d - try: - dims = [lut[s] for s in shape] - except KeyError: - raise NexusStructureError( - f"Could not determine axis indices for {self.name}/{name}" - ) - return dims - - def _try_guess_dims(self, name): - try: - return self._guess_dims(name) - except NexusStructureError: - return None - - def _get_field_dims(self, name: str) -> None | list[str]: - # Newly written files should always contain indices attributes, but the - # standard recommends that readers should also make "best effort" guess - # since legacy files do not set this attribute. - if (indices := self.attrs.get(f"{name}_indices")) is not None: - return list(np.array(self.dims)[np.array(indices).flatten()]) - if (axis := self._get_child(name).attrs.get("axis")) is not None: - return (self._get_group_dims()[axis - 1],) - if name in [self._signal_name, self._errors_name]: - return self._get_group_dims() # if None, field determines dims itself - if name in list(self.attrs.get("auxiliary_signals", [])): - return self._try_guess_dims(name) - if name in self._get_axes(): - # If there are named axes then items of same name are "dimension - # coordinates", i.e., have a dim matching their name. - # However, if the item is not 1-D we need more labels. Try to use labels of - # signal if dimensionality matches. - if self._signal_name in self and self._get_child(name).ndim == len( - self.shape - ): - return self[self._signal_name].dims - return [name] - return self._try_guess_dims(name) - - def _bin_edge_dim(self, coord: Field) -> None | str: - sizes = dict(zip(self.dims, self.shape)) - for dim, size in zip(coord.dims, coord.shape): - if dim in sizes and sizes[dim] + 1 == size: - return dim - return None - - def _dim_of_coord(self, name: str, coord: Field) -> None | str: - if len(coord.dims) == 1: - return coord.dims[0] - if name in coord.dims and name in self.dims: - return name - return self._bin_edge_dim(coord) - - def _should_be_aligned(self, da: sc.DataArray, name: str, coord: Field) -> bool: - dim_of_coord = self._dim_of_coord(name, coord) - if dim_of_coord is None: - return True - if dim_of_coord not in da.dims: - return False - return True - - def _getitem(self, select: ScippIndex) -> sc.DataArray: - from .nexus_classes import NXgeometry - - signal = self._signal - if signal is None: - raise NexusStructureError("No signal field found, cannot load group.") - signal = signal[select] - if self._errors_name is not None: - stddevs = self[self._errors_name][select] - # According to the standard, errors must have the same shape as the data. - # This is not the case in all files we observed, is there any harm in - # attempting a broadcast? - signal.variances = np.broadcast_to( - sc.pow(stddevs, sc.scalar(2)).values, shape=signal.shape - ) - - da = ( - signal - if isinstance(signal, sc.DataArray) - else sc.DataArray(data=asarray(signal)) - ) - - skip = self._skip - skip += [self._signal_name, self._errors_name] - skip += list(self.attrs.get("auxiliary_signals", [])) - for name in self: - if (errors := self._strategy.coord_errors(self, name)) is not None: - skip += [errors] - for name in self: - if name in skip: - continue - # It is not entirely clear whether skipping NXtransformations is the right - # solution. In principle NXobject will load them via the 'depends_on' - # mechanism, so for valid files this should be sufficient. - allowed = ( - Field, - NXtransformations, - NXcylindrical_geometry, - NXoff_geometry, - NXgeometry, - ) - if not isinstance(self._get_child(name), allowed): - raise NexusStructureError( - "Invalid NXdata: may not contain nested groups" - ) - - for name, field in self[Field].items(): - if name in skip: - continue - sel = to_child_select( - self.dims, field.dims, select, bin_edge_dim=self._bin_edge_dim(field) - ) - coord: sc.Variable = asarray(self[name][sel]) - if (error_name := self._strategy.coord_errors(self, name)) is not None: - stddevs = asarray(self[error_name][sel]) - coord.variances = sc.pow(stddevs, sc.scalar(2)).values - try: - da.coords[name] = coord - da.coords.set_aligned(name, self._should_be_aligned(da, name, field)) - except sc.DimensionError as e: - raise NexusStructureError( - f"Field in NXdata incompatible with dims or shape of signal: {e}" - ) from e - - return da diff --git a/src/scippnexus/v1/nxdetector.py b/src/scippnexus/v1/nxdetector.py deleted file mode 100644 index 8b3ad24c..00000000 --- a/src/scippnexus/v1/nxdetector.py +++ /dev/null @@ -1,267 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from __future__ import annotations - -from copy import copy -from typing import Any, Dict, List, Optional, Union - -import scipp as sc - -from .nxdata import NXdata, NXdataStrategy -from .nxevent_data import NXevent_data -from .nxobject import ( - Field, - NexusStructureError, - NXobject, - ScippIndex, - asarray, - is_dataset, -) - - -class NXdetectorStrategy(NXdataStrategy): - @staticmethod - def signal(group): - # NXdata uses the 'signal' attribute to define the field name of the signal. - # NXdetector uses a "hard-coded" signal name 'data', without specifying the - # attribute in the file, so we pass this explicitly to NXdata. - # Note the special case of an NXevent_data subgroup named 'data', which we - # avoid by checking if 'data' is a dataset. - if (name := NXdataStrategy.signal(group)) is not None: - return name - return "data" if "data" in group and is_dataset(group._group["data"]) else None - - -def group(da: sc.DataArray, groups: sc.Variable) -> sc.DataArray: - if hasattr(da, "group"): - return da.group(groups) - else: - return sc.bin(da, groups=[groups]) - - -class EventSelector: - """A proxy object for creating an NXdetector based on a selection of events.""" - - def __init__(self, detector): - self._detector = detector - - def __getitem__(self, select: ScippIndex) -> NXdetector: - """Return an NXdetector based on a selection (slice) of events.""" - det = copy(self._detector) - det._event_select = select - return det - - -class _EventField: - """Field-like wrapper of NXevent_data binned into pixels. - - This has no equivalent in the NeXus format, but represents the conceptual - event-data "signal" dataset of an NXdetector. - """ - - def __init__( - self, - nxevent_data: NXevent_data, - event_select: ScippIndex, - grouping_key: str | None = "detector_number", - grouping: Field | None = None, - ): - self._nxevent_data = nxevent_data - self._event_select = event_select - self._grouping_key = grouping_key - self._grouping = grouping - - @property - def name(self) -> str: - return self._nxevent_data.name - - @property - def attrs(self): - return self._nxevent_data.attrs - - @property - def dims(self): - if self._grouping is None: - return [self._grouping_key] - return self._grouping.dims - - @property - def shape(self): - if self._grouping is None: - raise NexusStructureError( - "Cannot get shape of NXdetector since no 'detector_number' " - "field found but detector contains event data." - ) - return self._grouping.shape - - @property - def unit(self) -> None: - return self._nxevent_data.unit - - def __getitem__(self, select: ScippIndex) -> sc.DataArray: - event_data = self._nxevent_data[self._event_select] - if isinstance(event_data, sc.DataGroup): - raise NexusStructureError("Invalid NXevent_data in NXdetector.") - if self._grouping is None: - if select not in (Ellipsis, tuple(), slice(None)): - raise NexusStructureError( - "Cannot load slice of NXdetector since it contains event data " - "but no 'detector_number' field, i.e., the shape is unknown. " - "Use ellipsis or an empty tuple to load the full detector." - ) - # Ideally we would prefer to use np.unique, but a quick experiment shows - # that this can easily be 100x slower, so it is not an option. In - # practice most files have contiguous event_id values within a bank - # (NXevent_data). - id_min = event_data.bins.coords["event_id"].min() - id_max = event_data.bins.coords["event_id"].max() - grouping = sc.arange( - dim=self._grouping_key, - unit=None, - start=id_min.value, - stop=id_max.value + 1, - dtype=id_min.dtype, - ) - else: - grouping = asarray(self._grouping[select]) - if (self._grouping_key in event_data.coords) and sc.identical( - grouping, event_data.coords[self._grouping_key] - ): - return event_data - # copy since sc.bin cannot deal with a non-contiguous view - event_id = grouping.flatten(to="event_id").copy() - event_data.bins.coords["event_time_zero"] = sc.bins_like( - event_data, fill_value=event_data.coords["event_time_zero"] - ) - # After loading raw NXevent_data it is guaranteed that the event table - # is contiguous and that there is no masking. We can therefore use the - # more efficient approach of binning from scratch instead of erasing the - # 'pulse' binning defined by NXevent_data. - event_data = group(event_data.bins.constituents["data"], groups=event_id) - if self._grouping is None: - event_data.coords[self._grouping_key] = event_data.coords.pop("event_id") - else: - del event_data.coords["event_id"] - return event_data.fold(dim="event_id", sizes=grouping.sizes) - - -class NXdetector(NXobject): - """A detector or detector bank providing an array of values or events. - - If the detector stores event data then the 'detector_number' field (if present) - is used to map events to detector pixels. Otherwise this returns event data in the - same format as NXevent_data. - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._event_select = tuple() - self._nxevent_data_fields = [ - "event_time_zero", - "event_index", - "event_time_offset", - "event_id", - "cue_timestamp_zero", - "cue_index", - "pulse_height", - ] - self._detector_number_fields = ["detector_number", "pixel_id", "spectrum_index"] - - @property - def shape(self) -> list[int]: - return self._signal.shape - - @property - def dims(self) -> list[str]: - return self._signal.dims - - @property - def unit(self) -> sc.Unit | None: - return self._signal.unit - - @property - def detector_number(self) -> Field | None: - for key in self._detector_number_fields: - if key in self: - return key - return None - - @property - def _event_grouping(self) -> None | dict[str, Any]: - for key in self._detector_number_fields: - if key in self: - return {"grouping_key": key, "grouping": self[key]} - return {} - - @property - def _signal(self) -> Field | _EventField: - return self._nxdata()._signal - - def _nxdata(self, use_event_signal=True) -> NXdata: - events = self.events - if use_event_signal and events is not None: - signal = _EventField(events, self._event_select, **self._event_grouping) - else: - signal = None - skip = None - if events is not None: - if events.name == self.name: - skip = self._nxevent_data_fields - else: - skip = [events.name.split("/")[-1]] # name of the subgroup - return NXdata( - self._group, strategy=NXdetectorStrategy, signal_override=signal, skip=skip - ) - - @property - def events(self) -> None | NXevent_data: - """Return the underlying NXevent_data group, None if not event data.""" - # The standard is unclear on whether the 'data' field may be NXevent_data or - # whether the fields of NXevent_data should be stored directly within this - # NXdetector. Both cases are observed in the wild. - event_entries = self[NXevent_data] - if len(event_entries) > 1: - raise NexusStructureError( - "No unique NXevent_data entry in NXdetector. " - f"Found {len(event_entries)}." - ) - if len(event_entries) == 1: - # If there is also a signal dataset (not events) it will be ignored - # (except for possibly using it to deduce shape and dims). - return next(iter(event_entries.values())) - if "event_time_offset" in self: - return NXevent_data(self._group) - return None - - @property - def select_events(self) -> EventSelector: - """ - Return a proxy object for selecting a slice of the underlying NXevent_data - group, while keeping wrapping the NXdetector. - """ - if self.events is None: - raise NexusStructureError( - "Cannot select events in NXdetector not containing NXevent_data." - ) - return EventSelector(self) - - def _get_field_dims(self, name: str) -> None | list[str]: - if self.events is not None: - if name in self._nxevent_data_fields: - # Event field is direct child of this class - return self.events._get_field_dims(name) - if name in self._detector_number_fields: - # If there is a signal field in addition to the event data it can be - # used to define dimension labels - nxdata = self._nxdata(use_event_signal=False) - if nxdata._signal_name is not None: - return nxdata._get_field_dims(name) - # If grouping is 1-D then we use this name as the dim - if self._get_child(name).ndim == 1: - return [name] - return None - return self._nxdata()._get_field_dims(name) - - def _getitem(self, select: ScippIndex) -> sc.DataArray: - return self._nxdata()._getitem(select) diff --git a/src/scippnexus/v1/nxdisk_chopper.py b/src/scippnexus/v1/nxdisk_chopper.py deleted file mode 100644 index 231c8156..00000000 --- a/src/scippnexus/v1/nxdisk_chopper.py +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from .leaf import Leaf - - -class NXdisk_chopper(Leaf): - """Disk chopper information, can be read as a dict.""" diff --git a/src/scippnexus/v1/nxevent_data.py b/src/scippnexus/v1/nxevent_data.py deleted file mode 100644 index f532f092..00000000 --- a/src/scippnexus/v1/nxevent_data.py +++ /dev/null @@ -1,138 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from typing import List, Union - -import numpy as np -import scipp as sc - -from .._common import to_plain_index -from .nxobject import NexusStructureError, NXobject, ScippIndex - -_event_dimension = "event" -_pulse_dimension = "pulse" - - -class NXevent_data(NXobject): - @property - def shape(self) -> List[int]: - return self['event_index'].shape - - @property - def dims(self) -> List[str]: - return [_pulse_dimension] - - @property - def unit(self) -> None: - # Binned data, bins do not have a unit - return None - - def _get_field_dims(self, name: str) -> Union[None, List[str]]: - if name in ['event_time_zero', 'event_index']: - return [_pulse_dimension] - if name in ['event_time_offset', 'event_id']: - return [_event_dimension] - return None - - def _getitem(self, select: ScippIndex) -> sc.DataArray: - self._check_for_missing_fields() - index = to_plain_index([_pulse_dimension], select) - - max_index = self.shape[0] - single = False - if index is Ellipsis or index == tuple(): - last_loaded = False - else: - if isinstance(index, int): - single = True - start, stop, _ = slice(index, None).indices(max_index) - if start == stop: - raise IndexError('Index {start} is out of range') - index = slice(start, start + 1) - start, stop, stride = index.indices(max_index) - if stop + stride > max_index: - last_loaded = False - else: - stop += stride - last_loaded = True - index = slice(start, stop, stride) - - event_index = self['event_index'][index].values - event_time_zero = self['event_time_zero'][index] - - num_event = self["event_time_offset"].shape[0] - # Some files contain uint64 "max" indices, which turn into negatives during - # conversion to int64. This is a hack to get around this. - event_index[event_index < 0] = num_event - - if len(event_index) > 0: - event_select = slice( - event_index[0], event_index[-1] if last_loaded else num_event - ) - else: - event_select = slice(None) - - if (event_id := self.get('event_id')) is not None: - event_id = event_id[event_select] - if event_id.dtype not in [sc.DType.int32, sc.DType.int64]: - raise NexusStructureError( - "NXevent_data contains event_id field with non-integer values" - ) - - event_time_offset = self['event_time_offset'][event_select] - - # Weights are not stored in NeXus, so use 1s - weights = sc.ones( - dims=[_event_dimension], - shape=event_time_offset.shape, - unit='counts', - dtype=np.float32, - ) - - events = sc.DataArray( - data=weights, coords={'event_time_offset': event_time_offset} - ) - if event_id is not None: - events.coords['event_id'] = event_id - - if not last_loaded: - event_index = np.append(event_index, num_event) - else: - # Not a bin-edge coord, all events in bin are associated with same - # (previous) pulse time value - # Copy to avoid confusing size display in _repr_html_ - event_time_zero = event_time_zero[:-1].copy() - - event_index = sc.array( - dims=[_pulse_dimension], values=event_index, dtype=sc.DType.int64, unit=None - ) - - event_index -= event_index.min() - - # There is some variation in the last recorded event_index in files from - # different institutions. We try to make sure here that it is what would be the - # first index of the next pulse. In other words, ensure that event_index - # includes the bin edge for the last pulse. - if single: - begins = event_index[_pulse_dimension, 0] - ends = event_index[_pulse_dimension, 1] - event_time_zero = event_time_zero[_pulse_dimension, 0].copy() - else: - begins = event_index[_pulse_dimension, :-1] - ends = event_index[_pulse_dimension, 1:] - - try: - binned = sc.bins(data=events, dim=_event_dimension, begin=begins, end=ends) - except IndexError as e: - raise NexusStructureError( - f"Invalid index in NXevent_data at {self.name}/event_index:\n{e}." - ) - - return sc.DataArray(data=binned, coords={'event_time_zero': event_time_zero}) - - def _check_for_missing_fields(self): - for field in ("event_time_zero", "event_index", "event_time_offset"): - if field not in self: - raise NexusStructureError( - f"Required field {field} not found in NXevent_data" - ) diff --git a/src/scippnexus/v1/nxfermi_chopper.py b/src/scippnexus/v1/nxfermi_chopper.py deleted file mode 100644 index ed2299bf..00000000 --- a/src/scippnexus/v1/nxfermi_chopper.py +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from .leaf import Leaf - - -class NXfermi_chopper(Leaf): - """Fermi chopper information, can be read as a dict.""" diff --git a/src/scippnexus/v1/nxlog.py b/src/scippnexus/v1/nxlog.py deleted file mode 100644 index 362788b6..00000000 --- a/src/scippnexus/v1/nxlog.py +++ /dev/null @@ -1,73 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock - -from typing import List, Union - -import scipp as sc - -from .nxdata import NXdata, NXdataStrategy -from .nxobject import NXobject, ScippIndex - - -class NXlogStrategy(NXdataStrategy): - @staticmethod - def axes(group): - if (ax := NXdataStrategy.axes(group)) is not None: - return ax - # We get the shape from the original dataset, to make sure we do not squeeze - # dimensions too early - child_dataset = group._get_child('value')._dataset - ndim = child_dataset.ndim - shape = child_dataset.shape - # The outermost axis in NXlog is pre-defined to 'time' (if present). Note - # that this may be overridden by an `axes` attribute, if defined for the group. - if 'time' in group: - raw_axes = ['time'] + (['.'] * (ndim - 1)) - else: - raw_axes = ['.'] * ndim - axes = [] - for i, ax in enumerate(raw_axes): - # Squeeze dimensions that have size 1 and are not 'time' - if (ax == 'time') or (shape[i] != 1): - axes.append(ax) - return axes - - @staticmethod - def signal(group): - # NXdata uses the 'signal' attribute to define the field name of the signal. - # NXlog uses a "hard-coded" signal name 'value', without specifying the - # attribute in the file, so we pass this explicitly to NXdata. - return group.attrs.get('signal', 'value') - - -class NXlog(NXobject): - @property - def shape(self): - return self._nxbase.shape - - @property - def dims(self): - return self._nxbase.dims - - @property - def unit(self): - return self._nxbase.unit - - @property - def _nxbase(self) -> NXdata: - return NXdata( - self._group, - strategy=NXlogStrategy, - skip=['cue_timestamp_zero', 'cue_index'], - ) - - def _getitem(self, select: ScippIndex) -> sc.DataArray: - base = self._nxbase - # Field loads datetime offset attributes automatically, but for NXlog this - # may apparently be omitted and must then interpreted as relative to epoch. - base.child_params['time'] = {'is_time': True} - return base[select] - - def _get_field_dims(self, name: str) -> Union[None, List[str]]: - return self._nxbase._get_field_dims(name) diff --git a/src/scippnexus/v1/nxmonitor.py b/src/scippnexus/v1/nxmonitor.py deleted file mode 100644 index 2d5045dc..00000000 --- a/src/scippnexus/v1/nxmonitor.py +++ /dev/null @@ -1,20 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from typing import Dict, Union - -from .nxdetector import NXdetector -from .nxobject import Field - - -class NXmonitor(NXdetector): - @property - def _event_grouping(self) -> Dict[str, Union[str, Field]]: - # Unlike NXdetector, NXmonitor does not group by 'detector_number'. We pass - # grouping information that matches the underlying binning of NXevent_data - # such that no addition binning will need to be performed. That is, the by-pulse - # binning present in the file (in NXevent_data) is preserved. - return { - 'grouping_key': 'event_time_zero', - 'grouping': self.events.get('event_time_zero'), - } diff --git a/src/scippnexus/v1/nxobject.py b/src/scippnexus/v1/nxobject.py deleted file mode 100644 index 1f2cc036..00000000 --- a/src/scippnexus/v1/nxobject.py +++ /dev/null @@ -1,684 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from __future__ import annotations - -import datetime -import functools -import inspect -import re -import warnings -from typing import Any, Callable, Dict, List, Optional, Protocol, Tuple, Union, overload - -import dateutil.parser -import h5py -import numpy as np -import scipp as sc - -from .._common import convert_time_to_datetime64, to_plain_index -from .._hdf5_nexus import ( - _cset_to_encoding, - _ensure_str, - _ensure_supported_int_type, - _warn_latin1_decode, -) -from ..typing import H5Dataset, H5Group, ScippIndex - -NXobjectIndex = Union[str, ScippIndex] - - -def asarray(obj: Any | sc.Variable) -> sc.Variable: - return obj if isinstance(obj, sc.Variable) else sc.scalar(obj, unit=None) - - -# TODO move into scipp -class DimensionedArray(Protocol): - """ - A multi-dimensional array with a unit and dimension labels. - - Could be, e.g., a scipp.Variable or a dimple dataclass wrapping a numpy array. - """ - - @property - def values(self): - """Multi-dimensional array of values""" - - @property - def unit(self): - """Physical unit of the values""" - - @property - def dims(self) -> list[str]: - """Dimension labels for the values""" - - -class AttributeManager(Protocol): - def __getitem__(self, name: str): - """Get attribute""" - - -class NexusStructureError(Exception): - """Invalid or unsupported class and field structure in Nexus.""" - - pass - - -class Attrs: - """HDF5 attributes.""" - - def __init__(self, attrs: AttributeManager): - self._attrs = attrs - - def __contains__(self, name: str) -> bool: - return name in self._attrs - - def __getitem__(self, name: str) -> Any: - attr = self._attrs[name] - # Is this check for string attributes sufficient? Is there a better way? - if isinstance(attr, (str, bytes)): - cset = self._attrs.get_id(name.encode("utf-8")).get_type().get_cset() - return _ensure_str(attr, _cset_to_encoding(cset)) - return attr - - def __setitem__(self, name: str, val: Any): - self._attrs[name] = val - - def __iter__(self): - yield from self._attrs - - def get(self, name: str, default=None) -> Any: - return self[name] if name in self else default - - def keys(self): - return self._attrs.keys() - - -def _is_time(obj): - dummy = sc.empty(dims=[], shape=[], unit=obj.unit) - try: - dummy.to(unit="s") - return True - except sc.UnitError: - return False - - -def _as_datetime(obj: Any): - if isinstance(obj, str): - try: - # NumPy and scipp cannot handle timezone information. We therefore apply it, - # i.e., convert to UTC. - # Would like to use dateutil directly, but with Python's datetime we do not - # get nanosecond precision. Therefore we combine numpy and dateutil parsing. - date_only = "T" not in obj - if date_only: - return sc.datetime(obj) - date, time = obj.split("T") - time_and_timezone_offset = re.split(r"Z|\+|-", time) - time = time_and_timezone_offset[0] - if len(time_and_timezone_offset) == 1: - # No timezone, parse directly (scipp based on numpy) - return sc.datetime(f"{date}T{time}") - else: - # There is timezone info. Parse with dateutil. - dt = dateutil.parser.isoparse(obj) - dt = dt.replace(microsecond=0) # handled by numpy - dt = dt.astimezone(datetime.timezone.utc) - dt = dt.replace(tzinfo=None).isoformat() - # We operate with string operations here and thus end up parsing date - # and time twice. The reason is that the timezone-offset arithmetic - # cannot be done, e.g., in nanoseconds without causing rounding errors. - if "." in time: - dt += f".{time.split('.')[1]}" - return sc.datetime(dt) - except ValueError: - pass - return None - - -def _dtype_from_dataset(dataset: H5Dataset) -> sc.DType: - dtype = dataset.dtype - if str(dtype).startswith("str") or h5py.check_string_dtype(dtype): - dtype = sc.DType.string - else: - dtype = sc.DType(_ensure_supported_int_type(str(dtype))) - return dtype - - -class Field: - """NeXus field. - - In HDF5 fields are represented as dataset. - """ - - def __init__( - self, - dataset: H5Dataset, - *, - ancestor, - dims=None, - dtype: sc.DType | None = None, - is_time=None, - ): - self._ancestor = ancestor # Usually the parent, but may be grandparent, etc. - self._dataset = dataset - self._dtype = _dtype_from_dataset(dataset) if dtype is None else dtype - self._shape = self._dataset.shape - if self._dtype == sc.DType.vector3: - self._shape = self._shape[:-1] - self._is_time = is_time - # NeXus treats [] and [1] interchangeably. In general this is ill-defined, but - # the best we can do appears to be squeezing unless the file provides names for - # dimensions. The shape property of this class does thus not necessarily return - # the same as the shape of the underlying dataset. - if dims is not None: - self._dims = tuple(dims) - if len(self._dims) < len(self._shape): - # The convention here is that the given dimensions apply to the shapes - # starting from the left. So we only squeeze dimensions that are after - # len(dims). - self._shape = self._shape[: len(self._dims)] + tuple( - size for size in self._shape[len(self._dims) :] if size != 1 - ) - elif (axes := self.attrs.get("axes")) is not None: - self._dims = tuple(axes.split(":")) - # The standard says that the axes should be colon-separated, but some - # files use comma-separated. - if len(self._dims) == 1 and self._dataset.ndim > 1: - self._dims = tuple(axes.split(",")) - else: - self._shape = tuple(size for size in self._shape if size != 1) - self._dims = tuple(f"dim_{i}" for i in range(self.ndim)) - - def __getitem__(self, select) -> Any | sc.Variable: - """Load the field as a :py:class:`scipp.Variable` or Python object. - - If the shape is empty and no unit is given this returns a Python object, such - as a string or integer. Otherwise a :py:class:`scipp.Variable` is returned. - """ - index = to_plain_index(self.dims, select) - if isinstance(index, (int, slice)): - index = (index,) - - base_dims = self.dims - base_shape = self.shape - dims = [] - shape = [] - for i, ind in enumerate(index): - if not isinstance(ind, int): - dims.append(base_dims[i]) - shape.append(len(range(*ind.indices(base_shape[i])))) - - variable = sc.empty(dims=dims, shape=shape, dtype=self.dtype, unit=self.unit) - - # If the variable is empty, return early - if np.prod(shape) == 0: - return variable - - if self.dtype == sc.DType.string: - try: - strings = self._dataset.asstr()[index] - except UnicodeDecodeError as e: - strings = self._dataset.asstr(encoding="latin-1")[index] - _warn_latin1_decode(self._dataset, strings, str(e)) - variable.values = np.asarray(strings).flatten() - elif variable.values.flags["C_CONTIGUOUS"]: - # On versions of h5py prior to 3.2, a TypeError occurs in some cases - # where h5py cannot broadcast data with e.g. shape (20, 1) to a buffer - # of shape (20,). Note that broadcasting (1, 20) -> (20,) does work - # (see https://github.com/h5py/h5py/pull/1796). - # Therefore, we manually squeeze here. - # A pin of h5py<3.2 is currently required by Mantid and hence scippneutron - # (see https://github.com/h5py/h5py/issues/1880#issuecomment-823223154) - # hence this workaround. Once we can use a more recent h5py with Mantid, - # this try/except can be removed. - try: - self._dataset.read_direct(variable.values, source_sel=index) - except TypeError: - variable.values = self._dataset[index].squeeze() - else: - variable.values = self._dataset[index] - if _is_time(variable): - starts = [] - for name in self.attrs: - if (dt := _as_datetime(self.attrs[name])) is not None: - starts.append(dt) - if self._is_time and len(starts) == 0: - starts.append(sc.epoch(unit=self.unit)) - if len(starts) == 1: - variable = convert_time_to_datetime64( - variable, - start=starts[0], - scaling_factor=self.attrs.get("scaling_factor"), - ) - if variable.ndim == 0 and variable.unit is None: - # Work around scipp/scipp#2815, and avoid returning NumPy bool - if isinstance(variable.values, np.ndarray) and variable.dtype != "bool": - return variable.values[()] - else: - return variable.value - return variable - - def __repr__(self) -> str: - return f'' - - @property - def attrs(self) -> Attrs: - return Attrs(self._dataset.attrs) - - @property - def dtype(self) -> sc.DType: - return self._dtype - - @property - def name(self) -> str: - return self._dataset.name - - @property - def file(self) -> NXroot: - return NXroot(self._dataset.file) - - @property - def parent(self) -> NXobject: - return self._ancestor._make(self._dataset.parent) - - @property - def ndim(self) -> int: - """Total number of dimensions in the dataset. - - See the shape property for potential differences to the value returned by the - underlying h5py.Dataset.ndim. - """ - return len(self.shape) - - @property - def shape(self) -> list[int]: - """Shape of the field. - - NeXus may use extra dimensions of length one to store data, such as shape=[1] - instead of shape=[]. This property returns the *squeezed* shape, dropping all - length-1 dimensions that are not explicitly named. The returned shape may thus - be different from the shape of the underlying h5py.Dataset. - """ - return self._shape - - @property - def dims(self) -> list[str]: - return self._dims - - @property - def sizes(self) -> dict[str, int]: - return dict(zip(self.dims, self.shape)) - - @property - def unit(self) -> sc.Unit | None: - if (unit := self.attrs.get("units")) is not None: - try: - return sc.Unit(unit) - except sc.UnitError: - warnings.warn( - f"Unrecognized unit '{unit}' for value dataset " - f"in '{self.name}'; setting unit as 'dimensionless'", - stacklevel=2, - ) - return sc.units.one - return None - - -def is_dataset(obj: H5Group | H5Dataset) -> bool: - """Return true if the object is an h5py.Dataset or equivalent. - - Use this instead of isinstance(obj, h5py.Dataset) to ensure that code is compatible - with other h5py-alike interfaces. - """ - return hasattr(obj, "shape") - - -class NXobjectStrategy: - @staticmethod - def include_child(_) -> bool: - """Return True if the child should be included when loading.""" - return True - - -class NXobject: - """Base class for all NeXus groups.""" - - def __init__( - self, - group: H5Group, - *, - definition: Any = None, - strategy: Callable | None = None, - ): - self._group = group - # TODO can strategies replace child-params? - self.child_params = {} - self._definition = definition - self._strategy = strategy - if strategy is None and self._definition is not None: - self._strategy = self._definition.make_strategy(self) - if self._strategy is None: - self._strategy = self._default_strategy() - - def _default_strategy(self): - """ - Default strategy to use when none given and when the application definition - does not provide one. Override in child classes to set a default. - """ - return NXobjectStrategy - - def _make(self, group) -> NXobject: - if (nx_class := Attrs(group.attrs).get("NX_class")) is not None: - return _nx_class_registry().get(nx_class, NXobject)( - group, definition=self._definition - ) - return group # Return underlying (h5py) group - - def _get_child( - self, name: NXobjectIndex, use_field_dims: bool = False - ) -> NXobject | Field | sc.DataArray: - """Get item, with flag to control whether fields dims should be inferred""" - if name is None: - raise KeyError("None is not a valid index") - if isinstance(name, str): - item = self._group[name] - if is_dataset(item): - try: - dims = self._get_field_dims(name) if use_field_dims else None - except Exception as e: - msg = ( - f"Failed to determine axis names of {item.name}: {e}. " - "Falling back to default dimension labels." - ) - warnings.warn(msg, stacklevel=2) - dims = None - dtype = self._get_field_dtype(name) - return Field( - item, - dims=dims, - dtype=dtype, - ancestor=self, - **self.child_params.get(name, {}), - ) - else: - return self._make(item) - - try: - da = self._getitem(name) - self._insert_leaf_properties(da) - except NexusStructureError as e: - # If the child class cannot load this group, we fall back to returning the - # underlying datasets in a DataGroup. - if type(self)._getitem == NXobject._getitem: - raise - else: - msg = ( - f"Failed to load {self.name} as {type(self).__name__}: {e} " - "Falling back to loading HDF5 group children as scipp.DataGroup." - ) - warnings.warn(msg, stacklevel=2) - da = NXobject._getitem(self, name) - return da - - def _insert_leaf_properties(self, container): - from .nexus_classes import NXgeometry - from .nxcylindrical_geometry import NXcylindrical_geometry - from .nxoff_geometry import NXoff_geometry - - def insert(container, name, obj): - if hasattr(container, "coords"): - container.coords[name] = ( - obj if isinstance(obj, sc.Variable) else sc.scalar(obj) - ) - else: - container[name] = obj - - detector_number = getattr(self, "detector_number", None) - if detector_number is not None: - detector_number = container.coords[detector_number] - for key, child in self[[NXcylindrical_geometry, NXoff_geometry]].items(): - insert(container, key, child.load_as_array(detector_number=detector_number)) - for key, child in self[NXgeometry].items(): - insert(container, key, child[()]) - if (t := self.depends_on) is not None: - insert(container, "depends_on", t) - # If loading the transformation failed, 'depends_on' returns a string, the - # path to the transformation. If this is a nested group, we load it here. - # Note that this info is currently incomplete, since attributes are not - # loaded. - if isinstance(t, str): - from .nexus_classes import NXtransformations - - for key, group in self[NXtransformations].items(): - insert(container, key, group[()]) - - def _get_children_by_nx_class( - self, select: type | list[type] - ) -> dict[str, NXobject | Field]: - children = {} - select = tuple(select) if isinstance(select, list) else select - for key in self.keys(): - if issubclass(type(self._get_child(key)), select): - # Get child again via __getitem__ so correct field dims are used. - children[key] = self[key] - return children - - @overload - def __getitem__(self, name: str) -> NXobject | Field: ... - - @overload - def __getitem__(self, name: ScippIndex) -> sc.DataArray | sc.DataGroup: ... - - @overload - def __getitem__(self, name: type | list[type]) -> dict[str, NXobject]: ... - - def __getitem__(self, name): - """ - Get a child group or child dataset, a selection of child groups, or load and - return the current group. - - Three cases are supported: - - - String name: The child group or child dataset of that name is returned. - - Class such as ``NXdata`` or ``NXlog``: A dict containing all direct children - with a matching ``NX_class`` attribute are returned. Also accepts a tuple of - classes. ``Field`` selects all child fields, i.e., all datasets but not - groups. - - Scipp-style index: Load the specified slice of the current group, returning - a :class:`scipp.DataArray` or :class:`scipp.DataGroup`. - - Parameters - ---------- - name: - Child name, class, or index. - - Returns - ------- - : - Field, group, dict of fields, or loaded data. - """ - - def isclass(x): - return inspect.isclass(x) and issubclass(x, (Field, NXobject)) - - if isclass(name) or ( - isinstance(name, list) and len(name) and all(isclass(x) for x in name) - ): - return self._get_children_by_nx_class(name) - return self._get_child(name, use_field_dims=True) - - def _getitem(self, index: ScippIndex) -> sc.DataArray | sc.DataGroup: - include = getattr(self._strategy, "include_child", lambda x: True) - return sc.DataGroup( - {name: child[index] for name, child in self.items() if include(child)} - ) - - def _get_field_dims(self, name: str) -> None | list[str]: - """Subclasses should reimplement this to provide dimension labels for fields.""" - return None - - def _get_field_dtype(self, name: str) -> None | sc.DType: - """Subclasses should reimplement this to override the dtype for fields.""" - return None - - def __contains__(self, name: str) -> bool: - return name in self._group - - def get(self, name: str, default=None) -> NXobject | Field | sc.DataArray: - return self[name] if name in self else default - - @property - def attrs(self) -> Attrs: - return Attrs(self._group.attrs) - - @property - def name(self) -> str: - return self._group.name - - @property - def file(self) -> NXroot: - return NXroot(self._group.file) - - @property - def parent(self) -> NXobject: - return self._make(self._group.parent) - - def _ipython_key_completions_(self) -> list[str]: - return list(self.keys()) - - def __iter__(self): - yield from self._group - - def keys(self) -> list[str]: - return self._group.keys() - - def values(self) -> list[Field | NXobject]: - return [self[name] for name in self.keys()] - - def items(self) -> list[tuple[str, Field | NXobject]]: - return list(zip(self.keys(), self.values())) - - @property - def nx_class(self) -> type | None: - """The value of the NX_class attribute of the group. - - In case of the subclass NXroot this returns 'NXroot' even if the attribute - is not actually set. This is to support the majority of all legacy files, which - do not have this attribute. - """ - if (nxclass := self.attrs.get("NX_class")) is not None: - return _nx_class_registry().get(nxclass) - - @property - def depends_on(self) -> sc.Variable | sc.DataArray | None: - if (depends_on := self.get("depends_on")) is not None: - # Imported late to avoid cyclic import - from .nxtransformations import TransformationError, get_full_transformation - - try: - return get_full_transformation(depends_on) - except (NexusStructureError, TransformationError) as e: - warnings.warn( - f"Failed to load transformation {self.name}/{depends_on}:\n{e}\n" - "Falling back to returning the path to the transformation.", - stacklevel=2, - ) - return depends_on[()] - return None - - def __repr__(self) -> str: - return f'<{type(self).__name__} "{self._group.name}">' - - def create_field(self, name: str, data: DimensionedArray, **kwargs) -> Field: - if not isinstance(data, sc.Variable): - return self._group.create_dataset(name, data=data, **kwargs) - values = data.values - if data.dtype == sc.DType.string: - values = np.array(data.values, dtype=object) - elif data.dtype == sc.DType.datetime64: - start = sc.epoch(unit=data.unit) - values = (data - start).values - dataset = self._group.create_dataset(name, data=values, **kwargs) - if data.unit is not None: - dataset.attrs["units"] = str(data.unit) - if data.dtype == sc.DType.datetime64: - dataset.attrs["start"] = str(start.value) - return Field(dataset, dims=data.dims, ancestor=self) - - def create_class(self, name: str, nx_class: str | type) -> NXobject: - """Create empty HDF5 group with given name and set the NX_class attribute. - - Parameters - ---------- - name: - Group name. - nx_class: - Nexus class, can be a valid string for the NX_class attribute, or a - subclass of NXobject, such as NXdata or NXlog. - """ - group = self._group.create_group(name) - attr = nx_class if isinstance(nx_class, str) else nx_class.__name__ - group.attrs["NX_class"] = attr - return self._make(group) - - def __setitem__(self, name: str, value: Field | NXobject | DimensionedArray): - """Create a link or a new field.""" - if isinstance(value, Field): - self._group[name] = value._dataset - elif isinstance(value, NXobject): - self._group[name] = value._group - elif hasattr(value, "__write_to_nexus_group__"): - group = self.create_class(name, nx_class=value.nx_class) - value.__write_to_nexus_group__(group) - else: - self.create_field(name, value) - - def __getattr__(self, attr: str) -> Any | NXobject: - nxclass = _nx_class_registry().get(f"NX{attr}") - if nxclass is None: - raise AttributeError(f"'NXobject' object has no attribute {attr}") - matches = self[nxclass] - if len(matches) == 0: - raise NexusStructureError(f"No group with requested NX_class='{nxclass}'") - if len(matches) == 1: - return next(iter(matches.values())) - raise NexusStructureError( - f"Multiple keys match {nxclass}, use obj[{nxclass}] " - f"to obtain all matches instead of obj.{attr}." - ) - - def __dir__(self): - keys = super().__dir__() - nxclasses = [] - # Avoiding self.values() since it is more costly, but mainly since there may be - # edge cases where creation of Field/NXobject may raise on unrelated children. - for _, val in self._group.items(): - if not is_dataset(val): - nxclasses.append(self._make(val).nx_class) - for key in set(nxclasses): - if key is None: - continue - if key in keys: - continue - if nxclasses.count(key) == 1: - keys.append(key.__name__[2:]) - return keys - - -class NXroot(NXobject): - """Root of a NeXus file.""" - - @property - def nx_class(self) -> type: - # As an oversight in the NeXus standard and the reference implementation, - # the NX_class was never set to NXroot. This applies to essentially all - # files in existence before 2016, and files written by other implementations - # that were inspired by the reference implementation. We thus hardcode NXroot: - return NXroot - - -@functools.lru_cache -def _nx_class_registry(): - from . import nexus_classes - - return dict(inspect.getmembers(nexus_classes, inspect.isclass)) diff --git a/src/scippnexus/v1/nxoff_geometry.py b/src/scippnexus/v1/nxoff_geometry.py deleted file mode 100644 index 654cfc1f..00000000 --- a/src/scippnexus/v1/nxoff_geometry.py +++ /dev/null @@ -1,75 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from typing import Optional, Tuple, Union - -import scipp as sc - -from .nxobject import NexusStructureError, NXobject - - -def off_to_shape( - *, - vertices: sc.Variable, - winding_order: sc.Variable, - faces: sc.Variable, - detector_faces: Optional[sc.Variable] = None, - detector_number: Optional[sc.Variable] = None, -) -> sc.Variable: - """ - Convert OFF shape description to simpler shape representation. - """ - # Vertices in winding order. This duplicates vertices if they are part of more than - # one faces. - vw = vertices[winding_order.values] - # Same as above, grouped by face. - fvw = sc.bins(begin=faces, data=vw, dim=vw.dim) - low = fvw.bins.size().min().value - high = fvw.bins.size().max().value - if low == high: - # Vertices in winding order, groupbed by face. Unlike `fvw` above we now know - # that each face has the same number of vertices, so we can fold instead of - # using binned data. - shapes = vw.fold(dim=vertices.dim, sizes={faces.dim: -1, vertices.dim: low}) - else: - raise NotImplementedError( - "Conversion from OFF to shape not implemented for " - "inconsistent number of vertices in faces." - ) - if detector_faces is None: # if detector_number is not None, all have same shape - return sc.bins(begin=sc.index(0), dim=faces.dim, data=shapes) - if detector_number is None: - raise NexusStructureError( - "`detector_number` not given but NXoff_geometry " - "contains `detector_faces`." - ) - shape_index = detector_faces['column', 0].copy() - detid = detector_faces['column', 1].copy() - da = sc.DataArray(shape_index, coords={'detector_number': detid}).group( - detector_number.flatten(to='detector_number') - ) - comps = da.bins.constituents - comps['data'] = shapes[faces.dim, comps['data'].values] - return sc.bins(**comps).fold(dim='detector_number', sizes=detector_number.sizes) - - -class NXoff_geometry(NXobject): - _dims = { - 'detector_faces': ('face', 'column'), - 'vertices': ('vertex',), - 'winding_order': ('winding_order',), - 'faces': ('face',), - } - - def _get_field_dims(self, name: str) -> Union[None, Tuple[str]]: - return self._dims.get(name) - - def _get_field_dtype(self, name: str) -> Union[None, sc.DType]: - if name == 'vertices': - return sc.DType.vector3 - return None - - def load_as_array( - self, detector_number: Optional[sc.Variable] = None - ) -> sc.Variable: - return off_to_shape(**self[()], detector_number=detector_number) diff --git a/src/scippnexus/v1/nxsample.py b/src/scippnexus/v1/nxsample.py deleted file mode 100644 index 24894293..00000000 --- a/src/scippnexus/v1/nxsample.py +++ /dev/null @@ -1,30 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from typing import Dict, Union - -import scipp as sc -from scipp.spatial import linear_transform - -from .leaf import Leaf -from .nxobject import ScippIndex - -_matrix_units = { - "orientation_matrix": "one", - "ub_matrix": "1/Angstrom", -} - - -class NXsample(Leaf): - """Sample information, can be read as a dict.""" - - def _getitem( - self, select: ScippIndex - ) -> Dict[str, Union[sc.Variable, sc.DataArray]]: - content = super()._getitem(select) - for key in _matrix_units: - if (item := content.get(key)) is not None: - content[key] = linear_transform( - value=item.values, unit=_matrix_units[key] - ) - return content diff --git a/src/scippnexus/v1/nxsource.py b/src/scippnexus/v1/nxsource.py deleted file mode 100644 index 85640e6f..00000000 --- a/src/scippnexus/v1/nxsource.py +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from .leaf import Leaf - - -class NXsource(Leaf): - """Source information, can be read as a dict.""" diff --git a/src/scippnexus/v1/nxtransformations.py b/src/scippnexus/v1/nxtransformations.py deleted file mode 100644 index e97218c0..00000000 --- a/src/scippnexus/v1/nxtransformations.py +++ /dev/null @@ -1,194 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) -# @author Simon Heybrock -from __future__ import annotations - -from typing import List, Optional, Union - -import numpy as np -import scipp as sc -from scipp.scipy import interpolate - -from .nxobject import Field, NexusStructureError, NXobject, ScippIndex - - -class TransformationError(NexusStructureError): - pass - - -def make_transformation(obj, /, path) -> Transformation | None: - if path.startswith("/"): - return Transformation(obj.file[path]) - elif path != ".": - return Transformation(obj.parent[path]) - return None # end of chain - - -class NXtransformations(NXobject): - """Group of transformations.""" - - def _getitem(self, index: ScippIndex) -> sc.DataGroup: - return sc.DataGroup( - { - name: get_full_transformation_starting_at( - Transformation(child), index=index - ) - for name, child in self.items() - } - ) - - -class Transformation: - def __init__(self, obj: Field | NXobject): # could be an NXlog - self._obj = obj - - @property - def attrs(self): - return self._obj.attrs - - @property - def name(self): - return self._obj.name - - @property - def depends_on(self): - if (path := self.attrs.get("depends_on")) is not None: - return make_transformation(self._obj, path) - return None - - @property - def offset(self): - if (offset := self.attrs.get("offset")) is None: - return None - if (offset_units := self.attrs.get("offset_units")) is None: - raise TransformationError( - f"Found {offset=} but no corresponding 'offset_units' " - f"attribute at {self.name}" - ) - return sc.spatial.translation(value=offset, unit=offset_units) - - @property - def vector(self) -> sc.Variable: - return sc.vector(value=self.attrs.get("vector")) - - def __getitem__(self, select: ScippIndex): - transformation_type = self.attrs.get("transformation_type") - # According to private communication with Tobias Richter, NeXus allows 0-D or - # shape=[1] for single values. It is unclear how and if this could be - # distinguished from a scan of length 1. - value = self._obj[select] - try: - if isinstance(value, sc.DataGroup): - raise TransformationError( - f"Failed to load transformation at {self.name}." - ) - t = value * self.vector - v = t if isinstance(t, sc.Variable) else t.data - if transformation_type == "translation": - v = v.to(unit="m", copy=False) - v = sc.spatial.translations(dims=v.dims, values=v.values, unit=v.unit) - elif transformation_type == "rotation": - v = sc.spatial.rotations_from_rotvecs(v) - else: - raise TransformationError( - f"{transformation_type=} attribute at {self.name}," - " expected 'translation' or 'rotation'." - ) - if isinstance(t, sc.Variable): - t = v - else: - t.data = v - if (offset := self.offset) is None: - return t - offset = sc.vector(value=offset.values, unit=offset.unit).to(unit="m") - offset = sc.spatial.translation(value=offset.value, unit=offset.unit) - return t * offset - except (sc.DimensionError, sc.UnitError) as e: - raise NexusStructureError( - f"Invalid transformation in NXtransformations: {e}" - ) from e - - -def _interpolate_transform(transform, xnew): - # scipy can't interpolate with a single value - if transform.sizes["time"] == 1: - transform = sc.concat([transform, transform], dim="time") - return interpolate.interp1d( - transform, "time", kind="previous", fill_value="extrapolate" - )(xnew=xnew) - - -def _smaller_unit(a, b): - if a.unit == b.unit: - return a.unit - ratio = sc.scalar(1.0, unit=a.unit).to(unit=b.unit) - if ratio.value < 1.0: - return a.unit - else: - return b.unit - - -def get_full_transformation( - depends_on: Field, -) -> None | sc.DataArray | sc.Variable: - """ - Get the 4x4 transformation matrix for a component, resulting - from the full chain of transformations linked by "depends_on" - attributes - """ - if (t0 := make_transformation(depends_on, depends_on[()])) is None: - return None - return get_full_transformation_starting_at(t0) - - -def get_full_transformation_starting_at( - t0: Transformation, *, index: ScippIndex = None -) -> None | sc.DataArray | sc.Variable: - transformations = _get_transformations(t0, index=() if index is None else index) - - total_transform = None - for transform in transformations: - if total_transform is None: - total_transform = transform - elif isinstance(total_transform, sc.DataArray) and isinstance( - transform, sc.DataArray - ): - unit = _smaller_unit( - transform.coords["time"], total_transform.coords["time"] - ) - total_transform.coords["time"] = total_transform.coords["time"].to( - unit=unit, copy=False - ) - transform.coords["time"] = transform.coords["time"].to( - unit=unit, copy=False - ) - time = sc.concat( - [total_transform.coords["time"], transform.coords["time"]], dim="time" - ) - time = sc.datetimes(values=np.unique(time.values), dims=["time"], unit=unit) - total_transform = _interpolate_transform( - transform, time - ) * _interpolate_transform(total_transform, time) - else: - total_transform = transform * total_transform - if isinstance(total_transform, sc.DataArray): - time_dependent = [t for t in transformations if isinstance(t, sc.DataArray)] - times = [da.coords["time"][0] for da in time_dependent] - latest_log_start = sc.reduce(times).max() - return total_transform["time", latest_log_start:].copy() - return total_transform - - -def _get_transformations( - transform: Transformation, *, index: ScippIndex -) -> list[sc.DataArray | sc.Variable]: - """Get all transformations in the depends_on chain.""" - transformations = [] - t = transform - while t is not None: - transformations.append(t[index]) - t = t.depends_on - # TODO: this list of transformation should probably be cached in the future - # to deal with changing beamline components (e.g. pixel positions) during a - # live data stream (see https://github.com/scipp/scippneutron/issues/76). - return transformations diff --git a/src/scippnexus/v1/ruff.toml b/src/scippnexus/v1/ruff.toml deleted file mode 100644 index 6d3809f8..00000000 --- a/src/scippnexus/v1/ruff.toml +++ /dev/null @@ -1,4 +0,0 @@ -exclude = ["."] -lint.select = [] -format.exclude = ["."] -format.quote-style = "preserve" \ No newline at end of file diff --git a/src/scippnexus/v2/__init__.py b/src/scippnexus/v2/__init__.py deleted file mode 100644 index a4c1bae3..00000000 --- a/src/scippnexus/v2/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# flake8: noqa -from scippnexus import * diff --git a/src/scippnexus/v2/application_definitions/nxcansas/__init__.py b/src/scippnexus/v2/application_definitions/nxcansas/__init__.py deleted file mode 100644 index 84072bf2..00000000 --- a/src/scippnexus/v2/application_definitions/nxcansas/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# flake8: noqa -from scippnexus.application_definitions.nxcansas import *