diff --git a/changelog/513.feature.1.rst b/changelog/513.feature.1.rst new file mode 100644 index 00000000..bb63e492 --- /dev/null +++ b/changelog/513.feature.1.rst @@ -0,0 +1 @@ +History of the ADSF file, such as versions of packages and extensions used when writing it are now exposed ``TiledDataset.meta["history"]`` and ``Dataset.meta["history"]``. diff --git a/changelog/513.feature.rst b/changelog/513.feature.rst new file mode 100644 index 00000000..a5d0af8b --- /dev/null +++ b/changelog/513.feature.rst @@ -0,0 +1 @@ +``TiledDataset`` now has a ``.meta`` dictionary like that of ``Dataset``. diff --git a/dkist/conftest.py b/dkist/conftest.py index d19c6ff7..4e466c47 100644 --- a/dkist/conftest.py +++ b/dkist/conftest.py @@ -305,7 +305,8 @@ def simple_tiled_dataset(dataset): for ds in datasets: ds.meta["inventory"] = dataset.meta["inventory"] dataset_array = np.array(datasets).reshape((2,2)) - return TiledDataset(dataset_array, dataset.meta["inventory"]) + meta = {"inventory": dataset.meta["inventory"]} + return TiledDataset(dataset_array, meta=meta) @pytest.fixture diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py index 4c1aaecb..bb3ec04b 100644 --- a/dkist/dataset/loader.py +++ b/dkist/dataset/loader.py @@ -108,9 +108,7 @@ def _load_from_results(results): return _load_from_iterable(results) -# In Python 3.11 we can use the Union type here -@load_dataset.register(list) -@load_dataset.register(tuple) +@load_dataset.register(tuple | list) def _load_from_iterable(iterable): """ A list or tuple of valid inputs to ``load_dataset``. @@ -245,6 +243,7 @@ def _load_from_asdf(filepath): with asdf.open(filepath, custom_schema=schema_path.as_posix(), lazy_load=False, **asdf_open_memory_mapping_kwarg(memmap=False)) as ff: ds = ff.tree["dataset"] + ds.meta["history"] = ff.tree["history"] if isinstance(ds, TiledDataset): for sub in ds.flat: sub.files.basepath = base_path diff --git a/dkist/dataset/tests/test_tiled_dataset.py b/dkist/dataset/tests/test_tiled_dataset.py index 2c5fb9f2..30749866 100644 --- a/dkist/dataset/tests/test_tiled_dataset.py +++ b/dkist/dataset/tests/test_tiled_dataset.py @@ -44,16 +44,17 @@ def test_tiled_dataset_headers(simple_tiled_dataset, dataset): def test_tiled_dataset_invalid_construction(dataset, dataset_4d): + meta = {"inventory": dataset.meta["inventory"]} with pytest.raises(ValueError, match="inventory record of the first dataset"): TiledDataset(np.array((dataset, dataset_4d))) with pytest.raises(ValueError, match="physical types do not match"): - TiledDataset(np.array((dataset, dataset_4d)), inventory=dataset.meta["inventory"]) + TiledDataset(np.array((dataset, dataset_4d)), meta=meta) ds2 = copy.deepcopy(dataset) ds2.meta["inventory"] = {"hello": "world"} with pytest.raises(ValueError, match="inventory records of all the datasets"): - TiledDataset(np.array((dataset, ds2)), dataset.meta["inventory"]) + TiledDataset(np.array((dataset, ds2)), meta=meta) def test_tiled_dataset_from_components(dataset): @@ -85,7 +86,7 @@ def test_tileddataset_plot(share_zscale): # https://github.com/sunpy/ndcube/issues/815 for tile in newtiles: tile.meta["inventory"] = ori_ds.inventory - ds = TiledDataset(np.array(newtiles).reshape(ori_ds.shape), inventory=newtiles[0].inventory) + ds = TiledDataset(np.array(newtiles).reshape(ori_ds.shape), meta={"inventory": newtiles[0].inventory}) fig = plt.figure(figsize=(12, 15)) ds.plot(0, share_zscale=share_zscale, figure=fig) return plt.gcf() diff --git a/dkist/dataset/tiled_dataset.py b/dkist/dataset/tiled_dataset.py index 29a5dca7..129b78c9 100644 --- a/dkist/dataset/tiled_dataset.py +++ b/dkist/dataset/tiled_dataset.py @@ -5,6 +5,7 @@ but not representable in a single NDCube derived object as the array data are not contiguous in the spatial dimensions (due to overlaps and offsets). """ +import warnings from textwrap import dedent from collections.abc import Collection @@ -16,6 +17,7 @@ from dkist.io.file_manager import FileManager, StripedExternalArray from dkist.io.loaders import AstropyFITSLoader +from dkist.utils.exceptions import DKISTDeprecationWarning from .dataset import Dataset from .utils import dataset_info_str @@ -27,15 +29,15 @@ class TiledDatasetSlicer: """ Basic class to provide the slicing """ - def __init__(self, data, inventory): + def __init__(self, data, meta): self.data = data - self.inventory = inventory + self.meta = meta def __getitem__(self, slice_): new_data = [] for tile in self.data.flat: new_data.append(tile[slice_]) - return TiledDataset(np.array(new_data).reshape(self.data.shape), self.inventory) + return TiledDataset(np.array(new_data).reshape(self.data.shape), meta=self.meta) class TiledDataset(Collection): @@ -80,12 +82,20 @@ def _from_components(cls, shape, file_managers, wcses, header_tables, inventory) datasets[i]._file_manager = fm datasets = datasets.reshape(shape) - return cls(datasets, inventory) + return cls(datasets, meta={"inventory": inventory}) - def __init__(self, dataset_array, inventory=None): + def __init__(self, dataset_array, inventory=None, *, meta=None): + if inventory is not None: + warnings.warn( + "The inventory= kwarg is deprecated, inventory should be passed as part of the meta argument", + DKISTDeprecationWarning, + ) self._data = np.array(dataset_array, dtype=object) - self._inventory = inventory or {} + meta = meta or {} + inventory = meta.get("inventory", inventory or {}) self._validate_component_datasets(self._data, inventory) + self._meta = meta + self._meta["inventory"] = inventory def __contains__(self, x): return any(ele is x for ele in self._data.flat) @@ -101,7 +111,7 @@ def __getitem__(self, aslice): if isinstance(new_data, Dataset): return new_data - return type(self)(new_data, inventory=self.inventory) + return type(self)(new_data, meta=self.meta) @staticmethod def _validate_component_datasets(datasets, inventory): @@ -122,14 +132,21 @@ def flat(self): """ Represent this `.TiledDataset` as a 1D array. """ - return type(self)(self._data.flat, self.inventory) + return type(self)(self._data.flat, meta=self.meta) + + @property + def meta(self): + """ + A dictionary of extra metadata about the dataset. + """ + return self._meta @property def inventory(self): """ The inventory record as kept by the data center for this dataset. """ - return self._inventory + return self._meta["inventory"] @property def combined_headers(self): @@ -260,7 +277,7 @@ def slice_tiles(self): helioprojective latitude | x | x """ - return TiledDatasetSlicer(self._data, self.inventory) + return TiledDatasetSlicer(self._data, self.meta) # TODO: def regrid() diff --git a/dkist/io/asdf/converters/dataset.py b/dkist/io/asdf/converters/dataset.py index 3167c1ff..202242d3 100644 --- a/dkist/io/asdf/converters/dataset.py +++ b/dkist/io/asdf/converters/dataset.py @@ -1,3 +1,5 @@ +import copy + from asdf.extension import Converter @@ -62,7 +64,10 @@ def to_yaml_tree(self, dataset, tag, ctx): raise ValueError("This Dataset object can not be saved to asdf as " "it was not constructed from a set of FITS files.") node = {} - node["meta"] = dataset.meta or {} + # Copy the meta so we don't pop from the one in memory + node["meta"] = copy.copy(dataset.meta) or {} + # If the history key has been injected into the meta, do not save it + node["meta"].pop("history", None) node["wcs"] = dataset.wcs node["data"] = dataset.files if dataset.unit: diff --git a/dkist/io/asdf/converters/tiled_dataset.py b/dkist/io/asdf/converters/tiled_dataset.py index 588754fa..3bd0c82b 100644 --- a/dkist/io/asdf/converters/tiled_dataset.py +++ b/dkist/io/asdf/converters/tiled_dataset.py @@ -1,3 +1,5 @@ +import copy + from asdf.extension import Converter @@ -5,16 +7,26 @@ class TiledDatasetConverter(Converter): tags = [ "tag:dkist.nso.edu:dkist/tiled_dataset-0.1.0", "asdf://dkist.nso.edu/tags/tiled_dataset-1.0.0", + "asdf://dkist.nso.edu/tags/tiled_dataset-1.1.0", ] types = ["dkist.dataset.tiled_dataset.TiledDataset"] def from_yaml_tree(cls, node, tag, ctx): from dkist.dataset.tiled_dataset import TiledDataset - return TiledDataset(node["datasets"], node["inventory"]) + # Support old files without meta, but with inventory + meta = node.get("meta", {}) + if "inventory" not in meta and (inventory := node.get("inventory", None)): + meta["inventory"] = inventory + + return TiledDataset(node["datasets"], meta=meta) def to_yaml_tree(cls, tiled_dataset, tag, ctx): tree = {} - tree["inventory"] = tiled_dataset._inventory + # Copy the meta so we don't pop from the one in memory + meta = copy.copy(tiled_dataset.meta) + # If the history key has been injected into the meta, do not save it + meta.pop("history", None) + tree["meta"] = meta tree["datasets"] = tiled_dataset._data.tolist() return tree diff --git a/dkist/io/asdf/entry_points.py b/dkist/io/asdf/entry_points.py index 8d86cf0f..6994bfe3 100644 --- a/dkist/io/asdf/entry_points.py +++ b/dkist/io/asdf/entry_points.py @@ -39,6 +39,8 @@ def get_extensions(): dkist_converters = [FileManagerConverter(), DatasetConverter(), TiledDatasetConverter()] wcs_converters = [VaryingCelestialConverter(), CoupledCompoundConverter(), RavelConverter(), AsymmetricMappingConverter()] return [ + ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.3.0", + converters=dkist_converters), ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.2.0", converters=dkist_converters), ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.1.0", diff --git a/dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml b/dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml new file mode 100644 index 00000000..4107302c --- /dev/null +++ b/dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml @@ -0,0 +1,14 @@ +%YAML 1.1 +--- +id: asdf://dkist.nso.edu/manifests/dkist-1.3.0 +extension_uri: asdf://dkist.nso.edu/dkist/extensions/dkist-1.3.0 +title: DKIST extension +description: ASDF schemas and tags for DKIST classes. + +tags: + - schema_uri: "asdf://dkist.nso.edu/schemas/file_manager-1.0.0" + tag_uri: "asdf://dkist.nso.edu/tags/file_manager-1.0.0" + - schema_uri: "asdf://dkist.nso.edu/schemas/dataset-1.1.0" + tag_uri: "asdf://dkist.nso.edu/tags/dataset-1.2.0" + - schema_uri: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.1.0" + tag_uri: "asdf://dkist.nso.edu/tags/tiled_dataset-1.1.0" diff --git a/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml b/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml new file mode 100644 index 00000000..56bb710e --- /dev/null +++ b/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml @@ -0,0 +1,38 @@ +%YAML 1.1 +--- +$schema: "http://stsci.edu/schemas/yaml-schema/draft-01" +id: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.1.0" + +title: | + A DKIST Tiled Dataset object. +description: + The container for a set of Dataset objects. + +type: object +properties: + datasets: + description: A nested structure of Dataset objects + type: array + items: + type: array + items: + - tag: "asdf://dkist.nso.edu/tags/dataset-1.*" + + meta: + description: Dataset metadata, describing the whole dataset. + type: object + properties: + quality: + description: A copy of the quality report of these observations. + type: object + + inventory: + description: A copy of the inventory record for this dataset. + type: object + + required: [inventory] + additionalProperties: true + +required: [datasets, meta] +additionalProperties: false +... diff --git a/dkist/io/asdf/tests/test_dataset.py b/dkist/io/asdf/tests/test_dataset.py index f475e6b6..4d0d843e 100644 --- a/dkist/io/asdf/tests/test_dataset.py +++ b/dkist/io/asdf/tests/test_dataset.py @@ -103,6 +103,7 @@ def test_save_dataset_with_file_schema(tagobj, tmpdir): tree = {"dataset": tagobj} with importlib_resources.as_file(importlib_resources.files("dkist.io") / "level_1_dataset_schema.yaml") as schema_path: with asdf.AsdfFile(tree, custom_schema=schema_path.as_posix()) as afile: + afile.validate() # it seems that asdf 4.0 does not validate the custom schema on write? afile.write_to(Path(tmpdir / "test.asdf")) diff --git a/dkist/io/level_1_dataset_schema.yaml b/dkist/io/level_1_dataset_schema.yaml index bbcc67d6..63a0a584 100644 --- a/dkist/io/level_1_dataset_schema.yaml +++ b/dkist/io/level_1_dataset_schema.yaml @@ -19,6 +19,7 @@ properties: - $ref: "asdf://dkist.nso.edu/schemas/dataset-1.1.0" - $ref: "asdf://dkist.nso.edu/schemas/tiled_dataset-0.1.0" - $ref: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.0.0" + - $ref: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.1.0" required: [dataset] additionalProperties: true