diff --git a/changelog/487.feature.rst b/changelog/487.feature.rst new file mode 100644 index 00000000..29fea312 --- /dev/null +++ b/changelog/487.feature.rst @@ -0,0 +1 @@ +Add support to TiledDataset for mosaic datasets with missing tiles or where tiles are irregularly arranged. diff --git a/dkist/conftest.py b/dkist/conftest.py index d19c6ff7..7b88d795 100644 --- a/dkist/conftest.py +++ b/dkist/conftest.py @@ -299,16 +299,23 @@ def eit_dataset(): return f.tree["dataset"] -@pytest.fixture -def simple_tiled_dataset(dataset): +@pytest.fixture(params=[False, + [[False, False], + [True, False]]], + ids=["simple-nomask", "simple-masked"]) +def simple_tiled_dataset(dataset, request): datasets = [copy.deepcopy(dataset) for i in range(4)] for ds in datasets: ds.meta["inventory"] = dataset.meta["inventory"] dataset_array = np.array(datasets).reshape((2,2)) - return TiledDataset(dataset_array, dataset.meta["inventory"]) + return TiledDataset(dataset_array, dataset.meta["inventory"], mask=request.param) -@pytest.fixture +@pytest.fixture(params=[False, + [[False, True, False], + [True, False, True], + [False, True, False]]], + ids=["large-nomask", "large-masked"]) def large_tiled_dataset(tmp_path_factory): vbidir = tmp_path_factory.mktemp("data") with gzip.open(Path(rootdir) / "large_vbi.asdf.gz", mode="rb") as gfo: diff --git a/dkist/dataset/tiled_dataset.py b/dkist/dataset/tiled_dataset.py index f1a04385..914cce6c 100644 --- a/dkist/dataset/tiled_dataset.py +++ b/dkist/dataset/tiled_dataset.py @@ -82,8 +82,8 @@ def _from_components(cls, shape, file_managers, wcses, header_tables, inventory) return cls(datasets, inventory) - def __init__(self, dataset_array, inventory=None): - self._data = np.array(dataset_array, dtype=object) + def __init__(self, dataset_array, inventory=None, mask=False): + self._data = np.ma.masked_array(dataset_array, dtype=object, mask=mask) self._inventory = inventory or {} self._validate_component_datasets(self._data, inventory) @@ -105,7 +105,7 @@ def __getitem__(self, aslice): @staticmethod def _validate_component_datasets(datasets, inventory): - datasets = datasets.flat + datasets = datasets.compressed() inv_1 = datasets[0].meta["inventory"] if inv_1 and inv_1 is not inventory: raise ValueError("The inventory record of the first dataset does not match the one passed to TiledDataset") @@ -122,7 +122,7 @@ def flat(self): """ Represent this `.TiledDataset` as a 1D array. """ - return type(self)(self._data.flat, self.inventory) + return type(self)(self._data.compressed(), self.inventory) @property def inventory(self): @@ -137,7 +137,7 @@ def combined_headers(self): A single `astropy.table.Table` containing all the FITS headers for all files in this dataset. """ - return vstack([ds.meta["headers"] for ds in self._data.flat]) + return vstack([ds.meta["headers"] for ds in self._data.compressed()]) @property def shape(self): diff --git a/dkist/io/asdf/converters/tiled_dataset.py b/dkist/io/asdf/converters/tiled_dataset.py index 588754fa..a156fe66 100644 --- a/dkist/io/asdf/converters/tiled_dataset.py +++ b/dkist/io/asdf/converters/tiled_dataset.py @@ -5,16 +5,20 @@ class TiledDatasetConverter(Converter): tags = [ "tag:dkist.nso.edu:dkist/tiled_dataset-0.1.0", "asdf://dkist.nso.edu/tags/tiled_dataset-1.0.0", + "asdf://dkist.nso.edu/tags/tiled_dataset-1.1.0", ] types = ["dkist.dataset.tiled_dataset.TiledDataset"] def from_yaml_tree(cls, node, tag, ctx): from dkist.dataset.tiled_dataset import TiledDataset - return TiledDataset(node["datasets"], node["inventory"]) + mask = node.get("mask", None) + + return TiledDataset(node["datasets"], node["inventory"], mask) def to_yaml_tree(cls, tiled_dataset, tag, ctx): tree = {} tree["inventory"] = tiled_dataset._inventory tree["datasets"] = tiled_dataset._data.tolist() + tree["mask"] = tiled_dataset._data.mask return tree diff --git a/dkist/io/asdf/entry_points.py b/dkist/io/asdf/entry_points.py index 8d86cf0f..6994bfe3 100644 --- a/dkist/io/asdf/entry_points.py +++ b/dkist/io/asdf/entry_points.py @@ -39,6 +39,8 @@ def get_extensions(): dkist_converters = [FileManagerConverter(), DatasetConverter(), TiledDatasetConverter()] wcs_converters = [VaryingCelestialConverter(), CoupledCompoundConverter(), RavelConverter(), AsymmetricMappingConverter()] return [ + ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.3.0", + converters=dkist_converters), ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.2.0", converters=dkist_converters), ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.1.0", diff --git a/dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml b/dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml new file mode 100644 index 00000000..4107302c --- /dev/null +++ b/dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml @@ -0,0 +1,14 @@ +%YAML 1.1 +--- +id: asdf://dkist.nso.edu/manifests/dkist-1.3.0 +extension_uri: asdf://dkist.nso.edu/dkist/extensions/dkist-1.3.0 +title: DKIST extension +description: ASDF schemas and tags for DKIST classes. + +tags: + - schema_uri: "asdf://dkist.nso.edu/schemas/file_manager-1.0.0" + tag_uri: "asdf://dkist.nso.edu/tags/file_manager-1.0.0" + - schema_uri: "asdf://dkist.nso.edu/schemas/dataset-1.1.0" + tag_uri: "asdf://dkist.nso.edu/tags/dataset-1.2.0" + - schema_uri: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.1.0" + tag_uri: "asdf://dkist.nso.edu/tags/tiled_dataset-1.1.0" diff --git a/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml b/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml new file mode 100644 index 00000000..b2084a4b --- /dev/null +++ b/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml @@ -0,0 +1,31 @@ +%YAML 1.1 +--- +$schema: "http://stsci.edu/schemas/yaml-schema/draft-01" +id: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.1.0" + +title: | + A DKIST Tiled Dataset object. +description: + The container for a set of Dataset objects. + +type: object +properties: + datasets: + description: A nested structure of Dataset objects + type: array + items: + type: array + items: + anyOf: + - tag: "asdf://dkist.nso.edu/tags/dataset-1.*" + - null: true + inventory: + description: A copy of the inventory record for this dataset. + type: object + mask: + description: A mask to indicate if invalid or missing Datasets should be ignored. + datatype: bool8 + +required: [datasets, inventory, mask] +additionalProperties: false +... diff --git a/dkist/io/asdf/tests/test_dataset.py b/dkist/io/asdf/tests/test_dataset.py index f475e6b6..4fdc04f2 100644 --- a/dkist/io/asdf/tests/test_dataset.py +++ b/dkist/io/asdf/tests/test_dataset.py @@ -3,6 +3,7 @@ import numpy as np import pytest +from pytest_lazy_fixtures import lf import asdf import astropy.table @@ -68,9 +69,9 @@ def test_roundtrip_tiled_dataset(simple_tiled_dataset): @pytest.mark.parametrize("tagobj", [ "dataset", - "simple_tiled_dataset", + lf("simple_tiled_dataset"), ], - indirect=True) + indirect=False) def test_save_dataset_without_file_schema(tagobj, tmp_path): tree = {"dataset": tagobj} with asdf.AsdfFile(tree) as afile: @@ -96,9 +97,9 @@ def test_asdf_tags(dataset, tmp_path): @pytest.mark.parametrize("tagobj", [ "dataset", - "simple_tiled_dataset", + lf("simple_tiled_dataset"), ], - indirect=True) + indirect=False) def test_save_dataset_with_file_schema(tagobj, tmpdir): tree = {"dataset": tagobj} with importlib_resources.as_file(importlib_resources.files("dkist.io") / "level_1_dataset_schema.yaml") as schema_path: diff --git a/pyproject.toml b/pyproject.toml index 79a421da..4823c2ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ tests = [ "pytest-filter-subpackage", "pytest-benchmark", "pytest-xdist", + "pytest-lazy-fixtures", "hypothesis", "tox", "pydot",