From 6280934559c6742dea01cebf2caa52f73a06c43a Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 13 Sep 2024 10:31:43 +0100 Subject: [PATCH 1/9] Add FileManager to TiledDataset --- dkist/dataset/tiled_dataset.py | 13 ++++++++++++- dkist/io/asdf/converters/tiled_dataset.py | 17 ++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/dkist/dataset/tiled_dataset.py b/dkist/dataset/tiled_dataset.py index f63cfc44..529062c4 100644 --- a/dkist/dataset/tiled_dataset.py +++ b/dkist/dataset/tiled_dataset.py @@ -13,7 +13,9 @@ from astropy.table import vstack -from .dataset import Dataset +from dkist.io.file_manager import FileManager + +from .dataset import Dataset, FileManagerDescriptor from .utils import dataset_info_str __all__ = ["TiledDataset"] @@ -56,6 +58,8 @@ class TiledDataset(Collection): """ + _file_manager = FileManagerDescriptor(default_type=FileManager) + @classmethod def _from_components(cls, shape, file_managers, wcses, header_tables, inventory): """ @@ -190,3 +194,10 @@ def __repr__(self): def __str__(self): return dataset_info_str(self) + + @property + def files(self): + """ + A `~.FileManager` helper for interacting with the files backing the data in this ``Dataset``. + """ + return self._file_manager diff --git a/dkist/io/asdf/converters/tiled_dataset.py b/dkist/io/asdf/converters/tiled_dataset.py index 588754fa..37ed669d 100644 --- a/dkist/io/asdf/converters/tiled_dataset.py +++ b/dkist/io/asdf/converters/tiled_dataset.py @@ -1,5 +1,8 @@ from asdf.extension import Converter +from dkist.io.file_manager import FileManager, StripedExternalArray +from dkist.io.loaders import AstropyFITSLoader + class TiledDatasetConverter(Converter): tags = [ @@ -11,7 +14,19 @@ class TiledDatasetConverter(Converter): def from_yaml_tree(cls, node, tag, ctx): from dkist.dataset.tiled_dataset import TiledDataset - return TiledDataset(node["datasets"], node["inventory"]) + tiled_dataset = TiledDataset(node["datasets"], node["inventory"]) + tiled_dataset._file_manager = FileManager( + StripedExternalArray( + fileuris = [[tile.files.filenames for tile in row] for row in tiled_dataset], + target = 1, + dtype = tiled_dataset[0, 0].files.fileuri_array.dtype, + shape = tiled_dataset[0, 0]._data.chunksize, + loader = AstropyFITSLoader, + basepath = tiled_dataset[0, 0].files.basepath, + chunksize = tiled_dataset[0, 0]._data.chunksize + ) + ) + return tiled_dataset def to_yaml_tree(cls, tiled_dataset, tag, ctx): tree = {} From c5183709c23d05291b89ceb14ddc4d92f4d30c76 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 13 Sep 2024 13:55:27 +0100 Subject: [PATCH 2/9] Add test for TiledDataset filemanager --- dkist/dataset/tests/test_tiled_dataset.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dkist/dataset/tests/test_tiled_dataset.py b/dkist/dataset/tests/test_tiled_dataset.py index 0db139b9..bcd7419a 100644 --- a/dkist/dataset/tests/test_tiled_dataset.py +++ b/dkist/dataset/tests/test_tiled_dataset.py @@ -96,3 +96,19 @@ def test_repr(simple_tiled_dataset): @pytest.mark.accept_cli_tiled_dataset def test_tiles_shape(simple_tiled_dataset): assert simple_tiled_dataset.tiles_shape == [[tile.data.shape for tile in row] for row in simple_tiled_dataset] + + +def test_file_manager(dataset): + from dkist.data.sample import VBI_AJQWW + ds = load_dataset(VBI_AJQWW) + + with pytest.raises(AttributeError): + ds.files = 10 + + assert len(ds.files.filenames) == 27 + assert ds.files.shape == (1, 4096, 4096) + assert ds.files.output_shape == (3, 3, 3, 4096, 4096) + + # Have some slicing tests here + assert len(ds.slice_tiles[0].files.filenames) == 9 + assert len(ds[:2, :2].files.filenames) == 12 From f114d4ec6dc84f0bcf818e28f9f426eb97b0c62d Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 13 Sep 2024 13:56:56 +0100 Subject: [PATCH 3/9] Move file manager construction into property on TiledDataset --- dkist/dataset/tiled_dataset.py | 17 ++++++++++++++++- dkist/io/asdf/converters/tiled_dataset.py | 17 +---------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/dkist/dataset/tiled_dataset.py b/dkist/dataset/tiled_dataset.py index 529062c4..e40ed020 100644 --- a/dkist/dataset/tiled_dataset.py +++ b/dkist/dataset/tiled_dataset.py @@ -13,7 +13,8 @@ from astropy.table import vstack -from dkist.io.file_manager import FileManager +from dkist.io.file_manager import FileManager, StripedExternalArray +from dkist.io.loaders import AstropyFITSLoader from .dataset import Dataset, FileManagerDescriptor from .utils import dataset_info_str @@ -201,3 +202,17 @@ def files(self): A `~.FileManager` helper for interacting with the files backing the data in this ``Dataset``. """ return self._file_manager + + @property + def _file_manager(self): + return FileManager( + StripedExternalArray( + fileuris = [[tile.files.filenames for tile in row] for row in self], + target = 1, + dtype = self[0, 0].files.fileuri_array.dtype, + shape = self[0, 0]._data.chunksize, + loader = AstropyFITSLoader, + basepath = self[0, 0].files.basepath, + chunksize = self[0, 0]._data.chunksize + ) + ) diff --git a/dkist/io/asdf/converters/tiled_dataset.py b/dkist/io/asdf/converters/tiled_dataset.py index 37ed669d..588754fa 100644 --- a/dkist/io/asdf/converters/tiled_dataset.py +++ b/dkist/io/asdf/converters/tiled_dataset.py @@ -1,8 +1,5 @@ from asdf.extension import Converter -from dkist.io.file_manager import FileManager, StripedExternalArray -from dkist.io.loaders import AstropyFITSLoader - class TiledDatasetConverter(Converter): tags = [ @@ -14,19 +11,7 @@ class TiledDatasetConverter(Converter): def from_yaml_tree(cls, node, tag, ctx): from dkist.dataset.tiled_dataset import TiledDataset - tiled_dataset = TiledDataset(node["datasets"], node["inventory"]) - tiled_dataset._file_manager = FileManager( - StripedExternalArray( - fileuris = [[tile.files.filenames for tile in row] for row in tiled_dataset], - target = 1, - dtype = tiled_dataset[0, 0].files.fileuri_array.dtype, - shape = tiled_dataset[0, 0]._data.chunksize, - loader = AstropyFITSLoader, - basepath = tiled_dataset[0, 0].files.basepath, - chunksize = tiled_dataset[0, 0]._data.chunksize - ) - ) - return tiled_dataset + return TiledDataset(node["datasets"], node["inventory"]) def to_yaml_tree(cls, tiled_dataset, tag, ctx): tree = {} From f6c164b2cf5a248af8289d88d29d9cd87c5e5135 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 13 Sep 2024 13:57:36 +0100 Subject: [PATCH 4/9] Trim duplicate line --- dkist/dataset/tests/test_dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dkist/dataset/tests/test_dataset.py b/dkist/dataset/tests/test_dataset.py index 966a62bb..0dc06532 100644 --- a/dkist/dataset/tests/test_dataset.py +++ b/dkist/dataset/tests/test_dataset.py @@ -130,7 +130,6 @@ def test_file_manager(): dataset.files = 10 assert len(dataset.files.filenames) == 11 - assert len(dataset.files.filenames) == 11 assert isinstance(dataset[5]._file_manager, FileManager) assert len(dataset[..., 5].files.filenames) == 11 From 01687bb1e845614400d1d72093a947e5dde0bf8d Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 13 Sep 2024 14:23:25 +0100 Subject: [PATCH 5/9] Don't use sample data for that test because internet --- dkist/dataset/tests/test_tiled_dataset.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dkist/dataset/tests/test_tiled_dataset.py b/dkist/dataset/tests/test_tiled_dataset.py index bcd7419a..a0d8e17b 100644 --- a/dkist/dataset/tests/test_tiled_dataset.py +++ b/dkist/dataset/tests/test_tiled_dataset.py @@ -98,10 +98,8 @@ def test_tiles_shape(simple_tiled_dataset): assert simple_tiled_dataset.tiles_shape == [[tile.data.shape for tile in row] for row in simple_tiled_dataset] -def test_file_manager(dataset): - from dkist.data.sample import VBI_AJQWW - ds = load_dataset(VBI_AJQWW) - +def test_file_manager(large_tiled_dataset): + ds = large_tiled_dataset with pytest.raises(AttributeError): ds.files = 10 From 611e33a0f2c92ef1f2964e66d6e47bb49a95e5ca Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 13 Sep 2024 14:48:44 +0100 Subject: [PATCH 6/9] Add changelog --- changelog/347.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/347.feature.rst diff --git a/changelog/347.feature.rst b/changelog/347.feature.rst new file mode 100644 index 00000000..550c3320 --- /dev/null +++ b/changelog/347.feature.rst @@ -0,0 +1 @@ +Add a FileManager property to TiledDataset for tracking files more easily. From a66d6820c3aa201fb4aea27a260e305d42678cb4 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 13 Sep 2024 14:53:01 +0100 Subject: [PATCH 7/9] Typo --- changelog/{347.feature.rst => 437.feature.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changelog/{347.feature.rst => 437.feature.rst} (100%) diff --git a/changelog/347.feature.rst b/changelog/437.feature.rst similarity index 100% rename from changelog/347.feature.rst rename to changelog/437.feature.rst From 5388352239aad24b45c00526b91a0acdf1232b11 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Tue, 17 Sep 2024 10:25:28 +0100 Subject: [PATCH 8/9] Don't need a thing --- dkist/dataset/tiled_dataset.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dkist/dataset/tiled_dataset.py b/dkist/dataset/tiled_dataset.py index e40ed020..5f9547fe 100644 --- a/dkist/dataset/tiled_dataset.py +++ b/dkist/dataset/tiled_dataset.py @@ -16,7 +16,7 @@ from dkist.io.file_manager import FileManager, StripedExternalArray from dkist.io.loaders import AstropyFITSLoader -from .dataset import Dataset, FileManagerDescriptor +from .dataset import Dataset from .utils import dataset_info_str __all__ = ["TiledDataset"] @@ -59,8 +59,6 @@ class TiledDataset(Collection): """ - _file_manager = FileManagerDescriptor(default_type=FileManager) - @classmethod def _from_components(cls, shape, file_managers, wcses, header_tables, inventory): """ From ec2048f17608a8691bb78046fe2e0efb14f67e95 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Wed, 18 Sep 2024 11:38:03 +0100 Subject: [PATCH 9/9] Check that file manager values match all tiles --- dkist/dataset/tests/test_tiled_dataset.py | 6 +++++ dkist/dataset/tiled_dataset.py | 29 +++++++++++++++++------ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/dkist/dataset/tests/test_tiled_dataset.py b/dkist/dataset/tests/test_tiled_dataset.py index a0d8e17b..e72b8117 100644 --- a/dkist/dataset/tests/test_tiled_dataset.py +++ b/dkist/dataset/tests/test_tiled_dataset.py @@ -110,3 +110,9 @@ def test_file_manager(large_tiled_dataset): # Have some slicing tests here assert len(ds.slice_tiles[0].files.filenames) == 9 assert len(ds[:2, :2].files.filenames) == 12 + + # TODO Also test that the other checks raise errors + # This at least demonstrates that the structure works + ds[1, 1].files.fileuri_array.dtype = np.dtype("