From 7063a0eba5d4eb18e33ddd9da8387219deea8576 Mon Sep 17 00:00:00 2001 From: Stuart Mumford Date: Mon, 26 Feb 2024 12:51:16 +0000 Subject: [PATCH] Make changes for datasets with only one file (#335) * Test scalar array to filename * Make changes for datasets with only one file This is towards fixing VBI single shot mosaics * Add changelog --------- Co-authored-by: Drew Leonard --- changelog/335.trivial.rst | 1 + dkist/conftest.py | 4 +++- dkist/io/dask_utils.py | 7 +++++-- dkist/io/file_manager.py | 5 ++--- 4 files changed, 11 insertions(+), 6 deletions(-) create mode 100644 changelog/335.trivial.rst diff --git a/changelog/335.trivial.rst b/changelog/335.trivial.rst new file mode 100644 index 00000000..8ef309f8 --- /dev/null +++ b/changelog/335.trivial.rst @@ -0,0 +1 @@ +Adjust file loading to support single-frame datasets with no time axis. diff --git a/dkist/conftest.py b/dkist/conftest.py index 5bbced56..03344457 100644 --- a/dkist/conftest.py +++ b/dkist/conftest.py @@ -228,7 +228,9 @@ def dataset(array, identity_gwcs): assert ds.data is array assert ds.wcs is identity_gwcs - ds._file_manager = FileManager.from_parts(['test1.fits'], 0, 'float', array.shape, + # Construct the filename here as a scalar array to make sure that works as + # it's what dkist-inventory does + ds._file_manager = FileManager.from_parts(np.array('test1.fits'), 0, 'float', array.shape, loader=AstropyFITSLoader) return ds diff --git a/dkist/io/dask_utils.py b/dkist/io/dask_utils.py index feb8a3fb..c66d4d13 100644 --- a/dkist/io/dask_utils.py +++ b/dkist/io/dask_utils.py @@ -23,6 +23,8 @@ def stack_loader_array(loader_array, chunksize): # If the chunksize sin't specified then use the whole array shape chunksize = chunksize or loader_array.flat[0].shape + if loader_array.size == 1: + return tuple(loader_to_dask(loader_array, chunksize))[0] if len(loader_array.shape) == 1: return da.stack(loader_to_dask(loader_array, chunksize)) stacks = [] @@ -38,10 +40,11 @@ def loader_to_dask(loader_array, chunksize): This is done so that an explicit ``meta=`` argument can be provided to prevent loading data from disk. """ - - if len(loader_array.shape) != 1: + if loader_array.size != 1 and len(loader_array.shape) != 1: raise ValueError("Can only be used on one dimensional arrays") + loader_array = np.atleast_1d(loader_array) + # The meta argument to from array is used to determine properties of the # array, such as dtype. We explicitly specify it here to prevent dask # trying to auto calculate it by reading from the actual array on disk. diff --git a/dkist/io/file_manager.py b/dkist/io/file_manager.py index dbe7a23d..fe05c581 100644 --- a/dkist/io/file_manager.py +++ b/dkist/io/file_manager.py @@ -56,7 +56,7 @@ def _output_shape_from_ref_array(shape, loader_array) -> Tuple[int]: if shape[0] == 1: shape = shape[1:] - if len(loader_array) == 1: + if loader_array.size == 1: return shape else: return tuple(list(loader_array.shape) + list(shape)) @@ -99,8 +99,7 @@ def __init__( self._basepath = None self.basepath = basepath # Use the setter to convert to a Path self.chunksize = chunksize - - self._fileuri_array = np.array(fileuris) + self._fileuri_array = np.atleast_1d(np.array(fileuris)) loader_array = np.empty_like(self._fileuri_array, dtype=object) for i, fileuri in enumerate(self._fileuri_array.flat):