From e1f3ca3a6e093cb59315d16a5b936179954826cb Mon Sep 17 00:00:00 2001 From: Stuart Mumford Date: Thu, 16 May 2019 14:39:17 +0100 Subject: [PATCH 1/3] Allow access of the filenames from the dataset object --- dkist/dataset/dataset.py | 11 +++++++++-- dkist/io/fits.py | 6 ++++-- dkist/io/reference_collections.py | 18 ++++++++++++++++-- dkist/io/tests/test_collections.py | 24 ++++++++++++++++++++++++ dkist/io/tests/test_fits.py | 4 ++-- 5 files changed, 55 insertions(+), 8 deletions(-) diff --git a/dkist/dataset/dataset.py b/dkist/dataset/dataset.py index 4a5d3708..831f8c3a 100644 --- a/dkist/dataset/dataset.py +++ b/dkist/dataset/dataset.py @@ -81,7 +81,7 @@ def __init__(self, data, uncertainty=None, mask=None, wcs=None, else: self.missing_axis = missing_axis - self.array_container = None + self._array_container = None @classmethod def from_directory(cls, directory): @@ -128,9 +128,16 @@ def from_asdf(cls, filepath): raise TypeError(f"This file is not a valid DKIST asdf file, it fails validation with: {e.message}.") cls = cls(data, wcs=wcs) - cls.array_container = array_container + cls._array_container = array_container return cls + @property + def array_container(self): + """ + A reference to the files containing the data. + """ + return self._array_container + @property def pixel_axes_names(self): if self.wcs.input_frame: diff --git a/dkist/io/fits.py b/dkist/io/fits.py index b71016cc..9a729c5c 100644 --- a/dkist/io/fits.py +++ b/dkist/io/fits.py @@ -51,14 +51,16 @@ def __repr__(self): if self._array is None: return "".format( self.fitsarray.fileuri, self.fitsarray.shape, self.fitsarray.dtype) - return repr(self._array) + return "\n{3!r}".format( + self.fitsarray.fileuri, self.fitsarray.shape, self.fitsarray.dtype, self._array) def __str__(self): # str alone should not force loading of the data if self._array is None: return "".format( self.fitsarray.fileuri, self.fitsarray.shape, self.fitsarray.dtype) - return str(self._array) + return "\n{3!r}".format( + self.fitsarray.fileuri, self.fitsarray.shape, self.fitsarray.dtype, self._array) def __array__(self): return self.fits_array diff --git a/dkist/io/reference_collections.py b/dkist/io/reference_collections.py index 6ea5b33a..03fa65ee 100644 --- a/dkist/io/reference_collections.py +++ b/dkist/io/reference_collections.py @@ -37,9 +37,7 @@ class BaseFITSArrayContainer(metaclass=abc.ABCMeta): """ def __init__(self, reference_array, *, loader, **kwargs): - reference_array = np.asarray(reference_array, dtype=object) - self._check_contents(reference_array) # If the first dimension is one we are going to squash it. @@ -52,7 +50,10 @@ def __init__(self, reference_array, *, loader, **kwargs): loader_array = np.empty_like(reference_array, dtype=object) for i, ele in enumerate(reference_array.flat): loader_array.flat[i] = loader(ele, **kwargs) + self.loader_array = loader_array + self._loader = partial(loader, **kwargs) + self.reference_array = reference_array def _check_contents(self, reference_array): """ @@ -67,6 +68,19 @@ def _check_contents(self, reference_array): assert ele.dtype == dtype assert ele.shape == shape + def __getitem__(self, item): + return type(self)(self.reference_array[item], loader=self._loader) + + @property + def filenames(self): + """ + Return a list of file names referenced by this Array Container. + """ + names = [] + for ear in self.reference_array.flat: + names.append(ear.fileuri) + return names + @abc.abstractproperty def array(self): """ diff --git a/dkist/io/tests/test_collections.py b/dkist/io/tests/test_collections.py index 9be19c6e..50f1d089 100644 --- a/dkist/io/tests/test_collections.py +++ b/dkist/io/tests/test_collections.py @@ -24,6 +24,30 @@ def externalarray(): return f.tree['data'] +def test_slicing(externalarray): + ac = NumpyFITSArrayContainer(externalarray, loader=AstropyFITSLoader, basepath=eitdir) + ext_shape = np.array(externalarray, dtype=object).shape + assert ac.loader_array.shape == ext_shape + assert ac.shape == tuple(list(ext_shape) + [128, 128]) + + assert isinstance(ac.array, np.ndarray) + assert_allclose(ac.array, np.array(ac)) + + ac = ac[5:8] + ext_shape = np.array(externalarray[5:8], dtype=object).shape + assert ac.loader_array.shape == ext_shape + assert ac.shape == tuple(list(ext_shape) + [128, 128]) + + assert isinstance(ac.array, np.ndarray) + assert_allclose(ac.array, np.array(ac)) + + +def test_filenames(externalarray): + ac = NumpyFITSArrayContainer(externalarray, loader=AstropyFITSLoader, basepath=eitdir) + assert len(ac.filenames) == len(externalarray) + assert ac.filenames == [e.fileuri for e in externalarray] + + def test_numpy(externalarray): ac = NumpyFITSArrayContainer(externalarray, loader=AstropyFITSLoader, basepath=eitdir) ext_shape = np.array(externalarray, dtype=object).shape diff --git a/dkist/io/tests/test_fits.py b/dkist/io/tests/test_fits.py index fc05b453..e69e59c9 100644 --- a/dkist/io/tests/test_fits.py +++ b/dkist/io/tests/test_fits.py @@ -59,8 +59,8 @@ def test_array(absolute_fl): assert isinstance(absolute_fl.fits_header, fits.Header) for contain in ("efz20040301.000010_s.fits", str(absolute_fl.shape), absolute_fl.dtype): - assert contain not in repr(absolute_fl) - assert contain not in str(absolute_fl) + assert contain in repr(absolute_fl) + assert contain in str(absolute_fl) def test_nan(): From 93e233ebf3df8b7196ed1e5d9efc99fc494b0f44 Mon Sep 17 00:00:00 2001 From: Stuart Mumford Date: Thu, 16 May 2019 14:46:48 +0100 Subject: [PATCH 2/3] Add changelogs --- changelog/56.feature.1.rst | 2 ++ changelog/56.feature.2.rst | 1 + 2 files changed, 3 insertions(+) create mode 100644 changelog/56.feature.1.rst create mode 100644 changelog/56.feature.2.rst diff --git a/changelog/56.feature.1.rst b/changelog/56.feature.1.rst new file mode 100644 index 00000000..9bb35caf --- /dev/null +++ b/changelog/56.feature.1.rst @@ -0,0 +1,2 @@ +Allow easy access to the filenames contained in an +`dkist.io.BaseFITSArrayContainer` object via a `~dkist.io.BaseFITSArrayContainer.filenames` property. diff --git a/changelog/56.feature.2.rst b/changelog/56.feature.2.rst new file mode 100644 index 00000000..15d278bf --- /dev/null +++ b/changelog/56.feature.2.rst @@ -0,0 +1 @@ +`dkist.io.BaseFITSArrayContainer` objects are now sliceable. From 5449696efc9836459f9425bf0f86f487edeeb33d Mon Sep 17 00:00:00 2001 From: Stuart Mumford Date: Thu, 16 May 2019 15:01:02 +0100 Subject: [PATCH 3/3] Add a test for `Dataset.array_container` --- dkist/dataset/tests/test_dataset.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dkist/dataset/tests/test_dataset.py b/dkist/dataset/tests/test_dataset.py index db6862ec..24fa4e83 100644 --- a/dkist/dataset/tests/test_dataset.py +++ b/dkist/dataset/tests/test_dataset.py @@ -118,3 +118,12 @@ def test_no_wcs_slice(dataset): def test_crop_few_slices(dataset_4d): sds = dataset_4d[0, 0] assert len(sds.wcs.input_frame.axes_order) + + +def test_array_container(): + dataset = Dataset.from_directory(os.path.join(rootdir, 'EIT')) + assert dataset.array_container is dataset._array_container + with pytest.raises(AttributeError): + dataset.array_container = 10 + + assert len(dataset.array_container.filenames) == 11