Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement generation of ASDF inventory from headers and WCS #76

Merged
merged 13 commits into from
Mar 27, 2020
2 changes: 2 additions & 0 deletions changelog/76.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Implement correct extraction of dataset inventory from headers and gwcs. Also
updates some data to be closer to the in progress outgoing header spec (214)
96 changes: 85 additions & 11 deletions dkist/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from dkist.dataset import Dataset
from dkist.io import AstropyFITSLoader, DaskFITSArrayContainer
from dkist.io.asdf.generator.transforms import generate_lookup_table


@pytest.fixture
Expand All @@ -28,13 +29,17 @@ def identity_gwcs():
"""
identity = m.Multiply(1*u.arcsec/u.pixel) & m.Multiply(1*u.arcsec/u.pixel)
sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective',
reference_frame=Helioprojective(obstime="2018-01-01"))
reference_frame=Helioprojective(obstime="2018-01-01"),
unit=(u.arcsec, u.arcsec))
detector_frame = cf.CoordinateFrame(name="detector", naxes=2,
axes_order=(0, 1),
axes_type=("pixel", "pixel"),
axes_names=("x", "y"),
unit=(u.pix, u.pix))
return gwcs.wcs.WCS(forward_transform=identity, output_frame=sky_frame, input_frame=detector_frame)
wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=sky_frame, input_frame=detector_frame)
wcs.pixel_shape = (10, 20)
wcs.array_shape = wcs.pixel_shape[::-1]
return wcs


@pytest.fixture
Expand All @@ -48,7 +53,8 @@ def identity_gwcs_3d():

sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective',
reference_frame=Helioprojective(obstime="2018-01-01"),
axes_names=("longitude", "latitude"))
axes_names=("longitude", "latitude"),
unit=(u.arcsec, u.arcsec))
wave_frame = cf.SpectralFrame(axes_order=(2, ), unit=u.nm, axes_names=("wavelength",))

frame = cf.CompositeFrame([sky_frame, wave_frame])
Expand All @@ -58,43 +64,105 @@ def identity_gwcs_3d():
axes_type=("pixel", "pixel", "pixel"),
axes_names=("x", "y", "z"), unit=(u.pix, u.pix, u.pix))

return gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame)
wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame)
wcs.pixel_shape = (10, 20, 30)
wcs.array_shape = wcs.pixel_shape[::-1]

return wcs


@pytest.fixture
def identity_gwcs_3d_temporal():
"""
A simple 1-1 gwcs that converts from pixels to arcseconds
"""
identity = (m.Multiply(1 * u.arcsec / u.pixel) &
m.Multiply(1 * u.arcsec / u.pixel) &
m.Multiply(1 * u.s / u.pixel))

sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective',
reference_frame=Helioprojective(obstime="2018-01-01"),
axes_names=("longitude", "latitude"),
unit=(u.arcsec, u.arcsec))
time_frame = cf.TemporalFrame(Time("2020-01-01T00:00", format="isot", scale="utc"),
axes_order=(2,), unit=u.s)

frame = cf.CompositeFrame([sky_frame, time_frame])

detector_frame = cf.CoordinateFrame(name="detector", naxes=3,
axes_order=(0, 1, 2),
axes_type=("pixel", "pixel", "pixel"),
axes_names=("x", "y", "z"), unit=(u.pix, u.pix, u.pix))
wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame)
wcs.pixel_shape = (10, 20, 30)
wcs.array_shape = wcs.pixel_shape[::-1]
return wcs


@pytest.fixture
def identity_gwcs_4d():
"""
A simple 1-1 gwcs that converts from pixels to arcseconds
"""
identity = (m.Multiply(1*u.arcsec/u.pixel) & m.Multiply(1*u.arcsec/u.pixel) &
m.Multiply(1*u.nm/u.pixel) & m.Multiply(1*u.nm/u.pixel))
identity = (m.Multiply(1 * u.arcsec/u.pixel) & m.Multiply(1 * u.arcsec/u.pixel) &
m.Multiply(1 * u.nm/u.pixel) & m.Multiply(1 * u.s/u.pixel))
sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective',
reference_frame=Helioprojective(obstime="2018-01-01"))
reference_frame=Helioprojective(obstime="2018-01-01"),
unit=(u.arcsec, u.arcsec))
wave_frame = cf.SpectralFrame(axes_order=(2, ), unit=u.nm)
time_frame = cf.TemporalFrame(Time([], format="isot", scale="utc"), axes_order=(3, ), unit=u.s)
time_frame = cf.TemporalFrame(Time("2020-01-01T00:00", format="isot", scale="utc"), axes_order=(3, ), unit=u.s)

frame = cf.CompositeFrame([sky_frame, wave_frame, time_frame])

detector_frame = cf.CoordinateFrame(name="detector", naxes=4,
axes_order=(0, 1, 2, 3),
axes_type=("pixel", "pixel", "pixel", "pixel"),
axes_names=("x", "y", "z", "s"), unit=(u.pix, u.pix, u.pix, u.pix))
axes_names=("x", "y", "z", "s"),
unit=(u.pix, u.pix, u.pix, u.pix))

wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame)
wcs.pixel_shape = (10, 20, 30, 40)
wcs.array_shape = wcs.pixel_shape[::-1]

return gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame)
return wcs


@pytest.fixture
def identity_gwcs_5d_stokes(identity_gwcs_4d):
stokes_frame = cf.StokesFrame(axes_order=(4,))
stokes_model = generate_lookup_table([0, 1, 2, 3] * u.one, interpolation='nearest')
transform = identity_gwcs_4d.forward_transform
frame = cf.CompositeFrame(identity_gwcs_4d.output_frame.frames + [stokes_frame])

detector_frame = cf.CoordinateFrame(name="detector", naxes=5,
axes_order=(0, 1, 2, 3, 4),
axes_type=("pixel", "pixel", "pixel", "pixel", "pixel"),
axes_names=("x", "y", "z", "t", "s"),
unit=(u.pix, u.pix, u.pix, u.pix, u.pix))

wcs = gwcs.wcs.WCS(forward_transform=transform & stokes_model, output_frame=frame,
input_frame=detector_frame)
wcs.pixel_shape = (10, 20, 30, 40, 4)
wcs.array_shape = wcs.pixel_shape[::-1]

return wcs


@pytest.fixture
def dataset(array, identity_gwcs):
meta = {'bucket': 'data',
'dataset_id': 'test_dataset',
'asdf_object_key': 'test_dataset.asdf'}

identity_gwcs.array_shape = array.shape
identity_gwcs.pixel_shape = array.shape[::-1]
ds = Dataset(array, wcs=identity_gwcs, meta=meta, headers=Table())
# Sanity checks
assert ds.data is array
assert ds.wcs is identity_gwcs

ds._array_container = DaskFITSArrayContainer(['test1.fits'], 0, 'float', array.shape,
loader=AstropyFITSLoader)
loader=AstropyFITSLoader)

return ds

Expand All @@ -105,6 +173,9 @@ def dataset_3d(identity_gwcs_3d):
x = np.ones(shape)
array = da.from_array(x, tuple(shape))

identity_gwcs_3d.pixel_shape = array.shape[::-1]
identity_gwcs_3d.array_shape = array.shape

return Dataset(array, wcs=identity_gwcs_3d)


Expand All @@ -114,4 +185,7 @@ def dataset_4d(identity_gwcs_4d):
x = np.ones(shape)
array = da.from_array(x, tuple(shape))

identity_gwcs_4d.pixel_shape = array.shape[::-1]
identity_gwcs_4d.array_shape = array.shape

return Dataset(array, wcs=identity_gwcs_4d)
Binary file modified dkist/data/test/datasettestfiles/visp.zip
Binary file not shown.
Binary file modified dkist/data/test/datasettestfiles/vtf.zip
Binary file not shown.
10 changes: 8 additions & 2 deletions dkist/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,14 @@ def __init__(self, data, wcs, uncertainty=None, mask=None, meta=None,

if isinstance(wcs, gwcs.WCS):
# Set the array shape to be that of the data.
wcs.array_shape = data.shape
wcs.pixel_shape = data.shape[::-1]
if wcs.array_shape is None:
wcs.array_shape = data.shape
if wcs.pixel_shape is None:
wcs.pixel_shape = data.shape[::-1]

if (wcs.pixel_shape != data.shape[::-1] or wcs.array_shape != data.shape):
raise ValueError("The pixel and array shape on the WCS object "
"do not match the given array.")

if headers is not None and not isinstance(headers, astropy.table.Table):
raise ValueError("The headers keyword argument must be an Astropy Table instance.")
Expand Down
46 changes: 30 additions & 16 deletions dkist/io/asdf/generator/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

from dkist.dataset import Dataset
from dkist.io import AstropyFITSLoader, DaskFITSArrayContainer
from dkist.io.asdf.generator.helpers import headers_from_filenames, preprocess_headers
from dkist.io.asdf.generator.simulated_data import generate_datset_inventory_from_headers
from dkist.io.asdf.generator.helpers import (extract_inventory, headers_from_filenames,
preprocess_headers)
from dkist.io.asdf.generator.transforms import TransformBuilder

try:
Expand Down Expand Up @@ -46,9 +46,9 @@ def references_from_filenames(filenames, headers, array_shape, hdu_index=0, rela
Returns
-------
`dkist.io.DaskFITSArrayContainer`
A container that represents a set of FITS files, and can generate a `dask.array.Array` from them.
A container that represents a set of FITS files, and can generate a
`dask.array.Array` from them.
"""

filenames = np.asanyarray(filenames)
filepaths = np.empty(array_shape, dtype=object)
if filenames.size != filepaths.size:
Expand Down Expand Up @@ -79,39 +79,53 @@ def references_from_filenames(filenames, headers, array_shape, hdu_index=0, rela
raise ValueError("Not all the shapes of these files are the same")
shape = list(shape)[0]

return DaskFITSArrayContainer(filepaths.tolist(), hdu_index, dtype, shape, loader=AstropyFITSLoader)
return DaskFITSArrayContainer(filepaths.tolist(), hdu_index, dtype, shape,
loader=AstropyFITSLoader)


def asdf_tree_from_filenames(filenames, asdf_filename, inventory=None, hdu=0,
def asdf_tree_from_filenames(filenames, headers=None, inventory=None, hdu=0,
relative_to=None, extra_inventory=None):
"""
Build a DKIST asdf tree from a list of (unsorted) filenames.

Parameters
----------

filenames : `list`
The filenames to process into a DKIST asdf dataset.

headers : `list`
The FITS headers if already known. If not specified will be read from
filenames.

inventory: `dict`
The frame inventory to put in the tree, if not specified a dummy one
will be generated.

hdu : `int`
The HDU to read from the FITS files.
The HDU to read the headers from and reference the data to.

relative_to : `pathlib.Path`
The path to reference the FITS files to inside the asdf. If not
specified will be local to the asdf (i.e. ``./``).

extra_inventory : `dict`
An extra set of inventory to override the generated one.
"""
# In case filenames is a generator we cast to list.
filenames = list(filenames)

# headers is an iterator
headers = headers_from_filenames(filenames, hdu=hdu)
if not headers:
headers = headers_from_filenames(filenames, hdu=hdu)

table_headers, sorted_filenames, sorted_headers = preprocess_headers(headers, filenames)

if not inventory:
inventory = generate_datset_inventory_from_headers(table_headers, asdf_filename)
if extra_inventory:
inventory.update(extra_inventory)


ds_wcs = TransformBuilder(sorted_headers).gwcs

if extra_inventory is None:
extra_inventory = {}
inventory = extract_inventory(table_headers, ds_wcs, **extra_inventory)

# Get the array shape
shape = tuple((headers[0][f'DNAXIS{n}'] for n in range(headers[0]['DNAXIS'],
headers[0]['DAAXES'], -1)))
Expand Down Expand Up @@ -158,7 +172,7 @@ def dataset_from_fits(path, asdf_filename, inventory=None, hdu=0, relative_to=No

files = path.glob("*fits")

tree = asdf_tree_from_filenames(list(files), asdf_filename, inventory=inventory,
tree = asdf_tree_from_filenames(list(files), inventory=inventory,
hdu=hdu, relative_to=relative_to)

with resources.path("dkist.io", "level_1_dataset_schema.yaml") as schema_path:
Expand Down
Loading