diff --git a/changelog/76.feature.rst b/changelog/76.feature.rst new file mode 100644 index 00000000..b19f3633 --- /dev/null +++ b/changelog/76.feature.rst @@ -0,0 +1,2 @@ +Implement correct extraction of dataset inventory from headers and gwcs. Also +updates some data to be closer to the in progress outgoing header spec (214) diff --git a/dkist/conftest.py b/dkist/conftest.py index 24b9865d..03b0d96b 100644 --- a/dkist/conftest.py +++ b/dkist/conftest.py @@ -12,6 +12,7 @@ from dkist.dataset import Dataset from dkist.io import AstropyFITSLoader, DaskFITSArrayContainer +from dkist.io.asdf.generator.transforms import generate_lookup_table @pytest.fixture @@ -28,13 +29,17 @@ def identity_gwcs(): """ identity = m.Multiply(1*u.arcsec/u.pixel) & m.Multiply(1*u.arcsec/u.pixel) sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective', - reference_frame=Helioprojective(obstime="2018-01-01")) + reference_frame=Helioprojective(obstime="2018-01-01"), + unit=(u.arcsec, u.arcsec)) detector_frame = cf.CoordinateFrame(name="detector", naxes=2, axes_order=(0, 1), axes_type=("pixel", "pixel"), axes_names=("x", "y"), unit=(u.pix, u.pix)) - return gwcs.wcs.WCS(forward_transform=identity, output_frame=sky_frame, input_frame=detector_frame) + wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=sky_frame, input_frame=detector_frame) + wcs.pixel_shape = (10, 20) + wcs.array_shape = wcs.pixel_shape[::-1] + return wcs @pytest.fixture @@ -48,7 +53,8 @@ def identity_gwcs_3d(): sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective', reference_frame=Helioprojective(obstime="2018-01-01"), - axes_names=("longitude", "latitude")) + axes_names=("longitude", "latitude"), + unit=(u.arcsec, u.arcsec)) wave_frame = cf.SpectralFrame(axes_order=(2, ), unit=u.nm, axes_names=("wavelength",)) frame = cf.CompositeFrame([sky_frame, wave_frame]) @@ -58,7 +64,39 @@ def identity_gwcs_3d(): axes_type=("pixel", "pixel", "pixel"), axes_names=("x", "y", "z"), unit=(u.pix, u.pix, u.pix)) - return gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame) + wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame) + wcs.pixel_shape = (10, 20, 30) + wcs.array_shape = wcs.pixel_shape[::-1] + + return wcs + + +@pytest.fixture +def identity_gwcs_3d_temporal(): + """ + A simple 1-1 gwcs that converts from pixels to arcseconds + """ + identity = (m.Multiply(1 * u.arcsec / u.pixel) & + m.Multiply(1 * u.arcsec / u.pixel) & + m.Multiply(1 * u.s / u.pixel)) + + sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective', + reference_frame=Helioprojective(obstime="2018-01-01"), + axes_names=("longitude", "latitude"), + unit=(u.arcsec, u.arcsec)) + time_frame = cf.TemporalFrame(Time("2020-01-01T00:00", format="isot", scale="utc"), + axes_order=(2,), unit=u.s) + + frame = cf.CompositeFrame([sky_frame, time_frame]) + + detector_frame = cf.CoordinateFrame(name="detector", naxes=3, + axes_order=(0, 1, 2), + axes_type=("pixel", "pixel", "pixel"), + axes_names=("x", "y", "z"), unit=(u.pix, u.pix, u.pix)) + wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame) + wcs.pixel_shape = (10, 20, 30) + wcs.array_shape = wcs.pixel_shape[::-1] + return wcs @pytest.fixture @@ -66,21 +104,48 @@ def identity_gwcs_4d(): """ A simple 1-1 gwcs that converts from pixels to arcseconds """ - identity = (m.Multiply(1*u.arcsec/u.pixel) & m.Multiply(1*u.arcsec/u.pixel) & - m.Multiply(1*u.nm/u.pixel) & m.Multiply(1*u.nm/u.pixel)) + identity = (m.Multiply(1 * u.arcsec/u.pixel) & m.Multiply(1 * u.arcsec/u.pixel) & + m.Multiply(1 * u.nm/u.pixel) & m.Multiply(1 * u.s/u.pixel)) sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective', - reference_frame=Helioprojective(obstime="2018-01-01")) + reference_frame=Helioprojective(obstime="2018-01-01"), + unit=(u.arcsec, u.arcsec)) wave_frame = cf.SpectralFrame(axes_order=(2, ), unit=u.nm) - time_frame = cf.TemporalFrame(Time([], format="isot", scale="utc"), axes_order=(3, ), unit=u.s) + time_frame = cf.TemporalFrame(Time("2020-01-01T00:00", format="isot", scale="utc"), axes_order=(3, ), unit=u.s) frame = cf.CompositeFrame([sky_frame, wave_frame, time_frame]) detector_frame = cf.CoordinateFrame(name="detector", naxes=4, axes_order=(0, 1, 2, 3), axes_type=("pixel", "pixel", "pixel", "pixel"), - axes_names=("x", "y", "z", "s"), unit=(u.pix, u.pix, u.pix, u.pix)) + axes_names=("x", "y", "z", "s"), + unit=(u.pix, u.pix, u.pix, u.pix)) + + wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame) + wcs.pixel_shape = (10, 20, 30, 40) + wcs.array_shape = wcs.pixel_shape[::-1] - return gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame) + return wcs + + +@pytest.fixture +def identity_gwcs_5d_stokes(identity_gwcs_4d): + stokes_frame = cf.StokesFrame(axes_order=(4,)) + stokes_model = generate_lookup_table([0, 1, 2, 3] * u.one, interpolation='nearest') + transform = identity_gwcs_4d.forward_transform + frame = cf.CompositeFrame(identity_gwcs_4d.output_frame.frames + [stokes_frame]) + + detector_frame = cf.CoordinateFrame(name="detector", naxes=5, + axes_order=(0, 1, 2, 3, 4), + axes_type=("pixel", "pixel", "pixel", "pixel", "pixel"), + axes_names=("x", "y", "z", "t", "s"), + unit=(u.pix, u.pix, u.pix, u.pix, u.pix)) + + wcs = gwcs.wcs.WCS(forward_transform=transform & stokes_model, output_frame=frame, + input_frame=detector_frame) + wcs.pixel_shape = (10, 20, 30, 40, 4) + wcs.array_shape = wcs.pixel_shape[::-1] + + return wcs @pytest.fixture @@ -88,13 +153,16 @@ def dataset(array, identity_gwcs): meta = {'bucket': 'data', 'dataset_id': 'test_dataset', 'asdf_object_key': 'test_dataset.asdf'} + + identity_gwcs.array_shape = array.shape + identity_gwcs.pixel_shape = array.shape[::-1] ds = Dataset(array, wcs=identity_gwcs, meta=meta, headers=Table()) # Sanity checks assert ds.data is array assert ds.wcs is identity_gwcs ds._array_container = DaskFITSArrayContainer(['test1.fits'], 0, 'float', array.shape, - loader=AstropyFITSLoader) + loader=AstropyFITSLoader) return ds @@ -105,6 +173,9 @@ def dataset_3d(identity_gwcs_3d): x = np.ones(shape) array = da.from_array(x, tuple(shape)) + identity_gwcs_3d.pixel_shape = array.shape[::-1] + identity_gwcs_3d.array_shape = array.shape + return Dataset(array, wcs=identity_gwcs_3d) @@ -114,4 +185,7 @@ def dataset_4d(identity_gwcs_4d): x = np.ones(shape) array = da.from_array(x, tuple(shape)) + identity_gwcs_4d.pixel_shape = array.shape[::-1] + identity_gwcs_4d.array_shape = array.shape + return Dataset(array, wcs=identity_gwcs_4d) diff --git a/dkist/data/test/datasettestfiles/visp.zip b/dkist/data/test/datasettestfiles/visp.zip index ad1aee1d..1fa76300 100644 Binary files a/dkist/data/test/datasettestfiles/visp.zip and b/dkist/data/test/datasettestfiles/visp.zip differ diff --git a/dkist/data/test/datasettestfiles/vtf.zip b/dkist/data/test/datasettestfiles/vtf.zip index 915708f9..d25ec169 100644 Binary files a/dkist/data/test/datasettestfiles/vtf.zip and b/dkist/data/test/datasettestfiles/vtf.zip differ diff --git a/dkist/dataset/dataset.py b/dkist/dataset/dataset.py index 894edf48..24d403db 100644 --- a/dkist/dataset/dataset.py +++ b/dkist/dataset/dataset.py @@ -80,8 +80,14 @@ def __init__(self, data, wcs, uncertainty=None, mask=None, meta=None, if isinstance(wcs, gwcs.WCS): # Set the array shape to be that of the data. - wcs.array_shape = data.shape - wcs.pixel_shape = data.shape[::-1] + if wcs.array_shape is None: + wcs.array_shape = data.shape + if wcs.pixel_shape is None: + wcs.pixel_shape = data.shape[::-1] + + if (wcs.pixel_shape != data.shape[::-1] or wcs.array_shape != data.shape): + raise ValueError("The pixel and array shape on the WCS object " + "do not match the given array.") if headers is not None and not isinstance(headers, astropy.table.Table): raise ValueError("The headers keyword argument must be an Astropy Table instance.") diff --git a/dkist/io/asdf/generator/generator.py b/dkist/io/asdf/generator/generator.py index 20e56c2d..8eb9b671 100644 --- a/dkist/io/asdf/generator/generator.py +++ b/dkist/io/asdf/generator/generator.py @@ -8,8 +8,8 @@ from dkist.dataset import Dataset from dkist.io import AstropyFITSLoader, DaskFITSArrayContainer -from dkist.io.asdf.generator.helpers import headers_from_filenames, preprocess_headers -from dkist.io.asdf.generator.simulated_data import generate_datset_inventory_from_headers +from dkist.io.asdf.generator.helpers import (extract_inventory, headers_from_filenames, + preprocess_headers) from dkist.io.asdf.generator.transforms import TransformBuilder try: @@ -46,9 +46,9 @@ def references_from_filenames(filenames, headers, array_shape, hdu_index=0, rela Returns ------- `dkist.io.DaskFITSArrayContainer` - A container that represents a set of FITS files, and can generate a `dask.array.Array` from them. + A container that represents a set of FITS files, and can generate a + `dask.array.Array` from them. """ - filenames = np.asanyarray(filenames) filepaths = np.empty(array_shape, dtype=object) if filenames.size != filepaths.size: @@ -79,39 +79,53 @@ def references_from_filenames(filenames, headers, array_shape, hdu_index=0, rela raise ValueError("Not all the shapes of these files are the same") shape = list(shape)[0] - return DaskFITSArrayContainer(filepaths.tolist(), hdu_index, dtype, shape, loader=AstropyFITSLoader) + return DaskFITSArrayContainer(filepaths.tolist(), hdu_index, dtype, shape, + loader=AstropyFITSLoader) -def asdf_tree_from_filenames(filenames, asdf_filename, inventory=None, hdu=0, +def asdf_tree_from_filenames(filenames, headers=None, inventory=None, hdu=0, relative_to=None, extra_inventory=None): """ Build a DKIST asdf tree from a list of (unsorted) filenames. Parameters ---------- - filenames : `list` The filenames to process into a DKIST asdf dataset. + headers : `list` + The FITS headers if already known. If not specified will be read from + filenames. + + inventory: `dict` + The frame inventory to put in the tree, if not specified a dummy one + will be generated. + hdu : `int` - The HDU to read from the FITS files. + The HDU to read the headers from and reference the data to. + + relative_to : `pathlib.Path` + The path to reference the FITS files to inside the asdf. If not + specified will be local to the asdf (i.e. ``./``). + + extra_inventory : `dict` + An extra set of inventory to override the generated one. """ # In case filenames is a generator we cast to list. filenames = list(filenames) # headers is an iterator - headers = headers_from_filenames(filenames, hdu=hdu) + if not headers: + headers = headers_from_filenames(filenames, hdu=hdu) table_headers, sorted_filenames, sorted_headers = preprocess_headers(headers, filenames) - if not inventory: - inventory = generate_datset_inventory_from_headers(table_headers, asdf_filename) - if extra_inventory: - inventory.update(extra_inventory) - - ds_wcs = TransformBuilder(sorted_headers).gwcs + if extra_inventory is None: + extra_inventory = {} + inventory = extract_inventory(table_headers, ds_wcs, **extra_inventory) + # Get the array shape shape = tuple((headers[0][f'DNAXIS{n}'] for n in range(headers[0]['DNAXIS'], headers[0]['DAAXES'], -1))) @@ -158,7 +172,7 @@ def dataset_from_fits(path, asdf_filename, inventory=None, hdu=0, relative_to=No files = path.glob("*fits") - tree = asdf_tree_from_filenames(list(files), asdf_filename, inventory=inventory, + tree = asdf_tree_from_filenames(list(files), inventory=inventory, hdu=hdu, relative_to=relative_to) with resources.path("dkist.io", "level_1_dataset_schema.yaml") as schema_path: diff --git a/dkist/io/asdf/generator/helpers.py b/dkist/io/asdf/generator/helpers.py index 621d530b..7b35e553 100644 --- a/dkist/io/asdf/generator/helpers.py +++ b/dkist/io/asdf/generator/helpers.py @@ -1,9 +1,13 @@ """ Helper functions for parsing files and processing headers. """ +import re +from functools import partial import numpy as np +import scipy.stats +import gwcs.coordinate_frames as cf from astropy.io import fits from astropy.table import Table @@ -13,7 +17,7 @@ def headers_from_filenames(filenames, hdu=0): """ - A generator to get the headers from filenames. + Generator to get the headers from filenames. """ return [dict(fits.getheader(fname, ext=hdu)) for fname in filenames] @@ -24,12 +28,13 @@ def table_from_headers(headers): def validate_headers(table_headers): """ - Given a bunch of headers, validate that they form a coherent set. This - function also adds the headers to a list as they are read from the file. + Given a bunch of headers, validate that they form a coherent set. + + This function also adds the headers to a list as they are read from the + file. Parameters ---------- - headers : iterator An iterator of headers. @@ -40,9 +45,8 @@ def validate_headers(table_headers): """ t = table_headers - """ - Let's do roughly the minimal amount of verification here. - """ + # Let's do roughly the minimal amount of verification here for construction + # of the WCS. Validation for inventory records is done independently. # For some keys all the values must be the same same_keys = ['NAXIS', 'DNAXIS'] @@ -92,3 +96,164 @@ def preprocess_headers(headers, filenames): table_headers.remove_columns(["headers", "filenames"]) return table_headers, sorted_filenames, sorted_headers + + +def _inventory_from_wcs(wcs): + """ + Parse the gWCS and extract any inventory keys needed. + + This assumes all WCSes have a celestial and temporal component. + + Keys for wavelength will not be added if there is no spectral component, + stokes keys are always added (defaulting to just I if not in the WCS). + """ + if not isinstance(wcs.output_frame, cf.CompositeFrame): + raise TypeError("Can't parse this WCS as expected.") # pragma: no cover + + bottom_left_array = [0] * wcs.pixel_n_dim + top_right_array = np.array(wcs.pixel_shape) - 1 + + bottom_left_world = wcs.array_index_to_world_values(*bottom_left_array) + top_right_world = wcs.array_index_to_world_values(*top_right_array) + + time_frame = list(filter(lambda f: isinstance(f, cf.TemporalFrame), wcs.output_frame.frames))[0] + temporal_axes = time_frame.axes_order[0] - wcs.pixel_n_dim + temporal_unit = time_frame.unit[0] + start_time = time_frame.reference_frame + bottom_left_world[temporal_axes] * temporal_unit + end_time = time_frame.reference_frame + top_right_world[temporal_axes] * temporal_unit + celestial_frame = list(filter(lambda f: isinstance(f, cf.CelestialFrame), wcs.output_frame.frames))[0] + lon_axes = celestial_frame.axes_order[0] - wcs.pixel_n_dim + lat_axes = celestial_frame.axes_order[1] - wcs.pixel_n_dim + + bounding_box = ((bottom_left_world[lon_axes], bottom_left_world[lat_axes]), + (top_right_world[lon_axes], top_right_world[lat_axes])) + + inventory = {'bounding_box': bounding_box, + 'start_time': start_time, + 'end_time': end_time} + + spec_frame = list(filter(lambda f: isinstance(f, cf.SpectralFrame), wcs.output_frame.frames)) + if spec_frame: + spectral_axes = spec_frame[0].axes_order[0] - wcs.pixel_n_dim + inventory["wavelength_min"] = bottom_left_world[spectral_axes] + inventory["wavelength_max"] = top_right_world[spectral_axes] + + stokes_frame = list(filter(lambda f: isinstance(f, cf.StokesFrame), wcs.output_frame.frames)) + if stokes_frame: + stokes_axes = stokes_frame[0].axes_order[0] + pixel_coords = [0] * wcs.pixel_n_dim + pixel_coords[stokes_axes] = (0, 1, 2, 3) + all_stokes = wcs.pixel_to_world(*np.broadcast_arrays(*pixel_coords)) + stokes_components = all_stokes[stokes_axes - 1] + + inventory["stokes_parameters"] = list(map(str, stokes_components)) + inventory["has_all_stokes"] = len(stokes_components) > 1 + + else: + inventory["stokes_parameters"] = ['I'] + inventory["has_all_stokes"] = False + + return inventory + + +def _get_unique(column, singular=False): + uniq = list(set(column)) + if singular: + if len(uniq) == 1: + if isinstance(uniq[0], np.str_): + return str(uniq[0]) + return uniq[0] + else: + raise ValueError("Column does not result in a singular unique value") + + return uniq + + +def _get_number_apply(column, func): + return func(column) + + +def _get_keys_matching(headers, pattern): + """ + Get all the values from all the keys matching the given re pattern. + + Assumes that each matching column is singular (all values are the same) + + Parameters + ---------- + headers : `astropy.table.Table` + All the headers + + pattern : `str` + A regex pattern + """ + results = [] + + prog = re.compile(pattern) + for key in headers.colnames: + if prog.match(key): + results.append(_get_unique(headers[key], singular=True)) + return list(set(results)) + + +def _inventory_from_headers(headers): + inventory = {} + + mode = partial(scipy.stats.mode, axis=None, nan_policy="raise") + + # These keys might get updated by parsing the gwcs object. + inventory["wavelength_min"] = inventory["wavelength_max"] = _get_unique(headers['LINEWAV'])[0] + + inventory["exposure_time"] = _get_number_apply(headers['FPA_EXPO'], mode).mode[0] + inventory["filter_wavelengths"] = _get_unique(headers['LINEWAV']) + inventory["instrument_name"] = _get_unique(headers['INSTRUME'], singular=True) + inventory["observables"] = [] # _get_unique(headers['']) + inventory["recipe_id"] = int(_get_unique(headers['RECIPEID'], singular=True)) + inventory["recipe_instance_id"] = int(_get_unique(headers['RINSTID'], singular=True)) + inventory["recipe_run_id"] = int(_get_unique(headers['RRUNID'], singular=True)) + inventory["target_type"] = list(map(str, _get_unique(headers['OBJECT']))) + inventory["primary_proposal_id"] = _get_unique(headers['PROPID'], singular=True) + inventory["primary_experiment_id"] = _get_unique(headers['EXPERID'], singular=True) + inventory["dataset_size"] = _get_number_apply(headers['FRAMEVOL'], np.sum) + inventory["contributing_experiment_ids"] = list(map(str, (_get_keys_matching(headers, r"EXPERID\d\d$") + + [_get_unique(headers["EXPERID"], singular=True)]))) + inventory["contributing_proposal_ids"] = list(map(str, (_get_keys_matching(headers, r"PROPID\d\d$") + + [_get_unique(headers["PROPID"], singular=True)]))) + + friedval = np.nan + if 'FRIEDVAL' in headers.colnames: + friedval = _get_number_apply(headers['FRIEDVAL'], np.mean) + + inventory["quality_average_fried_parameter"] = friedval + + polacc = np.nan + if 'POL_ACC' in headers.colnames: + polacc = _get_number_apply(headers['POL_ACC'], np.mean) + inventory["quality_average_polarimetric_accuracy"] = polacc + + return inventory + + +def extract_inventory(headers, wcs, **inventory): + """ + Generate the inventory record for an asdf file from an asdf tree. + + Parameters + ---------- + tree : `dict` + The incomplete asdf tree. Needs to contain the dataset object. + + inventory : `dict` + Additional inventory keys that can not be computed from the headers or the WCS. + + + Returns + ------- + tree: `dict` + The updated tree with the inventory. + + """ + # The headers will populate passband info for VBI and then wcs will + # override it if there is a wavelength axis in the dataset, + # any supplied kwargs override things extracted from dataset. + return {**_inventory_from_headers(headers), **_inventory_from_wcs(wcs), **inventory} diff --git a/dkist/io/asdf/generator/simulated_data.py b/dkist/io/asdf/generator/simulated_data.py deleted file mode 100644 index 60d356f3..00000000 --- a/dkist/io/asdf/generator/simulated_data.py +++ /dev/null @@ -1,99 +0,0 @@ -""" -Functions and helpers relating to working with simulated data. -""" -import random -import string - -from astropy.time import Time -from sunpy.time import parse_time - -__all__ = ['generate_datset_inventory_from_headers'] - - -def _gen_type(gen_type, max_int=1e6, max_float=1e6, len_str=30): - if gen_type is bool: - return bool(random.randint(0, 1)) - elif gen_type is int: - return random.randint(0, max_int) - elif gen_type is float: - return random.random() * max_float - elif gen_type is list: - return [_gen_type(str)] - elif gen_type is Time: - return parse_time("now") - elif gen_type is str: - return ''.join( - random.choice(string.ascii_uppercase + string.digits) for _ in range(len_str)) - else: - raise ValueError("Type {} is not supported".format(gen_type)) # pragma: no cover - - -def generate_datset_inventory_from_headers(headers, asdf_name): - """ - Generate a dummy dataset inventory from headers. - - .. note:: - This is just for test data, it should not be used on real data. - - Parameters - ---------- - - headers: `astropy.table.Table` - asdf_name: `str` - - """ - - schema = [ - ('asdf_object_key', str), - ('browse_movie_object_key', str), - ('browse_movie_url', str), - ('bucket', str), - ('contributing_experiment_ids', list), - ('contributing_proposal_ids', list), - ('dataset_id', str), - ('dataset_inventory_id', int), - ('dataset_size', int), - ('end_time', Time), - ('exposure_time', float), - ('filter_wavelengths', list), - ('frame_count', int), - ('has_all_stokes', bool), - ('instrument_name', str), - ('observables', list), - ('original_frame_count', int), - ('primary_experiment_id', str), - ('primary_proposal_id', str), - ('quality_average_fried_parameter', float), - ('quality_average_polarimetric_accuracy', float), - ('recipe_id', int), - ('recipe_instance_id', int), - ('recipe_run_id', int), - ('start_time', Time), - # ('stokes_parameters', str), - ('target_type', str), - ('wavelength_max', float), - ('wavelength_min', float) - ] - - header_mapping = { - 'start_time': 'DATE-BGN', - 'end_time': 'DATE-END', - 'filter_wavelengths': 'WAVELNGTH'} - - constants = { - 'frame_count': len(headers), - 'bucket': 'data', - 'asdf_object_key': str(asdf_name) - } - - output = {} - - for key, ktype in schema: - if key in header_mapping: - hdict = dict(zip(headers.colnames, headers[0])) - output[key] = ktype(hdict.get(header_mapping[key], _gen_type(ktype))) - else: - output[key] = _gen_type(ktype) - - output.update(constants) - return output diff --git a/dkist/io/asdf/generator/tests/test_generator.py b/dkist/io/asdf/generator/tests/test_generator.py index 2896515f..5791866a 100644 --- a/dkist/io/asdf/generator/tests/test_generator.py +++ b/dkist/io/asdf/generator/tests/test_generator.py @@ -28,7 +28,7 @@ def test_array_container_shape(header_filenames): def test_asdf_tree(header_filenames): - tree = asdf_tree_from_filenames(header_filenames, "test_file.asdf") + tree = asdf_tree_from_filenames(header_filenames) assert isinstance(tree, dict) diff --git a/dkist/io/asdf/generator/tests/test_helpers.py b/dkist/io/asdf/generator/tests/test_helpers.py index db2689ee..4ab01ef5 100644 --- a/dkist/io/asdf/generator/tests/test_helpers.py +++ b/dkist/io/asdf/generator/tests/test_helpers.py @@ -3,8 +3,14 @@ import numpy as np import pytest +import astropy.units as u +import gwcs.coordinate_frames as cf +from astropy.table import Table + from dkist.io.asdf.generator.generator import references_from_filenames -from dkist.io.asdf.generator.helpers import headers_from_filenames +from dkist.io.asdf.generator.helpers import (_get_unique, _inventory_from_headers, + _inventory_from_wcs, extract_inventory, + headers_from_filenames) def test_references_from_filesnames_shape_error(header_filenames): @@ -25,3 +31,159 @@ def test_references_from_filenames(header_filenames): for ref in refs.fileuris: assert base not in ref + + +@pytest.fixture +def headers_inventory_214(): + """A minimal collection of headers to test inventory creation.""" # noqa + return Table({ + 'LINEWAV': [550, 550, 550], + 'FPA_EXPO': [10, 20, 30], + 'INSTRUME': ["VBI", "VBI", "VBI"], + 'FRIEDVAL': [1, 2, 3], + 'POL_ACC': [500, 500, 500], + 'RECIPEID': [10, 10, 10], + 'RINSTID': [20, 20, 20], + 'RRUNID': [30, 30, 30], + 'OBJECT': ["A", "B", "C"], + 'FRAMEVOL': [100, 120, 130], + 'EXPERID': ["00", "00", "00"], + 'EXPERID01': ["10", "10", "10"], + 'EXPERID02': ["20", "20", "20"], + 'PROPID': ["001", "001", "001"], + 'PROPID01': ["30", "30", "30"] + }) + + +def test_valid_inventory(headers_inventory_214): + inv = _inventory_from_headers(headers_inventory_214) + assert isinstance(inv, dict) + + assert inv["wavelength_min"] == inv["wavelength_max"] == 550 + assert inv["filter_wavelengths"] == [550] + assert inv["instrument_name"] == "VBI" + assert inv["observables"] == [] + assert inv["quality_average_fried_parameter"] == np.mean([1, 2, 3]) + assert inv["quality_average_polarimetric_accuracy"] == 500 + assert inv["recipe_id"] == 10 + assert inv["recipe_instance_id"] == 20 + assert inv["recipe_run_id"] == 30 + assert set(inv["target_type"]) == {"A", "B", "C"} + assert inv["primary_proposal_id"] == "001" + assert inv["primary_experiment_id"] == "00" + assert set(inv["contributing_experiment_ids"]) == {"10", "20", "00"} + assert set(inv["contributing_proposal_ids"]) == {"30", "001"} + + +def test_inventory_from_wcs(identity_gwcs_4d): + inv = _inventory_from_wcs(identity_gwcs_4d) + time_frame = list(filter(lambda x: isinstance(x, cf.TemporalFrame), + identity_gwcs_4d.output_frame.frames))[0] + shape = identity_gwcs_4d.array_shape + + # This test transform is just 0 - n_pixel in all dimensions + assert inv["wavelength_min"] == 0 + assert inv["wavelength_max"] == shape[2] - 1 + assert inv["bounding_box"] == ((0, 0), (shape[0] - 1, shape[1] - 1)) + assert inv["start_time"] == time_frame.reference_frame + assert inv["end_time"] == (time_frame.reference_frame + (shape[3] - 1) * u.s) + assert inv["stokes_parameters"] == ["I"] + assert inv["has_all_stokes"] is False + + +def test_inventory_from_wcs_stokes(identity_gwcs_5d_stokes): + inv = _inventory_from_wcs(identity_gwcs_5d_stokes) + time_frame = list(filter(lambda x: isinstance(x, cf.TemporalFrame), + identity_gwcs_5d_stokes.output_frame.frames))[0] + shape = identity_gwcs_5d_stokes.array_shape + + # This test transform is just 0 - n_pixel in all dimensions + assert inv["wavelength_min"] == 0 + assert inv["wavelength_max"] == shape[2] - 1 + assert inv["bounding_box"] == ((0, 0), (shape[0] - 1, shape[1] - 1)) + assert inv["start_time"] == time_frame.reference_frame + assert inv["end_time"] == (time_frame.reference_frame + (shape[3] - 1) * u.s) + assert inv["stokes_parameters"] == ["I", "Q", "U", "V"] + assert inv["has_all_stokes"] is True + + +def test_inventory_from_wcs_2d(identity_gwcs_3d_temporal): + inv = _inventory_from_wcs(identity_gwcs_3d_temporal) + time_frame = list(filter(lambda x: isinstance(x, cf.TemporalFrame), + identity_gwcs_3d_temporal.output_frame.frames))[0] + shape = identity_gwcs_3d_temporal.array_shape + + # This test transform is just 0 - n_pixel in all dimensions + assert "wavelength_min" not in inv + assert "wavelength_max" not in inv + assert inv["bounding_box"] == ((0, 0), (shape[0] - 1, shape[1] - 1)) + assert inv["start_time"] == time_frame.reference_frame + assert inv["end_time"] == (time_frame.reference_frame + (shape[2] - 1) * u.s) + assert inv["stokes_parameters"] == ["I"] + assert inv["has_all_stokes"] is False + + +def test_unique_error(): + with pytest.raises(ValueError): + _get_unique([1, 2, 3], singular=True) + + assert _get_unique([1, 2, 3], singular=False) + + +def test_extract_inventory(headers_inventory_214, identity_gwcs_4d): + inv = extract_inventory(headers_inventory_214, identity_gwcs_4d) + + time_frame = list(filter(lambda x: isinstance(x, cf.TemporalFrame), + identity_gwcs_4d.output_frame.frames))[0] + shape = identity_gwcs_4d.array_shape + + # This test transform is just 0 - n_pixel in all dimensions + assert inv["wavelength_min"] == 0 + assert inv["wavelength_max"] == shape[2] - 1 + assert inv["bounding_box"] == ((0, 0), (shape[0] - 1, shape[1] - 1)) + assert inv["start_time"] == time_frame.reference_frame + assert inv["end_time"] == (time_frame.reference_frame + (shape[3] - 1) * u.s) + assert inv["stokes_parameters"] == ["I"] + assert inv["has_all_stokes"] is False + assert inv["filter_wavelengths"] == [550] + assert inv["instrument_name"] == "VBI" + assert inv["observables"] == [] + assert inv["quality_average_fried_parameter"] == np.mean([1, 2, 3]) + assert inv["quality_average_polarimetric_accuracy"] == 500 + assert inv["recipe_id"] == 10 + assert inv["recipe_instance_id"] == 20 + assert inv["recipe_run_id"] == 30 + assert set(inv["target_type"]) == {"A", "B", "C"} + assert inv["primary_proposal_id"] == "001" + assert inv["primary_experiment_id"] == "00" + assert set(inv["contributing_experiment_ids"]) == {"10", "20", "00"} + assert set(inv["contributing_proposal_ids"]) == {"30", "001"} + + +def test_extract_inventory_no_wave(headers_inventory_214, identity_gwcs_3d_temporal): + inv = extract_inventory(headers_inventory_214, identity_gwcs_3d_temporal) + + time_frame = list(filter(lambda x: isinstance(x, cf.TemporalFrame), + identity_gwcs_3d_temporal.output_frame.frames))[0] + shape = identity_gwcs_3d_temporal.array_shape + + # This test transform is just 0 - n_pixel in all dimensions + assert inv["bounding_box"] == ((0, 0), (shape[0] - 1, shape[1] - 1)) + assert inv["wavelength_min"] == inv["wavelength_max"] == 550 + assert inv["start_time"] == time_frame.reference_frame + assert inv["end_time"] == (time_frame.reference_frame + (shape[2] - 1) * u.s) + assert inv["stokes_parameters"] == ["I"] + assert inv["has_all_stokes"] is False + assert inv["filter_wavelengths"] == [550] + assert inv["instrument_name"] == "VBI" + assert inv["observables"] == [] + assert inv["quality_average_fried_parameter"] == np.mean([1, 2, 3]) + assert inv["quality_average_polarimetric_accuracy"] == 500 + assert inv["recipe_id"] == 10 + assert inv["recipe_instance_id"] == 20 + assert inv["recipe_run_id"] == 30 + assert set(inv["target_type"]) == {"A", "B", "C"} + assert inv["primary_proposal_id"] == "001" + assert inv["primary_experiment_id"] == "00" + assert set(inv["contributing_experiment_ids"]) == {"10", "20", "00"} + assert set(inv["contributing_proposal_ids"]) == {"30", "001"} diff --git a/dkist/io/asdf/generator/tests/test_transforms.py b/dkist/io/asdf/generator/tests/test_transforms.py index 62b6e40a..21c650cd 100644 --- a/dkist/io/asdf/generator/tests/test_transforms.py +++ b/dkist/io/asdf/generator/tests/test_transforms.py @@ -42,7 +42,7 @@ def test_input_name_ordering(wcs): def test_output_name_ordering(wcs): - allowed_world_names = (('latitude', 'longitude', 'wavelength', 'time', 'stokes'), + allowed_world_names = (('longitude', 'latitude', 'wavelength', 'time', 'stokes'), ('wavelength', 'latitude', 'longitude', 'time', 'stokes')) assert wcs.output_frame.axes_names in allowed_world_names diff --git a/dkist/io/asdf/generator/transforms.py b/dkist/io/asdf/generator/transforms.py index 6851d3f8..e31e0ae7 100644 --- a/dkist/io/asdf/generator/transforms.py +++ b/dkist/io/asdf/generator/transforms.py @@ -17,8 +17,8 @@ 'spatial_model_from_quantity', 'spatial_model_from_header'] -def spatial_model_from_quantity(crpix1, crpix2, cdelt1, cdelt2, pc, crval1, crval2, - projection='TAN'): +def spatial_model_from_quantity(crpix1, crpix2, cdelt1, cdelt2, pc, crval1, + crval2, lon_pole, projection='TAN'): """ Given quantity representations of a HPLx FITS WCS return a model for the spatial transform. @@ -33,7 +33,7 @@ def spatial_model_from_quantity(crpix1, crpix2, cdelt1, cdelt2, pc, crval1, crva scale = Multiply(cdelt1) & Multiply(cdelt2) rotu = AffineTransformation2D(pc, translation=(0, 0)*u.arcsec) tanu = projections[projection] - skyrotu = RotateNative2Celestial(crval1, crval2, 180*u.deg) + skyrotu = RotateNative2Celestial(crval1, crval2, lon_pole) return shiftu | scale | rotu | tanu | skyrotu @@ -70,22 +70,30 @@ def spatial_model_from_header(header): pc = np.matrix([[header[f'PC{lonind}_{lonind}'], header[f'PC{lonind}_{latind}']], [header[f'PC{latind}_{lonind}'], header[f'PC{latind}_{latind}']]]) * cunit1 - return spatial_model_from_quantity(crpix1, crpix2, cdelt1, cdelt2, pc, crval1, crval2, + lonpole = header.get("LONPOLE") + if not lonpole and latproj == "TAN": + lonpole = 180 + + if not lonpole: + raise ValueError("LONPOLE not specified and not known for projection {latproj}") + + return spatial_model_from_quantity(crpix1, crpix2, cdelt1, cdelt2, pc, + crval1, crval2, lonpole * u.deg, projection=latproj) @u.quantity_input def linear_spectral_model(spectral_width: u.nm, reference_val: u.nm): """ - A linear model in a spectral dimension. The reference pixel is always 0. + Linear model in a spectral dimension. The reference pixel is always 0. """ - return Linear1D(slope=spectral_width/(1*u.pix), intercept=reference_val) + return Linear1D(slope=spectral_width / (1 * u.pix), intercept=reference_val) @u.quantity_input def linear_time_model(cadence: u.s, reference_val: u.s = 0*u.s): """ - A linear model in a temporal dimension. The reference pixel is always 0. + Linear model in a temporal dimension. The reference pixel is always 0. """ if not reference_val: reference_val = 0 * cadence.unit @@ -165,12 +173,13 @@ def __init__(self, headers): self.header = headers[0] # Reshape the headers to match the Dataset shape, so we can extract headers along various axes. - shape = tuple((self.header[f'DNAXIS{n}'] for n in range(self.header['DNAXIS'], - self.header['DAAXES'], -1))) + shape = tuple(self.header[f'DNAXIS{n}'] for n in range(self.header['DNAXIS'], + self.header['DAAXES'], -1)) arr_headers = np.empty(shape, dtype=object) for i in range(arr_headers.size): arr_headers.flat[i] = headers[i] + self.pixel_shape = tuple(self.header[f'DNAXIS{n}'] for n in range(1, self.header['DNAXIS'] + 1)) self.headers = arr_headers self.reset() self._build() @@ -190,14 +199,17 @@ def pixel_frame(self): @property def gwcs(self): """ - A `gwcs.WCS` object representing these headers. + `gwcs.WCS` object representing these headers. """ world_frame = cf.CompositeFrame(self.frames) - return gwcs.WCS(forward_transform=self.transform, - input_frame=self.pixel_frame, - output_frame=world_frame) + out_wcs = gwcs.WCS(forward_transform=self.transform, + input_frame=self.pixel_frame, + output_frame=world_frame) + out_wcs.pixel_shape = self.pixel_shape + out_wcs.array_shape = self.pixel_shape[::-1] + return out_wcs @property def frames(self): diff --git a/dkist/io/asdf/schemas/dkist.nso.edu/dkist/dataset-0.1.0.yaml b/dkist/io/asdf/schemas/dkist.nso.edu/dkist/dataset-0.1.0.yaml index b6421e6d..a2ebf789 100644 --- a/dkist/io/asdf/schemas/dkist.nso.edu/dkist/dataset-0.1.0.yaml +++ b/dkist/io/asdf/schemas/dkist.nso.edu/dkist/dataset-0.1.0.yaml @@ -30,10 +30,12 @@ properties: properties: asdf_object_key: type: string - browse_movie_object_key: - type: string - browse_movie_url: - type: string + bounding_box: + type: array + items: + type: array + items: + type: number bucket: type: string contributing_experiment_ids: @@ -44,12 +46,14 @@ properties: type: array items: type: string + create_date: + $ref: "tag:stsci.edu:asdf/time/time-1.1.0" dataset_id: type: string dataset_inventory_id: type: integer dataset_size: - type: integer + type: number end_time: $ref: "tag:stsci.edu:asdf/time/time-1.1.0" exposure_time: @@ -57,9 +61,7 @@ properties: filter_wavelengths: type: array items: - type: string - frame_count: - type: integer + type: number has_all_stokes: type: boolean instrument_name: @@ -86,10 +88,14 @@ properties: type: integer start_time: $ref: "tag:stsci.edu:asdf/time/time-1.1.0" - #stokes_parameters: - # type: string + stokes_parameters: + type: array + items: + type: string target_type: - type: string + type: array + items: + type: string wavelength_max: type: number wavelength_min: @@ -97,18 +103,17 @@ properties: additionalProperties: false # required: # - asdf_object_key - # - browse_movie_object_key - # - browse_movie_url + # - bounding_box # - bucket # - contributing_experiment_ids # - contributing_proposal_ids + # - create_date # - dataset_id # - dataset_inventory_id # - dataset_size # - end_time # - exposure_time # - filter_wavelengths - # - frame_count # - has_all_stokes # - instrument_name # - observables @@ -121,6 +126,7 @@ properties: # - recipe_instance_id # - recipe_run_id # - start_time + # - stokes_parameters # - target_type # - wavelength_max # - wavelength_min @@ -131,10 +137,6 @@ properties: unit: $ref: "tag:stsci.edu:asdf/unit/unit-1.0.0" - # uncertainty: - # type: object - - required: [data, headers, wcs] allowAdditionalProperties: true ... diff --git a/tox.ini b/tox.ini index e519653c..c6fd8e8f 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ setenv = PYTEST_COMMAND = pytest --cov=dkist --cov-config={toxinidir}/setup.cfg --verbose deps = git+https://github.com/Cadair/ndcube@wcsaxes_only_plotting - git+https://github.com/Cadair/sunpy@attrs_namespace_registration#egg=sunpy[all] + git+https://github.com/sunpy/sunpy#egg=sunpy[all] extras = tests commands = {env:PYTEST_COMMAND} {posargs}