diff --git a/changelog/76.feature.rst b/changelog/76.feature.rst
new file mode 100644
index 00000000..b19f3633
--- /dev/null
+++ b/changelog/76.feature.rst
@@ -0,0 +1,2 @@
+Implement correct extraction of dataset inventory from headers and gwcs. Also
+updates some data to be closer to the in progress outgoing header spec (214)
diff --git a/dkist/conftest.py b/dkist/conftest.py
index 24b9865d..03b0d96b 100644
--- a/dkist/conftest.py
+++ b/dkist/conftest.py
@@ -12,6 +12,7 @@
 
 from dkist.dataset import Dataset
 from dkist.io import AstropyFITSLoader, DaskFITSArrayContainer
+from dkist.io.asdf.generator.transforms import generate_lookup_table
 
 
 @pytest.fixture
@@ -28,13 +29,17 @@ def identity_gwcs():
     """
     identity = m.Multiply(1*u.arcsec/u.pixel) & m.Multiply(1*u.arcsec/u.pixel)
     sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective',
-                                  reference_frame=Helioprojective(obstime="2018-01-01"))
+                                  reference_frame=Helioprojective(obstime="2018-01-01"),
+                                  unit=(u.arcsec, u.arcsec))
     detector_frame = cf.CoordinateFrame(name="detector", naxes=2,
                                         axes_order=(0, 1),
                                         axes_type=("pixel", "pixel"),
                                         axes_names=("x", "y"),
                                         unit=(u.pix, u.pix))
-    return gwcs.wcs.WCS(forward_transform=identity, output_frame=sky_frame, input_frame=detector_frame)
+    wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=sky_frame, input_frame=detector_frame)
+    wcs.pixel_shape = (10, 20)
+    wcs.array_shape = wcs.pixel_shape[::-1]
+    return wcs
 
 
 @pytest.fixture
@@ -48,7 +53,8 @@ def identity_gwcs_3d():
 
     sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective',
                                   reference_frame=Helioprojective(obstime="2018-01-01"),
-                                  axes_names=("longitude", "latitude"))
+                                  axes_names=("longitude", "latitude"),
+                                  unit=(u.arcsec, u.arcsec))
     wave_frame = cf.SpectralFrame(axes_order=(2, ), unit=u.nm, axes_names=("wavelength",))
 
     frame = cf.CompositeFrame([sky_frame, wave_frame])
@@ -58,7 +64,39 @@ def identity_gwcs_3d():
                                         axes_type=("pixel", "pixel", "pixel"),
                                         axes_names=("x", "y", "z"), unit=(u.pix, u.pix, u.pix))
 
-    return gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame)
+    wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame)
+    wcs.pixel_shape = (10, 20, 30)
+    wcs.array_shape = wcs.pixel_shape[::-1]
+
+    return wcs
+
+
+@pytest.fixture
+def identity_gwcs_3d_temporal():
+    """
+    A simple 1-1 gwcs that converts from pixels to arcseconds
+    """
+    identity = (m.Multiply(1 * u.arcsec / u.pixel) &
+                m.Multiply(1 * u.arcsec / u.pixel) &
+                m.Multiply(1 * u.s / u.pixel))
+
+    sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective',
+                                  reference_frame=Helioprojective(obstime="2018-01-01"),
+                                  axes_names=("longitude", "latitude"),
+                                  unit=(u.arcsec, u.arcsec))
+    time_frame = cf.TemporalFrame(Time("2020-01-01T00:00", format="isot", scale="utc"),
+                                  axes_order=(2,), unit=u.s)
+
+    frame = cf.CompositeFrame([sky_frame, time_frame])
+
+    detector_frame = cf.CoordinateFrame(name="detector", naxes=3,
+                                        axes_order=(0, 1, 2),
+                                        axes_type=("pixel", "pixel", "pixel"),
+                                        axes_names=("x", "y", "z"), unit=(u.pix, u.pix, u.pix))
+    wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame)
+    wcs.pixel_shape = (10, 20, 30)
+    wcs.array_shape = wcs.pixel_shape[::-1]
+    return wcs
 
 
 @pytest.fixture
@@ -66,21 +104,48 @@ def identity_gwcs_4d():
     """
     A simple 1-1 gwcs that converts from pixels to arcseconds
     """
-    identity = (m.Multiply(1*u.arcsec/u.pixel) & m.Multiply(1*u.arcsec/u.pixel) &
-                m.Multiply(1*u.nm/u.pixel) & m.Multiply(1*u.nm/u.pixel))
+    identity = (m.Multiply(1 * u.arcsec/u.pixel) & m.Multiply(1 * u.arcsec/u.pixel) &
+                m.Multiply(1 * u.nm/u.pixel) & m.Multiply(1 * u.s/u.pixel))
     sky_frame = cf.CelestialFrame(axes_order=(0, 1), name='helioprojective',
-                                  reference_frame=Helioprojective(obstime="2018-01-01"))
+                                  reference_frame=Helioprojective(obstime="2018-01-01"),
+                                  unit=(u.arcsec, u.arcsec))
     wave_frame = cf.SpectralFrame(axes_order=(2, ), unit=u.nm)
-    time_frame = cf.TemporalFrame(Time([], format="isot", scale="utc"), axes_order=(3, ), unit=u.s)
+    time_frame = cf.TemporalFrame(Time("2020-01-01T00:00", format="isot", scale="utc"), axes_order=(3, ), unit=u.s)
 
     frame = cf.CompositeFrame([sky_frame, wave_frame, time_frame])
 
     detector_frame = cf.CoordinateFrame(name="detector", naxes=4,
                                         axes_order=(0, 1, 2, 3),
                                         axes_type=("pixel", "pixel", "pixel", "pixel"),
-                                        axes_names=("x", "y", "z", "s"), unit=(u.pix, u.pix, u.pix, u.pix))
+                                        axes_names=("x", "y", "z", "s"),
+                                        unit=(u.pix, u.pix, u.pix, u.pix))
+
+    wcs = gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame)
+    wcs.pixel_shape = (10, 20, 30, 40)
+    wcs.array_shape = wcs.pixel_shape[::-1]
 
-    return gwcs.wcs.WCS(forward_transform=identity, output_frame=frame, input_frame=detector_frame)
+    return wcs
+
+
+@pytest.fixture
+def identity_gwcs_5d_stokes(identity_gwcs_4d):
+    stokes_frame = cf.StokesFrame(axes_order=(4,))
+    stokes_model = generate_lookup_table([0, 1, 2, 3] * u.one, interpolation='nearest')
+    transform = identity_gwcs_4d.forward_transform
+    frame = cf.CompositeFrame(identity_gwcs_4d.output_frame.frames + [stokes_frame])
+
+    detector_frame = cf.CoordinateFrame(name="detector", naxes=5,
+                                        axes_order=(0, 1, 2, 3, 4),
+                                        axes_type=("pixel", "pixel", "pixel", "pixel", "pixel"),
+                                        axes_names=("x", "y", "z", "t", "s"),
+                                        unit=(u.pix, u.pix, u.pix, u.pix, u.pix))
+
+    wcs = gwcs.wcs.WCS(forward_transform=transform & stokes_model, output_frame=frame,
+                       input_frame=detector_frame)
+    wcs.pixel_shape = (10, 20, 30, 40, 4)
+    wcs.array_shape = wcs.pixel_shape[::-1]
+
+    return wcs
 
 
 @pytest.fixture
@@ -88,13 +153,16 @@ def dataset(array, identity_gwcs):
     meta = {'bucket': 'data',
             'dataset_id': 'test_dataset',
             'asdf_object_key': 'test_dataset.asdf'}
+
+    identity_gwcs.array_shape = array.shape
+    identity_gwcs.pixel_shape = array.shape[::-1]
     ds = Dataset(array, wcs=identity_gwcs, meta=meta, headers=Table())
     # Sanity checks
     assert ds.data is array
     assert ds.wcs is identity_gwcs
 
     ds._array_container = DaskFITSArrayContainer(['test1.fits'], 0, 'float', array.shape,
-                                                loader=AstropyFITSLoader)
+                                                 loader=AstropyFITSLoader)
 
     return ds
 
@@ -105,6 +173,9 @@ def dataset_3d(identity_gwcs_3d):
     x = np.ones(shape)
     array = da.from_array(x, tuple(shape))
 
+    identity_gwcs_3d.pixel_shape = array.shape[::-1]
+    identity_gwcs_3d.array_shape = array.shape
+
     return Dataset(array, wcs=identity_gwcs_3d)
 
 
@@ -114,4 +185,7 @@ def dataset_4d(identity_gwcs_4d):
     x = np.ones(shape)
     array = da.from_array(x, tuple(shape))
 
+    identity_gwcs_4d.pixel_shape = array.shape[::-1]
+    identity_gwcs_4d.array_shape = array.shape
+
     return Dataset(array, wcs=identity_gwcs_4d)
diff --git a/dkist/data/test/datasettestfiles/visp.zip b/dkist/data/test/datasettestfiles/visp.zip
index ad1aee1d..1fa76300 100644
Binary files a/dkist/data/test/datasettestfiles/visp.zip and b/dkist/data/test/datasettestfiles/visp.zip differ
diff --git a/dkist/data/test/datasettestfiles/vtf.zip b/dkist/data/test/datasettestfiles/vtf.zip
index 915708f9..d25ec169 100644
Binary files a/dkist/data/test/datasettestfiles/vtf.zip and b/dkist/data/test/datasettestfiles/vtf.zip differ
diff --git a/dkist/dataset/dataset.py b/dkist/dataset/dataset.py
index 894edf48..24d403db 100644
--- a/dkist/dataset/dataset.py
+++ b/dkist/dataset/dataset.py
@@ -80,8 +80,14 @@ def __init__(self, data, wcs, uncertainty=None, mask=None, meta=None,
 
         if isinstance(wcs, gwcs.WCS):
             # Set the array shape to be that of the data.
-            wcs.array_shape = data.shape
-            wcs.pixel_shape = data.shape[::-1]
+            if wcs.array_shape is None:
+                wcs.array_shape = data.shape
+            if wcs.pixel_shape is None:
+                wcs.pixel_shape = data.shape[::-1]
+
+            if (wcs.pixel_shape != data.shape[::-1] or wcs.array_shape != data.shape):
+                raise ValueError("The pixel and array shape on the WCS object "
+                                 "do not match the given array.")
 
         if headers is not None and not isinstance(headers, astropy.table.Table):
             raise ValueError("The headers keyword argument must be an Astropy Table instance.")
diff --git a/dkist/io/asdf/generator/generator.py b/dkist/io/asdf/generator/generator.py
index 20e56c2d..8eb9b671 100644
--- a/dkist/io/asdf/generator/generator.py
+++ b/dkist/io/asdf/generator/generator.py
@@ -8,8 +8,8 @@
 
 from dkist.dataset import Dataset
 from dkist.io import AstropyFITSLoader, DaskFITSArrayContainer
-from dkist.io.asdf.generator.helpers import headers_from_filenames, preprocess_headers
-from dkist.io.asdf.generator.simulated_data import generate_datset_inventory_from_headers
+from dkist.io.asdf.generator.helpers import (extract_inventory, headers_from_filenames,
+                                             preprocess_headers)
 from dkist.io.asdf.generator.transforms import TransformBuilder
 
 try:
@@ -46,9 +46,9 @@ def references_from_filenames(filenames, headers, array_shape, hdu_index=0, rela
     Returns
     -------
     `dkist.io.DaskFITSArrayContainer`
-        A container that represents a set of FITS files, and can generate a `dask.array.Array` from them.
+        A container that represents a set of FITS files, and can generate a
+        `dask.array.Array` from them.
     """
-
     filenames = np.asanyarray(filenames)
     filepaths = np.empty(array_shape, dtype=object)
     if filenames.size != filepaths.size:
@@ -79,39 +79,53 @@ def references_from_filenames(filenames, headers, array_shape, hdu_index=0, rela
         raise ValueError("Not all the shapes of these files are the same")
     shape = list(shape)[0]
 
-    return DaskFITSArrayContainer(filepaths.tolist(), hdu_index, dtype, shape, loader=AstropyFITSLoader)
+    return DaskFITSArrayContainer(filepaths.tolist(), hdu_index, dtype, shape,
+                                  loader=AstropyFITSLoader)
 
 
-def asdf_tree_from_filenames(filenames, asdf_filename, inventory=None, hdu=0,
+def asdf_tree_from_filenames(filenames, headers=None, inventory=None, hdu=0,
                              relative_to=None, extra_inventory=None):
     """
     Build a DKIST asdf tree from a list of (unsorted) filenames.
 
     Parameters
     ----------
-
     filenames : `list`
         The filenames to process into a DKIST asdf dataset.
 
+    headers : `list`
+        The FITS headers if already known. If not specified will be read from
+        filenames.
+
+    inventory: `dict`
+        The frame inventory to put in the tree, if not specified a dummy one
+        will be generated.
+
     hdu : `int`
-        The HDU to read from the FITS files.
+        The HDU to read the headers from and reference the data to.
+
+    relative_to : `pathlib.Path`
+        The path to reference the FITS files to inside the asdf. If not
+        specified will be local to the asdf (i.e. ``./``).
+
+    extra_inventory : `dict`
+        An extra set of inventory to override the generated one.
     """
     # In case filenames is a generator we cast to list.
     filenames = list(filenames)
 
     # headers is an iterator
-    headers = headers_from_filenames(filenames, hdu=hdu)
+    if not headers:
+        headers = headers_from_filenames(filenames, hdu=hdu)
 
     table_headers, sorted_filenames, sorted_headers = preprocess_headers(headers, filenames)
 
-    if not inventory:
-        inventory = generate_datset_inventory_from_headers(table_headers, asdf_filename)
-    if extra_inventory:
-        inventory.update(extra_inventory)
-
-
     ds_wcs = TransformBuilder(sorted_headers).gwcs
 
+    if extra_inventory is None:
+        extra_inventory = {}
+    inventory = extract_inventory(table_headers, ds_wcs, **extra_inventory)
+
     # Get the array shape
     shape = tuple((headers[0][f'DNAXIS{n}'] for n in range(headers[0]['DNAXIS'],
                                                            headers[0]['DAAXES'], -1)))
@@ -158,7 +172,7 @@ def dataset_from_fits(path, asdf_filename, inventory=None, hdu=0, relative_to=No
 
     files = path.glob("*fits")
 
-    tree = asdf_tree_from_filenames(list(files), asdf_filename, inventory=inventory,
+    tree = asdf_tree_from_filenames(list(files), inventory=inventory,
                                     hdu=hdu, relative_to=relative_to)
 
     with resources.path("dkist.io", "level_1_dataset_schema.yaml") as schema_path:
diff --git a/dkist/io/asdf/generator/helpers.py b/dkist/io/asdf/generator/helpers.py
index 621d530b..7b35e553 100644
--- a/dkist/io/asdf/generator/helpers.py
+++ b/dkist/io/asdf/generator/helpers.py
@@ -1,9 +1,13 @@
 """
 Helper functions for parsing files and processing headers.
 """
+import re
+from functools import partial
 
 import numpy as np
+import scipy.stats
 
+import gwcs.coordinate_frames as cf
 from astropy.io import fits
 from astropy.table import Table
 
@@ -13,7 +17,7 @@
 
 def headers_from_filenames(filenames, hdu=0):
     """
-    A generator to get the headers from filenames.
+    Generator to get the headers from filenames.
     """
     return [dict(fits.getheader(fname, ext=hdu)) for fname in filenames]
 
@@ -24,12 +28,13 @@ def table_from_headers(headers):
 
 def validate_headers(table_headers):
     """
-    Given a bunch of headers, validate that they form a coherent set. This
-    function also adds the headers to a list as they are read from the file.
+    Given a bunch of headers, validate that they form a coherent set.
+
+    This function also adds the headers to a list as they are read from the
+    file.
 
     Parameters
     ----------
-
     headers :  iterator
         An iterator of headers.
 
@@ -40,9 +45,8 @@ def validate_headers(table_headers):
     """
     t = table_headers
 
-    """
-    Let's do roughly the minimal amount of verification here.
-    """
+    # Let's do roughly the minimal amount of verification here for construction
+    # of the WCS. Validation for inventory records is done independently.
 
     # For some keys all the values must be the same
     same_keys = ['NAXIS', 'DNAXIS']
@@ -92,3 +96,164 @@ def preprocess_headers(headers, filenames):
     table_headers.remove_columns(["headers", "filenames"])
 
     return table_headers, sorted_filenames, sorted_headers
+
+
+def _inventory_from_wcs(wcs):
+    """
+    Parse the gWCS and extract any inventory keys needed.
+
+    This assumes all WCSes have a celestial and temporal component.
+
+    Keys for wavelength will not be added if there is no spectral component,
+    stokes keys are always added (defaulting to just I if not in the WCS).
+    """
+    if not isinstance(wcs.output_frame, cf.CompositeFrame):
+        raise TypeError("Can't parse this WCS as expected.")  # pragma: no cover
+
+    bottom_left_array = [0] * wcs.pixel_n_dim
+    top_right_array = np.array(wcs.pixel_shape) - 1
+
+    bottom_left_world = wcs.array_index_to_world_values(*bottom_left_array)
+    top_right_world = wcs.array_index_to_world_values(*top_right_array)
+
+    time_frame = list(filter(lambda f: isinstance(f, cf.TemporalFrame), wcs.output_frame.frames))[0]
+    temporal_axes = time_frame.axes_order[0] - wcs.pixel_n_dim
+    temporal_unit = time_frame.unit[0]
+    start_time = time_frame.reference_frame + bottom_left_world[temporal_axes] * temporal_unit
+    end_time = time_frame.reference_frame + top_right_world[temporal_axes] * temporal_unit
+    celestial_frame = list(filter(lambda f: isinstance(f, cf.CelestialFrame), wcs.output_frame.frames))[0]
+    lon_axes = celestial_frame.axes_order[0] - wcs.pixel_n_dim
+    lat_axes = celestial_frame.axes_order[1] - wcs.pixel_n_dim
+
+    bounding_box = ((bottom_left_world[lon_axes], bottom_left_world[lat_axes]),
+                    (top_right_world[lon_axes], top_right_world[lat_axes]))
+
+    inventory = {'bounding_box': bounding_box,
+                 'start_time': start_time,
+                 'end_time': end_time}
+
+    spec_frame = list(filter(lambda f: isinstance(f, cf.SpectralFrame), wcs.output_frame.frames))
+    if spec_frame:
+        spectral_axes = spec_frame[0].axes_order[0] - wcs.pixel_n_dim
+        inventory["wavelength_min"] = bottom_left_world[spectral_axes]
+        inventory["wavelength_max"] = top_right_world[spectral_axes]
+
+    stokes_frame = list(filter(lambda f: isinstance(f, cf.StokesFrame), wcs.output_frame.frames))
+    if stokes_frame:
+        stokes_axes = stokes_frame[0].axes_order[0]
+        pixel_coords = [0] * wcs.pixel_n_dim
+        pixel_coords[stokes_axes] = (0, 1, 2, 3)
+        all_stokes = wcs.pixel_to_world(*np.broadcast_arrays(*pixel_coords))
+        stokes_components = all_stokes[stokes_axes - 1]
+
+        inventory["stokes_parameters"] = list(map(str, stokes_components))
+        inventory["has_all_stokes"] = len(stokes_components) > 1
+
+    else:
+        inventory["stokes_parameters"] = ['I']
+        inventory["has_all_stokes"] = False
+
+    return inventory
+
+
+def _get_unique(column, singular=False):
+    uniq = list(set(column))
+    if singular:
+        if len(uniq) == 1:
+            if isinstance(uniq[0], np.str_):
+                return str(uniq[0])
+            return uniq[0]
+        else:
+            raise ValueError("Column does not result in a singular unique value")
+
+    return uniq
+
+
+def _get_number_apply(column, func):
+    return func(column)
+
+
+def _get_keys_matching(headers, pattern):
+    """
+    Get all the values from all the keys matching the given re pattern.
+
+    Assumes that each matching column is singular (all values are the same)
+
+    Parameters
+    ----------
+    headers : `astropy.table.Table`
+        All the headers
+
+    pattern : `str`
+        A regex pattern
+    """
+    results = []
+
+    prog = re.compile(pattern)
+    for key in headers.colnames:
+        if prog.match(key):
+            results.append(_get_unique(headers[key], singular=True))
+    return list(set(results))
+
+
+def _inventory_from_headers(headers):
+    inventory = {}
+
+    mode = partial(scipy.stats.mode, axis=None, nan_policy="raise")
+
+    # These keys might get updated by parsing the gwcs object.
+    inventory["wavelength_min"] = inventory["wavelength_max"] = _get_unique(headers['LINEWAV'])[0]
+
+    inventory["exposure_time"] = _get_number_apply(headers['FPA_EXPO'], mode).mode[0]
+    inventory["filter_wavelengths"] = _get_unique(headers['LINEWAV'])
+    inventory["instrument_name"] = _get_unique(headers['INSTRUME'], singular=True)
+    inventory["observables"] = []  # _get_unique(headers[''])
+    inventory["recipe_id"] = int(_get_unique(headers['RECIPEID'], singular=True))
+    inventory["recipe_instance_id"] = int(_get_unique(headers['RINSTID'], singular=True))
+    inventory["recipe_run_id"] = int(_get_unique(headers['RRUNID'], singular=True))
+    inventory["target_type"] = list(map(str, _get_unique(headers['OBJECT'])))
+    inventory["primary_proposal_id"] = _get_unique(headers['PROPID'], singular=True)
+    inventory["primary_experiment_id"] = _get_unique(headers['EXPERID'], singular=True)
+    inventory["dataset_size"] = _get_number_apply(headers['FRAMEVOL'], np.sum)
+    inventory["contributing_experiment_ids"] = list(map(str, (_get_keys_matching(headers, r"EXPERID\d\d$") +
+                                                              [_get_unique(headers["EXPERID"], singular=True)])))
+    inventory["contributing_proposal_ids"] = list(map(str, (_get_keys_matching(headers, r"PROPID\d\d$") +
+                                                            [_get_unique(headers["PROPID"], singular=True)])))
+
+    friedval = np.nan
+    if 'FRIEDVAL' in headers.colnames:
+        friedval = _get_number_apply(headers['FRIEDVAL'], np.mean)
+
+    inventory["quality_average_fried_parameter"] = friedval
+
+    polacc = np.nan
+    if 'POL_ACC' in headers.colnames:
+        polacc = _get_number_apply(headers['POL_ACC'], np.mean)
+    inventory["quality_average_polarimetric_accuracy"] = polacc
+
+    return inventory
+
+
+def extract_inventory(headers, wcs, **inventory):
+    """
+    Generate the inventory record for an asdf file from an asdf tree.
+
+    Parameters
+    ----------
+    tree : `dict`
+        The incomplete asdf tree. Needs to contain the dataset object.
+
+    inventory : `dict`
+        Additional inventory keys that can not be computed from the headers or the WCS.
+
+
+    Returns
+    -------
+    tree: `dict`
+        The updated tree with the inventory.
+
+    """
+    # The headers will populate passband info for VBI and then wcs will
+    # override it if there is a wavelength axis in the dataset,
+    # any supplied kwargs override things extracted from dataset.
+    return {**_inventory_from_headers(headers), **_inventory_from_wcs(wcs), **inventory}
diff --git a/dkist/io/asdf/generator/simulated_data.py b/dkist/io/asdf/generator/simulated_data.py
deleted file mode 100644
index 60d356f3..00000000
--- a/dkist/io/asdf/generator/simulated_data.py
+++ /dev/null
@@ -1,99 +0,0 @@
-"""
-Functions and helpers relating to working with simulated data.
-"""
-import random
-import string
-
-from astropy.time import Time
-from sunpy.time import parse_time
-
-__all__ = ['generate_datset_inventory_from_headers']
-
-
-def _gen_type(gen_type, max_int=1e6, max_float=1e6, len_str=30):
-    if gen_type is bool:
-        return bool(random.randint(0, 1))
-    elif gen_type is int:
-        return random.randint(0, max_int)
-    elif gen_type is float:
-        return random.random() * max_float
-    elif gen_type is list:
-        return [_gen_type(str)]
-    elif gen_type is Time:
-        return parse_time("now")
-    elif gen_type is str:
-        return ''.join(
-            random.choice(string.ascii_uppercase + string.digits) for _ in range(len_str))
-    else:
-        raise ValueError("Type {} is not supported".format(gen_type))  # pragma: no cover
-
-
-def generate_datset_inventory_from_headers(headers, asdf_name):
-    """
-    Generate a dummy dataset inventory from headers.
-
-    .. note::
-       This is just for test data, it should not be used on real data.
-
-    Parameters
-    ----------
-
-    headers: `astropy.table.Table`
-    asdf_name: `str`
-
-    """
-
-    schema = [
-        ('asdf_object_key', str),
-        ('browse_movie_object_key', str),
-        ('browse_movie_url', str),
-        ('bucket', str),
-        ('contributing_experiment_ids', list),
-        ('contributing_proposal_ids', list),
-        ('dataset_id', str),
-        ('dataset_inventory_id', int),
-        ('dataset_size', int),
-        ('end_time', Time),
-        ('exposure_time', float),
-        ('filter_wavelengths', list),
-        ('frame_count', int),
-        ('has_all_stokes', bool),
-        ('instrument_name', str),
-        ('observables', list),
-        ('original_frame_count', int),
-        ('primary_experiment_id', str),
-        ('primary_proposal_id', str),
-        ('quality_average_fried_parameter', float),
-        ('quality_average_polarimetric_accuracy', float),
-        ('recipe_id', int),
-        ('recipe_instance_id', int),
-        ('recipe_run_id', int),
-        ('start_time', Time),
-        # ('stokes_parameters', str),
-        ('target_type', str),
-        ('wavelength_max', float),
-        ('wavelength_min', float)
-    ]
-
-    header_mapping = {
-        'start_time': 'DATE-BGN',
-        'end_time': 'DATE-END',
-        'filter_wavelengths': 'WAVELNGTH'}
-
-    constants = {
-        'frame_count': len(headers),
-        'bucket': 'data',
-        'asdf_object_key': str(asdf_name)
-        }
-
-    output = {}
-
-    for key, ktype in schema:
-        if key in header_mapping:
-            hdict = dict(zip(headers.colnames, headers[0]))
-            output[key] = ktype(hdict.get(header_mapping[key], _gen_type(ktype)))
-        else:
-            output[key] = _gen_type(ktype)
-
-    output.update(constants)
-    return output
diff --git a/dkist/io/asdf/generator/tests/test_generator.py b/dkist/io/asdf/generator/tests/test_generator.py
index 2896515f..5791866a 100644
--- a/dkist/io/asdf/generator/tests/test_generator.py
+++ b/dkist/io/asdf/generator/tests/test_generator.py
@@ -28,7 +28,7 @@ def test_array_container_shape(header_filenames):
 
 
 def test_asdf_tree(header_filenames):
-    tree = asdf_tree_from_filenames(header_filenames, "test_file.asdf")
+    tree = asdf_tree_from_filenames(header_filenames)
     assert isinstance(tree, dict)
 
 
diff --git a/dkist/io/asdf/generator/tests/test_helpers.py b/dkist/io/asdf/generator/tests/test_helpers.py
index db2689ee..4ab01ef5 100644
--- a/dkist/io/asdf/generator/tests/test_helpers.py
+++ b/dkist/io/asdf/generator/tests/test_helpers.py
@@ -3,8 +3,14 @@
 import numpy as np
 import pytest
 
+import astropy.units as u
+import gwcs.coordinate_frames as cf
+from astropy.table import Table
+
 from dkist.io.asdf.generator.generator import references_from_filenames
-from dkist.io.asdf.generator.helpers import headers_from_filenames
+from dkist.io.asdf.generator.helpers import (_get_unique, _inventory_from_headers,
+                                             _inventory_from_wcs, extract_inventory,
+                                             headers_from_filenames)
 
 
 def test_references_from_filesnames_shape_error(header_filenames):
@@ -25,3 +31,159 @@ def test_references_from_filenames(header_filenames):
 
     for ref in refs.fileuris:
         assert base not in ref
+
+
+@pytest.fixture
+def headers_inventory_214():
+    """A minimal collection of headers to test inventory creation."""  # noqa
+    return Table({
+        'LINEWAV': [550, 550, 550],
+        'FPA_EXPO': [10, 20, 30],
+        'INSTRUME': ["VBI", "VBI", "VBI"],
+        'FRIEDVAL': [1, 2, 3],
+        'POL_ACC': [500, 500, 500],
+        'RECIPEID': [10, 10, 10],
+        'RINSTID': [20, 20, 20],
+        'RRUNID': [30, 30, 30],
+        'OBJECT': ["A", "B", "C"],
+        'FRAMEVOL': [100, 120, 130],
+        'EXPERID': ["00", "00", "00"],
+        'EXPERID01': ["10", "10", "10"],
+        'EXPERID02': ["20", "20", "20"],
+        'PROPID': ["001", "001", "001"],
+        'PROPID01': ["30", "30", "30"]
+    })
+
+
+def test_valid_inventory(headers_inventory_214):
+    inv = _inventory_from_headers(headers_inventory_214)
+    assert isinstance(inv, dict)
+
+    assert inv["wavelength_min"] == inv["wavelength_max"] == 550
+    assert inv["filter_wavelengths"] == [550]
+    assert inv["instrument_name"] == "VBI"
+    assert inv["observables"] == []
+    assert inv["quality_average_fried_parameter"] == np.mean([1, 2, 3])
+    assert inv["quality_average_polarimetric_accuracy"] == 500
+    assert inv["recipe_id"] == 10
+    assert inv["recipe_instance_id"] == 20
+    assert inv["recipe_run_id"] == 30
+    assert set(inv["target_type"]) == {"A", "B", "C"}
+    assert inv["primary_proposal_id"] == "001"
+    assert inv["primary_experiment_id"] == "00"
+    assert set(inv["contributing_experiment_ids"]) == {"10", "20", "00"}
+    assert set(inv["contributing_proposal_ids"]) == {"30", "001"}
+
+
+def test_inventory_from_wcs(identity_gwcs_4d):
+    inv = _inventory_from_wcs(identity_gwcs_4d)
+    time_frame = list(filter(lambda x: isinstance(x, cf.TemporalFrame),
+                             identity_gwcs_4d.output_frame.frames))[0]
+    shape = identity_gwcs_4d.array_shape
+
+    # This test transform is just 0 - n_pixel in all dimensions
+    assert inv["wavelength_min"] == 0
+    assert inv["wavelength_max"] == shape[2] - 1
+    assert inv["bounding_box"] == ((0, 0), (shape[0] - 1, shape[1] - 1))
+    assert inv["start_time"] == time_frame.reference_frame
+    assert inv["end_time"] == (time_frame.reference_frame + (shape[3] - 1) * u.s)
+    assert inv["stokes_parameters"] == ["I"]
+    assert inv["has_all_stokes"] is False
+
+
+def test_inventory_from_wcs_stokes(identity_gwcs_5d_stokes):
+    inv = _inventory_from_wcs(identity_gwcs_5d_stokes)
+    time_frame = list(filter(lambda x: isinstance(x, cf.TemporalFrame),
+                             identity_gwcs_5d_stokes.output_frame.frames))[0]
+    shape = identity_gwcs_5d_stokes.array_shape
+
+    # This test transform is just 0 - n_pixel in all dimensions
+    assert inv["wavelength_min"] == 0
+    assert inv["wavelength_max"] == shape[2] - 1
+    assert inv["bounding_box"] == ((0, 0), (shape[0] - 1, shape[1] - 1))
+    assert inv["start_time"] == time_frame.reference_frame
+    assert inv["end_time"] == (time_frame.reference_frame + (shape[3] - 1) * u.s)
+    assert inv["stokes_parameters"] == ["I", "Q", "U", "V"]
+    assert inv["has_all_stokes"] is True
+
+
+def test_inventory_from_wcs_2d(identity_gwcs_3d_temporal):
+    inv = _inventory_from_wcs(identity_gwcs_3d_temporal)
+    time_frame = list(filter(lambda x: isinstance(x, cf.TemporalFrame),
+                             identity_gwcs_3d_temporal.output_frame.frames))[0]
+    shape = identity_gwcs_3d_temporal.array_shape
+
+    # This test transform is just 0 - n_pixel in all dimensions
+    assert "wavelength_min" not in inv
+    assert "wavelength_max" not in inv
+    assert inv["bounding_box"] == ((0, 0), (shape[0] - 1, shape[1] - 1))
+    assert inv["start_time"] == time_frame.reference_frame
+    assert inv["end_time"] == (time_frame.reference_frame + (shape[2] - 1) * u.s)
+    assert inv["stokes_parameters"] == ["I"]
+    assert inv["has_all_stokes"] is False
+
+
+def test_unique_error():
+    with pytest.raises(ValueError):
+        _get_unique([1, 2, 3], singular=True)
+
+    assert _get_unique([1, 2, 3], singular=False)
+
+
+def test_extract_inventory(headers_inventory_214, identity_gwcs_4d):
+    inv = extract_inventory(headers_inventory_214, identity_gwcs_4d)
+
+    time_frame = list(filter(lambda x: isinstance(x, cf.TemporalFrame),
+                             identity_gwcs_4d.output_frame.frames))[0]
+    shape = identity_gwcs_4d.array_shape
+
+    # This test transform is just 0 - n_pixel in all dimensions
+    assert inv["wavelength_min"] == 0
+    assert inv["wavelength_max"] == shape[2] - 1
+    assert inv["bounding_box"] == ((0, 0), (shape[0] - 1, shape[1] - 1))
+    assert inv["start_time"] == time_frame.reference_frame
+    assert inv["end_time"] == (time_frame.reference_frame + (shape[3] - 1) * u.s)
+    assert inv["stokes_parameters"] == ["I"]
+    assert inv["has_all_stokes"] is False
+    assert inv["filter_wavelengths"] == [550]
+    assert inv["instrument_name"] == "VBI"
+    assert inv["observables"] == []
+    assert inv["quality_average_fried_parameter"] == np.mean([1, 2, 3])
+    assert inv["quality_average_polarimetric_accuracy"] == 500
+    assert inv["recipe_id"] == 10
+    assert inv["recipe_instance_id"] == 20
+    assert inv["recipe_run_id"] == 30
+    assert set(inv["target_type"]) == {"A", "B", "C"}
+    assert inv["primary_proposal_id"] == "001"
+    assert inv["primary_experiment_id"] == "00"
+    assert set(inv["contributing_experiment_ids"]) == {"10", "20", "00"}
+    assert set(inv["contributing_proposal_ids"]) == {"30", "001"}
+
+
+def test_extract_inventory_no_wave(headers_inventory_214, identity_gwcs_3d_temporal):
+    inv = extract_inventory(headers_inventory_214, identity_gwcs_3d_temporal)
+
+    time_frame = list(filter(lambda x: isinstance(x, cf.TemporalFrame),
+                             identity_gwcs_3d_temporal.output_frame.frames))[0]
+    shape = identity_gwcs_3d_temporal.array_shape
+
+    # This test transform is just 0 - n_pixel in all dimensions
+    assert inv["bounding_box"] == ((0, 0), (shape[0] - 1, shape[1] - 1))
+    assert inv["wavelength_min"] == inv["wavelength_max"] == 550
+    assert inv["start_time"] == time_frame.reference_frame
+    assert inv["end_time"] == (time_frame.reference_frame + (shape[2] - 1) * u.s)
+    assert inv["stokes_parameters"] == ["I"]
+    assert inv["has_all_stokes"] is False
+    assert inv["filter_wavelengths"] == [550]
+    assert inv["instrument_name"] == "VBI"
+    assert inv["observables"] == []
+    assert inv["quality_average_fried_parameter"] == np.mean([1, 2, 3])
+    assert inv["quality_average_polarimetric_accuracy"] == 500
+    assert inv["recipe_id"] == 10
+    assert inv["recipe_instance_id"] == 20
+    assert inv["recipe_run_id"] == 30
+    assert set(inv["target_type"]) == {"A", "B", "C"}
+    assert inv["primary_proposal_id"] == "001"
+    assert inv["primary_experiment_id"] == "00"
+    assert set(inv["contributing_experiment_ids"]) == {"10", "20", "00"}
+    assert set(inv["contributing_proposal_ids"]) == {"30", "001"}
diff --git a/dkist/io/asdf/generator/tests/test_transforms.py b/dkist/io/asdf/generator/tests/test_transforms.py
index 62b6e40a..21c650cd 100644
--- a/dkist/io/asdf/generator/tests/test_transforms.py
+++ b/dkist/io/asdf/generator/tests/test_transforms.py
@@ -42,7 +42,7 @@ def test_input_name_ordering(wcs):
 
 
 def test_output_name_ordering(wcs):
-    allowed_world_names = (('latitude', 'longitude', 'wavelength', 'time', 'stokes'),
+    allowed_world_names = (('longitude', 'latitude', 'wavelength', 'time', 'stokes'),
                            ('wavelength', 'latitude', 'longitude', 'time', 'stokes'))
     assert wcs.output_frame.axes_names in allowed_world_names
 
diff --git a/dkist/io/asdf/generator/transforms.py b/dkist/io/asdf/generator/transforms.py
index 6851d3f8..e31e0ae7 100644
--- a/dkist/io/asdf/generator/transforms.py
+++ b/dkist/io/asdf/generator/transforms.py
@@ -17,8 +17,8 @@
            'spatial_model_from_quantity', 'spatial_model_from_header']
 
 
-def spatial_model_from_quantity(crpix1, crpix2, cdelt1, cdelt2, pc, crval1, crval2,
-                                projection='TAN'):
+def spatial_model_from_quantity(crpix1, crpix2, cdelt1, cdelt2, pc, crval1,
+                                crval2, lon_pole, projection='TAN'):
     """
     Given quantity representations of a HPLx FITS WCS return a model for the
     spatial transform.
@@ -33,7 +33,7 @@ def spatial_model_from_quantity(crpix1, crpix2, cdelt1, cdelt2, pc, crval1, crva
     scale = Multiply(cdelt1) & Multiply(cdelt2)
     rotu = AffineTransformation2D(pc, translation=(0, 0)*u.arcsec)
     tanu = projections[projection]
-    skyrotu = RotateNative2Celestial(crval1, crval2, 180*u.deg)
+    skyrotu = RotateNative2Celestial(crval1, crval2, lon_pole)
     return shiftu | scale | rotu | tanu | skyrotu
 
 
@@ -70,22 +70,30 @@ def spatial_model_from_header(header):
     pc = np.matrix([[header[f'PC{lonind}_{lonind}'], header[f'PC{lonind}_{latind}']],
                     [header[f'PC{latind}_{lonind}'], header[f'PC{latind}_{latind}']]]) * cunit1
 
-    return spatial_model_from_quantity(crpix1, crpix2, cdelt1, cdelt2, pc, crval1, crval2,
+    lonpole = header.get("LONPOLE")
+    if not lonpole and latproj == "TAN":
+        lonpole = 180
+
+    if not lonpole:
+        raise ValueError("LONPOLE not specified and not known for projection {latproj}")
+
+    return spatial_model_from_quantity(crpix1, crpix2, cdelt1, cdelt2, pc,
+                                       crval1, crval2, lonpole * u.deg,
                                        projection=latproj)
 
 
 @u.quantity_input
 def linear_spectral_model(spectral_width: u.nm, reference_val: u.nm):
     """
-    A linear model in a spectral dimension. The reference pixel is always 0.
+    Linear model in a spectral dimension. The reference pixel is always 0.
     """
-    return Linear1D(slope=spectral_width/(1*u.pix), intercept=reference_val)
+    return Linear1D(slope=spectral_width / (1 * u.pix), intercept=reference_val)
 
 
 @u.quantity_input
 def linear_time_model(cadence: u.s, reference_val: u.s = 0*u.s):
     """
-    A linear model in a temporal dimension. The reference pixel is always 0.
+    Linear model in a temporal dimension. The reference pixel is always 0.
     """
     if not reference_val:
         reference_val = 0 * cadence.unit
@@ -165,12 +173,13 @@ def __init__(self, headers):
         self.header = headers[0]
 
         # Reshape the headers to match the Dataset shape, so we can extract headers along various axes.
-        shape = tuple((self.header[f'DNAXIS{n}'] for n in range(self.header['DNAXIS'],
-                                                                self.header['DAAXES'], -1)))
+        shape = tuple(self.header[f'DNAXIS{n}'] for n in range(self.header['DNAXIS'],
+                                                               self.header['DAAXES'], -1))
         arr_headers = np.empty(shape, dtype=object)
         for i in range(arr_headers.size):
             arr_headers.flat[i] = headers[i]
 
+        self.pixel_shape = tuple(self.header[f'DNAXIS{n}'] for n in range(1, self.header['DNAXIS'] + 1))
         self.headers = arr_headers
         self.reset()
         self._build()
@@ -190,14 +199,17 @@ def pixel_frame(self):
     @property
     def gwcs(self):
         """
-        A `gwcs.WCS` object representing these headers.
+        `gwcs.WCS` object representing these headers.
         """
         world_frame = cf.CompositeFrame(self.frames)
 
-        return gwcs.WCS(forward_transform=self.transform,
-                        input_frame=self.pixel_frame,
-                        output_frame=world_frame)
+        out_wcs = gwcs.WCS(forward_transform=self.transform,
+                           input_frame=self.pixel_frame,
+                           output_frame=world_frame)
+        out_wcs.pixel_shape = self.pixel_shape
+        out_wcs.array_shape = self.pixel_shape[::-1]
 
+        return out_wcs
 
     @property
     def frames(self):
diff --git a/dkist/io/asdf/schemas/dkist.nso.edu/dkist/dataset-0.1.0.yaml b/dkist/io/asdf/schemas/dkist.nso.edu/dkist/dataset-0.1.0.yaml
index b6421e6d..a2ebf789 100644
--- a/dkist/io/asdf/schemas/dkist.nso.edu/dkist/dataset-0.1.0.yaml
+++ b/dkist/io/asdf/schemas/dkist.nso.edu/dkist/dataset-0.1.0.yaml
@@ -30,10 +30,12 @@ properties:
     properties:
         asdf_object_key:
           type: string
-        browse_movie_object_key:
-          type: string
-        browse_movie_url:
-          type: string
+        bounding_box:
+          type: array
+          items:
+            type: array
+            items:
+              type: number
         bucket:
           type: string
         contributing_experiment_ids:
@@ -44,12 +46,14 @@ properties:
           type: array
           items:
             type: string
+        create_date:
+          $ref: "tag:stsci.edu:asdf/time/time-1.1.0"
         dataset_id:
           type: string
         dataset_inventory_id:
           type: integer
         dataset_size:
-          type: integer
+          type: number
         end_time:
           $ref: "tag:stsci.edu:asdf/time/time-1.1.0"
         exposure_time:
@@ -57,9 +61,7 @@ properties:
         filter_wavelengths:
           type: array
           items:
-            type: string
-        frame_count:
-          type: integer
+            type: number
         has_all_stokes:
           type: boolean
         instrument_name:
@@ -86,10 +88,14 @@ properties:
           type: integer
         start_time:
           $ref: "tag:stsci.edu:asdf/time/time-1.1.0"
-        #stokes_parameters:
-        #  type: string
+        stokes_parameters:
+          type: array
+          items:
+            type: string
         target_type:
-          type: string
+          type: array
+          items:
+            type: string
         wavelength_max:
           type: number
         wavelength_min:
@@ -97,18 +103,17 @@ properties:
     additionalProperties: false
     # required:
     #     - asdf_object_key
-    #     - browse_movie_object_key
-    #     - browse_movie_url
+    #     - bounding_box
     #     - bucket
     #     - contributing_experiment_ids
     #     - contributing_proposal_ids
+    #     - create_date
     #     - dataset_id
     #     - dataset_inventory_id
     #     - dataset_size
     #     - end_time
     #     - exposure_time
     #     - filter_wavelengths
-    #     - frame_count
     #     - has_all_stokes
     #     - instrument_name
     #     - observables
@@ -121,6 +126,7 @@ properties:
     #     - recipe_instance_id
     #     - recipe_run_id
     #     - start_time
+    #     - stokes_parameters
     #     - target_type
     #     - wavelength_max
     #     - wavelength_min
@@ -131,10 +137,6 @@ properties:
   unit:
     $ref: "tag:stsci.edu:asdf/unit/unit-1.0.0"
 
-  # uncertainty:
-  #   type: object
-
-
 required: [data, headers, wcs]
 allowAdditionalProperties: true
 ...
diff --git a/tox.ini b/tox.ini
index e519653c..c6fd8e8f 100644
--- a/tox.ini
+++ b/tox.ini
@@ -10,7 +10,7 @@ setenv =
     PYTEST_COMMAND = pytest --cov=dkist --cov-config={toxinidir}/setup.cfg --verbose
 deps =
     git+https://github.com/Cadair/ndcube@wcsaxes_only_plotting
-    git+https://github.com/Cadair/sunpy@attrs_namespace_registration#egg=sunpy[all]
+    git+https://github.com/sunpy/sunpy#egg=sunpy[all]
 extras = tests
 commands = {env:PYTEST_COMMAND} {posargs}