DKISTDC · Cadair · May 9, 2023 · Mar 24, 2023 · Mar 24, 2023 · Mar 28, 2023
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -32,7 +32,7 @@ jobs:
             apt:
               - graphviz
         - linux: py311-devdeps
-        - linux: py38-oldestdeps
+        - linux: py39-oldestdeps
 
   publish:
     needs: [tests]

diff --git a/changelog/232.feature.rst b/changelog/232.feature.rst
@@ -0,0 +1 @@
+Drop support for Python 3.8 in line with `NEP 29 <https://numpy.org/neps/nep-0029-deprecation_policy.html>_`.
diff --git a/changelog/232.trivial.rst b/changelog/232.trivial.rst
@@ -0,0 +1,2 @@
+Internal improvements to how the data are loaded from the collection of FITS files.
+This should have no user facing effects, but provides a foundation for future performance work.
diff --git a/dkist/conftest.py b/dkist/conftest.py
@@ -24,8 +24,9 @@
 
 @pytest.fixture
 def array():
-    shape = np.random.randint(10, 100, size=2)
-    x = np.ones(shape) + 10
+    shape = 2**np.random.randint(2, 7, size=2)
+    x = np.ones(np.prod(shape)) + 10
+    x = x.reshape(shape)
     return da.from_array(x, tuple(shape))
 
 

diff --git a/dkist/dataset/dataset.py b/dkist/dataset/dataset.py
@@ -1,12 +1,7 @@
-import sys
+import importlib.resources as importlib_resources
 from pathlib import Path
 from textwrap import dedent
 
-if sys.version_info < (3, 9):
-    import importlib_resources
-else:
-    import importlib.resources as importlib_resources
-
 from jsonschema.exceptions import ValidationError
 
 import asdf

diff --git a/dkist/io/asdf/converters/file_manager.py b/dkist/io/asdf/converters/file_manager.py
@@ -28,14 +28,17 @@ def from_yaml_tree(self, node, tag, ctx):
                                               node["target"],
                                               node["datatype"],
                                               node["shape"],
+                                              chunksize=node.get("chunksize", None),
                                               loader=AstropyFITSLoader,
                                               basepath=base_path)
         return file_manager
 
     def to_yaml_tree(self, obj, tag, ctx):
         node = {}
-        node["fileuris"] = obj._striped_external_array._fileuris.tolist()
+        node["fileuris"] = obj._striped_external_array.fileuri_array.tolist()
         node["target"] = obj._striped_external_array.target
         node["datatype"] = obj._striped_external_array.dtype
         node["shape"] = obj._striped_external_array.shape
+        if chunksize := obj._striped_external_array.chunksize is not None:
+            node["chunksize"] = chunksize
         return node
diff --git a/dkist/io/asdf/entry_points.py b/dkist/io/asdf/entry_points.py
@@ -1,7 +1,7 @@
 """
 This file contains the entry points for asdf.
 """
-import sys
+import importlib.resources as importlib_resources
 
 from asdf.extension import ManifestExtension
 from asdf.resource import DirectoryResourceMapping
@@ -10,12 +10,6 @@
                                       FileManagerConverter, RavelConverter, TiledDatasetConverter,
                                       VaryingCelestialConverter)
 
-if sys.version_info < (3, 9):
-    import importlib_resources
-else:
-    import importlib.resources as importlib_resources
-
-
 
 def get_resource_mappings():
     """

diff --git a/dkist/io/asdf/resources/schemas/file_manager-1.0.0.yaml b/dkist/io/asdf/resources/schemas/file_manager-1.0.0.yaml
@@ -22,6 +22,8 @@ properties:
       anyOf:
       - type: integer
         minimum: 0
+  chunksize:
+    type: array
 
 required: [fileuris, target, datatype, shape]
 additionalProperties: false

diff --git a/dkist/io/asdf/tests/test_dataset.py b/dkist/io/asdf/tests/test_dataset.py
@@ -1,11 +1,6 @@
-import sys
+import importlib.resources as importlib_resources
 from pathlib import Path
 
-if sys.version_info < (3, 9):
-    import importlib_resources
-else:
-    import importlib.resources as importlib_resources
-
 import numpy as np
 import pytest
 
@@ -49,8 +44,8 @@ def assert_dataset_equal(new, old):
     new.meta["headers"] = new_headers
     assert old.wcs.name == new.wcs.name
     assert len(old.wcs.available_frames) == len(new.wcs.available_frames)
-    ac_new = new.files.external_array_references
-    ac_old = old.files.external_array_references
+    ac_new = new.files.fileuri_array
+    ac_old = old.files.fileuri_array
     assert ac_new == ac_old
     assert old.unit == new.unit
     assert old.mask == new.mask
@@ -140,3 +135,39 @@ def test_read_all_schema_versions(eit_dataset_asdf_path):
     assert isinstance(dataset.wcs, gwcs.WCS)
     assert dataset.wcs.world_n_dim == 3
     assert dataset.wcs.pixel_n_dim == 3
+
+
+@pytest.fixture
+def wrap_object(mocker):
+
+    def wrap_object(target, attribute):
+        mock = mocker.MagicMock()
+        real_attribute = getattr(target, attribute)
+
+        def mocked_attribute(self, *args, **kwargs):
+            mock.__call__(*args, **kwargs)
+            return real_attribute(self, *args, **kwargs)
+
+        mocker.patch.object(target, attribute, mocked_attribute)
+
+        return mock
+
+    return wrap_object
+
+
+def test_loader_getitem_with_chunksize(eit_dataset_asdf_path, wrap_object):
+    # Import this here to prevent hitting https://bugs.python.org/issue35753 on Python <3.10
+    # Importing call is enough to trigger a doctest error
+    from unittest.mock import call
+
+    chunksize = (32, 16)
+    with asdf.open(eit_dataset_asdf_path) as tree:
+        dataset = tree["dataset"]
+        dataset.files.basepath = rootdir / "EIT"
+        dataset.files._striped_external_array.chunksize = chunksize
+        mocked = wrap_object(dataset.files._striped_external_array._loader, "__getitem__")
+        dataset._data = dataset.files._generate_array()
+        dataset.data.compute()
+
+    expected_call = call((slice(0, chunksize[0], None), slice(0, chunksize[1], None)))
+    assert expected_call in mocked.mock_calls
diff --git a/dkist/io/asdf/tests/test_tiled_dataset.py b/dkist/io/asdf/tests/test_tiled_dataset.py
@@ -1,9 +1,4 @@
-import sys
-
-if sys.version_info < (3, 9):
-    import importlib_resources
-else:
-    import importlib.resources as importlib_resources
+import importlib.resources as importlib_resources
 
 import asdf
 

diff --git a/dkist/io/dask_utils.py b/dkist/io/dask_utils.py
@@ -6,7 +6,7 @@
 __all__ = ['stack_loader_array']
 
 
-def stack_loader_array(loader_array):
+def stack_loader_array(loader_array, chunksize):
     """
     Stack a loader array along each of its dimensions.
 
@@ -20,15 +20,18 @@ def stack_loader_array(loader_array):
     -------
     array : `dask.array.Array`
     """
+    # If the chunksize sin't specified then use the whole array shape
+    chunksize = chunksize or loader_array.flat[0].shape
+
     if len(loader_array.shape) == 1:
-        return da.stack(loader_to_dask(loader_array))
+        return da.stack(loader_to_dask(loader_array, chunksize))
     stacks = []
     for i in range(loader_array.shape[0]):
-        stacks.append(stack_loader_array(loader_array[i]))
+        stacks.append(stack_loader_array(loader_array[i], chunksize))
     return da.stack(stacks)
 
 
-def loader_to_dask(loader_array):
+def loader_to_dask(loader_array, chunksize):
     """
     Map a call to `dask.array.from_array` onto all the elements in ``loader_array``.
 
@@ -44,6 +47,6 @@ def loader_to_dask(loader_array):
     # trying to auto calculate it by reading from the actual array on disk.
     meta = np.zeros((0,), dtype=loader_array[0].dtype)
 
-    to_array = partial(da.from_array, meta=meta)
+    to_array = partial(da.from_array, meta=meta, chunks=chunksize)
 
     return map(to_array, loader_array)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Drop support for Python 3.8 in line with `NEP 29 <https://numpy.org/neps/nep-0029-deprecation_policy.html>_`.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Internal improvements to how the data are loaded from the collection of FITS files.
		This should have no user facing effects, but provides a foundation for future performance work.