Fixes related to sample data updates (#523)

* Fixes related to sample data updates * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * What is going on * Add an option to force re-download sample files * Temporarily ignore asdf warning so I can debug things * Update pyproject.toml Co-authored-by: Stuart Mumford <[email protected]> * Make that actually work * Add remote_data mark to figure tests with sample data * Download sample data in tests if necessary instead of in tox * Same again for benchmarks * Fix error ignoring * Update TiledDataset plot test figure hashes * Don't need these apparently * Run benchmarks with remote-data * Skip failing Windows test until we can fix it * Add note for later on warning ignore * You get a changelog! And you get a changelog! * Fix devdeps figure hashes as well * Update pytest.ini * Rename changelog * Update .github/workflows/main.yml --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Drew Leonard <[email protected]>
DKISTDC · Feb 18, 2025 · 9f3ff52 · 9f3ff52
1 parent f4f12ab
commit 9f3ff52
Show file tree

Hide file tree

Showing 14 changed files with 35 additions and 22 deletions.
diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
@@ -16,9 +16,8 @@ jobs:
         with:
           python-version: '3.12'
       - run: python -m pip install .[tests] pytest-codspeed 'numpy<2'
-      - run: python -c "from dkist.data.sample import download_all_sample_data; download_all_sample_data()"
       - name: Run benchmarks
         uses: CodspeedHQ/action@v3
         with:
           token: ${{ secrets.CODSPEED_TOKEN }}
-          run: "pytest -vvv -r fEs --pyargs dkist --codspeed"
+          run: "pytest -vvv -r fEs --pyargs dkist --codspeed --remote-data=any"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -92,7 +92,7 @@ jobs:
     with:
       python-version: '3.13'
       test_extras: tests
-      test_command: pytest --pyargs dkist -k "not test_fail"
+      test_command: pytest --pyargs dkist -k "not test_fail" --remote-data=none --benchmark-skip
       # We have to work around a github runner bug here: https://github.com/actions/runner/issues/2788#issuecomment-2145922705
       upload_to_pypi: ${{ startsWith(github.ref || format('{0}{1}', 'refs/tags/', github.event.release.tag_name), 'refs/tags/v') && !endsWith(github.ref || format('{0}{1}', 'refs/tags/', github.event.release.tag_name), '.dev') }}
     secrets:

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -19,7 +19,7 @@ Backwards Incompatible Changes
   * asdf-astropy >= 0.5
   * asdf-coordinate-schemas >= 0.3
   * asdf-transform-schemas >= 0.5
-  * asdf-wcs-schemas >= 0.4 
+  * asdf-wcs-schemas >= 0.4
 
 
 Features

diff --git a/changelog/523.feature.rst b/changelog/523.feature.rst
@@ -0,0 +1 @@
+Adds an `overwrite` keyword to `download_all_sample_data()` to force downloading new data.
diff --git a/changelog/523.trivial.rst b/changelog/523.trivial.rst
@@ -0,0 +1 @@
+Update hashes for `TiledDataset.plot()` figure tests.
diff --git a/dkist/data/sample.py b/dkist/data/sample.py
@@ -20,8 +20,13 @@ def __getattr__(name):
     raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
 
 
-def download_all_sample_data():
+def download_all_sample_data(overwrite=False):
     """
     Download all sample data at once that has not already been downloaded.
+
+    Parameters
+    ----------
+    overwrite : `bool`
+        Re-download and overwrite any existing files.
     """
-    return _get_sample_datasets(_SAMPLE_DATASETS.keys())
+    return _get_sample_datasets(_SAMPLE_DATASETS.keys(), force_download=not overwrite)
diff --git a/dkist/data/tests/test_sample.py b/dkist/data/tests/test_sample.py
@@ -1,4 +1,5 @@
 import os
+import platform
 from unittest.mock import call
 
 import pytest
@@ -29,6 +30,7 @@ def test_module_getattr(mocker, attrname):
     mock.assert_has_calls([call(attrname), call().__getitem__(0)])
 
 
+@pytest.mark.skipif(platform.system() == "Windows", reason="Internet not properly disabled on Windows")
 @pytest.mark.internet_off
 def test_fail(tmp_sample_dir):
     """

diff --git a/dkist/dataset/tests/test_tiled_dataset.py b/dkist/dataset/tests/test_tiled_dataset.py
@@ -77,6 +77,7 @@ def test_tiled_dataset_from_components(dataset):
 
 
 @figure_test
+@pytest.mark.remote_data
 @pytest.mark.parametrize("share_zscale", [True, False], ids=["share_zscale", "indpendent_zscale"])
 def test_tileddataset_plot(share_zscale):
     from dkist.data.sample import VBI_AJQWW
@@ -101,6 +102,7 @@ def test_tileddataset_plot(share_zscale):
     return plt.gcf()
 
 @figure_test
+@pytest.mark.remote_data
 @pytest.mark.parametrize("swap_tile_limits", ["x", "y", "xy", None])
 def test_tileddataset_plot_limit_swapping(swap_tile_limits):
     # Also test that row/column sizes are correct

diff --git a/dkist/tests/figure_hashes_mpl_391_ft_261_astropy_611_animators_111_ndcube_222.json b/dkist/tests/figure_hashes_mpl_391_ft_261_astropy_611_animators_111_ndcube_222.json
@@ -6,10 +6,10 @@
   "dkist.dataset.tests.test_plotting.test_2d_plot[aslice1]": "cbb84fbae51d8238803f8f0d6820c575f024fe54b1656f1b181dc4ec645e9ff9",
   "dkist.dataset.tests.test_plotting.test_2d_plot[aslice2]": "132c5615832daff457dacb4cb770498f1fbb4460a5b90b5d4d01d224c70eeb28",
   "dkist.dataset.tests.test_plotting.test_2d_plot2": "409b5a10ad8ccf005331261505e63ce8febdc38eb8b5a34f8863e567e3cccb9c",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot[share_zscale]": "40298abbc680c82de029b02c4e543a60eac1b2d71e06b22c53a1d43194491ac3",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot[indpendent_zscale]": "b6f2dd9fdeb79bf25ad43a591d8dec242f32e0ba3a521e15791058d51e0ecbaf",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[x]": "0f2fa941c020f9853eff0eaf2f575be193372d7042731349d166a4b3645d78b0",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[y]": "ae3a81c58bf55afed01c90cac9ce6227cddf430c0741d9c2f7b2d4c3ca350a6f",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[xy]": "9098876ebd47e11e2aca7460c29ac1614e383a2386868995ca3b57c61ace0162",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[None]": "0159e3fcd0f7109e216888ea337e8eb9861dbc951ab9cfba5d14cc6c8b501132"
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot[share_zscale]": "bc1561c95587a12245dc201eb25487f53b8759f9faf361089308ff95cc21fab5",
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot[indpendent_zscale]": "b0a77981f035b9ac5ec8f231a54986f3e1973cb95a68f333d6c7fcba6bb8fc4c",
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[x]": "56548cd44d6b68c7586fd1b37b16dea46cc6b50ce37aa6d19367a92bc8402555",
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[y]": "debd24cf94d88a4526974ce4e1cf073715b2f1d7da8b558f16fd0f97047e477d",
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[xy]": "3e130694abf6e1a2fc31f9d4c2bdd65d73c87c58645430ebd590bf95f0aed27d",
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[None]": "6aeac69c6b5f1376e8e0334590250c7127fe847031642f05cf82d3cf1b22661f"
 }
diff --git a/dkist/tests/figure_hashes_mpl_dev_ft_261_astropy_dev_animators_dev_ndcube_dev.json b/dkist/tests/figure_hashes_mpl_dev_ft_261_astropy_dev_animators_dev_ndcube_dev.json
@@ -6,10 +6,10 @@
   "dkist.dataset.tests.test_plotting.test_2d_plot[aslice1]": "cbb84fbae51d8238803f8f0d6820c575f024fe54b1656f1b181dc4ec645e9ff9",
   "dkist.dataset.tests.test_plotting.test_2d_plot[aslice2]": "4b5be9cf1883d0ebd15ff091f52cea2822068e8238a8df7b0f594d69fba27597",
   "dkist.dataset.tests.test_plotting.test_2d_plot2": "1c10e9db44b0b694a6bb1b493c4c2193278541df7c1302bb11fe3f6372682e35",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot[share_zscale]": "bd0cfadd99f9d3d416f011184f2e9a7971df226879c8786e8ab2349e13909b5c",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot[indpendent_zscale]": "2d6afac3f582846f4be95b23b524bb670895b0885519d8c13623307d07a3b39e",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[x]": "b35593deb273b02ff1f2384810c4cf825ef5017ecad4d020543c53ad6361cd9e",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[y]": "78de0395df62edd8626014d7b8924b5f3d1d66b27be9c1a328fac7b7639e702b",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[xy]": "05219c0c450825fa7bd555ff27a9a111066082b15e2dde83ac2f3ac43dba5102",
-  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[None]": "64a247b0b54b7de8a8a7c636d60bdceb7d7581a5429c9e8d813b8d81912a2c10"
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot[share_zscale]": "2a1ac0d42d08c09a0d6633e4261f709558838069820562e46e08348a619bb7e3",
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot[indpendent_zscale]": "43ee8674dd9930bfba1b9aeb6c2f2f4e9bd314e4d3867d34a1d754df5ab1494e",
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[x]": "362e816e9a97dc15ecb4ef77c148bd5b41a769283748ff60bdeb05ecb93b2acd",
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[y]": "f427624e4d6de447a2233543fe785eaf9d8757037654eb6a9352237de19211d6",
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[xy]": "d36b703c6f52e01184c7bdcff04817fbabe662f22dd856145b26d2283f40c03a",
+  "dkist.dataset.tests.test_tiled_dataset.test_tileddataset_plot_limit_swapping[None]": "f953957907b9315ccdba36d130ec88cd41f4cd26554f07bdc7a1b284909e86a2"
 }
diff --git a/dkist/tests/test_benchmarks.py b/dkist/tests/test_benchmarks.py
@@ -41,6 +41,7 @@ def plot_and_save_fig(ds=visp_dataset_no_headers, axes=axes):
 
 
 @pytest.mark.benchmark
+@pytest.mark.remote_data
 def test_dataset_compute_data_full_files(benchmark):
     """
     Note that although this will load all the files to compute the data, the
@@ -56,6 +57,7 @@ def test_dataset_compute_data_full_files(benchmark):
 
 
 @pytest.mark.benchmark
+@pytest.mark.remote_data
 def test_dataset_compute_data_partial_files(benchmark):
     from dkist.data.sample import VISP_BKPLX
     ds = load_dataset(VISP_BKPLX)[0, :15, :100, :100]

diff --git a/docs/examples/reproject_vbi_mosaic.md b/docs/examples/reproject_vbi_mosaic.md
@@ -44,7 +44,7 @@ If you want to replace this dataset with your own dataset, see {ref}`dkist:howto
 Let's load the data with {obj}`dkist.load_dataset`:
 
 ```{code-cell} ipython3
-ds = dkist.load_dataset(VBI_AJQWW / "VBI_L1_20231016T184519_AJQWW.asdf")
+ds = dkist.load_dataset(VBI_AJQWW)
 ds
 ```
 

diff --git a/tools/update_sample_data.py b/tools/update_sample_data.py
@@ -45,7 +45,7 @@ def main(datasets, working_directory, destination_path="/user_tools_tutorial_dat
 
     for did, props in datasets.items():
         res = Fido.search(a.dkist.Dataset(did))
-        asdf_file = Fido.fetch(res, path=working_directory / "{dataset_id}", progress=False, overwrite=False)
+        asdf_file = Fido.fetch(res, path=working_directory / "{dataset_id}", progress=False, overwrite=True)
 
         ds = dkist.load_dataset(asdf_file)
         if "slice" in props:
@@ -64,10 +64,12 @@ def main(datasets, working_directory, destination_path="/user_tools_tutorial_dat
         [f.unlink() for f in dataset_path.glob("*.mp4")]
         [f.unlink() for f in dataset_path.glob("*.pdf")]
         assert len(list(dataset_path.glob("*.asdf"))) == 1
+        dataset_files = tuple(dataset_path.glob("*"))
 
         sample_filename = working_directory / props["filename"]
         with tarfile.open(sample_filename, mode="w") as tfile:
-            tfile.add(dataset_path, recursive=True)
+            for dfile in dataset_files:
+                tfile.add(dfile, arcname=dfile.name, recursive=False)
 
         sample_files_for_upload.append(sample_filename)
 

diff --git a/tox.ini b/tox.ini
@@ -61,7 +61,6 @@ commands_pre =
     oldestdeps: minimum_dependencies dkist --filename requirements-min.txt
     # We need to pin down the cryptography transitive dependency because of globus
     oldestdeps: pip install -r requirements-min.txt cryptography<42
-    figure: python -c "from dkist.data.sample import download_all_sample_data; download_all_sample_data()"
     pip freeze --all --no-input
 commands =
     figure: /bin/sh -c "mkdir -p ./figure_test_images; python -c 'import matplotlib as mpl; print(mpl.ft2font.__file__, mpl.ft2font.__freetype_version__, mpl.ft2font.__freetype_build_type__)' > ./figure_test_images/figure_version_info.txt"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Adds an `overwrite` keyword to `download_all_sample_data()` to force downloading new data.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Update hashes for `TiledDataset.plot()` figure tests.