Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes related to sample data updates #523

Merged
merged 22 commits into from
Feb 18, 2025
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/codspeed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ jobs:
with:
python-version: '3.12'
- run: python -m pip install .[tests] pytest-codspeed 'numpy<2'
- run: python -c "from dkist.data.sample import download_all_sample_data; download_all_sample_data()"
- name: Run benchmarks
uses: CodspeedHQ/action@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
with:
python-version: '3.13'
test_extras: tests
test_command: pytest --pyargs dkist -k "not test_fail"
test_command: pytest --pyargs dkist -k "not test_fail" --remote-data=none
# We have to work around a github runner bug here: https://github.com/actions/runner/issues/2788#issuecomment-2145922705
upload_to_pypi: ${{ startsWith(github.ref || format('{0}{1}', 'refs/tags/', github.event.release.tag_name), 'refs/tags/v') && !endsWith(github.ref || format('{0}{1}', 'refs/tags/', github.event.release.tag_name), '.dev') }}
secrets:
Expand Down
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Backwards Incompatible Changes
* asdf-astropy >= 0.5
* asdf-coordinate-schemas >= 0.3
* asdf-transform-schemas >= 0.5
* asdf-wcs-schemas >= 0.4
* asdf-wcs-schemas >= 0.4


Features
Expand Down
9 changes: 7 additions & 2 deletions dkist/data/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,13 @@
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")


def download_all_sample_data():
def download_all_sample_data(overwrite=False):
"""
Download all sample data at once that has not already been downloaded.

Parameters
----------
overwrite : `bool`
Re-download and overwrite any existing files.
"""
return _get_sample_datasets(_SAMPLE_DATASETS.keys())
return _get_sample_datasets(_SAMPLE_DATASETS.keys(), force_download=not overwrite)

Check warning on line 32 in dkist/data/sample.py

View check run for this annotation

Codecov / codecov/patch

dkist/data/sample.py#L32

Added line #L32 was not covered by tests
11 changes: 8 additions & 3 deletions dkist/dataset/tests/test_tiled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,11 @@


@figure_test
@pytest.mark.remote_data
@pytest.mark.parametrize("share_zscale", [True, False], ids=["share_zscale", "indpendent_zscale"])
def test_tileddataset_plot(share_zscale):
from dkist.data.sample import VBI_AJQWW
from dkist.data.sample import VBI_AJQWW, download_all_sample_data
download_all_sample_data()

Check warning on line 84 in dkist/dataset/tests/test_tiled_dataset.py

View check run for this annotation

Codecov / codecov/patch

dkist/dataset/tests/test_tiled_dataset.py#L83-L84

Added lines #L83 - L84 were not covered by tests
ori_ds = load_dataset(VBI_AJQWW)

newtiles = []
Expand All @@ -101,11 +103,13 @@
return plt.gcf()

@figure_test
@pytest.mark.remote_data
@pytest.mark.parametrize("swap_tile_limits", ["x", "y", "xy", None])
def test_tileddataset_plot_limit_swapping(swap_tile_limits):
# Also test that row/column sizes are correct

from dkist.data.sample import VBI_AJQWW
from dkist.data.sample import VBI_AJQWW, download_all_sample_data
download_all_sample_data()

Check warning on line 112 in dkist/dataset/tests/test_tiled_dataset.py

View check run for this annotation

Codecov / codecov/patch

dkist/dataset/tests/test_tiled_dataset.py#L111-L112

Added lines #L111 - L112 were not covered by tests
ori_ds = load_dataset(VBI_AJQWW)

# Swap WCS to make the `swap_tile_limits` option more natural
Expand Down Expand Up @@ -148,7 +152,8 @@

@pytest.mark.remote_data
def test_tileddataset_plot_non2d_sliceindex():
from dkist.data.sample import VBI_AJQWW
from dkist.data.sample import VBI_AJQWW, download_all_sample_data
download_all_sample_data()

Check warning on line 156 in dkist/dataset/tests/test_tiled_dataset.py

View check run for this annotation

Codecov / codecov/patch

dkist/dataset/tests/test_tiled_dataset.py#L156

Added line #L156 was not covered by tests
ds = load_dataset(VBI_AJQWW)

newtiles = []
Expand Down
8 changes: 6 additions & 2 deletions dkist/tests/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,23 +41,27 @@


@pytest.mark.benchmark
@pytest.mark.remote_data
def test_dataset_compute_data_full_files(benchmark):
"""
Note that although this will load all the files to compute the data, the
file IO overhead is *not* included in codspeed's timing of the benchmark,
because it doesn't support that. This test therefore only assesses the
performance of the compute step.
"""
from dkist.data.sample import VISP_BKPLX
from dkist.data.sample import VISP_BKPLX, download_all_sample_data
download_all_sample_data()

Check warning on line 53 in dkist/tests/test_benchmarks.py

View check run for this annotation

Codecov / codecov/patch

dkist/tests/test_benchmarks.py#L52-L53

Added lines #L52 - L53 were not covered by tests
ds = load_dataset(VISP_BKPLX)[0, :15]
benchmark(ds.data.compute)

assert not np.isnan(ds.data.compute()).any()


@pytest.mark.benchmark
@pytest.mark.remote_data
def test_dataset_compute_data_partial_files(benchmark):
from dkist.data.sample import VISP_BKPLX
from dkist.data.sample import VISP_BKPLX, download_all_sample_data
download_all_sample_data()

Check warning on line 64 in dkist/tests/test_benchmarks.py

View check run for this annotation

Codecov / codecov/patch

dkist/tests/test_benchmarks.py#L63-L64

Added lines #L63 - L64 were not covered by tests
ds = load_dataset(VISP_BKPLX)[0, :15, :100, :100]
benchmark(ds.data.compute)

Expand Down
2 changes: 1 addition & 1 deletion docs/examples/reproject_vbi_mosaic.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ If you want to replace this dataset with your own dataset, see {ref}`dkist:howto
Let's load the data with {obj}`dkist.load_dataset`:

```{code-cell} ipython3
ds = dkist.load_dataset(VBI_AJQWW / "VBI_L1_20231016T184519_AJQWW.asdf")
ds = dkist.load_dataset(VBI_AJQWW)
ds
```

Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,4 @@ filterwarnings =
ignore:pkg_resources is deprecated as an API.*:DeprecationWarning
ignore:Deprecated call to .*pkg_resources\.declare_namespace.*mpl_toolkits.*:DeprecationWarning
ignore:Extension .*sunpy-1\.0\.0:asdf.exceptions.AsdfManifestURIMismatchWarning
ignore:(from package asdf-astropy==0.5.0), which is not currently installed
6 changes: 4 additions & 2 deletions tools/update_sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def main(datasets, working_directory, destination_path="/user_tools_tutorial_dat

for did, props in datasets.items():
res = Fido.search(a.dkist.Dataset(did))
asdf_file = Fido.fetch(res, path=working_directory / "{dataset_id}", progress=False, overwrite=False)
asdf_file = Fido.fetch(res, path=working_directory / "{dataset_id}", progress=False, overwrite=True)

ds = dkist.load_dataset(asdf_file)
if "slice" in props:
Expand All @@ -64,10 +64,12 @@ def main(datasets, working_directory, destination_path="/user_tools_tutorial_dat
[f.unlink() for f in dataset_path.glob("*.mp4")]
[f.unlink() for f in dataset_path.glob("*.pdf")]
assert len(list(dataset_path.glob("*.asdf"))) == 1
dataset_files = tuple(dataset_path.glob("*"))

sample_filename = working_directory / props["filename"]
with tarfile.open(sample_filename, mode="w") as tfile:
tfile.add(dataset_path, recursive=True)
for dfile in dataset_files:
tfile.add(dfile, arcname=dfile.name, recursive=False)

sample_files_for_upload.append(sample_filename)

Expand Down
1 change: 0 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ commands_pre =
oldestdeps: minimum_dependencies dkist --filename requirements-min.txt
# We need to pin down the cryptography transitive dependency because of globus
oldestdeps: pip install -r requirements-min.txt cryptography<42
figure: python -c "from dkist.data.sample import download_all_sample_data; download_all_sample_data()"
pip freeze --all --no-input
commands =
figure: /bin/sh -c "mkdir -p ./figure_test_images; python -c 'import matplotlib as mpl; print(mpl.ft2font.__file__, mpl.ft2font.__freetype_version__, mpl.ft2font.__freetype_build_type__)' > ./figure_test_images/figure_version_info.txt"
Expand Down
Loading