diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 798bb8ac..419484ec 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,17 +3,16 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "mambaforge-4.10" + python: "mambaforge-latest" jobs: post_checkout: - git fetch --unshallow || true pre_install: - git update-index --assume-unchanged .rtd-environment.yml docs/conf.py pre_build: - - parfive https://g-a36282.cd214.a567.data.globus.org/user_tools_tutorial_data/BKPLX_stokesI.tar https://g-a36282.cd214.a567.data.globus.org/user_tools_tutorial_data/AJQWW_single_mosaic.tar - - mkdir -p $HOME/dkist_data/BKPLX $HOME/dkist_data/AJQWW - - tar -xv -f BKPLX_stokesI.tar --directory $HOME/dkist_data/BKPLX - - tar -xv -f AJQWW_single_mosaic.tar --directory $HOME/dkist_data/AJQWW + # Note $DKIST_SAMPLE_DIR is set by RTD to ~/dkist_data + - python -c "from dkist.data.sample import download_all_sample_data; download_all_sample_data()" + - python -c "from dkist.data.sample import VISP_BKPLX; print(VISP_BKPLX)" conda: environment: .rtd-environment.yml diff --git a/changelog/421.trivial.rst b/changelog/421.trivial.rst new file mode 100644 index 00000000..43261b73 --- /dev/null +++ b/changelog/421.trivial.rst @@ -0,0 +1 @@ +Added two partial datasets to `dkist.data.sample` for documentation and testing. diff --git a/dkist/__init__.py b/dkist/__init__.py index afc2c99f..fb2a3a47 100644 --- a/dkist/__init__.py +++ b/dkist/__init__.py @@ -4,7 +4,10 @@ from importlib.metadata import PackageNotFoundError from importlib.metadata import version as _version +import platformdirs as _platformdirs + from .logger import setup_default_dkist_logger as _setup_log +import dkist.config as _config log = _setup_log(__name__) @@ -14,7 +17,7 @@ __version__ = "unknown" -__all__ = ["TiledDataset", "Dataset", "load_dataset", "system_info"] +__all__ = ["TiledDataset", "Dataset", "load_dataset", "system_info", "conf"] def write_default_config(overwrite=False): @@ -29,6 +32,19 @@ def write_default_config(overwrite=False): return _config.create_config_file("dkist", "dkist", overwrite=overwrite) +class Conf(_config.ConfigNamespace): + """ + Configuration Parameters for the `dkist` Package. + """ + sample_data_directory = _config.ConfigItem( + _platformdirs.user_data_dir(appname="dkist"), + "Location to download sample data to." + ) + + +conf = Conf() + + # Do internal imports last (so logger etc is initialised) from dkist.dataset import Dataset, TiledDataset, load_dataset from dkist.utils.sysinfo import system_info diff --git a/dkist/data/_sample.py b/dkist/data/_sample.py new file mode 100644 index 00000000..7fce0d02 --- /dev/null +++ b/dkist/data/_sample.py @@ -0,0 +1,110 @@ +import os +import tarfile +from pathlib import Path +from urllib.parse import urljoin + +from parfive import Downloader, Results + +from astropy.io import fits + +from dkist import conf + +VISP_HEADER = fits.Header.fromtextfile(Path(__file__).parent / "VISP_HEADER.hdr") +_SAMPLE_DATASETS = { + "VISP_BKPLX": ("https://g-a36282.cd214.a567.data.globus.org/user_tools_tutorial_data/", "BKPLX_stokesI.tar"), + "VBI_AJQWW": ("https://g-a36282.cd214.a567.data.globus.org/user_tools_tutorial_data/", "AJQWW_single_mosaic.tar"), +} + + +def _download_and_extract_sample_data(names, overwrite, path): + """ + Downloads a list of files. + + Parameters + ---------- + names : list[str] + The names of the datasets to download and extract + overwrite : bool + Will overwrite a file on disk if True. + path : `pathlib.Path` + The sample data path to save the tar files + + Returns + ------- + `parfive.Results` + Download results. Will behave like a list of files. + """ + dl = Downloader(overwrite=overwrite, progress=True) + + existing_files = [] + + for name in names: + base_url, filename = _SAMPLE_DATASETS[name] + if (filepath := path / filename).exists(): + existing_files.append(filepath) + continue + + url = urljoin(base_url, filename) + dl.enqueue_file(url, path=path) + + results = Results() + if dl.queued_downloads: + results = dl.download() + results += existing_files + + file_folder = {filename: name for name, (_, filename) in _SAMPLE_DATASETS.items() if name in names} + + for i, tarpath in enumerate(results): + output_path = path / file_folder[Path(tarpath).name] + with tarfile.open(tarpath, "r:*") as tar: + tar.extractall(path=output_path, filter="data") + results[i] = output_path + + return results + + +def _get_sample_datasets(dataset_names, no_download=False, force_download=False): + """ + Returns a list of disk locations corresponding to a list of filenames for + sample data, downloading the sample data files as necessary. + + Parameters + ---------- + no_download : `bool` + If ``True``, do not download any files, even if they are not present. + Default is ``False``. + force_download : `bool` + If ``True``, download all files, and overwrite any existing ones. + Default is ``False``. + + Returns + ------- + `list` of `pathlib.Path` + List of disk locations corresponding to the list of filenames. An entry + will be ``None`` if ``no_download == True`` and the file is not present. + + Raises + ------ + RuntimeError + Raised if any of the files cannot be downloaded from any of the mirrors. + """ + sampledata_dir = Path(conf.sample_data_directory) + if env_override := os.environ.get("DKIST_SAMPLE_DIR"): + # For some reason, RTD adds ' around the path in the env var. + sampledata_dir = Path(env_override.strip("'")) + sampledata_dir = sampledata_dir.expanduser() + + datasets = dict((k,v) for k, v in _SAMPLE_DATASETS.items() if k in dataset_names) # noqa: C402 + download_paths = [sampledata_dir / fn for _, fn in datasets.values()] + + if no_download: + return [sampledata_dir / name for name in datasets.keys() if (sampledata_dir / name).exists()] + + results = _download_and_extract_sample_data(datasets.keys(), overwrite=force_download, path=sampledata_dir) + + if results.errors: + raise RuntimeError( + f"{len(results.errors)} sample data files failed " + "to download, the first error is above.") from results.errors[0].exception + + return list(results) diff --git a/dkist/data/sample.py b/dkist/data/sample.py index 74b9dab2..efcf4f61 100644 --- a/dkist/data/sample.py +++ b/dkist/data/sample.py @@ -1,9 +1,27 @@ -import pathlib +""" +This module provides some (partial) sample datasets. +""" -from astropy.io import fits +from ._sample import _SAMPLE_DATASETS, VISP_HEADER, _get_sample_datasets -__all__ = ["VISP_HEADER"] +__all__ = ["download_all_sample_data", *sorted(_SAMPLE_DATASETS.keys()), "VISP_HEADER"] -_data_dir = pathlib.Path(__file__).parent -VISP_HEADER = fits.Header.fromtextfile(_data_dir / "VISP_HEADER.hdr") +# See PEP 562 (https://peps.python.org/pep-0562/) for module-level __dir__() +def __dir__(): + return __all__ + + +# See PEP 562 (https://peps.python.org/pep-0562/) for module-level __getattr__() +def __getattr__(name): + if name in _SAMPLE_DATASETS: + return _get_sample_datasets(name)[0] + + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + + +def download_all_sample_data(): + """ + Download all sample data at once that has not already been downloaded. + """ + return _get_sample_datasets(_SAMPLE_DATASETS.keys()) diff --git a/dkist/data/tests/__init__.py b/dkist/data/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dkist/data/tests/test_sample.py b/dkist/data/tests/test_sample.py new file mode 100644 index 00000000..10ba7046 --- /dev/null +++ b/dkist/data/tests/test_sample.py @@ -0,0 +1,38 @@ +import os +from unittest.mock import call + +import pytest + + +@pytest.fixture +def tmp_sample_dir(tmp_path): + old_path = os.environ.get("DKIST_SAMPLE_DIR", "") + os.environ["DKIST_SAMPLE_DIR"] = str(tmp_path) + yield tmp_path + os.environ["DKIST_SAMPLE_DIR"] = old_path + + +def test_module_dir(): + import dkist.data.sample + + assert "VBI_AJQWW" in dir(dkist.data.sample) + assert "VISP_BKPLX" in dir(dkist.data.sample) + + +@pytest.mark.parametrize("attrname", ["VBI_AJQWW", "VISP_BKPLX"]) +def test_module_getattr(mocker, attrname): + mock = mocker.patch("dkist.data.sample._get_sample_datasets") + import dkist.data.sample + + getattr(dkist.data.sample, attrname) + + mock.assert_has_calls([call(attrname), call().__getitem__(0)]) + + +@pytest.mark.internet_off +def test_fail(tmp_sample_dir): + """ + No remote data means this test should fail. + """ + with pytest.raises(RuntimeError, match="1 sample data files failed"): + from dkist.data.sample import VISP_BKPLX # noqa: F401 diff --git a/dkist/tests/test_benchmarks.py b/dkist/tests/test_benchmarks.py index 11dfbfbe..f8a20228 100644 --- a/dkist/tests/test_benchmarks.py +++ b/dkist/tests/test_benchmarks.py @@ -32,11 +32,11 @@ def test_pixel_to_world(benchmark, visp_dataset_no_headers): @pytest.mark.parametrize("axes", [ ["y", None, None, "x"], ]) -def test_plot_dataset(benchmark, axes, visp_dataset_no_headers): +def test_plot_dataset(benchmark, axes, visp_dataset_no_headers, tmp_path): @benchmark def plot_and_save_fig(ds=visp_dataset_no_headers, axes=axes): ds.plot(plot_axes=axes) - plt.savefig("tmpplot") + plt.savefig(tmp_path / "tmpplot.png") plt.close() diff --git a/docs/tutorial/2_search_and_asdf_download.md b/docs/tutorial/2_search_and_asdf_download.md index 444fca57..acbb6fcd 100644 --- a/docs/tutorial/2_search_and_asdf_download.md +++ b/docs/tutorial/2_search_and_asdf_download.md @@ -107,7 +107,7 @@ Note that you can also pass more than one result to be downloaded. A simple example of both of these is: ```{code-cell} python -Fido.fetch(visp[:3], path="~/dkist_data/{instrument}/{dataset_id}/") +Fido.fetch(visp[:3], path="~/dkist_data/{instrument}_{dataset_id}/") ``` This will put each of our ASDF files in a directory named with the corresponding Dataset ID and Instrument. diff --git a/docs/tutorial/3_the_dataset.md b/docs/tutorial/3_the_dataset.md index 1e5900da..f042152d 100644 --- a/docs/tutorial/3_the_dataset.md +++ b/docs/tutorial/3_the_dataset.md @@ -43,7 +43,7 @@ from sunpy.net import Fido, attrs as a ```{code-cell} ipython3 res = Fido.search(a.dkist.Dataset('BKPLX')) -files = Fido.fetch(res, path="~/dkist_data/{dataset_id}") +files = Fido.fetch(res, path="~/dkist_data/{instrument}_{dataset_id}") files ``` diff --git a/docs/tutorial/4_more_dataset.md b/docs/tutorial/4_more_dataset.md index 27aacefb..8df494fb 100644 --- a/docs/tutorial/4_more_dataset.md +++ b/docs/tutorial/4_more_dataset.md @@ -23,7 +23,7 @@ import dkist import dkist.net res = Fido.search(a.dkist.Dataset('BKPLX')) -files = Fido.fetch(res, path="~/dkist_data/{dataset_id}") +files = Fido.fetch(res, path="~/dkist_data/{instrument}_{dataset_id}") ds = dkist.load_dataset(files) ``` diff --git a/docs/tutorial/5_downloading_data.md b/docs/tutorial/5_downloading_data.md index fd6066e3..02caf0a7 100644 --- a/docs/tutorial/5_downloading_data.md +++ b/docs/tutorial/5_downloading_data.md @@ -42,7 +42,7 @@ import dkist.net from sunpy.net import Fido, attrs as a res = Fido.search(a.dkist.Dataset('BKPLX')) -files = Fido.fetch(res, path="~/dkist_data/{dataset_id}") +files = Fido.fetch(res, path="~/dkist_data/{instrument}_{dataset_id}") ds = dkist.load_dataset(files) ``` @@ -84,7 +84,7 @@ So for example: ```{code-cell} ipython3 :tags: [skip-execution] -ds[0, 0].files.download(path="~/dkist_data/{dataset_id}") +ds[0, 0].files.download(path="~/dkist_data/{instrument}_{dataset_id}") ``` would save the file to `~/dkist_data/BKPLX/VISP_2023_10_16T18_21_47_508_00630200_I_BKPLX_L1.fits`. diff --git a/docs/tutorial/6_visualization.md b/docs/tutorial/6_visualization.md index ea0d26b1..92517a64 100644 --- a/docs/tutorial/6_visualization.md +++ b/docs/tutorial/6_visualization.md @@ -29,7 +29,7 @@ import dkist.net ```{code-cell} ipython3 res = Fido.search(a.dkist.Dataset("BKPLX")) -asdf_file = Fido.fetch(res, path="~/dkist_data/{dataset_id}") +asdf_file = Fido.fetch(res, path="~/dkist_data/{instrument}_{dataset_id}") ds = dkist.load_dataset(asdf_file) ``` @@ -106,7 +106,7 @@ For the next few examples we'll go back to using some VBI data. ```{code-cell} ipython3 res = Fido.search(a.dkist.Dataset("AJQWW")) -asdf_file = Fido.fetch(res, path="~/dkist_data/{dataset_id}") +asdf_file = Fido.fetch(res, path="~/dkist_data/{instrument}_{dataset_id}") # We extract the top left tile of the VBI mosaic ds = dkist.load_dataset(asdf_file)[0, 0]