diff --git a/.cruft.json b/.cruft.json index 72ad7fdd..e71c986b 100644 --- a/.cruft.json +++ b/.cruft.json @@ -1,6 +1,6 @@ { "template": "https://github.com/sunpy/package-template", - "commit": "112d7d4adf0fa168bbb9ddb1886ad4f1e595b8be", + "commit": "bd61f1c29296ec3923d3562019d690dbd75366ba", "checkout": null, "context": { "cookiecutter": { diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml index ea87f95c..068f779f 100644 --- a/.github/workflows/codspeed.yml +++ b/.github/workflows/codspeed.yml @@ -16,6 +16,7 @@ jobs: with: python-version: '3.12' - run: python -m pip install .[tests] pytest-codspeed 'numpy<2' + - run: python -c "from dkist.data.sample import download_all_sample_data; download_all_sample_data()" - name: Run benchmarks uses: CodspeedHQ/action@v3 with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4b9954cf..145c2b11 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: # This should be before any formatting hooks like isort - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.5.2" + rev: "v0.6.1" hooks: - id: ruff args: ["--fix"] diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 419484ec..29475caa 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,7 +1,7 @@ version: 2 build: - os: ubuntu-22.04 + os: ubuntu-lts-latest tools: python: "mambaforge-latest" jobs: diff --git a/.rtd-environment.yml b/.rtd-environment.yml index f3d2fdbe..2dc187de 100644 --- a/.rtd-environment.yml +++ b/.rtd-environment.yml @@ -2,6 +2,6 @@ name: dkist channels: - conda-forge dependencies: - - python=3.10 + - python=3.12 - pip - graphviz!=2.42.*,!=2.43.* diff --git a/changelog/394.trivial.rst b/changelog/394.trivial.rst new file mode 100644 index 00000000..fc52bf9e --- /dev/null +++ b/changelog/394.trivial.rst @@ -0,0 +1 @@ +Add benchmarks for slicing and computing datasets. diff --git a/changelog/422.trivial.rst b/changelog/422.trivial.rst new file mode 100644 index 00000000..1ace78ed --- /dev/null +++ b/changelog/422.trivial.rst @@ -0,0 +1 @@ +replace usages of ``copy_arrays`` with ``memmap`` for ``asdf>=3.1.0`` diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py index ef724726..46af93a5 100644 --- a/dkist/dataset/loader.py +++ b/dkist/dataset/loader.py @@ -14,6 +14,12 @@ from asdf import ValidationError +def asdf_open_memory_mapping_kwarg(memmap: bool) -> dict: + if asdf.__version__ > "3.1.0": + return {"memmap": memmap} + return {"copy_arrays": not memmap} + + @singledispatch def load_dataset(target): """ @@ -156,7 +162,7 @@ def _load_from_asdf(filepath): try: with importlib_resources.as_file(importlib_resources.files("dkist.io") / "level_1_dataset_schema.yaml") as schema_path: with asdf.open(filepath, custom_schema=schema_path.as_posix(), - lazy_load=False, copy_arrays=True) as ff: + lazy_load=False, **asdf_open_memory_mapping_kwarg(memmap=False)) as ff: ds = ff.tree["dataset"] if isinstance(ds, TiledDataset): for sub in ds.flat: diff --git a/dkist/tests/test_benchmarks.py b/dkist/tests/test_benchmarks.py index f8a20228..88f37108 100644 --- a/dkist/tests/test_benchmarks.py +++ b/dkist/tests/test_benchmarks.py @@ -40,6 +40,30 @@ def plot_and_save_fig(ds=visp_dataset_no_headers, axes=axes): plt.close() +@pytest.mark.benchmark +def test_dataset_compute_data_full_files(benchmark): + """ + Note that although this will load all the files to compute the data, the + file IO overhead is *not* included in codspeed's timing of the benchmark, + because it doesn't support that. This test therefore only assesses the + performance of the compute step. + """ + from dkist.data.sample import VISP_BKPLX + ds = load_dataset(VISP_BKPLX)[0, :15] + benchmark(ds.data.compute) + + assert not np.isnan(ds.data.compute()).any() + + +@pytest.mark.benchmark +def test_dataset_compute_data_partial_files(benchmark): + from dkist.data.sample import VISP_BKPLX + ds = load_dataset(VISP_BKPLX)[0, :15, :100, :100] + benchmark(ds.data.compute) + + assert not np.isnan(ds.data.compute()).any() + + @pytest.mark.benchmark def test_generate_celestial(benchmark): benchmark(generate_celestial_transform, diff --git a/pyproject.toml b/pyproject.toml index d8130fb8..9b184cd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,8 @@ requires = [ "setuptools>=62.1", "setuptools_scm[toml]>=6.2", - "wheel",] + "wheel", +] build-backend = "setuptools.build_meta" [project] @@ -10,7 +11,7 @@ name = "dkist" description = "DKIST User Tools" requires-python = ">=3.10" readme = { file = "README.rst", content-type = "text/x-rst" } -license = { file = "licenses/LICENSE.rst", content-type = "text/plain" } +license = { file = "licenses/LICENSE.rst" } authors = [ { name = "NSO / AURA", email = "stuart@cadair.com" }, ] @@ -85,6 +86,7 @@ zip-safe = false include-package-data = true [tool.setuptools.packages.find] +include = ["dkist*"] exclude = ["dkist._dev*"] [tool.setuptools_scm] diff --git a/pytest.ini b/pytest.ini index ed8a2423..151fc2d8 100644 --- a/pytest.ini +++ b/pytest.ini @@ -61,7 +61,7 @@ filterwarnings = # debugging. ignore:the imp module is deprecated in favour of importlib:DeprecationWarning:ipykernel.iostream # Ignore warnings about asdf versions - ignore:File.*asdf.extension.BuiltinExtension.*:asdf.exceptions.AsdfWarning + ignore:File.*asdf.extension.BuiltinExtension.* # pytest / asdf interaction ignore:The .* argument to AsdfSchemaFile is deprecated # gwcs main has the wrong version number: https://github.com/spacetelescope/gwcs/issues/399 @@ -80,7 +80,7 @@ filterwarnings = ignore:The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. ignore:Subclassing validator classes is not intended to be part of their public API # Ignore warning so gwcs 0.18.3 can load - ignore:Converter handles multiple tags for this extension:asdf.exceptions.AsdfWarning + ignore:Converter handles multiple tags for this extension # https://github.com/pandas-dev/pandas/issues/54466 ignore:\nPyarrow will become a required dependency of pandas in the next major release of pandas:DeprecationWarning # This seems to be coming out of pandas