From cc3f3528f688089a5153dbff4911279b0123c491 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= <51087536+thorbjoernl@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:13:00 +0000 Subject: [PATCH 1/6] First attempt at fix --- pyaerocom/helpers.py | 24 ++++++++++++------------ pyaerocom/stats/mda8/mda8.py | 5 +---- pyaerocom/time_config.py | 10 +++++++++- tests/test_helpers.py | 6 +++--- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/pyaerocom/helpers.py b/pyaerocom/helpers.py index ef663d44c..05cb4872c 100644 --- a/pyaerocom/helpers.py +++ b/pyaerocom/helpers.py @@ -945,14 +945,14 @@ def merge_station_data( return merged -def _get_pandas_freq_and_loffset(freq): +def _get_pandas_freq_and_offset(freq): """Helper to convert resampling info""" if freq in TS_TYPE_TO_PANDAS_FREQ: freq = TS_TYPE_TO_PANDAS_FREQ[freq] - loffset = None + offset = None if freq in PANDAS_RESAMPLE_OFFSETS: - loffset = PANDAS_RESAMPLE_OFFSETS[freq] - return (freq, loffset) + offset = PANDAS_RESAMPLE_OFFSETS[freq] + return (freq, offset) def make_datetime_index(start, stop, freq): @@ -983,10 +983,10 @@ def make_datetime_index(start, stop, freq): if not isinstance(stop, pd.Timestamp): stop = to_pandas_timestamp(stop) - freq, loffset = _get_pandas_freq_and_loffset(freq) + freq, offset = _get_pandas_freq_and_offset(freq) idx = pd.date_range(start=start, end=stop, freq=freq) - if loffset is not None: - idx = idx + pd.Timedelta(loffset) + if offset is not None: + idx = idx + pd.Timedelta(offset) return idx @@ -1096,7 +1096,7 @@ def resample_timeseries(ts, freq, how=None, min_num_obs=None): p = int(how.split("percentile")[0]) how = lambda x: np.nanpercentile(x, p) # noqa: E731 - freq, loffset = _get_pandas_freq_and_loffset(freq) + freq, offset = _get_pandas_freq_and_offset(freq) resampler = ts.resample(freq) data = resampler.agg(how) @@ -1106,8 +1106,8 @@ def resample_timeseries(ts, freq, how=None, min_num_obs=None): invalid = numobs < min_num_obs if np.any(invalid): data.values[invalid] = np.nan - if loffset is not None: - data.index = data.index + pd.Timedelta(loffset) + if offset is not None: + data.index = data.index + offset return data @@ -1165,8 +1165,8 @@ def resample_time_dataarray(arr, freq, how=None, min_num_obs=None): if min_num_obs is not None: invalid = arr.resample(time=pd_freq).count(dim="time") < min_num_obs - freq, loffset = _get_pandas_freq_and_loffset(freq) - resampler = arr.resample(time=pd_freq, loffset=loffset) + freq, offset = _get_pandas_freq_and_offset(freq) + resampler = arr.resample(time=pd_freq, offset=offset) try: aggfun = getattr(resampler, how) except AttributeError: diff --git a/pyaerocom/stats/mda8/mda8.py b/pyaerocom/stats/mda8/mda8.py index 5fe6daf1e..54b031c34 100644 --- a/pyaerocom/stats/mda8/mda8.py +++ b/pyaerocom/stats/mda8/mda8.py @@ -104,9 +104,6 @@ def _rolling_average_8hr(arr: xr.DataArray) -> xr.DataArray: def _daily_max(arr: xr.DataArray) -> xr.DataArray: - # TODO: Base is deprecated, and using offset="1h" is the proper way to do this. - # However this currently breaks the old-dependencies test in CI. Should be - # changed in the future. - return arr.resample(time="24H", base=1).reduce( + return arr.resample(time="24H", offset="1h").reduce( lambda x, axis: np.apply_along_axis(min_periods_max, 1, x, min_periods=18) ) diff --git a/pyaerocom/time_config.py b/pyaerocom/time_config.py index 00a110786..bb47da97e 100644 --- a/pyaerocom/time_config.py +++ b/pyaerocom/time_config.py @@ -4,6 +4,7 @@ from datetime import datetime +import pandas as pd from iris import coord_categorisation TS_TYPES = ["minutely", "hourly", "daily", "weekly", "monthly", "yearly", "native", "coarsest"] @@ -40,8 +41,15 @@ "season": "Q", "yearly": "YS", } +PANDAS_RESAMPLE_OFFSETS = { + "YS": pd.Timedelta(181, "d"), + "MS": pd.Timedelta(14, "d"), + "D": pd.Timedelta(12, "h"), + "h": pd.Timedelta(30, "m"), +} + -PANDAS_RESAMPLE_OFFSETS = {"YS": "181D", "MS": "14D", "D": "12h", "h": "30min"} +# PANDAS_RESAMPLE_OFFSETS = {"YS": pd.Timestamp("181D"), "MS": pd.Timestamp("14D"), "D": pd.Timestamp("12h"), "h": pd.Timestamp("30min")} PANDAS_FREQ_TO_TS_TYPE = {v: k for k, v in TS_TYPE_TO_PANDAS_FREQ.items()} diff --git a/tests/test_helpers.py b/tests/test_helpers.py index ac69ec283..db80db874 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -90,9 +90,9 @@ def test_merge_station_data_error(statlist, use, exception, error): assert str(e.value).startswith(error) -def test__get_pandas_freq_and_loffset(): - val = helpers._get_pandas_freq_and_loffset("monthly") - assert val == ("MS", "14D") +def test__get_pandas_freq_and_offset(): + val = helpers._get_pandas_freq_and_offset("monthly") + assert val == ("MS", pd.Timedelta(14, "d")) @pytest.fixture(scope="module") From 238923476eac73fffb0b52694a6dc344faca8ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= <51087536+thorbjoernl@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:15:59 +0000 Subject: [PATCH 2/6] Remove upper limit on xarray --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8dc25f591..906c7df2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ requires-python = ">=3.10" dependencies = [ "aerovaldb@git+https://github.com/metno/aerovaldb.git@v0.0.14", "scitools-iris>=3.8.1", - "xarray>=2022.10.0, <2024.7.0", + "xarray>=2022.10.0", "cartopy>=0.21.1", "matplotlib>=3.7.1", "scipy>=1.10.1", From 7beeb59d27b30e352db177e49ed4be930a05de85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= <51087536+thorbjoernl@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:25:46 +0000 Subject: [PATCH 3/6] Bump xarray supported versions --- pyaerocom/helpers.py | 2 +- pyaerocom_env.yml | 2 +- pyproject.toml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyaerocom/helpers.py b/pyaerocom/helpers.py index 05cb4872c..0540fa0a5 100644 --- a/pyaerocom/helpers.py +++ b/pyaerocom/helpers.py @@ -986,7 +986,7 @@ def make_datetime_index(start, stop, freq): freq, offset = _get_pandas_freq_and_offset(freq) idx = pd.date_range(start=start, end=stop, freq=freq) if offset is not None: - idx = idx + pd.Timedelta(offset) + idx = idx + offset return idx diff --git a/pyaerocom_env.yml b/pyaerocom_env.yml index c92368886..0cbdee7c0 100644 --- a/pyaerocom_env.yml +++ b/pyaerocom_env.yml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - iris >=3.8.1 - - xarray >=2022.10.0, <2024.7.0 + - xarray >=2022.12.0 - cartopy >=0.21.1 - matplotlib-base >=3.7.1 - scipy >=1.10.1 diff --git a/pyproject.toml b/pyproject.toml index 906c7df2d..7fb10fa2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ requires-python = ">=3.10" dependencies = [ "aerovaldb@git+https://github.com/metno/aerovaldb.git@v0.0.14", "scitools-iris>=3.8.1", - "xarray>=2022.10.0", + "xarray>=2022.12.0", "cartopy>=0.21.1", "matplotlib>=3.7.1", "scipy>=1.10.1", @@ -230,7 +230,7 @@ deps = pydantic ==2.7.1; python_version < "3.11" pyaro == 0.0.10; python_version < "3.11" pooch ==1.7.0; python_version < "3.11" - xarray ==2022.10.0; python_version < "3.11" + xarray ==2022.12.0; python_version < "3.11" pandas ==1.5.3; python_version < "3.11" [testenv:lint] From 744e87f219a4ddd37fdf175f6835ff97662f526f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= <51087536+thorbjoernl@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:34:20 +0000 Subject: [PATCH 4/6] Type hints --- pyaerocom/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaerocom/helpers.py b/pyaerocom/helpers.py index 0540fa0a5..d1fa62bc6 100644 --- a/pyaerocom/helpers.py +++ b/pyaerocom/helpers.py @@ -945,7 +945,7 @@ def merge_station_data( return merged -def _get_pandas_freq_and_offset(freq): +def _get_pandas_freq_and_offset(freq: str) -> tuple[str, pd.Timedelta]: """Helper to convert resampling info""" if freq in TS_TYPE_TO_PANDAS_FREQ: freq = TS_TYPE_TO_PANDAS_FREQ[freq] From c04a66752dd75de624200481be5a57379e2f1187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= <51087536+thorbjoernl@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:56:07 +0000 Subject: [PATCH 5/6] fix type-hint --- pyaerocom/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaerocom/helpers.py b/pyaerocom/helpers.py index d1fa62bc6..16b72b21b 100644 --- a/pyaerocom/helpers.py +++ b/pyaerocom/helpers.py @@ -945,7 +945,7 @@ def merge_station_data( return merged -def _get_pandas_freq_and_offset(freq: str) -> tuple[str, pd.Timedelta]: +def _get_pandas_freq_and_offset(freq: str) -> tuple[str, pd.Timedelta | None]: """Helper to convert resampling info""" if freq in TS_TYPE_TO_PANDAS_FREQ: freq = TS_TYPE_TO_PANDAS_FREQ[freq] From c4b993ddad02e09e0738f9479fd90bc883c9ceae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?thorbjoernl=20=28Thorbj=C3=B8rn=29?= <51087536+thorbjoernl@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:58:07 +0000 Subject: [PATCH 6/6] Remove commented code --- pyaerocom/time_config.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyaerocom/time_config.py b/pyaerocom/time_config.py index bb47da97e..107060f00 100644 --- a/pyaerocom/time_config.py +++ b/pyaerocom/time_config.py @@ -48,9 +48,6 @@ "h": pd.Timedelta(30, "m"), } - -# PANDAS_RESAMPLE_OFFSETS = {"YS": pd.Timestamp("181D"), "MS": pd.Timestamp("14D"), "D": pd.Timestamp("12h"), "h": pd.Timestamp("30min")} - PANDAS_FREQ_TO_TS_TYPE = {v: k for k, v in TS_TYPE_TO_PANDAS_FREQ.items()} # frequency strings