Skip to content

Commit

Permalink
Add FutureWarning for change in decode_timedelta behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
spencerkclark committed Jan 29, 2025
1 parent 714e17d commit 3a96c8a
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 16 deletions.
6 changes: 6 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ Breaking changes

Deprecations
~~~~~~~~~~~~
- In a future version of xarray decoding of variables into
:py:class:`numpy.timedelta64` values will be disabled by default. To silence
warnings associated with this, set ``decode_timedelta`` to ``True``,
``False``, or a :py:class:`coders.CFTimedeltaCoder` instance when opening
data (:issue:`1621`, :pull:`9966`). By `Spencer Clark
<https://github.com/spencerkclark>`_.


Bug fixes
Expand Down
9 changes: 9 additions & 0 deletions xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -1356,6 +1356,7 @@ def __init__(
time_unit: PDDatetimeUnitOptions = "ns",
) -> None:
self.time_unit = time_unit
self._emit_decode_timedelta_future_warning = False

def encode(self, variable: Variable, name: T_Name = None) -> Variable:
if np.issubdtype(variable.data.dtype, np.timedelta64):
Expand All @@ -1373,6 +1374,14 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
def decode(self, variable: Variable, name: T_Name = None) -> Variable:
units = variable.attrs.get("units", None)
if isinstance(units, str) and units in TIME_UNITS:
if self._emit_decode_timedelta_future_warning:
emit_user_level_warning(
"In a future version of xarray decode_timedelta will "
"default to False rather than None. To silence this "
"warning, set decode_timedelta to True, False, or a "
"'CFTimedeltaCoder' instance.",
FutureWarning,
)
dims, data, attrs, encoding = unpack_for_decoding(variable)

units = pop_to(attrs, encoding, "units")
Expand Down
9 changes: 9 additions & 0 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import itertools
import warnings
from collections import defaultdict
from collections.abc import Hashable, Iterable, Mapping, MutableMapping
from typing import TYPE_CHECKING, Any, Literal, TypeVar, Union
Expand Down Expand Up @@ -172,6 +173,7 @@ def decode_cf_variable(

original_dtype = var.dtype

decode_timedelta_was_none = decode_timedelta is None
if decode_timedelta is None:
if isinstance(decode_times, CFDatetimeCoder):
decode_timedelta = CFTimedeltaCoder(time_unit=decode_times.time_unit)
Expand Down Expand Up @@ -200,6 +202,9 @@ def decode_cf_variable(
if decode_timedelta:
if not isinstance(decode_timedelta, CFTimedeltaCoder):
decode_timedelta = CFTimedeltaCoder()
decode_timedelta._emit_decode_timedelta_future_warning = (
decode_timedelta_was_none
)
var = decode_timedelta.decode(var, name=name)
if decode_times:
# remove checks after end of deprecation cycle
Expand Down Expand Up @@ -352,6 +357,10 @@ def decode_cf_variables(
See: decode_cf_variable
"""
# Only emit once instance of the decode_timedelta default change
# FutureWarning. This can be removed once this change is made.
warnings.filterwarnings("once", "decode_timedelta", FutureWarning)

dimensions_used_by = defaultdict(list)
for v in variables.values():
for d in v.dims:
Expand Down
14 changes: 11 additions & 3 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from xarray.backends.pydap_ import PydapDataStore
from xarray.backends.scipy_ import ScipyBackendEntrypoint
from xarray.backends.zarr import ZarrStore
from xarray.coders import CFDatetimeCoder
from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder
from xarray.coding.cftime_offsets import cftime_range
from xarray.coding.strings import check_vlen_dtype, create_vlen_dtype
from xarray.coding.variables import SerializationWarning
Expand Down Expand Up @@ -639,7 +639,9 @@ def test_roundtrip_timedelta_data(self) -> None:
# to support large ranges
time_deltas = pd.to_timedelta(["1h", "2h", "NaT"]).as_unit("s") # type: ignore[arg-type, unused-ignore]
expected = Dataset({"td": ("td", time_deltas), "td0": time_deltas[0]})
with self.roundtrip(expected) as actual:
with self.roundtrip(
expected, open_kwargs={"decode_timedelta": CFTimedeltaCoder(time_unit="ns")}
) as actual:
assert_identical(expected, actual)

def test_roundtrip_float64_data(self) -> None:
Expand Down Expand Up @@ -3267,7 +3269,13 @@ def test_attributes(self, obj) -> None:
def test_chunked_datetime64_or_timedelta64(self, dtype) -> None:
# Generalized from @malmans2's test in PR #8253
original = create_test_data().astype(dtype).chunk(1)
with self.roundtrip(original, open_kwargs={"chunks": {}}) as actual:
with self.roundtrip(
original,
open_kwargs={
"chunks": {},
"decode_timedelta": CFTimedeltaCoder(time_unit="ns"),
},
) as actual:
for name, actual_var in actual.variables.items():
assert original[name].chunks == actual_var.chunks
assert original.chunks == actual.chunks
Expand Down
46 changes: 35 additions & 11 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -1542,7 +1542,10 @@ def test_roundtrip_timedelta64_nanosecond_precision(

encoded_var = conventions.encode_cf_variable(var)
decoded_var = conventions.decode_cf_variable(
"foo", encoded_var, decode_times=CFDatetimeCoder(time_unit=time_unit)
"foo",
encoded_var,
decode_times=CFDatetimeCoder(time_unit=time_unit),
decode_timedelta=CFTimedeltaCoder(time_unit=time_unit),
)

assert_identical(var, decoded_var)
Expand All @@ -1569,7 +1572,9 @@ def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None:
assert encoded_var.dtype == np.int64
assert encoded_var.attrs["units"] == needed_units
assert encoded_var.attrs["_FillValue"] == 20
decoded_var = conventions.decode_cf_variable("foo", encoded_var)
decoded_var = conventions.decode_cf_variable(
"foo", encoded_var, decode_timedelta=CFTimedeltaCoder(time_unit="ns")
)
assert_identical(var, decoded_var)
assert decoded_var.encoding["dtype"] == np.int64

Expand Down Expand Up @@ -1617,7 +1622,9 @@ def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None
assert encoded_var.attrs["units"] == units
assert encoded_var.attrs["_FillValue"] == fill_value

decoded_var = conventions.decode_cf_variable("foo", encoded_var)
decoded_var = conventions.decode_cf_variable(
"foo", encoded_var, decode_timedelta=CFTimedeltaCoder(time_unit="ns")
)
assert_identical(var, decoded_var)
assert decoded_var.encoding["units"] == units
assert decoded_var.encoding["_FillValue"] == fill_value
Expand Down Expand Up @@ -1808,7 +1815,9 @@ def test_encode_cf_timedelta_casting_value_error(use_dask) -> None:
with pytest.warns(UserWarning, match="Timedeltas can't be serialized"):
encoded = conventions.encode_cf_variable(variable)
assert encoded.attrs["units"] == "hours"
decoded = conventions.decode_cf_variable("name", encoded)
decoded = conventions.decode_cf_variable(
"name", encoded, decode_timedelta=CFTimedeltaCoder(time_unit="ns")
)
assert_equal(variable, decoded)
else:
with pytest.raises(ValueError, match="Not possible"):
Expand All @@ -1832,43 +1841,58 @@ def test_encode_cf_timedelta_casting_overflow_error(use_dask, dtype) -> None:


_DECODE_TIMEDELTA_TESTS = {
"default": (True, None, np.dtype("timedelta64[ns]")),
"decode_timdelta=False": (True, False, np.dtype("int64")),
"default": (True, None, np.dtype("timedelta64[ns]"), True),
"decode_timdelta=False": (True, False, np.dtype("int64"), False),
"inherit-time_unit-from-decode_times": (
CFDatetimeCoder(time_unit="s"),
None,
np.dtype("timedelta64[s]"),
True,
),
"set-time_unit-via-CFTimedeltaCoder-decode_times=True": (
True,
CFTimedeltaCoder(time_unit="s"),
np.dtype("timedelta64[s]"),
False,
),
"set-time_unit-via-CFTimedeltaCoder-decode_times=False": (
False,
CFTimedeltaCoder(time_unit="s"),
np.dtype("timedelta64[s]"),
False,
),
"override-time_unit-from-decode_times": (
CFDatetimeCoder(time_unit="ns"),
CFTimedeltaCoder(time_unit="s"),
np.dtype("timedelta64[s]"),
False,
),
}


@pytest.mark.parametrize(
("decode_times", "decode_timedelta", "expected_dtype"),
("decode_times", "decode_timedelta", "expected_dtype", "warns"),
list(_DECODE_TIMEDELTA_TESTS.values()),
ids=list(_DECODE_TIMEDELTA_TESTS.keys()),
)
def test_decode_timedelta(decode_times, decode_timedelta, expected_dtype) -> None:
def test_decode_timedelta(
decode_times, decode_timedelta, expected_dtype, warns
) -> None:
timedeltas = pd.timedelta_range(0, freq="d", periods=3)
var = Variable(["time"], timedeltas)
encoded = conventions.encode_cf_variable(var)
decoded = conventions.decode_cf_variable(
"foo", encoded, decode_times=decode_times, decode_timedelta=decode_timedelta
)
if warns:
with pytest.warns(FutureWarning, match="decode_timedelta"):
decoded = conventions.decode_cf_variable(
"foo",
encoded,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
)
else:
decoded = conventions.decode_cf_variable(
"foo", encoded, decode_times=decode_times, decode_timedelta=decode_timedelta
)
if decode_timedelta is False:
assert_equal(encoded, decoded)
else:
Expand Down
18 changes: 16 additions & 2 deletions xarray/tests/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
)
from xarray.backends.common import WritableCFDataStore
from xarray.backends.memory import InMemoryDataStore
from xarray.coders import CFDatetimeCoder
from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder
from xarray.conventions import decode_cf
from xarray.testing import assert_identical
from xarray.tests import (
Expand Down Expand Up @@ -536,7 +536,9 @@ def test_decode_cf_time_kwargs(self, time_unit) -> None:
)

dsc = conventions.decode_cf(
ds, decode_times=CFDatetimeCoder(time_unit=time_unit)
ds,
decode_times=CFDatetimeCoder(time_unit=time_unit),
decode_timedelta=CFTimedeltaCoder(time_unit=time_unit),
)
assert dsc.timedelta.dtype == np.dtype(f"m8[{time_unit}]")
assert dsc.time.dtype == np.dtype(f"M8[{time_unit}]")
Expand Down Expand Up @@ -655,3 +657,15 @@ def test_encode_cf_variable_with_vlen_dtype() -> None:
encoded_v = conventions.encode_cf_variable(v)
assert encoded_v.data.dtype.kind == "O"
assert coding.strings.check_vlen_dtype(encoded_v.data.dtype) is str


def test_decode_cf_variables_decode_timedelta_warning() -> None:
v = Variable(["time"], [1, 2], attrs={"units": "seconds"})
variables = {"a": v}

with warnings.catch_warnings():
warnings.filterwarnings("error", "decode_timedelta", FutureWarning)
conventions.decode_cf_variables(variables, {}, decode_timedelta=True)

with pytest.warns(FutureWarning, match="decode_timedelta"):
conventions.decode_cf_variables(variables, {})

0 comments on commit 3a96c8a

Please sign in to comment.