Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix upstream dev issues #9953

Merged
merged 2 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ Deprecations

Bug fixes
~~~~~~~~~
- Fix issues related to Pandas v3 ("us" vs. "ns" for python datetime, copy on write) and handling of 0d-numpy arrays in datetime/timedelta decoding (:pull:`9953`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.


Documentation
Expand Down
12 changes: 8 additions & 4 deletions xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,20 +579,23 @@ def _numbers_to_timedelta(
) -> np.ndarray:
"""Transform numbers to np.timedelta64."""
# keep NaT/nan mask
nan = np.isnan(flat_num) | (flat_num == np.iinfo(np.int64).min)
if flat_num.dtype.kind == "f":
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a bit more explicit. The asarray-wrapping is needed for upstream-dev to keep 0d-arrays an array.

nan = np.asarray(np.isnan(flat_num))
elif flat_num.dtype.kind == "i":
nan = np.asarray(flat_num == np.iinfo(np.int64).min)

# in case we need to change the unit, we fix the numbers here
# this should be safe, as errors would have been raised above
ns_time_unit = _NS_PER_TIME_DELTA[time_unit]
ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_unit]
if ns_time_unit > ns_ref_date_unit:
flat_num *= np.int64(ns_time_unit / ns_ref_date_unit)
flat_num = np.asarray(flat_num * np.int64(ns_time_unit / ns_ref_date_unit))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For pandas 3.0 CoW (copy on write) this array might be flagged readonly. So creating a copy here. The asarray-wrapping is needed for upstream-dev to keep 0d-arrays an array.

time_unit = ref_unit

# estimate fitting resolution for floating point values
# this iterates until all floats are fractionless or time_unit == "ns"
if flat_num.dtype.kind == "f" and time_unit != "ns":
flat_num_dates, new_time_unit = _check_higher_resolution(flat_num, time_unit) # type: ignore[arg-type]
flat_num, new_time_unit = _check_higher_resolution(flat_num, time_unit)
if time_unit != new_time_unit:
msg = (
f"Can't decode floating point {datatype} to {time_unit!r} without "
Expand All @@ -608,7 +611,8 @@ def _numbers_to_timedelta(
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
flat_num = flat_num.astype(np.int64)
flat_num[nan] = np.iinfo(np.int64).min
if nan.any():
flat_num[nan] = np.iinfo(np.int64).min

# cast to wanted type
return flat_num.astype(f"timedelta64[{time_unit}]")
Expand Down
43 changes: 33 additions & 10 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
assert_identical,
assert_no_warnings,
has_dask_ge_2024_11_0,
has_pandas_3,
raise_if_dask_computes,
requires_bottleneck,
requires_cupy,
Expand Down Expand Up @@ -208,8 +209,11 @@ def test_index_0d_datetime(self):
x = self.cls(["x"], [np.datetime64(d)])
self._assertIndexedLikeNDArray(x, np.datetime64(d), "datetime64[us]")

expected_unit = "us" if has_pandas_3 else "ns"
x = self.cls(["x"], pd.DatetimeIndex([d]))
self._assertIndexedLikeNDArray(x, np.datetime64(d), "datetime64[ns]")
self._assertIndexedLikeNDArray(
x, np.datetime64(d), f"datetime64[{expected_unit}]"
)

def test_index_0d_timedelta64(self):
td = timedelta(hours=1)
Expand Down Expand Up @@ -283,7 +287,10 @@ def test_0d_time_data(self):
(dt64_data.values.astype("datetime64[m]"), "s"),
(dt64_data.values.astype("datetime64[s]"), "s"),
(dt64_data.values.astype("datetime64[ps]"), "ns"),
(dt64_data.to_pydatetime(), "ns"),
(
dt64_data.to_pydatetime(),
"us" if has_pandas_3 else "ns",
),
],
)
def test_datetime64_conversion(self, values, unit):
Expand Down Expand Up @@ -1071,8 +1078,14 @@ def test_numpy_same_methods(self):
"values, unit",
[
(np.datetime64("2000-01-01"), "s"),
(pd.Timestamp("2000-01-01T00"), "ns"),
(datetime(2000, 1, 1), "ns"),
(
pd.Timestamp("2000-01-01T00"),
"s" if has_pandas_3 else "ns",
),
(
datetime(2000, 1, 1),
"us" if has_pandas_3 else "ns",
),
(np.datetime64("2000-01-01T00:00:00.1234567891"), "ns"),
],
)
Expand Down Expand Up @@ -1109,8 +1122,9 @@ def test_0d_str(self):

def test_0d_datetime(self):
v = Variable([], pd.Timestamp("2000-01-01"))
assert v.dtype == np.dtype("datetime64[ns]")
assert v.values == np.datetime64("2000-01-01", "ns")
expected_unit = "s" if has_pandas_3 else "ns"
assert v.dtype == np.dtype(f"datetime64[{expected_unit}]")
assert v.values == np.datetime64("2000-01-01", expected_unit)

@pytest.mark.parametrize(
"values, unit", [(pd.to_timedelta("1s"), "ns"), (np.timedelta64(1, "s"), "s")]
Expand Down Expand Up @@ -2654,11 +2668,14 @@ def test_datetime(self):
assert np.dtype("datetime64[ns]") == actual.dtype
assert expected is source_ndarray(np.asarray(actual))

expected = np.datetime64("2000-01-01", "ns")
expected = np.datetime64(
"2000-01-01",
"us" if has_pandas_3 else "ns",
)
actual = as_compatible_data(datetime(2000, 1, 1))
assert np.asarray(expected) == actual
assert np.ndarray is type(actual)
assert np.dtype("datetime64[ns]") == actual.dtype
assert expected.dtype == actual.dtype

def test_tz_datetime(self) -> None:
tz = pytz.timezone("America/New_York")
Expand Down Expand Up @@ -2980,8 +2997,14 @@ def test_from_pint_wrapping_dask(self, Var):
(np.array([np.datetime64("2000-01-01", "ns")]), "ns"),
(np.array([np.datetime64("2000-01-01", "s")]), "s"),
(pd.date_range("2000", periods=1), "ns"),
(datetime(2000, 1, 1), "ns"),
(np.array([datetime(2000, 1, 1)]), "ns"),
(
datetime(2000, 1, 1),
"us" if has_pandas_3 else "ns",
),
(
np.array([datetime(2000, 1, 1)]),
"us" if has_pandas_3 else "ns",
),
(pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York")), "ns"),
(
pd.Series(
Expand Down
Loading