From 5761de6812015e3a1deb1cee86e6905a30575155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 17 Jan 2025 14:16:47 +0100 Subject: [PATCH] fix upstream dev issues (#9953) * fix pandas dev issues * add whats-new.rst entry --- doc/whats-new.rst | 2 ++ xarray/coding/times.py | 12 ++++++---- xarray/tests/test_variable.py | 43 +++++++++++++++++++++++++++-------- 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 95aa5a57438..467ef536a08 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -63,6 +63,8 @@ Deprecations Bug fixes ~~~~~~~~~ +- Fix issues related to Pandas v3 ("us" vs. "ns" for python datetime, copy on write) and handling of 0d-numpy arrays in datetime/timedelta decoding (:pull:`9953`). + By `Kai Mühlbauer `_. Documentation diff --git a/xarray/coding/times.py b/xarray/coding/times.py index adbec3b9063..0f9a7a48ef8 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -579,20 +579,23 @@ def _numbers_to_timedelta( ) -> np.ndarray: """Transform numbers to np.timedelta64.""" # keep NaT/nan mask - nan = np.isnan(flat_num) | (flat_num == np.iinfo(np.int64).min) + if flat_num.dtype.kind == "f": + nan = np.asarray(np.isnan(flat_num)) + elif flat_num.dtype.kind == "i": + nan = np.asarray(flat_num == np.iinfo(np.int64).min) # in case we need to change the unit, we fix the numbers here # this should be safe, as errors would have been raised above ns_time_unit = _NS_PER_TIME_DELTA[time_unit] ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_unit] if ns_time_unit > ns_ref_date_unit: - flat_num *= np.int64(ns_time_unit / ns_ref_date_unit) + flat_num = np.asarray(flat_num * np.int64(ns_time_unit / ns_ref_date_unit)) time_unit = ref_unit # estimate fitting resolution for floating point values # this iterates until all floats are fractionless or time_unit == "ns" if flat_num.dtype.kind == "f" and time_unit != "ns": - flat_num_dates, new_time_unit = _check_higher_resolution(flat_num, time_unit) # type: ignore[arg-type] + flat_num, new_time_unit = _check_higher_resolution(flat_num, time_unit) if time_unit != new_time_unit: msg = ( f"Can't decode floating point {datatype} to {time_unit!r} without " @@ -608,7 +611,8 @@ def _numbers_to_timedelta( with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) flat_num = flat_num.astype(np.int64) - flat_num[nan] = np.iinfo(np.int64).min + if nan.any(): + flat_num[nan] = np.iinfo(np.int64).min # cast to wanted type return flat_num.astype(f"timedelta64[{time_unit}]") diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 4cf4204649d..c283797bd08 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -37,6 +37,7 @@ assert_identical, assert_no_warnings, has_dask_ge_2024_11_0, + has_pandas_3, raise_if_dask_computes, requires_bottleneck, requires_cupy, @@ -208,8 +209,11 @@ def test_index_0d_datetime(self): x = self.cls(["x"], [np.datetime64(d)]) self._assertIndexedLikeNDArray(x, np.datetime64(d), "datetime64[us]") + expected_unit = "us" if has_pandas_3 else "ns" x = self.cls(["x"], pd.DatetimeIndex([d])) - self._assertIndexedLikeNDArray(x, np.datetime64(d), "datetime64[ns]") + self._assertIndexedLikeNDArray( + x, np.datetime64(d), f"datetime64[{expected_unit}]" + ) def test_index_0d_timedelta64(self): td = timedelta(hours=1) @@ -283,7 +287,10 @@ def test_0d_time_data(self): (dt64_data.values.astype("datetime64[m]"), "s"), (dt64_data.values.astype("datetime64[s]"), "s"), (dt64_data.values.astype("datetime64[ps]"), "ns"), - (dt64_data.to_pydatetime(), "ns"), + ( + dt64_data.to_pydatetime(), + "us" if has_pandas_3 else "ns", + ), ], ) def test_datetime64_conversion(self, values, unit): @@ -1071,8 +1078,14 @@ def test_numpy_same_methods(self): "values, unit", [ (np.datetime64("2000-01-01"), "s"), - (pd.Timestamp("2000-01-01T00"), "ns"), - (datetime(2000, 1, 1), "ns"), + ( + pd.Timestamp("2000-01-01T00"), + "s" if has_pandas_3 else "ns", + ), + ( + datetime(2000, 1, 1), + "us" if has_pandas_3 else "ns", + ), (np.datetime64("2000-01-01T00:00:00.1234567891"), "ns"), ], ) @@ -1109,8 +1122,9 @@ def test_0d_str(self): def test_0d_datetime(self): v = Variable([], pd.Timestamp("2000-01-01")) - assert v.dtype == np.dtype("datetime64[ns]") - assert v.values == np.datetime64("2000-01-01", "ns") + expected_unit = "s" if has_pandas_3 else "ns" + assert v.dtype == np.dtype(f"datetime64[{expected_unit}]") + assert v.values == np.datetime64("2000-01-01", expected_unit) @pytest.mark.parametrize( "values, unit", [(pd.to_timedelta("1s"), "ns"), (np.timedelta64(1, "s"), "s")] @@ -2654,11 +2668,14 @@ def test_datetime(self): assert np.dtype("datetime64[ns]") == actual.dtype assert expected is source_ndarray(np.asarray(actual)) - expected = np.datetime64("2000-01-01", "ns") + expected = np.datetime64( + "2000-01-01", + "us" if has_pandas_3 else "ns", + ) actual = as_compatible_data(datetime(2000, 1, 1)) assert np.asarray(expected) == actual assert np.ndarray is type(actual) - assert np.dtype("datetime64[ns]") == actual.dtype + assert expected.dtype == actual.dtype def test_tz_datetime(self) -> None: tz = pytz.timezone("America/New_York") @@ -2980,8 +2997,14 @@ def test_from_pint_wrapping_dask(self, Var): (np.array([np.datetime64("2000-01-01", "ns")]), "ns"), (np.array([np.datetime64("2000-01-01", "s")]), "s"), (pd.date_range("2000", periods=1), "ns"), - (datetime(2000, 1, 1), "ns"), - (np.array([datetime(2000, 1, 1)]), "ns"), + ( + datetime(2000, 1, 1), + "us" if has_pandas_3 else "ns", + ), + ( + np.array([datetime(2000, 1, 1)]), + "us" if has_pandas_3 else "ns", + ), (pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York")), "ns"), ( pd.Series(