From 35ed378f201ecf7c8cd80a7a6180583296781ed7 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 1 May 2022 10:57:36 -0600 Subject: [PATCH] Support dask arrays in datetime_to_numeric --- xarray/core/duck_array_ops.py | 14 +++++++-- xarray/tests/test_duck_array_ops.py | 49 +++++++++++++++++++++++------ 2 files changed, 51 insertions(+), 12 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index b85d0e1645e..b41170b2444 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -517,10 +517,20 @@ def pd_timedelta_to_float(value, datetime_unit): return np_timedelta64_to_float(value, datetime_unit) +def _timedelta_to_seconds(array): + return np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 + + def py_timedelta_to_float(array, datetime_unit): """Convert a timedelta object to a float, possibly at a loss of resolution.""" - array = np.asarray(array) - array = np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 + if not is_duck_array(array): + array = np.asarray(array) + if is_duck_dask_array(array): + array = array.map_blocks( + _timedelta_to_seconds, meta=np.array([], dtype=np.float64) + ) + else: + array = _timedelta_to_seconds(array) conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) return conversion_factor * array diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index c329bc50c56..392f1b91914 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -675,39 +675,68 @@ def test_multiple_dims(dtype, dask, skipna, func): assert_allclose(actual, expected) -def test_datetime_to_numeric_datetime64(): +@pytest.mark.parametrize("dask", [True, False]) +def test_datetime_to_numeric_datetime64(dask): + if dask and not has_dask: + pytest.skip("requires dask") + times = pd.date_range("2000", periods=5, freq="7D").values - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") + if dask: + import dask.array + + times = dask.array.from_array(times, chunks=-1) + + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) offset = times[1] - result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h") + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit="h" + ) expected = 24 * np.arange(-7, 28, 7) np.testing.assert_array_equal(result, expected) dtype = np.float32 - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, datetime_unit="h", dtype=dtype + ) expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected) @requires_cftime -def test_datetime_to_numeric_cftime(): +@pytest.mark.parametrize("dask", [True, False]) +def test_datetime_to_numeric_cftime(dask): + if dask and not has_dask: + pytest.skip("requires dask") + times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int) + if dask: + import dask.array + + times = dask.array.from_array(times, chunks=-1) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int) expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) offset = times[1] - result = duck_array_ops.datetime_to_numeric( - times, offset=offset, datetime_unit="h", dtype=int - ) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit="h", dtype=int + ) expected = 24 * np.arange(-7, 28, 7) np.testing.assert_array_equal(result, expected) dtype = np.float32 - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, datetime_unit="h", dtype=dtype + ) expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected)