Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[skip-ci] Add cftime groupby, resample benchmarks #7795

Merged
merged 11 commits into from
May 2, 2023
1 change: 1 addition & 0 deletions asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "conda",
"conda_channels": ["conda-forge"],

// timeout in seconds for installing any dependencies in environment
// defaults to 10 min
Expand Down
67 changes: 54 additions & 13 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,29 @@ def setup(self, *args, **kwargs):
"c": xr.DataArray(np.arange(2 * self.n)),
}
)
self.ds2d = self.ds1d.expand_dims(z=10)
self.ds2d = self.ds1d.expand_dims(z=10).copy()
self.ds1d_mean = self.ds1d.groupby("b").mean()
self.ds2d_mean = self.ds2d.groupby("b").mean()

@parameterized(["ndim"], [(1, 2)])
def time_init(self, ndim):
getattr(self, f"ds{ndim}d").groupby("b")

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_small_num_groups(self, method, ndim):
@parameterized(
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
)
def time_agg_small_num_groups(self, method, ndim, use_flox):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.groupby("a"), method)().compute()
with xr.set_options(use_flox=use_flox):
getattr(ds.groupby("a"), method)().compute()

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_large_num_groups(self, method, ndim):
@parameterized(
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
)
def time_agg_large_num_groups(self, method, ndim, use_flox):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.groupby("b"), method)().compute()
with xr.set_options(use_flox=use_flox):
getattr(ds.groupby("b"), method)().compute()

def time_binary_op_1d(self):
(self.ds1d.groupby("b") - self.ds1d_mean).compute()
Expand Down Expand Up @@ -115,15 +121,21 @@ def setup(self, *args, **kwargs):
def time_init(self, ndim):
getattr(self, f"ds{ndim}d").resample(time="D")

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_small_num_groups(self, method, ndim):
@parameterized(
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
)
def time_agg_small_num_groups(self, method, ndim, use_flox):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.resample(time="3M"), method)().compute()
with xr.set_options(use_flox=use_flox):
getattr(ds.resample(time="3M"), method)().compute()

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_large_num_groups(self, method, ndim):
@parameterized(
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
)
def time_agg_large_num_groups(self, method, ndim, use_flox):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.resample(time="48H"), method)().compute()
with xr.set_options(use_flox=use_flox):
getattr(ds.resample(time="48H"), method)().compute()


class ResampleDask(Resample):
Expand All @@ -132,3 +144,32 @@ def setup(self, *args, **kwargs):
super().setup(**kwargs)
self.ds1d = self.ds1d.chunk({"time": 50})
self.ds2d = self.ds2d.chunk({"time": 50, "z": 4})


class ResampleCFTime(Resample):
def setup(self, *args, **kwargs):
self.ds1d = xr.Dataset(
{
"b": ("time", np.arange(365.0 * 24)),
},
coords={
"time": xr.date_range(
"2001-01-01", freq="H", periods=365 * 24, calendar="noleap"
)
},
)
self.ds2d = self.ds1d.expand_dims(z=10)
self.ds1d_mean = self.ds1d.resample(time="48H").mean()
self.ds2d_mean = self.ds2d.resample(time="48H").mean()


@parameterized(["use_cftime", "use_flox"], [[True, False], [True, False]])
class GroupByLongTime:
def setup(self, use_cftime, use_flox):
arr = np.random.randn(10, 10, 365 * 30)
time = xr.date_range("2000", periods=30 * 365, use_cftime=use_cftime)
self.da = xr.DataArray(arr, dims=("y", "x", "time"), coords={"time": time})

def time_mean(self, use_cftime, use_flox):
with xr.set_options(use_flox=use_flox):
self.da.groupby("time.year").mean()