Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[skip-ci] Add cftime groupby, resample benchmarks #7795

Merged
merged 11 commits into from
May 2, 2023
1 change: 1 addition & 0 deletions asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "conda",
"conda_channels": ["conda-forge"],

// timeout in seconds for installing any dependencies in environment
// defaults to 10 min
Expand Down
60 changes: 50 additions & 10 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,29 @@ def setup(self, *args, **kwargs):
"c": xr.DataArray(np.arange(2 * self.n)),
}
)
self.ds2d = self.ds1d.expand_dims(z=10)
self.ds2d = self.ds1d.expand_dims(z=10).copy()
self.ds1d_mean = self.ds1d.groupby("b").mean()
self.ds2d_mean = self.ds2d.groupby("b").mean()

@parameterized(["ndim"], [(1, 2)])
def time_init(self, ndim):
getattr(self, f"ds{ndim}d").groupby("b")

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_small_num_groups(self, method, ndim):
@parameterized(
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
)
def time_agg_small_num_groups(self, method, ndim, use_flox):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.groupby("a"), method)().compute()
with xr.set_options(use_flox=use_flox):
getattr(ds.groupby("a"), method)().compute()

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_large_num_groups(self, method, ndim):
@parameterized(
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
)
def time_agg_large_num_groups(self, method, ndim, use_flox):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.groupby("b"), method)().compute()
with xr.set_options(use_flox=use_flox):
getattr(ds.groupby("b"), method)().compute()

def time_binary_op_1d(self):
(self.ds1d.groupby("b") - self.ds1d_mean).compute()
Expand Down Expand Up @@ -115,10 +121,13 @@ def setup(self, *args, **kwargs):
def time_init(self, ndim):
getattr(self, f"ds{ndim}d").resample(time="D")

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_small_num_groups(self, method, ndim):
@parameterized(
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
)
def time_agg_small_num_groups(self, method, ndim, use_flox):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.resample(time="3M"), method)().compute()
with xr.set_options(use_flox=use_flox):
getattr(ds.resample(time="3M"), method)().compute()

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_large_num_groups(self, method, ndim):
Expand All @@ -132,3 +141,34 @@ def setup(self, *args, **kwargs):
super().setup(**kwargs)
self.ds1d = self.ds1d.chunk({"time": 50})
self.ds2d = self.ds2d.chunk({"time": 50, "z": 4})


class ResampleCFTime:
dcherian marked this conversation as resolved.
Show resolved Hide resolved
dcherian marked this conversation as resolved.
Show resolved Hide resolved
def setup(self, *args, **kwargs):
self.ds1d = xr.Dataset(
{
"b": ("time", np.arange(365.0 * 24)),
},
coords={
"time": xr.date_range(
"2001-01-01", freq="H", periods=365 * 24, calendar="noleap"
)
},
)
self.ds2d = self.ds1d.expand_dims(z=10)
self.ds1d_mean = self.ds1d.resample(time="48H").mean()
self.ds2d_mean = self.ds2d.resample(time="48H").mean()


class GroupByLongTime:
params = [[True, False], [True, False]]
param_names = ["use_cftime", "use_flox"]
dcherian marked this conversation as resolved.
Show resolved Hide resolved

def setup(self, use_cftime, use_flox):
arr = np.random.randn(10, 10, 365 * 30)
time = xr.date_range("2000", periods=30 * 365, use_cftime=use_cftime)
self.da = xr.DataArray(arr, dims=("y", "x", "time"), coords={"time": time})

def time_mean(self, use_cftime, use_flox):
with xr.set_options(use_flox=use_flox):
self.da.groupby("time.year").mean()