From 0cccb903b4195a172fab95e4674abb56e5027ba1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 20 Jan 2024 14:20:53 -0700 Subject: [PATCH] Optimize bitmask finding some more. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` | Before [e26fb39e]
| After [9753dfea] | Ratio | Benchmark (Parameter) | |----------------------------|-------------------------------------|---------|------------------------------------------------------------| | 4.10±0.05ms | 3.64±0.04ms | 0.89 | cohorts.ERA5MonthHour.time_find_group_cohorts | | 4.56±0.05ms | 4.00±0.06ms | 0.88 | cohorts.ERA5MonthHourRechunked.time_find_group_cohorts | | 3.31±0.08ms | 2.79±0.01ms | 0.84 | cohorts.ERA5DayOfYear.time_find_group_cohorts | | 8.15±0.05ms | 6.78±0.02ms | 0.83 | cohorts.OISST.time_find_group_cohorts | | 659±10μs | 519±6μs | 0.79 | cohorts.PerfectBlockwiseResampling.time_find_group_cohorts | | 663±20μs | 487±5μs | 0.73 | cohorts.PerfectMonthly.time_find_group_cohorts | | 2.75±0.03ms | 1.93±0.03ms | 0.7 | cohorts.ERA5Google.time_find_group_cohorts | ``` --- flox/core.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/flox/core.py b/flox/core.py index a734a3a42..094a8867c 100644 --- a/flox/core.py +++ b/flox/core.py @@ -244,12 +244,11 @@ def _compute_label_chunk_bitmask(labels, chunks, nlabels): labels = np.broadcast_to(labels, shape[-labels.ndim :]) - rows = [] cols = [] # Add one to handle the -1 sentinel value label_is_present = np.zeros((nlabels + 1,), dtype=bool) ilabels = np.arange(nlabels) - for idx, region in enumerate(slices_from_chunks(chunks)): + for region in slices_from_chunks(chunks): # This is a quite fast way to find unique integers, when we know how many there are # inspired by a similar idea in numpy_groupies for first, last # instead of explicitly finding uniques, repeatedly write True to the same location @@ -259,10 +258,9 @@ def _compute_label_chunk_bitmask(labels, chunks, nlabels): # skip the -1 sentinel by slicing # Faster than np.argwhere by a lot uniques = ilabels[label_is_present[:-1]] - rows.append(np.full_like(uniques, idx)) cols.append(uniques) label_is_present[:] = False - rows_array = np.concatenate(rows) + rows_array = np.repeat(np.arange(nchunks), tuple(len(col) for col in cols)) cols_array = np.concatenate(cols) data = np.broadcast_to(np.array(1, dtype=np.uint8), rows_array.shape) bitmask = csc_array((data, (rows_array, cols_array)), dtype=bool, shape=(nchunks, nlabels))