diff --git a/numpy_groupies/aggregate_numpy.py b/numpy_groupies/aggregate_numpy.py index a6dae81..dd77fbb 100644 --- a/numpy_groupies/aggregate_numpy.py +++ b/numpy_groupies/aggregate_numpy.py @@ -24,7 +24,7 @@ def _sum(group_idx, a, size, fill_value, dtype=None): dtype = minimum_dtype_scalar(fill_value, dtype, a) if np.ndim(a) == 0: - ret = np.bincount(group_idx, minlength=size).astype(dtype) + ret = np.bincount(group_idx, minlength=size).astype(dtype, copy=False) if a != 1: ret *= a else: @@ -33,7 +33,9 @@ def _sum(group_idx, a, size, fill_value, dtype=None): ret.real = np.bincount(group_idx, weights=a.real, minlength=size) ret.imag = np.bincount(group_idx, weights=a.imag, minlength=size) else: - ret = np.bincount(group_idx, weights=a, minlength=size).astype(dtype) + ret = np.bincount(group_idx, weights=a, minlength=size).astype( + dtype, copy=False + ) if fill_value != 0: _fill_untouched(group_idx, ret, fill_value) @@ -146,10 +148,10 @@ def _mean(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)): sums.real = np.bincount(group_idx, weights=a.real, minlength=size) sums.imag = np.bincount(group_idx, weights=a.imag, minlength=size) else: - sums = np.bincount(group_idx, weights=a, minlength=size).astype(dtype) + sums = np.bincount(group_idx, weights=a, minlength=size).astype(dtype, copy=False) with np.errstate(divide="ignore", invalid="ignore"): - ret = sums.astype(dtype) / counts + ret = sums.astype(dtype, copy=False) / counts if not np.isnan(fill_value): ret[counts == 0] = fill_value return ret @@ -157,8 +159,8 @@ def _mean(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)): def _sum_of_squres(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)): ret = np.bincount(group_idx, weights=a * a, minlength=size) - counts = np.bincount(group_idx, minlength=size) if fill_value != 0: + counts = np.bincount(group_idx, minlength=size) ret[counts == 0] = fill_value return ret @@ -171,7 +173,7 @@ def _var( counts = np.bincount(group_idx, minlength=size) sums = np.bincount(group_idx, weights=a, minlength=size) with np.errstate(divide="ignore", invalid="ignore"): - means = sums.astype(dtype) / counts + means = sums.astype(dtype, copy=False) / counts counts = np.where(counts > ddof, counts - ddof, 0) ret = ( np.bincount(group_idx, (a - means[group_idx]) ** 2, minlength=size) / counts @@ -299,6 +301,7 @@ def _aggregate_base( dtype=None, axis=None, _impl_dict=_impl_dict, + is_pandas=False, **kwargs ): iv = input_validation(group_idx, a, size=size, order=order, axis=axis, func=func) @@ -324,7 +327,9 @@ def _aggregate_base( kwargs["_nansqueeze"] = True else: good = ~np.isnan(a) - a = a[good] + if "len" not in func or is_pandas: + # a is not needed for len, nanlen! + a = a[good] group_idx = group_idx[good] dtype = check_dtype(dtype, func, a, flat_size) diff --git a/numpy_groupies/aggregate_pandas.py b/numpy_groupies/aggregate_pandas.py index 5be2c97..4770fc0 100644 --- a/numpy_groupies/aggregate_pandas.py +++ b/numpy_groupies/aggregate_pandas.py @@ -69,6 +69,7 @@ def aggregate( func=func, axis=axis, _impl_dict=_impl_dict, + is_pandas=True, **kwargs )