diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 3b70ceb209..73699d2ec7 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -488,7 +488,7 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs): combined = self.clone(aggregated, kdims=kdims, new_type=Dataset) for i, d in enumerate(vdims): dim = d('_'.join([d.name, spread_name])) - dvals = error.dimension_values(d, False, False) + dvals = error.dimension_values(d, flat=False) combined = combined.add_dimension(dim, ndims+i, dvals, True) return combined.clone(new_type=Dataset if generic_type else type(self)) diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py index ecf082b494..cee7e6e2fe 100644 --- a/holoviews/core/data/pandas.py +++ b/holoviews/core/data/pandas.py @@ -139,11 +139,17 @@ def aggregate(cls, columns, dimensions, function, **kwargs): cols = [d.name for d in columns.kdims if d in dimensions] vdims = columns.dimensions('value', label='name') reindexed = data[cols+vdims] + if function in [np.std, np.var]: + # Fix for consistency with other backend + # pandas uses ddof=1 for std and var + fn = lambda x: function(x, ddof=0) + else: + fn = function if len(dimensions): grouped = reindexed.groupby(cols, sort=False) - return grouped.aggregate(function, **kwargs).reset_index() + return grouped.aggregate(fn, **kwargs).reset_index() else: - agg = reindexed.apply(function, **kwargs) + agg = reindexed.apply(fn, **kwargs) return pd.DataFrame.from_items([(col, [v]) for col, v in zip(agg.index, agg.values)]) diff --git a/tests/testdataset.py b/tests/testdataset.py index 0db16c8805..92e5aeb0c7 100644 --- a/tests/testdataset.py +++ b/tests/testdataset.py @@ -397,6 +397,15 @@ def test_dataset_2D_partial_reduce_ht(self): kdims=['x'], vdims=['z']) self.assertEqual(dataset.reduce(['y'], np.mean), reduced) + def test_dataset_2D_aggregate_spread_fn_with_duplicates(self): + dataset = Dataset({'x': np.array([0, 0, 1, 1]), 'y': np.array([0, 1, 2, 3]), + 'z': np.array([1, 2, 3, 4])}, + kdims=['x', 'y'], vdims=['z']) + agg = dataset.aggregate('x', function=np.mean, spreadfn=np.var) + self.assertEqual(agg, Dataset({'x': np.array([0, 1]), 'z': np.array([1.5, 3.5]), + 'z_var': np.array([0.25, 0.25])}, + kdims=['x'], vdims=['z', 'z_var'])) + def test_dataset_aggregate_ht(self): aggregated = Dataset({'Gender':['M', 'F'], 'Weight':[16.5, 10], 'Height':[0.7, 0.8]}, kdims=self.kdims[:1], vdims=self.vdims) @@ -584,6 +593,9 @@ def test_dataset_add_dimensions_values_hm(self): def test_dataset_add_dimensions_values_ht(self): raise SkipTest("Not supported") + def test_dataset_2D_aggregate_spread_fn_with_duplicates(self): + raise SkipTest("Not supported") + def test_dataset_sort_hm(self): raise SkipTest("Not supported")