From dc81dbc0c0ea647164a5165c5f8a26c793947185 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 31 Aug 2015 18:52:18 +0100 Subject: [PATCH 001/212] Initial dataframe integration on Charts --- holoviews/element/chart.py | 57 +++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 3605a7086a..76d61947da 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -1,4 +1,8 @@ import numpy as np +try: + import pandas as pd +except ImportError: + pd = None import param @@ -32,8 +36,7 @@ class Chart(Element2D): _null_value = np.array([[], []]).T # For when data is None def __init__(self, data, **kwargs): - data, params = self._process_data(data) - params.update(kwargs) + data, params = self._process_data(data, kwargs) super(Chart, self).__init__(data, **params) self.data = self._validate_data(self.data) @@ -48,9 +51,22 @@ def _convert_element(self, element): return super(Chart, self)._convert_element(element) - def _process_data(self, data): + def _process_data(self, data, kwargs): + self._pandas = False params = {} - if isinstance(data, UniformNdMapping) or (isinstance(data, list) and data + if pd is not None and isinstance(data, pd.DataFrame): + self._pandas = True + if 'kdims' in params or 'vdims' in params: + columns = params.get('kdims', []) + params.get('vdims', []) + col_labels = [c.name if isinstance(d, Dimension) else c + for c in columns] + if not all(c in data.columns for c in col_labels): + raise ValueError("Supplied dimensions don't match columns" + "in the dataframe.") + else: + params['kdims'] = list(data.columns[:len(self.kdims)]) + params['vdims'] = list(data.columns[len(self.kdims):]) + elif isinstance(data, UniformNdMapping) or (isinstance(data, list) and data and isinstance(data[0], Element2D)): params = dict([v for v in data][0].get_param_values(onlychanged=True)) data = np.concatenate([v.data for v in data]) @@ -63,7 +79,7 @@ def _process_data(self, data): data = self._null_value if (data is None) or (len(data) == 0) else data if len(data): data = np.array(data) - + params.update(kwargs) return data, params @@ -135,7 +151,6 @@ def __getitem__(self, slices): @classmethod def collapse_data(cls, data, function, **kwargs): - new_data = [arr[:, 1:] for arr in data] if isinstance(function, np.ufunc): collapsed = function.reduce(new_data) else: @@ -186,7 +201,14 @@ def __len__(self): def dimension_values(self, dim): index = self.get_dimension_index(dim) if index < len(self.dimensions()): - return self.data[:, index] + if self._pandas: + return self.data[self.data.columns[index]] + else: + if self.data.ndim == 1: + data = np.atleast_2d(self.data).T + else: + data = self.data + return data[:, index] else: return super(Chart, self).dimension_values(dim) @@ -197,12 +219,11 @@ def range(self, dim, data_range=True): if dim.range != (None, None): return dim.range elif dim_idx < len(self.dimensions()): - if self.data.ndim == 1: - data = np.atleast_2d(self.data).T - else: - data = self.data - if len(data): - data = data[:, dim_idx] + if self._pandas: + data = self.data[dim.name] + data_range = data.min(), data.max() + elif len(self.data): + data = self.dimension_values(dim_idx) data_range = np.nanmin(data), np.nanmax(data) else: data_range = (np.NaN, np.NaN) @@ -213,9 +234,13 @@ def range(self, dim, data_range=True): def dframe(self): - import pandas as pd - columns = [d.name for d in self.dimensions()] - return pd.DataFrame(self.data, columns=columns) + if self._pandas: + return self.data.copy() + elif pd: + columns = [d.name for d in self.dimensions()] + return pd.DataFrame(self.data, columns=columns) + else: + raise ImportError("Pandas not found.") From 7ed11749ec9d21a23415b28e7ad2eb99a4042f89 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 31 Aug 2015 18:53:51 +0100 Subject: [PATCH 002/212] Initial support for plotting Charts with dataframes --- holoviews/plotting/bokeh/chart.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/holoviews/plotting/bokeh/chart.py b/holoviews/plotting/bokeh/chart.py index 455515b978..4eaef473c1 100644 --- a/holoviews/plotting/bokeh/chart.py +++ b/holoviews/plotting/bokeh/chart.py @@ -50,17 +50,17 @@ def get_data(self, element, ranges=None): if self.color_index < len(dims) and cmap: mapping['fill_color'] = dims[self.color_index] cmap = get_cmap(cmap) - colors = element.data[:, self.color_index] + colors = element.dimension_values(self.color_index) data[dims[self.color_index]] = map_colors(colors, ranges, cmap) if self.size_index < len(dims): mapping['size'] = dims[self.size_index] val_dim = dims[self.size_index] ms = style.get('size', 1) - sizes = element.data[:, self.size_index] + sizes = element.dimension_values(self.size_index) data[dims[self.size_index]] = compute_sizes(sizes, self.size_fn, self.scaling_factor, ms) - data[dims[0]] = element.data[:, 0] - data[dims[1]] = element.data[:, 1] + data[dims[0]] = element.dimension_values(0) + data[dims[1]] = element.dimension_values(1) return data, mapping @@ -71,7 +71,8 @@ class CurvePlot(ElementPlot): _plot_method = 'line' def get_data(self, element, ranges=None): - return (dict(x=element.data[:, 0], y=element.data[:, 1]), + return (dict(x=element.dimension_values(0), + y=element.dimension_values(1)), dict(x='x', y='y')) @@ -83,9 +84,9 @@ def __init__(self, *args, **kwargs): super(SpreadPlot, self).__init__(*args, **kwargs) def get_data(self, element, ranges=None): - lower = element.data[:, 1] - element.data[:, 2] - upper = element.data[:, 1] + element.data[:, 3] - band_x = np.append(element.data[:, 0], element.data[::-1, 0]) + lower = element.dimension_values(1) - element.dimension_values(2) + upper = element.dimension_values(1) + element.dimension_values(3) + band_x = np.append(element.dimension_values(0), element.dimension_values(0)[::-1]) band_y = np.append(lower, upper[::-1]) return dict(xs=[band_x], ys=[band_y]), self._mapping @@ -109,7 +110,8 @@ class ErrorPlot(PathPlot): style_opts = ['color'] + line_properties def get_data(self, element, ranges=None): - data = element.data + data = [element.dimension_values(i) + for i in range(element.dimensions())] err_xs = [] err_ys = [] for x, y, neg, pos in data: From 281d9d74c5686056ec998aa275dc3cfb86bc77a8 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 31 Aug 2015 20:14:34 +0100 Subject: [PATCH 003/212] Added support for dask dataframes on Charts --- holoviews/element/chart.py | 41 +++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 76d61947da..519dc748d8 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -4,6 +4,11 @@ except ImportError: pd = None +try: + import dask.dataframe as dd +except: + dd = None + import param from ..core import util @@ -52,14 +57,16 @@ def _convert_element(self, element): def _process_data(self, data, kwargs): - self._pandas = False + self._dataframe = False params = {} - if pd is not None and isinstance(data, pd.DataFrame): - self._pandas = True - if 'kdims' in params or 'vdims' in params: - columns = params.get('kdims', []) + params.get('vdims', []) - col_labels = [c.name if isinstance(d, Dimension) else c + if ((pd is not None and isinstance(data, pd.DataFrame)) or + (dd is not None and isinstance(data, dd.DataFrame))): + self._dataframe = True + if 'kdims' in kwargs or 'vdims' in kwargs: + columns = kwargs.get('kdims', self.kdims) + kwargs.get('vdims', self.vdims) + col_labels = [c.name if isinstance(c, Dimension) else c for c in columns] + data = data[col_labels] if not all(c in data.columns for c in col_labels): raise ValueError("Supplied dimensions don't match columns" "in the dataframe.") @@ -84,6 +91,8 @@ def _process_data(self, data, kwargs): def _validate_data(self, data): + if self._dataframe: + return data if data.ndim == 1: data = np.array(list(zip(range(len(data)), data))) if not data.shape[1] == len(self.dimensions()): @@ -201,8 +210,11 @@ def __len__(self): def dimension_values(self, dim): index = self.get_dimension_index(dim) if index < len(self.dimensions()): - if self._pandas: - return self.data[self.data.columns[index]] + if self._dataframe: + data = self.data[self.data.columns[index]] + if isinstance(data, dd.Series): + data = data.compute() + return data else: if self.data.ndim == 1: data = np.atleast_2d(self.data).T @@ -219,12 +231,13 @@ def range(self, dim, data_range=True): if dim.range != (None, None): return dim.range elif dim_idx < len(self.dimensions()): - if self._pandas: - data = self.data[dim.name] - data_range = data.min(), data.max() - elif len(self.data): + data = self.data[dim.name] + if len(self.data): data = self.dimension_values(dim_idx) - data_range = np.nanmin(data), np.nanmax(data) + if self._dataframe: + data_range = data.min(), data.max() + else: + data_range = np.nanmin(data), np.nanmax(data) else: data_range = (np.NaN, np.NaN) if data_range: @@ -234,7 +247,7 @@ def range(self, dim, data_range=True): def dframe(self): - if self._pandas: + if self._dataframe: return self.data.copy() elif pd: columns = [d.name for d in self.dimensions()] From 68d8e126a68a3d0b20deecaa674dce23d404b01f Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 31 Aug 2015 23:24:28 +0100 Subject: [PATCH 004/212] Fixed bug in Chart.range --- holoviews/element/chart.py | 1 - 1 file changed, 1 deletion(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 519dc748d8..9573a8a521 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -231,7 +231,6 @@ def range(self, dim, data_range=True): if dim.range != (None, None): return dim.range elif dim_idx < len(self.dimensions()): - data = self.data[dim.name] if len(self.data): data = self.dimension_values(dim_idx) if self._dataframe: From e8694dca4db44cb07611648c922d35d081dcd769 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Tue, 1 Sep 2015 02:13:25 +0100 Subject: [PATCH 005/212] Fix for Points dataframe len support --- holoviews/element/chart.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 9573a8a521..9510fc1299 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -519,9 +519,6 @@ class Points(Chart): _min_dims = 2 # Minimum number of columns - def __len__(self): - return self.data.shape[0] - def __iter__(self): i = 0 while i < len(self): From 6a5ed27189853f53993828637644c522ed8a4f6e Mon Sep 17 00:00:00 2001 From: philippjfr Date: Tue, 1 Sep 2015 16:09:46 +0100 Subject: [PATCH 006/212] Added support for Blaze data sources in Charts --- holoviews/element/chart.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 93173c0f4a..88caae4f89 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -6,9 +6,14 @@ try: import dask.dataframe as dd -except: +except ImportError: dd = None +try: + from blaze import bz +except ImportError: + bz = None + import param from ..core import util @@ -60,7 +65,8 @@ def _process_data(self, data, kwargs): self._dataframe = False params = {} if ((pd is not None and isinstance(data, pd.DataFrame)) or - (dd is not None and isinstance(data, dd.DataFrame))): + (dd is not None and isinstance(data, dd.DataFrame)), + (bz is not None and isinstance(data, bz.Data))): self._dataframe = True if 'kdims' in kwargs or 'vdims' in kwargs: columns = kwargs.get('kdims', self.kdims) + kwargs.get('vdims', self.vdims) From 45e7b312c35022ab2b72fd03d43b477bce29f75b Mon Sep 17 00:00:00 2001 From: jlstevens Date: Tue, 1 Sep 2015 19:03:47 +0100 Subject: [PATCH 007/212] Fixed bug in conditional logic handling numpy arrays and dataframes --- holoviews/element/chart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 88caae4f89..41f4907a70 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -65,7 +65,7 @@ def _process_data(self, data, kwargs): self._dataframe = False params = {} if ((pd is not None and isinstance(data, pd.DataFrame)) or - (dd is not None and isinstance(data, dd.DataFrame)), + (dd is not None and isinstance(data, dd.DataFrame)) or (bz is not None and isinstance(data, bz.Data))): self._dataframe = True if 'kdims' in kwargs or 'vdims' in kwargs: From 8db885e3c5ec9779c386a7233cb0cf1640dfd451 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Wed, 2 Sep 2015 00:12:43 +0100 Subject: [PATCH 008/212] Simplified the dimension_values method on Raster --- holoviews/element/raster.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index 60f7b8c180..97bdca8b3b 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -159,13 +159,13 @@ def dimension_values(self, dim, unique=False): The set of samples available along a particular dimension. """ dim_idx = self.get_dimension_index(dim) - if dim_idx in [0, 1]: - shape = self.data.shape[abs(dim_idx)] - dim_max = self.data.shape[abs(dim_idx-1)] - coords = list(range(0, dim_max)) - if not unique: - coords = coords * shape - return coords if dim_idx else sorted(coords) + if unique and dim_idx == 0: + return np.array(range(self.data.shape[1])) + elif unique and dim_idx == 1: + return np.array(range(self.data.shape[0])) + elif dim_idx in [0, 1]: + D1, D2 = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]] + return D1.flatten() if dim_idx == 0 else D2.flatten() elif dim_idx == 2: return self.data.T.flatten() else: From d9ab3405fc90745518ad29221af5fd3eff61f3de Mon Sep 17 00:00:00 2001 From: jlstevens Date: Wed, 2 Sep 2015 00:13:47 +0100 Subject: [PATCH 009/212] Added the toarray utility to support dask Arrays --- holoviews/element/util.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/holoviews/element/util.py b/holoviews/element/util.py index 65e2529531..bca8a9ce7d 100644 --- a/holoviews/element/util.py +++ b/holoviews/element/util.py @@ -1,5 +1,20 @@ import numpy as np +try: + import dask +except: + dask = None + +def toarray(v): + """ + Interface helper function to turn dask Arrays into numpy arrays as + necessary. + """ + if dask and isinstance(v, dask.array.Array): + return v.compute() + else: + return v + def compute_edges(edges): """ Computes edges from a number of bin centers, From 4130d7e58c5ce56456e75a72550200db2117c9c9 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Wed, 2 Sep 2015 00:15:37 +0100 Subject: [PATCH 010/212] Initial support for dask Array objects in Raster --- holoviews/element/raster.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index 97bdca8b3b..d5dc31d980 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -9,17 +9,16 @@ from ..core.sheetcoords import SheetCoordinateSystem, Slice from .chart import Curve from .tabular import Table -from .util import compute_edges - +from .util import compute_edges, toarray class Raster(Element2D): """ - Raster is a basic 2D element type for presenting numpy arrays as - two dimensional raster images. + Raster is a basic 2D element type for presenting either numpy or + dask arrays as two dimensional raster images. - Arrays with a shape of (N,M) are valid inputs for Raster wheras - subclasses of Raster (e.g. RGB) may also accept 3D arrays - containing channel information. + Arrays with a shape of (N,M) are valid inputs for Raster wheras + subclasses of Raster (e.g. RGB) may also accept 3D arrays + containing channel information. Raster does not support slicing like the Image or RGB subclasses and the extents are in matrix coordinates if not explicitly @@ -167,7 +166,7 @@ def dimension_values(self, dim, unique=False): D1, D2 = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]] return D1.flatten() if dim_idx == 0 else D2.flatten() elif dim_idx == 2: - return self.data.T.flatten() + return toarray(self.data.T).flatten() else: return super(Raster, self).dimension_values(dim) From 795b52e81d08474256874c7c92c23018f6ea75a1 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Wed, 2 Sep 2015 01:04:56 +0100 Subject: [PATCH 011/212] Fixed accidental removal of line from collapse_data method --- holoviews/element/chart.py | 1 + 1 file changed, 1 insertion(+) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 41f4907a70..f0ff190458 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -166,6 +166,7 @@ def __getitem__(self, slices): @classmethod def collapse_data(cls, data, function, **kwargs): + new_data = [arr[:, 1:] for arr in data] if isinstance(function, np.ufunc): collapsed = function.reduce(new_data) else: From 5e1a577164c819deff39d48d1b2ec77fd2346bef Mon Sep 17 00:00:00 2001 From: jlstevens Date: Wed, 2 Sep 2015 01:06:34 +0100 Subject: [PATCH 012/212] Fixed isinstance check in dimension_values method of Chart --- holoviews/element/chart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index f0ff190458..796aa6b39c 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -219,7 +219,7 @@ def dimension_values(self, dim): if index < len(self.dimensions()): if self._dataframe: data = self.data[self.data.columns[index]] - if isinstance(data, dd.Series): + if dd and isinstance(data, dd.Series): data = data.compute() return data else: From 7c6ec45808da5f87c7c1645fbcb80bee11a81668 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Wed, 2 Sep 2015 01:47:13 +0100 Subject: [PATCH 013/212] Added index_value option to toarray utility function --- holoviews/element/util.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/holoviews/element/util.py b/holoviews/element/util.py index bca8a9ce7d..7d6d828c99 100644 --- a/holoviews/element/util.py +++ b/holoviews/element/util.py @@ -5,13 +5,15 @@ except: dask = None -def toarray(v): +def toarray(v, index_value=False): """ Interface helper function to turn dask Arrays into numpy arrays as - necessary. + necessary. If index_value is True, a value is returned instead of + an array holding a single value. """ if dask and isinstance(v, dask.array.Array): - return v.compute() + arr = v.compute() + return arr[()] if index_value else arr else: return v From 0270bd09432b334820a16a003e33a32e161b7050 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Wed, 2 Sep 2015 01:48:10 +0100 Subject: [PATCH 014/212] Value indexing now works corrects using dask arrays --- holoviews/element/raster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index d5dc31d980..ed28306997 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -54,7 +54,7 @@ def __getitem__(self, slices): if all(slc_types): return self.clone(data, extents=None) elif not any(slc_types): - return data + return toarray(data, index_value=True) else: return self.clone(np.expand_dims(data, axis=slc_types.index(True)), extents=None) From c9d2c4efd9bf30d288f7f0acb90968523fbdb5a5 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 10 Sep 2015 19:10:29 +0100 Subject: [PATCH 015/212] Added is_dataframe utiltity --- holoviews/core/util.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 87ee6da278..3e01fc7f79 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -8,6 +8,21 @@ import numpy as np import param +try: + import pandas as pd +except ImportError: + pd = None + +try: + import dask.dataframe as dd +except ImportError: + dd = None + +try: + from blaze import bz +except ImportError: + bz = None + # Python3 compatibility basestring = str if sys.version_info.major == 3 else basestring @@ -570,3 +585,12 @@ def find_file(folder, filename): for filename in fnmatch.filter(filenames, filename): matches.append(os.path.join(root, filename)) return matches[-1] + + +def is_dataframe(data): + """ + Checks whether the supplied data is DatFrame type. + """ + return((pd is not None and isinstance(data, pd.DataFrame)) or + (dd is not None and isinstance(data, dd.DataFrame)) or + (bz is not None and isinstance(data, bz.Data))) From 88d08182cd572bbd7638f4c0a6316a1304a9a646 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 10 Sep 2015 19:11:39 +0100 Subject: [PATCH 016/212] Moved dataframe support from Chart to Element --- holoviews/core/element.py | 56 ++++++++++++++++++++++++++++++-------- holoviews/element/chart.py | 35 ++---------------------- 2 files changed, 47 insertions(+), 44 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 2d7ebf1700..7a55257549 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -10,7 +10,8 @@ from .ndmapping import OrderedDict, UniformNdMapping, NdMapping, item_check from .overlay import Overlayable, NdOverlay, Overlay, CompositeOverlay from .tree import AttrTree -from .util import sanitize_identifier +from .util import sanitize_identifier, is_dataframe + class Element(ViewableElement, Composable, Overlayable): """ @@ -42,17 +43,42 @@ def hist(self, dimension=None, num_bins=20, bin_range=None, #======================# def __init__(self, data, **params): + self._dataframe = False convert = isinstance(data, Element) if convert: params = dict(data.get_param_values(onlychanged=True), **params) element = data - data = [] + data = [] if is_dataframe(element.data) else element.data + if is_dataframe(data): + self._dataframe = True + kdims, vdims = self._process_df_dims(data, params) + params['kdims'] = kdims + params['vdims'] = vdims + element = data + data = None super(Element, self).__init__(data, **params) - if convert: + if self._dataframe: + self.data = element + elif convert: self.data = self._convert_element(element) + def _process_df_dims(self, data, kwargs): + if 'kdims' in kwargs or 'vdims' in kwargs: + columns = kwargs.get('kdims', self.kdims) + kwargs.get('vdims', self.vdims) + col_labels = [c.name if isinstance(c, Dimension) else c + for c in columns] + if not all(c in data.columns for c in col_labels): + raise ValueError("Supplied dimensions don't match columns" + "in the dataframe.") + kdims, vdims = kwargs['kdims'], kwargs['vdims'] + else: + kdims = list(data.columns[:len(self.kdims)]) + vdims = list(data.columns[len(self.kdims):]) + return kdims, vdims + + def _convert_element(self, element): type_str = self.__class__.__name__ type_name = type_str.lower() @@ -143,10 +169,16 @@ def table(self, **kwargs): as long as it implements a dimension_values method. """ from ..element import Table - keys = zip(*[self.dimension_values(dim.name) - for dim in self.kdims]) - values = zip(*[self.dimension_values(dim.name) - for dim in self.vdims]) + if self._dataframe: + data = self.dframe() + else: + keys = zip(*[self.dimension_values(dim.name) + for dim in self.kdims]) + values = zip(*[self.dimension_values(dim.name) + for dim in self.vdims]) + if not keys: keys = [()]*len(values) + if not values: [()]*len(keys) + data = zip(keys, values) kwargs = {'label': self.label for k, v in self.get_param_values(onlychanged=True) if k in ['group', 'label']} @@ -155,16 +187,16 @@ def table(self, **kwargs): label=self.label) if not self.params()['group'].default == self.group: params['group'] = self.group - if not keys: keys = [()]*len(values) - if not values: [()]*len(keys) - return Table(zip(keys, values), **dict(params, **kwargs)) + return Table(data, **dict(params, **kwargs)) def dframe(self): - import pandas + import pandas as pd + if self._dataframe: + return self.data column_names = self.dimensions(label=True) dim_vals = np.vstack([self.dimension_values(dim) for dim in column_names]).T - return pandas.DataFrame(dim_vals, columns=column_names) + return pd.DataFrame(dim_vals, columns=column_names) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 796aa6b39c..4f1c01e75c 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -1,18 +1,4 @@ import numpy as np -try: - import pandas as pd -except ImportError: - pd = None - -try: - import dask.dataframe as dd -except ImportError: - dd = None - -try: - from blaze import bz -except ImportError: - bz = None import param @@ -62,28 +48,12 @@ def _convert_element(self, element): def _process_data(self, data, kwargs): - self._dataframe = False params = {} - if ((pd is not None and isinstance(data, pd.DataFrame)) or - (dd is not None and isinstance(data, dd.DataFrame)) or - (bz is not None and isinstance(data, bz.Data))): - self._dataframe = True - if 'kdims' in kwargs or 'vdims' in kwargs: - columns = kwargs.get('kdims', self.kdims) + kwargs.get('vdims', self.vdims) - col_labels = [c.name if isinstance(c, Dimension) else c - for c in columns] - data = data[col_labels] - if not all(c in data.columns for c in col_labels): - raise ValueError("Supplied dimensions don't match columns" - "in the dataframe.") - else: - params['kdims'] = list(data.columns[:len(self.kdims)]) - params['vdims'] = list(data.columns[len(self.kdims):]) - elif isinstance(data, UniformNdMapping) or (isinstance(data, list) and data + if isinstance(data, UniformNdMapping) or (isinstance(data, list) and data and isinstance(data[0], Element2D)): params = dict([v for v in data][0].get_param_values(onlychanged=True)) data = np.concatenate([v.data for v in data]) - elif isinstance(data, Element): + elif isinstance(data, Element) or util.is_dataframe(data): pass elif isinstance(data, tuple): data = np.column_stack(data) @@ -253,6 +223,7 @@ def range(self, dim, data_range=True): def dframe(self): + import pandas as pd if self._dataframe: return self.data.copy() elif pd: From dfa6764610b1d9eafcaba949d853d87f78072004 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 10 Sep 2015 19:12:06 +0100 Subject: [PATCH 017/212] Made Tabular.pprint_cell use general API --- holoviews/core/element.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 7a55257549..bbda86d202 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -235,16 +235,8 @@ def pprint_cell(self, row, col): return str(self.kdims[col]) else: dim = self.get_dimension(col) - if col >= ndims: - row_values = self.values()[row-1] - if self.vdims: - val = row_values[col - ndims] - else: - val = row_values - else: - row_data = list(self.data.keys())[row-1] - val = row_data[col] - return dim.pprint_value(val) + values = self.dimension_values(dim.name) + return dim.pprint_value(values[row-1]) def cell_type(self, row, col): From 2878e8dca45da88f76f64704b9b1645b2bc271ab Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 10 Sep 2015 19:12:54 +0100 Subject: [PATCH 018/212] Added initial support for dataframes on NdElement types --- holoviews/core/element.py | 63 +++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index bbda86d202..aa724904a0 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -302,6 +302,26 @@ def _convert_element(self, element): else: return element + def groupby(self, dimensions, container_type=NdMapping): + if self._dataframe: + invalid_dims = list(set(dimensions) - set(self._cached_index_names)) + if invalid_dims: + raise Exception('Following dimensions could not be found %s.' + % invalid_dims) + + index_dims = [self.get_dimension(d) for d in dimensions] + mapping = container_type(None, kdims=index_dims) + view_dims = set(self._cached_index_names) - set(dimensions) + view_dims = [self.get_dimension(d) for d in view_dims] + for k, v in self.data.groupby(dimensions): + data = v.drop(dimensions, axis=1) + mapping[k] = self.clone(data, kdims=[self.get_dimension(d) + for d in data.columns]) + return mapping + else: + return super(NdElement, self).groupby(dimensions, container_type) + + def reindex(self, kdims=None, vdims=None, force=False): """ Create a new object with a re-ordered set of dimensions. @@ -318,6 +338,11 @@ def reindex(self, kdims=None, vdims=None, force=False): if d not in vdims] key_dims = [self.get_dimension(k) for k in kdims] val_dims = [self.get_dimension(v) for v in vdims] + + # DataFrame based tables don't need to be reindexed + if self._dataframe: + return self.clone(kdims=key_dims, vdims=val_dims) + kidxs = [(i, k in self._cached_index_names, self.get_dimension_index(k)) for i, k in enumerate(kdims)] vidxs = [(i, v in self._cached_index_names, self.get_dimension_index(v)) @@ -398,6 +423,9 @@ def __getitem__(self, args): In addition to usual NdMapping indexing, NdElements can be indexed by column name (or a slice over column names) """ + if self._dataframe: + if not isinstance(args, tuple): args = (args,) + return self.select(**dict(zip(self.dimensions(label=True), args))) ndmap_index = args[:self.ndims] if isinstance(args, tuple) else args subtable = NdMapping.__getitem__(self, ndmap_index) @@ -417,6 +445,26 @@ def __getitem__(self, args): return self._filter_data(subtable, args[-1]) + def select(self, selection_specs=None, **select): + """ + Allows slice and select individual values along the DataFrameView + dimensions. Supply the dimensions and values or slices as + keyword arguments. + """ + if self._dataframe: + df = self.data + for dim, k in select.items(): + if isinstance(k, tuple): + k = slice(*k) + if isinstance(k, slice): + df = df[(k.start < df[dim]) & (df[dim] < k.stop)] + else: + df = df[df[dim] == k] + return self.clone(df) + else: + return super(NdElement, self).select(selection_specs, **select) + + def sample(self, samples=[]): """ Allows sampling of the Table with a list of samples. @@ -478,14 +526,17 @@ def collapse_data(cls, data, function, **kwargs): def dimension_values(self, dim): - if isinstance(dim, Dimension): - raise Exception('Dimension to be specified by name') + dim = self.get_dimension(dim) value_dims = self.dimensions('value', label=True) - if dim in value_dims: - index = value_dims.index(dim) + if self._dataframe: + if dim.name in self.data.columns: + return np.array(self.data[dim.name]) + return None + elif dim.name in value_dims: + index = value_dims.index(dim.name) return [v[index] for v in self.values()] else: - return NdMapping.dimension_values(self, dim) + return NdMapping.dimension_values(self, dim.name) def dframe(self, value_label='data'): @@ -493,6 +544,8 @@ def dframe(self, value_label='data'): import pandas except ImportError: raise Exception("Cannot build a DataFrame without the pandas library.") + if self._dataframe: + return self.data labels = [d.name for d in self.dimensions()] return pandas.DataFrame( [dict(zip(labels, np.concatenate([np.array(k),v]))) From a2e6b3783e19d87866f84026b1d6a16af871462b Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 10 Sep 2015 19:13:36 +0100 Subject: [PATCH 019/212] Made DFrame inherit from NdElement --- holoviews/interface/pandas.py | 65 ++--------------------------------- 1 file changed, 2 insertions(+), 63 deletions(-) diff --git a/holoviews/interface/pandas.py b/holoviews/interface/pandas.py index 0156ebf2d5..ca088f7c72 100644 --- a/holoviews/interface/pandas.py +++ b/holoviews/interface/pandas.py @@ -19,11 +19,11 @@ import param from ..core import ViewableElement, NdMapping, NdOverlay,\ - NdLayout, GridSpace, Element, HoloMap + NdLayout, GridSpace, NdElement, HoloMap from ..element import Chart, Table, Curve, Scatter, Bars, Points, VectorField, HeatMap, Scatter3D, Surface -class DataFrameView(Element): +class DataFrameView(NdElement): """ DataFrameView provides a convenient compatibility wrapper around Pandas DataFrames. It provides several core functions: @@ -87,42 +87,6 @@ def __init__(self, data, dimensions={}, kdims=None, clone_override=False, self.data.columns = self._cached_index_names - def __getitem__(self, key): - """ - Allows slicing and selecting along the DataFrameView dimensions. - """ - if key is (): - return self - else: - if len(key) <= self.ndims: - return self.select(**dict(zip(self._cached_index_names, key))) - else: - raise Exception('Selection contains %d dimensions, DataFrameView ' - 'only has %d index dimensions.' % (self.ndims, len(key))) - - - def select(self, selection_specs=None, **select): - """ - Allows slice and select individual values along the DataFrameView - dimensions. Supply the dimensions and values or slices as - keyword arguments. - """ - df = self.data - for dim, k in select.items(): - if isinstance(k, slice): - df = df[(k.start < df[dim]) & (df[dim] < k.stop)] - else: - df = df[df[dim] == k] - return self.clone(df) - - - def dimension_values(self, dim): - if dim in self.data.columns: - return np.array(self.data[dim]) - else: - return super(DataFrameView, self).dimension_values(dim) - - def apply(self, name, *args, **kwargs): """ Applies the Pandas dframe method corresponding to the supplied @@ -132,13 +96,6 @@ def apply(self, name, *args, **kwargs): clone_override=True) - def dframe(self): - """ - Returns a copy of the internal dframe. - """ - return self.data.copy() - - def aggregate(self, dimensions=[], function=None, **reductions): """ The aggregate function accepts either a list of Dimensions @@ -167,24 +124,6 @@ def aggregate(self, dimensions=[], function=None, **reductions): return self.clone(reduced, kdims=kdims) - def groupby(self, dimensions, container_type=NdMapping): - invalid_dims = list(set(dimensions) - set(self._cached_index_names)) - if invalid_dims: - raise Exception('Following dimensions could not be found %s.' - % invalid_dims) - - index_dims = [self.get_dimension(d) for d in dimensions] - mapping = container_type(None, kdims=index_dims) - view_dims = set(self._cached_index_names) - set(dimensions) - view_dims = [self.get_dimension(d) for d in view_dims] - for k, v in self.data.groupby(dimensions): - data = v.drop(dimensions, axis=1) - mapping[k] = self.clone(data, - kdims=[self.get_dimension(d) - for d in data.columns]) - return mapping - - def overlay(self, dimensions): return self.groupby(dimensions, NdOverlay) From 3e20bf2aa75ddc518e0e496faf690537a79351d7 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 11 Sep 2015 01:45:10 +0100 Subject: [PATCH 020/212] Fixes to NdElement dataframe handling --- holoviews/core/element.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index aa724904a0..e7e025606d 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -66,13 +66,13 @@ def __init__(self, data, **params): def _process_df_dims(self, data, kwargs): if 'kdims' in kwargs or 'vdims' in kwargs: - columns = kwargs.get('kdims', self.kdims) + kwargs.get('vdims', self.vdims) + kdims = kwargs.get('kdims', []) + vdims = kwargs.get('vdims', []) col_labels = [c.name if isinstance(c, Dimension) else c - for c in columns] + for c in kdims+vdims] if not all(c in data.columns for c in col_labels): raise ValueError("Supplied dimensions don't match columns" "in the dataframe.") - kdims, vdims = kwargs['kdims'], kwargs['vdims'] else: kdims = list(data.columns[:len(self.kdims)]) vdims = list(data.columns[len(self.kdims):]) @@ -304,20 +304,22 @@ def _convert_element(self, element): def groupby(self, dimensions, container_type=NdMapping): if self._dataframe: - invalid_dims = list(set(dimensions) - set(self._cached_index_names)) + dim_labels = self.dimensions(label=True) + invalid_dims = list(set(dimensions) - set(dim_labels)) if invalid_dims: raise Exception('Following dimensions could not be found %s.' % invalid_dims) index_dims = [self.get_dimension(d) for d in dimensions] mapping = container_type(None, kdims=index_dims) - view_dims = set(self._cached_index_names) - set(dimensions) - view_dims = [self.get_dimension(d) for d in view_dims] + kdims = set(self._cached_index_names) - set(dimensions) + vdims = set(self._cached_value_names) - set(dimensions) + kdims = [self.get_dimension(d) for d in kdims] + vdims = [self.get_dimension(d) for d in vdims] for k, v in self.data.groupby(dimensions): data = v.drop(dimensions, axis=1) - mapping[k] = self.clone(data, kdims=[self.get_dimension(d) - for d in data.columns]) - return mapping + mapping[k] = self.clone(data, kdims=kdims, vdims=vdims) + return mapping else: return super(NdElement, self).groupby(dimensions, container_type) From 072a2e8d231cf42a53b472c6a3bdb98055dfc903 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 11 Sep 2015 01:47:40 +0100 Subject: [PATCH 021/212] Improved bokeh Chart plots data source mapping --- holoviews/plotting/bokeh/chart.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/holoviews/plotting/bokeh/chart.py b/holoviews/plotting/bokeh/chart.py index 93f3d300c3..75c157c3b1 100644 --- a/holoviews/plotting/bokeh/chart.py +++ b/holoviews/plotting/bokeh/chart.py @@ -49,16 +49,18 @@ def get_data(self, element, ranges=None): cmap = style.get('palette', style.get('cmap', None)) if self.color_index < len(dims) and cmap: - mapping['color'] = 'color' + map_key = 'color_' + str(id(element.data)) + mapping['color'] = map_key cmap = get_cmap(cmap) colors = element.dimension_values(self.color_index) - data['color'] = map_colors(colors, ranges, cmap) + data[map_key] = map_colors(colors, ranges, cmap) if self.size_index < len(dims): - mapping['size'] = 'size' + map_key = 'size_' + str(id(element.data)) + mapping['size'] = map_key ms = style.get('size', 1) sizes = element.dimension_values(self.size_index) - data['size'] = compute_sizes(sizes, self.size_fn, - self.scaling_factor, ms) + data[map_key] = compute_sizes(sizes, self.size_fn, + self.scaling_factor, ms) data[dims[0]] = element.dimension_values(0) data[dims[1]] = element.dimension_values(1) if 'hover' in self.tools: @@ -74,8 +76,11 @@ class CurvePlot(ElementPlot): _plot_method = 'line' def get_data(self, element, ranges=None): - return (dict(x=element.data[:, 0], y=element.data[:, 1]), - dict(x='x', y='y')) + x = element.get_dimension(0).name + y = element.get_dimension(1).name + return ({'x': element.dimension_values(0), + 'y': element.dimension_values(1)}, + dict(x=x, y=y)) class SpreadPlot(PolygonPlot): From 51c5477e119766d05090cf83e5997795e4a563cc Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 11 Sep 2015 01:49:06 +0100 Subject: [PATCH 022/212] bokeh ElementPlot and TabularPlot now cache initial frame --- holoviews/plotting/bokeh/element.py | 3 +++ holoviews/plotting/bokeh/tabular.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/holoviews/plotting/bokeh/element.py b/holoviews/plotting/bokeh/element.py index b60501b671..dc29969e14 100644 --- a/holoviews/plotting/bokeh/element.py +++ b/holoviews/plotting/bokeh/element.py @@ -297,6 +297,8 @@ def initialize_plot(self, ranges=None, plot=None, plots=None, source=None): # Get element key and ranges for frame element = self.hmap.last key = self.keys[-1] + self.current_frame = element + self.current_key = key ranges = self.compute_ranges(self.hmap, key, ranges) ranges = util.match_spec(element, ranges) @@ -313,6 +315,7 @@ def initialize_plot(self, ranges=None, plot=None, plots=None, source=None): properties = self._glyph_properties(plot, element, source, ranges) self._init_glyph(plot, mapping, properties) glyph = plot.renderers[-1].glyph + self.handles['glyph_renderer'] = plot.renderers[-1] self.handles['glyph'] = glyph # Update plot, source and glyph diff --git a/holoviews/plotting/bokeh/tabular.py b/holoviews/plotting/bokeh/tabular.py index 1e4f074527..d6944ea424 100644 --- a/holoviews/plotting/bokeh/tabular.py +++ b/holoviews/plotting/bokeh/tabular.py @@ -29,6 +29,8 @@ def initialize_plot(self, ranges=None, plot=None, plots=None, source=None): # Get element key and ranges for frame element = self.hmap.last key = self.keys[-1] + self.current_frame = element + self.current_key = key data, mapping = self.get_data(element, ranges) if source is None: @@ -41,6 +43,7 @@ def initialize_plot(self, ranges=None, plot=None, plots=None, source=None): table = DataTable(source=source, columns=columns, height=self.height, width=self.width, **properties) self.handles['plot'] = table + self.handles['glyph_renderer'] = table self.drawn = True return table From 9ff59a8b570c97e53ba3e503fbde49ac05c17e75 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 11 Sep 2015 03:13:16 +0100 Subject: [PATCH 023/212] Plots in tabs now correctly sized --- holoviews/plotting/bokeh/element.py | 1 + 1 file changed, 1 insertion(+) diff --git a/holoviews/plotting/bokeh/element.py b/holoviews/plotting/bokeh/element.py index dc29969e14..0fa582d871 100644 --- a/holoviews/plotting/bokeh/element.py +++ b/holoviews/plotting/bokeh/element.py @@ -457,6 +457,7 @@ def initialize_plot(self, ranges=None, plot=None, plots=None): panels = [] for key, subplot in self.subplots.items(): + if self.tabs: subplot.overlaid = False child = subplot.initialize_plot(ranges, plot, plots) if self.tabs: if self.hmap.type is Overlay: From 49a59bc0d5e9e2b47e577b6e69e6c01c5376e63d Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 11 Sep 2015 11:44:04 +0100 Subject: [PATCH 024/212] Added traverse method to DimensionedPlot --- holoviews/plotting/plot.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/holoviews/plotting/plot.py b/holoviews/plotting/plot.py index b134fb41b3..a89227e76b 100644 --- a/holoviews/plotting/plot.py +++ b/holoviews/plotting/plot.py @@ -182,6 +182,40 @@ def _get_frame(self, key): pass + def matches(self, spec): + """ + Matches a specification against the current Plot. + """ + if callable(spec) and not isinstance(spec, type): return spec(self) + elif isinstance(spec, type): return isinstance(self, spec) + else: + raise ValueError("Matching specs have to be either a type or a callable.") + + + def traverse(self, fn=None, specs=None, full_breadth=True): + """ + Traverses any nested DimensionedPlot returning a list + of all plots that match the specs. The specs should + be supplied as a list of either Plot types or callables, + which should return a boolean given the plot class. + """ + accumulator = [] + matches = specs is None + if not matches: + for spec in specs: + matches = self.matches(spec) + if matches: break + if matches: + accumulator.append(fn(self) if fn else self) + + # Assumes composite objects are iterables + if hasattr(self, 'subplots') and self.subplots: + for el in self.subplots.values(): + accumulator += el.traverse(fn, specs, full_breadth) + if not full_breadth: break + return accumulator + + def _frame_title(self, key, group_size=2, separator='\n'): """ Returns the formatted dimension group strings From 2263a4bd78882c48603de391215c65c21b5f4178 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 11 Sep 2015 11:44:27 +0100 Subject: [PATCH 025/212] Fix for Chart handling of dask dataframes --- holoviews/element/chart.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 4f1c01e75c..ed46e4d70d 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -188,8 +188,8 @@ def dimension_values(self, dim): index = self.get_dimension_index(dim) if index < len(self.dimensions()): if self._dataframe: - data = self.data[self.data.columns[index]] - if dd and isinstance(data, dd.Series): + data = self.data[self.get_dimension(index).name] + if util.dd and isinstance(data, util.dd.Series): data = data.compute() return data else: From ab6f6e3756abcd4dc399fbf7b639580cb6e0e512 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 11 Sep 2015 11:46:12 +0100 Subject: [PATCH 026/212] Added shared_source functionality to bokeh LayoutPlot --- holoviews/plotting/bokeh/plot.py | 40 +++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/holoviews/plotting/bokeh/plot.py b/holoviews/plotting/bokeh/plot.py index 96d5bee113..6906f50c30 100644 --- a/holoviews/plotting/bokeh/plot.py +++ b/holoviews/plotting/bokeh/plot.py @@ -1,20 +1,22 @@ +from collections import defaultdict +from itertools import groupby import numpy as np import param from bokeh.io import gridplot, vplot, hplot from bokeh.models import ColumnDataSource -from bokeh.models.widgets import Panel, Tabs +from bokeh.models.widgets import Panel, Tabs, DataTable from ...core import OrderedDict, CompositeOverlay, Element from ...core import Store, Layout, AdjointLayout, NdLayout, Empty, GridSpace, HoloMap from ...core.options import Compositor from ...core import traversal -from ..plot import Plot, GenericCompositePlot, GenericLayoutPlot +from ..plot import Plot, DimensionedPlot, GenericCompositePlot, GenericLayoutPlot from .renderer import BokehRenderer -class BokehPlot(Plot): +class BokehPlot(DimensionedPlot): """ Plotting baseclass for the Bokeh backends, implementing the basic plotting interface for Bokeh based plots. @@ -186,6 +188,10 @@ def update_frame(self, key, ranges=None): class LayoutPlot(BokehPlot, GenericLayoutPlot): + shared_source = param.Boolean(default=True, doc=""" + Whether to share data sources across plots in the Layout + allowing for linked selection tools to be used.""") + tabs = param.Boolean(default=False, doc=""" Whether to display overlaid plots in separate panes""") @@ -347,10 +353,38 @@ def initialize_plot(self, ranges=None): self.handles['plot'] = layout_plot self.handles['plots'] = plots self.drawn = True + if self.shared_source: + self._share_source() return self.handles['plot'] + def _share_source(self): + """ + Traverses all plots to find any with shared data, + then sources are grouped, combined and replaced + on the original plots and glyphs. + """ + source_getter = lambda x: (id(x.current_frame.data), x, + x.handles.get('glyph_renderer'), + x.handles['source']) + element_filter = lambda x: (hasattr(x, 'hmap') and + x.current_frame and 'source' in x.handles) + plots = self.traverse(source_getter, specs=[element_filter]) + source_data = defaultdict(dict) + for pid, group in groupby(sorted(plots), lambda x: x[0]): + for (pid, _, _, source) in group: + source_data[pid].update(source.data) + sources = {pid: ColumnDataSource(data=data) for pid, data in source_data.items()} + for pid, group in groupby(sorted(plots), lambda x: x[0]): + for (pid, plot, glyph, source) in group: + if isinstance(glyph, DataTable): + glyph.source = sources[pid] + else: + glyph.data_source = sources[pid] + plot.handles['source'] = sources[pid] + + def update_frame(self, key, ranges=None): """ Update the internal state of the Plot to represent the given From 4162625d93744bba5c4a1b57deaf070603b90391 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 14 Sep 2015 13:19:29 +0100 Subject: [PATCH 027/212] Support for specifying unselected point styles --- holoviews/plotting/bokeh/chart.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/holoviews/plotting/bokeh/chart.py b/holoviews/plotting/bokeh/chart.py index 75c157c3b1..6d10fbf0c1 100644 --- a/holoviews/plotting/bokeh/chart.py +++ b/holoviews/plotting/bokeh/chart.py @@ -1,5 +1,5 @@ import numpy as np - +from bokeh.models import Circle import param from ...core import Dimension @@ -8,7 +8,7 @@ from ..util import compute_sizes from .element import ElementPlot, line_properties, fill_properties from .path import PathPlot, PolygonPlot -from .util import map_colors, get_cmap +from .util import map_colors, get_cmap, mpl_to_bokeh class PointPlot(ElementPlot): @@ -34,7 +34,8 @@ class PointPlot(ElementPlot): Function applied to size values before applying scaling, to remove values lower than zero.""") - style_opts = (['cmap', 'palette', 'marker', 'size', 's', 'alpha', 'color'] + + style_opts = (['cmap', 'palette', 'marker', 'size', 's', 'alpha', 'color', + 'unselected_color'] + line_properties + fill_properties) _plot_method = 'scatter' @@ -69,6 +70,26 @@ def get_data(self, element, ranges=None): return data, mapping + def _init_glyph(self, plot, mapping, properties): + """ + Returns a Bokeh glyph object. + """ + properties = mpl_to_bokeh(properties) + unselect_color = properties.pop('unselected_color', None) + if (any(t in self.tools for t in ['box_select', 'lasso_select']) + and unselect_color is not None): + source = properties.pop('source') + color = properties.pop('color', None) + color = mapping.pop('color', color) + properties.pop('legend', None) + unselected = Circle(**dict(properties, fill_color=unselect_color, **mapping)) + selected = Circle(**dict(properties, fill_color=color, **mapping)) + plot.add_glyph(source, selected, selection_glyph=selected, + nonselection_glyph=unselected) + else: + getattr(plot, self._plot_method)(**dict(properties, **mapping)) + + class CurvePlot(ElementPlot): From a8dff85020469f173c1c37417de50c9d6f26ae3a Mon Sep 17 00:00:00 2001 From: philippjfr Date: Tue, 29 Sep 2015 16:48:09 +0100 Subject: [PATCH 028/212] Fix to reduce method dimension handling --- holoviews/core/element.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 637b498e42..243ab86d90 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -164,11 +164,11 @@ def reduce(self, dimensions=[], function=None, **reduce_map): def _reduce_map(self, dimensions, function, reduce_map): - dimensions = self._valid_dimensions(dimensions) if dimensions and reduce_map: raise Exception("Pass reduced dimensions either as an argument" "or as part of the kwargs not both.") - elif dimensions: + dimensions = self._valid_dimensions(dimensions) + if dimensions: reduce_map = {d: function for d in dimensions} elif not reduce_map: reduce_map = {d: function for d in self._cached_index_names} From f5267cf25b1823319392d78cd3e6be0bda52322e Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 15 Oct 2015 02:06:49 +0100 Subject: [PATCH 029/212] Added Columns and ColumnarData classes starting Data API refactor --- holoviews/core/__init__.py | 1 + holoviews/core/data.py | 527 ++++++++++++++++++++++++++++++++++ holoviews/core/element.py | 186 +----------- holoviews/element/chart.py | 245 +--------------- holoviews/element/tabular.py | 5 +- holoviews/interface/pandas.py | 33 +-- 6 files changed, 559 insertions(+), 438 deletions(-) create mode 100644 holoviews/core/data.py diff --git a/holoviews/core/__init__.py b/holoviews/core/__init__.py index 69bb230619..484b50478b 100644 --- a/holoviews/core/__init__.py +++ b/holoviews/core/__init__.py @@ -1,4 +1,5 @@ from .boundingregion import * # pyflakes:ignore (API import) +from .data import * # pyflakes:ignore (API import) from .dimension import * # pyflakes:ignore (API import) from .element import * # pyflakes:ignore (API import) from .layout import * # pyflakes:ignore (API import) diff --git a/holoviews/core/data.py b/holoviews/core/data.py new file mode 100644 index 0000000000..c7d749a1cd --- /dev/null +++ b/holoviews/core/data.py @@ -0,0 +1,527 @@ +""" +The data module provides utility classes to interface with various +data backends. +""" + +from collections import defaultdict +from itertools import groupby + +import numpy as np +import param + +from .dimension import OrderedDict, Dimension +from .element import Element, NdElement +from .spaces import HoloMap +from . import util + + +class Columns(Element): + + def __init__(self, data, **kwargs): + if 'kdims' not in kwargs: + kwargs['kdims'] = self.kdims + if 'vdims' not in kwargs: + kwargs['vdims'] = self.vdims + data, params = ColumnarData._process_data(data, **kwargs) + super(Columns, self).__init__(data, **params) + self.data = self._validate_data(self.data) + + + def _validate_data(self, data): + if self.interface is None: + return data + else: + return self.interface.validate_data(data) + + + def select(self, selection_specs=None, **selection): + if self.interface is None: + data = self.data.select(**selection) + else: + data = self.interface.select(selection) + if np.isscalar(data): + return data + else: + return self.clone(data) + + + @property + def interface(self): + if util.is_dataframe(self.data): + return ColumnarDataFrame(self) + elif isinstance(self.data, np.ndarray): + return ColumnarArray(self) + + + def reindex(self, kdims=None, vdims=None): + """ + Create a new object with a re-ordered set of dimensions. + Allows converting key dimensions to value dimensions + and vice versa. + """ + if self.interface is None: + return self.data.reindex(kdims, vdims) + + if vdims is None: + vdims = self._cached_value_names + elif kdims is None: + dimensions = (self._cached_index_names + + self._cached_value_names) + kdims = [d for d in dimensions if d not in vdims] + key_dims = [self.get_dimension(k) for k in kdims] + val_dims = [self.get_dimension(v) for v in vdims] + data = self.interface.reindex(self.data, key_dims, val_dims) + return self.clone(data, key_dims, val_dims) + + + def __getitem__(self, slices): + """ + Implements slicing or indexing of the data by the data x-value. + If a single element is indexed reduces the Element2D to a single + Scatter object. + """ + if slices is (): return self + if not isinstance(slices, tuple): slices = (slices,) + selection = dict(zip(self.dimensions(label=True), slices)) + if self.interface is None: + data = self.data.select(**selection) + else: + data = self.interface.select(**selection) + return self.clone(data) + + + def sample(self, samples=[]): + """ + Allows sampling of Columns as an iterator of coordinates + matching the key dimensions. + """ + if self.interface is None: + return self.clone(self.data.sample(samples)) + else: + return self.clone(self.interface.sample(samples)) + + + def reduce(self, dimensions=[], function=None, **reduce_map): + """ + Allows collapsing of Chart objects using the supplied map of + dimensions and reduce functions. + """ + reduce_map = self._reduce_map(dimensions, function, reduce_map) + + if len(reduce_map) > 1: + raise ValueError("Chart Elements may only be reduced to a point.") + dim, reduce_fn = list(reduce_map.items())[0] + if dim in self._cached_index_names: + reduced_data = OrderedDict(zip(self.vdims, reduce_fn(self.data[:, self.ndims:], axis=0))) + else: + raise Exception("Dimension %s not found in %s" % (dim, type(self).__name__)) + return self.clone(reduced_data) + + + def groupby(self, dimensions, container_type=HoloMap, **kwargs): + if self.interface is None: + return self.data.groupby(dimensions, container_type, **kwargs) + else: + return self.interface.groupby(dimensions, container_type, **kwargs) + + + def __len__(self): + if self.interface is None: + return len(self.data) + else: + return len(self.interface) + + + @property + def shape(self): + if self.interface is None: + return (len(self), len(self.dimensions())) + else: + return self.interface.shape + + + def dimension_values(self, dim): + if self.interface is None: + return self.data.dimension_values(dim) + else: + return self.interface.values(dim) + + + def dframe(self): + if self.interface is None: + return self.data.dframe() + else: + return self.interface.dframe() + + + def array(self): + if self.interface is None: + dims = self._cached_index_names + self._cached_value_names + return np.column_stack([self.dimension_values(d) for d in dims]) + else: + return self.interface.array() + + + + +class ColumnarData(param.Parameterized): + + def __init__(self, element, **params): + self.element = element + + + def array(self): + NotImplementedError + + @property + def ndims(self): + self.element.ndims + + + @property + def shape(self): + return self.element.data.shape + + + @classmethod + def _process_data(cls, data, **kwargs): + params = {} + if isinstance(data, NdElement): + params['kdims'] = data.kdims + params['vdims'] = data.vdims + params['label'] = data.label + elif isinstance(data, Element): + params = dict(data.get_param_values(onlychanged=True)) + data = data.data + elif util.is_dataframe(data): + kdims, vdims = cls._process_df_dims(data, params) + params['kdims'] = kdims + params['vdims'] = vdims + elif isinstance(data, tuple): + data = np.column_stack(data) + elif not isinstance(data, (np.ndarray, dict)): + data = np.array() if data is None else list(data) + if all(np.isscalar(d) for coord in data for d in coord): + data = np.array(data) + elif len(data): + data = OrderedDict(data) + params.update(kwargs) + if isinstance(data, dict): + data = NdElement(data, kdims=params['kdims'], + vdims=params['vdims']) + return data, params + + + @classmethod + def _process_df_dims(cls, data, kwargs): + if 'kdims' in kwargs or 'vdims' in kwargs: + kdims = kwargs.get('kdims', []) + vdims = kwargs.get('vdims', []) + col_labels = [c.name if isinstance(c, Dimension) else c + for c in kdims+vdims] + if not all(c in data.columns for c in col_labels): + raise ValueError("Supplied dimensions don't match columns" + "in the dataframe.") + else: + kdims = list(data.columns[:2]) + vdims = list(data.columns[2:]) + return kdims, vdims + + + @classmethod + def _datarange(cls, data): + """ + Should return minimum and maximum of data + returned by values method. + """ + raise NotImplementedError + + + def range(self, dim, data_range=True): + dim_idx = self.get_dimension_index(dim) + if dim.range != (None, None): + return dim.range + elif dim_idx < len(self.dimensions()): + if len(self): + data = self.values(dim_idx) + data_range = self._datarange(data) + else: + data_range = (np.NaN, np.NaN) + if data_range: + return util.max_range([data_range, dim.soft_range]) + else: + return dim.soft_range + + + def as_ndelement(self, **kwargs): + """ + This method transforms any ViewableElement type into a Table + as long as it implements a dimension_values method. + """ + keys = zip(*[self.values(dim.name) + for dim in self.kdims]) + values = zip(*[self.values(dim.name) + for dim in self.vdims]) + if not keys: keys = [()]*len(values) + if not values: [()]*len(keys) + data = zip(keys, values) + kwargs = {'label': self.label + for k, v in self.get_param_values(onlychanged=True) + if k in ['group', 'label']} + params = dict(kdims=self.kdims, + vdims=self.vdims, + label=self.label) + if not self.params()['group'].default == self.group: + params['group'] = self.group + el_type = type(self.element) + return el_type(data, **dict(params, **kwargs)) + + + def __len__(self): + return len(self.element.data) + + + @classmethod + def validate_data(cls, data): + return data + + +class ColumnarDataFrame(ColumnarData): + + + def groupby(self, dimensions, container_type=HoloMap, **kwargs): + invalid_dims = list(set(dimensions) - set(self._cached_index_names)) + if invalid_dims: + raise Exception('Following dimensions could not be found:\n%s.' + % invalid_dims) + + index_dims = [self.get_dimension(d) for d in dimensions] + mapping = container_type(None, kdims=index_dims) + for k, v in self.data.groupby(dimensions): + data = v.drop(dimensions, axis=1) + mapping[k] = self.clone(data, + kdims=[self.get_dimension(d) + for d in data.columns], **kwargs) + return mapping + + + def reduce(self, dimensions=[], function=None, **reductions): + """ + The aggregate function accepts either a list of Dimensions + and a function to apply to find the aggregate across + those Dimensions or a list of dimension/function pairs + to apply one by one. + """ + if not dimensions and not reductions: + raise Exception("Supply either a list of Dimensions or" + "reductions as keyword arguments") + reduced = self.element.data + dfnumeric = reduced.applymap(np.isreal).all(axis=0) + unreducable = list(dfnumeric[dfnumeric == False].index) + if dimensions: + if not function: + raise Exception("Supply a function to reduce the Dimensions with.") + reductions.update({d: function for d in dimensions}) + if reductions: + reduce_ops = defaultdict(list) + for d, fn in reductions.items(): reduce_ops[fn].append(fn) + for fn, dims in reduce_ops.items(): + reduced = reduced.groupby(dims, as_index=True).aggregate(fn) + reduced_indexes = [reduced.index.names.index(d) for d in unreducable] + reduced = reduced.reset_index(level=reduced_indexes) + kdims = [self.element.get_dimension(d) for d in reduced.columns] + return self.element.clone(reduced, kdims=kdims) + + + def array(self): + return self.element.data.iloc + + def reindex(self, kdims=None, vdims=None): + # DataFrame based tables don't need to be reindexed + return self.element.data + + + @classmethod + def _datarange(cls, data): + return data.min(), data.max() + + + def select(self, selection_specs=None, **select): + """ + Allows slice and select individual values along the DataFrameView + dimensions. Supply the dimensions and values or slices as + keyword arguments. + """ + df = self.element.data + for dim, k in select.items(): + if isinstance(k, tuple): + k = slice(*k) + if isinstance(k, slice): + df = df[(k.start < df[dim]) & (df[dim] < k.stop)] + else: + df = df[df[dim] == k] + return df + + + def values(self, dim): + data = self.element.data[dim] + if util.dd and isinstance(data, util.dd.Series): + data = data.compute() + return data + + + @classmethod + def add_dimension(cls, data, dimension, values): + data[dimension] = values + return data + + + def dframe(self): + return self.element.data + + +class ColumnarArray(ColumnarData): + + @classmethod + def validate_data(cls, data): + if data.ndim == 1: + data = np.column_stack([np.arange(len(data)), data]) + return data + + + def array(self): + return self.element.data + + @classmethod + def add_dimension(cls, data, dimension, values): + if np.isscalar(values): + values = [values]*len(data) + return np.column_stack([data, values]) + + + def closest(self, coords): + """ + Given single or multiple x-values, returns the list + of closest actual samples. + """ + if not isinstance(coords, list): coords = [coords] + xs = self.data[:, 0] + idxs = [np.argmin(np.abs(xs-coord)) for coord in coords] + return [xs[idx] for idx in idxs] + + + @classmethod + def _datarange(cls, data): + return np.nanmin(data), np.nanmax(data) + + + def values(self, dim): + data = self.element.data + dim_idx = self.element.get_dimension_index(dim) + if data.ndim == 1: + data = np.atleast_2d(data).T + return data[:, dim_idx] + + + def reindex(self, kdims=None, vdims=None): + # DataFrame based tables don't need to be reindexed + dims = kdims + vdims + data = [self.element.dimension_values(d) for d in dims] + return np.column_stack(data) + + + def groupby(self, dimensions, container_type=HoloMap, **kwargs): + data = self.element.data + + # Get dimension objects, labels, indexes and data + dimensions = [self.element.get_dimension(d) for d in dimensions] + dim_labels = [d.name for d in dimensions] + dim_idxs = [self.element.get_dimension_index(d) for d in dimensions] + dim_data = {d: self.element.dimension_values(d) for d in dim_labels} + + # Find unique entries along supplied dimensions + indices = data[:, dim_idxs] + view = indices.view(np.dtype((np.void, indices.dtype.itemsize * indices.shape[1]))) + _, idx = np.unique(view, return_index=True) + unique_indices = indices[idx] + + # Iterate over the unique entries building masks + # to apply the group selection + grouped_data = [] + for group in unique_indices: + mask = np.zeros(len(data), dtype=bool) + for d, v in zip(dim_labels, group): + mask = np.logical_or(mask, dim_data[d] == v) + group_element = self.element.clone(data[mask, :], **kwargs) + grouped_data.append((tuple(group), group_element)) + return container_type(grouped_data, kdims=dimensions) + + + def select(self, **selection): + data = self.element.data + for d, slc in selection.items(): + idx = self.element.get_dimension_index(d) + if isinstance(slc, slice): + start = -float("inf") if slc.start is None else slc.start + stop = float("inf") if slc.stop is None else slc.stop + clip_start = start <= data[:, idx] + clip_stop = data[:, idx] < stop + data = data[np.logical_and(clip_start, clip_stop), :] + elif isinstance(slc, (set, list)): + filt = np.in1d(data[:, idx], list(slc)) + data = data[filt, :] + else: + if self.element.ndims == 1: + data_index = np.argmin(np.abs(data[:, idx] - slc)) + data = data[data_index, :] + else: + data = data[data[:, idx] == slc, :] + return data + + + @classmethod + def collapse_data(cls, data, function, **kwargs): + new_data = [arr[:, self.ndim:] for arr in data] + if isinstance(function, np.ufunc): + collapsed = function.reduce(new_data) + else: + collapsed = function(np.dstack(new_data), axis=-1, **kwargs) + return np.hstack([data[0][:, self.ndims:, np.newaxis], collapsed]) + + + def sample(self, samples=[]): + """ + Sample the Element data with a list of samples. + """ + data = self.element.data + mask = np.zeros(len(self), dtype=bool) + for sample in samples: + if np.isscalar(sample): sample = [sample] + for i, v in enumerate(sample): + mask = np.logical_or(mask, data[:, i]==v) + return data[mask] + + + def reduce(self, dimensions=[], function=None, **reduce_map): + """ + Allows collapsing of Chart objects using the supplied map of + dimensions and reduce functions. + """ + reduce_map = self._reduce_map(dimensions, function, reduce_map) + + dim, reduce_fn = list(reduce_map.items())[0] + if dim in self._cached_index_names: + reduced_data = OrderedDict(zip(self.vdims, reduce_fn(self.data[:, self.ndims:], axis=0))) + else: + raise Exception("Dimension %s not found in %s" % (dim, type(self).__name__)) + params = dict(self.get_param_values(onlychanged=True), vdims=self.vdims, + kdims=[]) + params.pop('extents', None) + return ItemTable(reduced_data, **params) + + + def dframe(self): + import pandas as pd + column_names = self.dimensions(label=True) + dim_vals = np.vstack([self.dimension_values(dim) for dim in column_names]).T + return pd.DataFrame(dim_vals, columns=column_names) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 7bd6f75ab8..b0829e31bb 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -56,56 +56,6 @@ def hist(self, dimension=None, num_bins=20, bin_range=None, # Subclassable methods # #======================# - def __init__(self, data, **params): - convert = isinstance(data, Element) - if convert: - params = dict(data.get_param_values(onlychanged=True), - **params) - element = data - data = [] if is_dataframe(element.data) else element.data - if is_dataframe(data): - self._dataframe = True - kdims, vdims = self._process_df_dims(data, params) - params['kdims'] = kdims - params['vdims'] = vdims - element = data - data = None - super(Element, self).__init__(data, **params) - if self._dataframe: - self.data = element - elif convert: - self.data = self._convert_element(element) - - - def _process_df_dims(self, data, kwargs): - if 'kdims' in kwargs or 'vdims' in kwargs: - kdims = kwargs.get('kdims', []) - vdims = kwargs.get('vdims', []) - col_labels = [c.name if isinstance(c, Dimension) else c - for c in kdims+vdims] - if not all(c in data.columns for c in col_labels): - raise ValueError("Supplied dimensions don't match columns" - "in the dataframe.") - else: - kdims = list(data.columns[:len(self.kdims)]) - vdims = list(data.columns[len(self.kdims):]) - return kdims, vdims - - - def _convert_element(self, element): - type_str = self.__class__.__name__ - type_name = type_str.lower() - table = element.table() - conversion = getattr(table.to, type_name) - if conversion is None: - return element - try: - converted = conversion(self._cached_index_names, - self._cached_value_names) - except: - raise - return converted.data - def __getitem__(self, key): if key is (): @@ -177,44 +127,8 @@ def _reduce_map(self, dimensions, function, reduce_map): return {sanitized.get(d, d): fn for d, fn in reduce_map.items()} - def table(self, **kwargs): - """ - This method transforms any ViewableElement type into a Table - as long as it implements a dimension_values method. - """ - from ..element import Table - if self._dataframe: - data = self.dframe() - else: - keys = zip(*[self.dimension_values(dim.name) - for dim in self.kdims]) - values = zip(*[self.dimension_values(dim.name) - for dim in self.vdims]) - if not keys: keys = [()]*len(values) - if not values: [()]*len(keys) - data = zip(keys, values) - kwargs = {'label': self.label - for k, v in self.get_param_values(onlychanged=True) - if k in ['group', 'label']} - params = dict(kdims=self.kdims, - vdims=self.vdims, - label=self.label) - if not self.params()['group'].default == self.group: - params['group'] = self.group - return Table(data, **dict(params, **kwargs)) - - - def dframe(self): - import pandas as pd - if self._dataframe: - return self.data - column_names = self.dimensions(label=True) - dim_vals = np.vstack([self.dimension_values(dim) for dim in column_names]).T - return pd.DataFrame(dim_vals, columns=column_names) - - -class Tabular(NdMapping): +class Tabular(Element): """ Baseclass to give an NdMapping objects an API to generate a table representation. @@ -270,7 +184,7 @@ class Element2D(Element): defining the (left, bottom, right and top) edges.""") -class NdElement(Element, Tabular): +class NdElement(NdMapping, Tabular): """ An NdElement is an Element that stores the contained data as an NdMapping. In addition to the usual multi-dimensional keys @@ -301,43 +215,11 @@ class NdElement(Element, Tabular): _deep_indexable = False def __init__(self, data=None, **params): - if isinstance(data, Element): - data = data.table() - elif isinstance(data, list) and all(np.isscalar(el) for el in data): + if isinstance(data, list) and all(np.isscalar(el) for el in data): data = OrderedDict(list(((k,), v) for k, v in enumerate(data))) super(NdElement, self).__init__(data, **params) - def _convert_element(self, element): - if isinstance(element, NdElement): - return element.data - if isinstance(element, Element): - return element.table().data - else: return element - - - def groupby(self, dimensions, container_type=NdMapping): - if self._dataframe: - dim_labels = self.dimensions(label=True) - invalid_dims = list(set(dimensions) - set(dim_labels)) - if invalid_dims: - raise Exception('Following dimensions could not be found %s.' - % invalid_dims) - - index_dims = [self.get_dimension(d) for d in dimensions] - mapping = container_type(None, kdims=index_dims) - kdims = set(self._cached_index_names) - set(dimensions) - vdims = set(self._cached_value_names) - set(dimensions) - kdims = [self.get_dimension(d) for d in kdims] - vdims = [self.get_dimension(d) for d in vdims] - for k, v in self.data.groupby(dimensions): - data = v.drop(dimensions, axis=1) - mapping[k] = self.clone(data, kdims=kdims, vdims=vdims) - return mapping - else: - return super(NdElement, self).groupby(dimensions, container_type) - - def reindex(self, kdims=None, vdims=None, force=False): """ Create a new object with a re-ordered set of dimensions. @@ -355,10 +237,6 @@ def reindex(self, kdims=None, vdims=None, force=False): key_dims = [self.get_dimension(k) for k in kdims] val_dims = [self.get_dimension(v) for v in vdims] - # DataFrame based tables don't need to be reindexed - if self._dataframe: - return self.clone(kdims=key_dims, vdims=val_dims) - kidxs = [(i, k in self._cached_index_names, self.get_dimension_index(k)) for i, k in enumerate(kdims)] vidxs = [(i, v in self._cached_index_names, self.get_dimension_index(v)) @@ -419,19 +297,7 @@ def _filter_data(self, subtable, vdims): vdims = [self.vdims[i] for i in indices] items = [(k, tuple(v[i] for i in indices)) for (k,v) in subtable.items()] - if len(items) == 1: - data = items[0][1] - if len(vdims) == 1: - return data[0] - else: - from ..element.tabular import ItemTable - kwargs = {'label': self.label - for k, v in self.get_param_values(onlychanged=True) - if k in ['group', 'label']} - data = list(zip(vdims, data)) - return ItemTable(data, **kwargs) - else: - return subtable.clone(items, vdims=vdims) + return subtable.clone(items, vdims=vdims) def __getitem__(self, args): @@ -439,55 +305,29 @@ def __getitem__(self, args): In addition to usual NdMapping indexing, NdElements can be indexed by column name (or a slice over column names) """ - if self._dataframe: - if not isinstance(args, tuple): args = (args,) - return self.select(**dict(zip(self.dimensions(label=True), args))) ndmap_index = args[:self.ndims] if isinstance(args, tuple) else args subtable = NdMapping.__getitem__(self, ndmap_index) - if len(self.vdims) > 1 and not isinstance(subtable, NdElement): - subtable = self.__class__([((), subtable)], label=self.label, - kdims=[], vdims=self.vdims) + if not isinstance(subtable, NdElement): + subtable = self.__class__([(args, subtable)], label=self.label, + kdims=self.kdims, vdims=self.vdims) # If subtable is not a slice return as reduced type if not isinstance(args, tuple): args = (args,) shallow = len(args) <= self.ndims - slcs = any(isinstance(a, (slice, set)) for a in args[:self.ndims]) - if shallow and not (slcs or len(args) == 0): - args = list(args) + [self.dimensions('value', True)] - elif shallow: + if shallow: return subtable return self._filter_data(subtable, args[-1]) - def select(self, selection_specs=None, **select): - """ - Allows slice and select individual values along the DataFrameView - dimensions. Supply the dimensions and values or slices as - keyword arguments. - """ - if self._dataframe: - df = self.data - for dim, k in select.items(): - if isinstance(k, tuple): - k = slice(*k) - if isinstance(k, slice): - df = df[(k.start < df[dim]) & (df[dim] < k.stop)] - else: - df = df[df[dim] == k] - return self.clone(df) - else: - return super(NdElement, self).select(selection_specs, **select) - - def sample(self, samples=[]): """ Allows sampling of the Table with a list of samples. """ sample_data = OrderedDict() for sample in samples: - sample_data[sample] = self[sample] + sample_data[sample] = self[sample].values()[0] return self.__class__(sample_data, **dict(self.get_param_values(onlychanged=True))) @@ -544,11 +384,7 @@ def collapse_data(cls, data, function, **kwargs): def dimension_values(self, dim): dim = self.get_dimension(dim) value_dims = self.dimensions('value', label=True) - if self._dataframe: - if dim.name in self.data.columns: - return np.array(self.data[dim.name]) - return None - elif dim.name in value_dims: + if dim.name in value_dims: index = value_dims.index(dim.name) return [v[index] for v in self.values()] else: @@ -671,7 +507,7 @@ def __call__(self): def _add_item(self, key, value, sort=True): - Tabular._add_item(self, key, value, sort) + NdMapping._add_item(self, key, value, sort) @property diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index bab0d6cfbf..cbb8a9a74e 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -3,11 +3,11 @@ import param from ..core import util -from ..core import OrderedDict, Dimension, UniformNdMapping, Element, Element2D, NdElement, HoloMap +from ..core import OrderedDict, Dimension, UniformNdMapping, Element, Columns, Element2D, NdElement, HoloMap from .tabular import ItemTable, Table from .util import compute_edges -class Chart(Element2D): +class Chart(Columns, Element2D): """ The data held within Chart is a numpy array of shape (N, D), where N is the number of samples and D the number of dimensions. @@ -31,207 +31,6 @@ class Chart(Element2D): _null_value = np.array([[], []]).T # For when data is None - def __init__(self, data, **kwargs): - data, params = self._process_data(data, kwargs) - super(Chart, self).__init__(data, **params) - self.data = self._validate_data(self.data) - - - def _convert_element(self, element): - if isinstance(element, Chart): - return element.data - elif isinstance(element, NdElement): - return np.vstack([np.concatenate([key, vals]) - for key, vals in element.data.items()]).astype(np.float) - else: - return super(Chart, self)._convert_element(element) - - - def _process_data(self, data, kwargs): - params = {} - if isinstance(data, UniformNdMapping) or (isinstance(data, list) and data - and isinstance(data[0], Element2D)): - params = dict([v for v in data][0].get_param_values(onlychanged=True)) - data = np.concatenate([v.data for v in data]) - elif isinstance(data, Element) or util.is_dataframe(data): - pass - elif isinstance(data, tuple): - data = np.column_stack(data) - elif not isinstance(data, np.ndarray): - data = self._null_value if (data is None) else list(data) - if len(data): - data = np.array(data) - params.update(kwargs) - return data, params - - - def _validate_data(self, data): - if self._dataframe: - return data - if data.ndim == 1: - data = np.array(list(zip(range(len(data)), data))) - if not data.shape[1] == len(self.dimensions()): - raise ValueError("Data has to match number of key and value dimensions") - return data - - - def closest(self, coords): - """ - Given single or multiple x-values, returns the list - of closest actual samples. - """ - if not isinstance(coords, list): coords = [coords] - xs = self.data[:, 0] - idxs = [np.argmin(np.abs(xs-coord)) for coord in coords] - return [xs[idx] for idx in idxs] - - - def __getitem__(self, slices): - """ - Implements slicing or indexing of the data by the data x-value. - If a single element is indexed reduces the Element2D to a single - Scatter object. - """ - if slices is (): - return self - if not isinstance(slices, tuple): slices = (slices,) - if len(slices) > self.ndims: - raise Exception("Slice must match number of key dimensions.") - - data = self.data - lower_bounds, upper_bounds = [], [] - for idx, slc in enumerate(slices): - if isinstance(slc, slice): - start = -float("inf") if slc.start is None else slc.start - stop = float("inf") if slc.stop is None else slc.stop - - clip_start = start <= data[:, idx] - clip_stop = data[:, idx] < stop - data = data[np.logical_and(clip_start, clip_stop), :] - lbound = self.extents[idx] - ubound = self.extents[self.ndims:][idx] - lower_bounds.append(lbound if slc.start is None else slc.start) - upper_bounds.append(ubound if slc.stop is None else slc.stop) - else: - if self.ndims == 1: - data_index = np.argmin(np.abs(data[:, idx] - slc)) - data = data[data_index, :] - else: - raise KeyError("Only 1D Chart types may be indexed.") - if not any(isinstance(slc, slice) for slc in slices): - if data.ndim == 1: - data = data[self.ndims:] - dims = data.shape[0] - else: - data = data[:, self.ndims:] - dims = data.shape[1] - return data[0] if dims == 1 else data - if self.ndims == 1: - lower_bounds.append(None) - upper_bounds.append(None) - - return self.clone(data, extents=tuple(lower_bounds + upper_bounds)) - - - @classmethod - def collapse_data(cls, data, function, **kwargs): - new_data = [arr[:, 1:] for arr in data] - if isinstance(function, np.ufunc): - collapsed = function.reduce(new_data) - else: - collapsed = function(np.dstack(new_data), axis=-1, **kwargs) - return np.hstack([data[0][:, 0, np.newaxis], collapsed]) - - - def sample(self, samples=[]): - """ - Allows sampling of Chart Elements using the default - syntax of providing a map of dimensions and sample pairs. - """ - sample_data = OrderedDict() - for sample in samples: - data = self[sample] - data = data if np.isscalar(data) else tuple(data) - sample_data[sample] = data - params = dict(self.get_param_values(onlychanged=True)) - params.pop('extents', None) - return Table(sample_data, **dict(params, kdims=self.kdims, - vdims=self.vdims)) - - - def reduce(self, dimensions=[], function=None, **reduce_map): - """ - Allows collapsing of Chart objects using the supplied map of - dimensions and reduce functions. - """ - reduce_map = self._reduce_map(dimensions, function, reduce_map) - - if len(reduce_map) > 1: - raise ValueError("Chart Elements may only be reduced to a point.") - dim, reduce_fn = list(reduce_map.items())[0] - if dim in self._cached_index_names: - reduced_data = OrderedDict(zip(self.vdims, reduce_fn(self.data[:, self.ndims:], axis=0))) - else: - raise Exception("Dimension %s not found in %s" % (dim, type(self).__name__)) - params = dict(self.get_param_values(onlychanged=True), vdims=self.vdims, - kdims=[]) - params.pop('extents', None) - return ItemTable(reduced_data, **params) - - - def __len__(self): - return len(self.data) - - - def dimension_values(self, dim): - index = self.get_dimension_index(dim) - if index < len(self.dimensions()): - if self._dataframe: - data = self.data[self.get_dimension(index).name] - if util.dd and isinstance(data, util.dd.Series): - data = data.compute() - return data - else: - if self.data.ndim == 1: - data = np.atleast_2d(self.data).T - else: - data = self.data - return data[:, index] - else: - return super(Chart, self).dimension_values(dim) - - - def range(self, dim, data_range=True): - dim_idx = dim if isinstance(dim, int) else self.get_dimension_index(dim) - dim = self.get_dimension(dim_idx) - if dim.range != (None, None): - return dim.range - elif dim_idx < len(self.dimensions()): - if len(self.data): - data = self.dimension_values(dim_idx) - if self._dataframe: - data_range = data.min(), data.max() - else: - data_range = np.nanmin(data), np.nanmax(data) - else: - data_range = (np.NaN, np.NaN) - if data_range: - return util.max_range([data_range, dim.soft_range]) - else: - return dim.soft_range - - - def dframe(self): - import pandas as pd - if self._dataframe: - return self.data.copy() - elif pd: - columns = [d.name for d in self.dimensions()] - return pd.DataFrame(self.data, columns=columns) - else: - raise ImportError("Pandas not found.") - - class Scatter(Chart): """ @@ -257,21 +56,6 @@ class Curve(Chart): group = param.String(default='Curve', constant=True) - def progressive(self): - """ - Create map indexed by Curve x-axis with progressively expanding number - of curve samples. - """ - vmap = HoloMap(None, kdims=self.kdims, - title=self.title+' {dims}') - for idx in range(len(self.data)): - x = self.data[0] - if x in vmap: - vmap[x].data.append(self.data[0:idx]) - else: - vmap[x] = self.clone(self.data[0:idx]) - return vmap - class ErrorBars(Chart): @@ -296,19 +80,23 @@ class ErrorBars(Chart): vdims = param.List(default=[Dimension('lerror'), Dimension('uerror')], bounds=(2,2), constant=True) + def _validate_data(self, data): - if data.shape[1] == 3: - return np.column_stack([data, data[:, 2]]) - else: - return data + if self.shape[1] == 3: + data = self.interface.add_dimension(data, self.vdims[1].name, + self.dimension_values(2)) + return super(ErrorBars, self)._validate_data(data) def range(self, dim, data_range=True): drange = super(ErrorBars, self).range(dim, data_range) didx = self.get_dimension_index(dim) if didx == 1 and data_range: - lower = np.nanmin(self.data[:, 1] - self.data[:, 2]) - upper = np.nanmax(self.data[:, 1] + self.data[:, 3]) + mean = self.dimension_values(1) + neg_error = self.dimension_values(2) + pos_error = self.dimension_values(3) + lower = np.nanmin(mean-neg_error) + upper = np.nanmax(mean+pos_error) return util.max_range([(lower, upper), drange]) else: return drange @@ -330,7 +118,7 @@ class Spread(ErrorBars): -class Bars(NdElement): +class Bars(Columns): """ Bars is an Element type, representing a number of stacked and grouped bars, depending the dimensionality of the key and value @@ -494,7 +282,6 @@ class Points(Chart): vdims = param.List(default=[]) - _min_dims = 2 # Minimum number of columns def __iter__(self): @@ -547,8 +334,6 @@ class VectorField(Points): _min_dims = 3 # Minimum number of columns def __init__(self, data, **params): - if not isinstance(data, np.ndarray): - data = np.array([ - [el for el in (col.flat if isinstance(col,np.ndarray) else col)] - for col in data]).T + if isinstance(data, list) and all(isinstance(d, np.ndarray) for d in data): + data = np.column_stack([d.flat if d.ndim > 1 else d for d in data]) super(VectorField, self).__init__(data, **params) diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index 2d8e533013..2264bc094e 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -2,7 +2,8 @@ import param -from ..core import OrderedDict, Dimension, Element, NdElement, HoloMap +from ..core import (OrderedDict, Dimension, Element, Columns, + Tabular, NdElement, HoloMap) class ItemTable(Element): @@ -141,7 +142,7 @@ def values(self): -class Table(NdElement): +class Table(Columns, Tabular): """ Table is an NdElement type, which gets displayed in a tabular format and is convertible to most other Element types. diff --git a/holoviews/interface/pandas.py b/holoviews/interface/pandas.py index ca088f7c72..070d059753 100644 --- a/holoviews/interface/pandas.py +++ b/holoviews/interface/pandas.py @@ -18,12 +18,12 @@ import param -from ..core import ViewableElement, NdMapping, NdOverlay,\ +from ..core import ViewableElement, NdMapping, Columns, NdOverlay,\ NdLayout, GridSpace, NdElement, HoloMap from ..element import Chart, Table, Curve, Scatter, Bars, Points, VectorField, HeatMap, Scatter3D, Surface -class DataFrameView(NdElement): +class DataFrameView(Columns): """ DataFrameView provides a convenient compatibility wrapper around Pandas DataFrames. It provides several core functions: @@ -95,35 +95,6 @@ def apply(self, name, *args, **kwargs): return self.clone(getattr(self.data, name)(*args, **kwargs), clone_override=True) - - def aggregate(self, dimensions=[], function=None, **reductions): - """ - The aggregate function accepts either a list of Dimensions - and a function to apply to find the aggregate across - those Dimensions or a list of dimension/function pairs - to apply one by one. - """ - if not dimensions and not reductions: - raise Exception("Supply either a list of Dimensions or" - "reductions as keyword arguments") - reduced = self.data - dfnumeric = reduced.applymap(np.isreal).all(axis=0) - unreducable = list(dfnumeric[dfnumeric == False].index) - if dimensions: - if not function: - raise Exception("Supply a function to reduce the Dimensions with") - reduced = reduced.groupby(dimensions+unreducable, as_index=True).aggregate(function) - reduced_indexes = [reduced.index.names.index(d) for d in unreducable if d not in dimensions] - reduced = reduced.reset_index(level=reduced_indexes) - if reductions: - for dim, fn in reductions.items(): - reduced = reduced.groupby(dim, as_index=True).aggregate(fn) - reduced_indexes = [reduced.index.names.index(d) for d in unreducable] - reduced = reduced.reset_index(level=reduced_indexes) - kdims = [self.get_dimension(d) for d in reduced.columns] - return self.clone(reduced, kdims=kdims) - - def overlay(self, dimensions): return self.groupby(dimensions, NdOverlay) From f42eeb3feecda2bc9434ce86d771d6671b2b782d Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 15 Oct 2015 02:08:06 +0100 Subject: [PATCH 030/212] Handling scalar values in Dimensioned.select --- holoviews/core/dimension.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index 458ddf63a9..707246a136 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -11,6 +11,7 @@ except: from collections import OrderedDict +import numpy as np import param from ..core.util import basestring, sanitize_identifier, max_range, find_range @@ -683,7 +684,9 @@ def select(self, selection_specs=None, **kwargs): else: selection = self - if type(selection) is not type(self): + if np.isscalar(selection): + return selection + elif type(selection) is not type(self): # Apply the selection on the selected object of a different type val_dim = ['value'] if selection.vdims else [] key_dims = selection.dimensions('key', label=True) + val_dim From 20122526836a0bbb301771ebd1d99200d2ef5b98 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 15 Oct 2015 02:09:41 +0100 Subject: [PATCH 031/212] Chart plots now work independent of the data backend --- holoviews/plotting/bokeh/chart.py | 4 +-- holoviews/plotting/mpl/chart.py | 46 ++++++++++++++++--------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/holoviews/plotting/bokeh/chart.py b/holoviews/plotting/bokeh/chart.py index 1afe641998..bab190e3b5 100644 --- a/holoviews/plotting/bokeh/chart.py +++ b/holoviews/plotting/bokeh/chart.py @@ -60,8 +60,8 @@ def get_data(self, element, ranges=None): mapping['size'] = 'size' ms = style.get('size', 1) sizes = element.dimension_values(self.size_index) - data[map_key] = compute_sizes(sizes, self.size_fn, - self.scaling_factor, ms) + data['size'] = compute_sizes(sizes, self.size_fn, + self.scaling_factor, ms) data[dims[0]] = element.dimension_values(0) data[dims[1]] = element.dimension_values(1) if 'hover' in self.tools: diff --git a/holoviews/plotting/mpl/chart.py b/holoviews/plotting/mpl/chart.py index dbadc33c31..76ea28751d 100644 --- a/holoviews/plotting/mpl/chart.py +++ b/holoviews/plotting/mpl/chart.py @@ -53,8 +53,8 @@ def _cyclic_curves(self, curveview): """ Mutate the lines object to generate a rotated cyclic curves. """ - x_values = list(curveview.data[:, 0]) - y_values = list(curveview.data[:, 1]) + x_values = list(curveview.dimension_values(0)) + y_values = list(curveview.dimension_values(1)) if self.center_cyclic: rotate_n = self.peak_argmax+len(x_values)/2 y_values = self._rotate(y_values, n=rotate_n) @@ -127,7 +127,8 @@ def initialize_plot(self, ranges=None): # Create line segments and apply style style = self.style[self.cyclic_index] legend = element.label if self.show_legend else '' - line_segment = axis.plot(data[:, 0], data[:, 1], label=legend, + line_segment = axis.plot(element.dimension_values(0), + element.dimension_values(1), label=legend, zorder=self.zorder, **style)[0] self.handles['artist'] = line_segment @@ -135,12 +136,11 @@ def initialize_plot(self, ranges=None): def update_handles(self, axis, element, key, ranges=None): - data = element.data artist = self.handles['artist'] if self.cyclic_range is not None: data = self._cyclic_curves(element) - artist.set_xdata(data[:, 0]) - artist.set_ydata(data[:, 1]) + artist.set_xdata(element.dimension_values(0)) + artist.set_ydata(element.dimension_values(1)) @@ -171,8 +171,8 @@ def initialize_plot(self, ranges=None): error_kwargs = dict(self.style[self.cyclic_index], fmt='none', zorder=self.zorder) error_kwargs['yerr'] = element.data[:, 2:4].T - _, (bottoms, tops), verts = axis.errorbar(element.data[:, 0], - element.data[:, 1], + _, (bottoms, tops), verts = axis.errorbar(element.dimension_values(0), + element.dimension_values(1), **error_kwargs) self.handles['bottoms'] = bottoms self.handles['tops'] = tops @@ -237,9 +237,10 @@ def initialize_plot(self, ranges=None): def update_handles(self, axis, element, key, ranges=None): if 'paths' in self.handles: self.handles['paths'].remove() - paths = axis.fill_between(element.data[:, 0], - element.data[:, 1]-element.data[:, 2], - element.data[:, 1]+element.data[:, 3], + yvals = element.data[:, 1] + paths = axis.fill_between(element.dimension_values(0), + yvals-element.dimension_values(2), + yvals+element.dimension_values(3), zorder=self.zorder, label=element.label if self.show_legend else None, **self.style[self.cyclic_index]) @@ -547,9 +548,9 @@ def initialize_plot(self, ranges=None): ranges = match_spec(points, ranges) ndims = points.data.shape[1] - xs = points.data[:, 0] if len(points.data) else [] - ys = points.data[:, 1] if len(points.data) else [] - cs = points.data[:, self.color_index] if self.color_index < ndims else None + xs = points.dimension_values(0) if len(points.data) else [] + ys = points.dimension_values(1) if len(points.data) else [] + cs = points.dimension_values(self.color_index) if self.color_index < ndims else None style = self.style[self.cyclic_index] if self.size_index < ndims and self.scaling_factor > 1: @@ -575,7 +576,7 @@ def initialize_plot(self, ranges=None): def _compute_size(self, element, opts): - sizes = element.data[:, self.size_index] + sizes = element.dimension_values(self.size_index) ms = opts.pop('s') if 's' in opts else plt.rcParams['lines.markersize'] return compute_sizes(sizes, self.size_fn, self.scaling_factor, ms) @@ -648,10 +649,10 @@ def _get_map_info(self, vmap): def _get_info(self, vfield, input_scale, ranges): - xs = vfield.data[:, 0] if len(vfield.data) else [] - ys = vfield.data[:, 1] if len(vfield.data) else [] - radians = vfield.data[:, 2] if len(vfield.data) else [] - magnitudes = vfield.data[:, 3] if vfield.data.shape[1]>=4 else np.array([1.0] * len(xs)) + xs = vfield.dimension_values(0) if len(vfield.data) else [] + ys = vfield.dimension_values(1) if len(vfield.data) else [] + radians = vfield.dimension_values(2) if len(vfield.data) else [] + magnitudes = vfield.dimension_values(3) if vfield.data.shape[1]>=4 else np.array([1.0] * len(xs)) colors = magnitudes if self.color_dim == 'magnitude' else radians if vfield.data.shape[1] >= 4: @@ -690,7 +691,7 @@ def initialize_plot(self, ranges=None): ranges = match_spec(vfield, ranges) xs, ys, angles, lens, colors, scale = self._get_info(vfield, input_scale, ranges) - args = (xs, ys, lens, [0.0] * len(vfield.data)) + args = (xs, ys, lens, [0.0] * len(vfield)) args = args + (colors,) if colorized else args if not self.arrow_heads: @@ -922,11 +923,12 @@ def _create_bars(self, axis, element): label_key[idx] = stk style_key[idx] = stk_name val_key[si] = stk_name - val = element.get(tuple(val_key), (np.NaN,)) + vals = element.sample([tuple(val_key)]).dimension_values(element.vdims[0].name) + val = float(vals[0]) if vals else np.NaN label = ', '.join(label_key) style = dict(style_opts, label='' if label in labels else label, **dict(zip(sopts, color_groups[tuple(style_key)]))) - bar = axis.bar([xpos], val, width=width, bottom=prev, + bar = axis.bar([xpos], [val], width=width, bottom=prev, **style) # Update variables From 0c22fb61d8949946e086c00d65698e5367b6380b Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 15 Oct 2015 02:11:00 +0100 Subject: [PATCH 032/212] Updated Chart.sample test --- tests/testcharts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testcharts.py b/tests/testcharts.py index a1547d82f0..6691946c1f 100644 --- a/tests/testcharts.py +++ b/tests/testcharts.py @@ -38,5 +38,5 @@ def test_chart_reduce(self): self.assertEqual(mean, itable) def test_chart_sample(self): - samples = self.chart.sample([0, 5, 10]).values() - self.assertEqual(samples, [(0,), (0.5,), (1,)]) + samples = self.chart.sample([0, 5, 10]).dimension_values('y') + self.assertEqual(samples, np.array([0, 0.5, 1])) From 07f14cefc5860ec0ca52983f6fd1f02adf57a133 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 15 Oct 2015 14:43:52 +0100 Subject: [PATCH 033/212] Updated comparisons of Columns based Elements --- holoviews/element/comparison.py | 65 +++++++++++++-------------------- 1 file changed, 25 insertions(+), 40 deletions(-) diff --git a/holoviews/element/comparison.py b/holoviews/element/comparison.py index b25e92b978..3a0769b689 100644 --- a/holoviews/element/comparison.py +++ b/holoviews/element/comparison.py @@ -432,34 +432,39 @@ def compare_bounds(cls, el1, el2, msg='Bounds'): #========# @classmethod - def compare_curve(cls, el1, el2, msg=None): + def compare_chart(cls, el1, el2, msg=None): cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Curve data') + if isinstance(el1.data, np.ndarray): + cls.compare_arrays(el1.data, el2.data, msg) + elif isinstance(el1.data, NdElement): + cls.compare_ndmappings(el1.data, el2.data, msg) + else: + cls.compare_dframe(el1, el2, msg) + + + @classmethod + def compare_curve(cls, el1, el2, msg=None): + cls.compare_chart(el1, el2, msg='Curve data') @classmethod def compare_errorbars(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'ErrorBars data') + cls.compare_chart(el1, el2, msg='ErrorBars data') @classmethod def compare_spread(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Spread data') + cls.compare_chart(el1, el2, msg='Spread data') @classmethod def compare_scatter(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Scatter data') + cls.compare_chart(el1, el2, msg='Scatter data') @classmethod def compare_scatter3d(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Scatter3D data') + cls.compare_chart(el1, el2, msg='Scatter3D data') @classmethod def compare_trisurface(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Trisurface data') + cls.compare_chart(el1, el2, msg='Trisurface data') @classmethod def compare_histogram(cls, el1, el2, msg=None): @@ -470,24 +475,16 @@ def compare_histogram(cls, el1, el2, msg=None): @classmethod def compare_points(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - if len(el1) != len(el2): - raise cls.failureException("Points objects have different numbers of points.") + cls.compare_chart(el1, el2, msg='Points data') - cls.compare_arrays(el1.data, el2.data, 'Points data') @classmethod def compare_vectorfield(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - if len(el1) != len(el2): - raise cls.failureException("VectorField objects have different numbers of vectors.") - - cls.compare_arrays(el1.data, el2.data, 'VectorField data') + cls.compare_chart(el1, el2, msg='VectorField data') @classmethod def compare_bars(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_ndmappings(el1, el2, msg) + cls.compare_chart(el1, el2, msg='Bars data') #=========# # Rasters # @@ -553,15 +550,7 @@ def compare_itemtables(cls, el1, el2, msg=None): @classmethod def compare_tables(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - if el1.rows != el2.rows: - raise cls.failureException("Tables have different numbers of rows.") - - if el1.cols != el2.cols: - raise cls.failureException("Tables have different numbers of columns.") - - cls.compare_ndmappings(el1, el2, msg) - + cls.compare_chart(el1, el2, msg='Table data') #========# # Pandas # @@ -582,23 +571,19 @@ def compare_dframe(cls, el1, el2, msg=None): @classmethod def compare_distribution(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Distribution data') + cls.compare_chart(el1, el2, msg='Distribution data') @classmethod def compare_timeseries(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'TimeSeries data') + cls.compare_chart(el1, el2, msg='TimeSeries data') @classmethod def compare_bivariate(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Bivariate data') + cls.compare_chart(el1, el2, msg='Bivariate data') @classmethod def compare_regression(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Regression data') + cls.compare_chart(el1, el2, msg='Regression data') #=======# # Grids # From 37a841e70b59104e90948b7372de155b00be7389 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 15 Oct 2015 14:44:42 +0100 Subject: [PATCH 034/212] Array comparisons now support non-numeric data --- holoviews/element/comparison.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/holoviews/element/comparison.py b/holoviews/element/comparison.py index 3a0769b689..1263674423 100644 --- a/holoviews/element/comparison.py +++ b/holoviews/element/comparison.py @@ -20,7 +20,7 @@ import numpy as np from unittest.util import safe_repr from unittest import TestCase -from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_array_equal, assert_array_almost_equal from . import * # pyflakes:ignore (All Elements need to support comparison) from ..core import Element, Empty, AdjointLayout, Overlay, Dimension, HoloMap, \ @@ -210,9 +210,12 @@ def compare_floats(cls, arr1, arr2, msg='Floats'): @classmethod def compare_arrays(cls, arr1, arr2, msg='Arrays'): try: - assert_array_almost_equal(arr1, arr2) - except AssertionError as e: - raise cls.failureException(msg + str(e)[11:]) + assert_array_equal(arr1, arr2) + except: + try: + assert_array_almost_equal(arr1, arr2) + except AssertionError as e: + raise cls.failureException(msg + str(e)[11:]) @classmethod def bounds_check(cls, el1, el2, msg=None): From 07f9fb53f30b4055f7df44177d0372c578a9cd15 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 16 Oct 2015 02:31:50 +0100 Subject: [PATCH 035/212] Added shape property to NdElement --- holoviews/core/element.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index b0829e31bb..d874081892 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -220,6 +220,11 @@ def __init__(self, data=None, **params): super(NdElement, self).__init__(data, **params) + @property + def shape(self): + return (len(self), len(self.dimensions())) + + def reindex(self, kdims=None, vdims=None, force=False): """ Create a new object with a re-ordered set of dimensions. From 04506242eb2d32d02321aa8f1b5e6c0f52ff1249 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 16 Oct 2015 02:34:41 +0100 Subject: [PATCH 036/212] Readded general Element.table and Element.dframe methods --- holoviews/core/data.py | 7 ------- holoviews/core/element.py | 12 ++++++++++++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index c7d749a1cd..ec047667e7 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -518,10 +518,3 @@ def reduce(self, dimensions=[], function=None, **reduce_map): kdims=[]) params.pop('extents', None) return ItemTable(reduced_data, **params) - - - def dframe(self): - import pandas as pd - column_names = self.dimensions(label=True) - dim_vals = np.vstack([self.dimension_values(dim) for dim in column_names]).T - return pd.DataFrame(dim_vals, columns=column_names) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index d874081892..f3901fb416 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -127,6 +127,18 @@ def _reduce_map(self, dimensions, function, reduce_map): return {sanitized.get(d, d): fn for d, fn in reduce_map.items()} + def table(self): + from ..element import Table + return Table(self) + + + def dframe(self): + import pandas as pd + column_names = self.dimensions(label=True) + dim_vals = OrderedDict([(d, self.dimension_values(dim)) for dim in column_names]) + return pd.DataFrame(dim_vals) + + class Tabular(Element): """ From 5e21c387369b00d1db513cef2e792643bf337cb3 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 16 Oct 2015 02:40:18 +0100 Subject: [PATCH 037/212] Added general mechanism for Element casting to Columns type --- holoviews/core/data.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index ec047667e7..4b35a0df7e 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -7,6 +7,11 @@ from itertools import groupby import numpy as np +try: + import pandas as pd +except ImportError: + pd = None + import param from .dimension import OrderedDict, Dimension @@ -18,11 +23,11 @@ class Columns(Element): def __init__(self, data, **kwargs): - if 'kdims' not in kwargs: - kwargs['kdims'] = self.kdims - if 'vdims' not in kwargs: - kwargs['vdims'] = self.vdims data, params = ColumnarData._process_data(data, **kwargs) + if 'kdims' not in params: + params['kdims'] = self.kdims + if 'vdims' not in params: + params['vdims'] = self.vdims super(Columns, self).__init__(data, **params) self.data = self._validate_data(self.data) @@ -186,13 +191,26 @@ def shape(self): @classmethod def _process_data(cls, data, **kwargs): params = {} - if isinstance(data, NdElement): + if isinstance(data, Element): params['kdims'] = data.kdims params['vdims'] = data.vdims params['label'] = data.label - elif isinstance(data, Element): - params = dict(data.get_param_values(onlychanged=True)) + if data.group != data.params()['group'].default: + params['group'] = data.group + + if isinstance(data, NdElement): + pass + elif isinstance(data, Columns): data = data.data + elif isinstance(data, Element): + dimensions = data.dimensions(label=True) + columns = OrderedDict([(dim, data.dimension_values(dim)) + for dim in dimensions]) + if pd: + data = pd.DataFrame(columns) + else: + data = OrderedDict([(row[:data.ndims], row[data.ndims:]) + for row in zip(*columns.values())]) elif util.is_dataframe(data): kdims, vdims = cls._process_df_dims(data, params) params['kdims'] = kdims From 8257144a62f898d3ca6af3ac96f663f75fb61d50 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 16 Oct 2015 02:40:52 +0100 Subject: [PATCH 038/212] Minor fix to Columns.dimension_values --- holoviews/core/data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 4b35a0df7e..da80d8ab6e 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -149,6 +149,7 @@ def dimension_values(self, dim): if self.interface is None: return self.data.dimension_values(dim) else: + dim = self.get_dimension(dim).name return self.interface.values(dim) From 1fb6ca3a8baca4fa8dc2ee91959b6c7204cb343f Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 19:07:52 +0100 Subject: [PATCH 039/212] Various fixes and improvements to Columns interface --- holoviews/core/data.py | 110 ++++++++++++++++++++++++++++---------- holoviews/core/element.py | 7 ++- 2 files changed, 86 insertions(+), 31 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index da80d8ab6e..0f2bea609d 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -6,6 +6,11 @@ from collections import defaultdict from itertools import groupby +try: + import itertools.izip as zip +except ImportError: + pass + import numpy as np try: import pandas as pd @@ -23,11 +28,8 @@ class Columns(Element): def __init__(self, data, **kwargs): - data, params = ColumnarData._process_data(data, **kwargs) - if 'kdims' not in params: - params['kdims'] = self.kdims - if 'vdims' not in params: - params['vdims'] = self.vdims + defaults = {'kdims': self.kdims, 'vdims': self.vdims} + data, params = ColumnarData._process_data(data, defaults, **kwargs) super(Columns, self).__init__(data, **params) self.data = self._validate_data(self.data) @@ -39,11 +41,24 @@ def _validate_data(self, data): return self.interface.validate_data(data) + def closest(self, coords): + if self.ndims > 1: + NotImplementedError("Closest method currently only " + "implemented for 1D Elements") + elif self.interface is None: + return self.data.closest(coords) + else: + return self.interface.closest(coords) + + def select(self, selection_specs=None, **selection): + if selection_specs and not self.matches(selection_specs): + return self + if self.interface is None: data = self.data.select(**selection) else: - data = self.interface.select(selection) + data = self.interface.select(**selection) if np.isscalar(data): return data else: @@ -87,12 +102,22 @@ def __getitem__(self, slices): """ if slices is (): return self if not isinstance(slices, tuple): slices = (slices,) - selection = dict(zip(self.dimensions(label=True), slices)) - if self.interface is None: - data = self.data.select(**selection) + value_select = None + if len(slices) == 1 and slices[0] in self.dimensions(): + return self.dimension_values(slices[0]) + elif len(slices) == self.ndims+1 and slices[self.ndims] in self.dimensions(): + selection = dict(zip(self.dimensions('key', label=True), slices)) + value_select = slices[self.ndims] else: - data = self.interface.select(**selection) - return self.clone(data) + selection = dict(zip(self.dimensions(label=True), slices)) + data = self.select(**selection) + if value_select: + values = data.dimension_values(value_select) + if len(values) > 1: + return values + else: + return values[0] + return data def sample(self, samples=[]): @@ -179,10 +204,6 @@ def __init__(self, element, **params): def array(self): NotImplementedError - @property - def ndims(self): - self.element.ndims - @property def shape(self): @@ -190,7 +211,7 @@ def shape(self): @classmethod - def _process_data(cls, data, **kwargs): + def _process_data(cls, data, defaults, **kwargs): params = {} if isinstance(data, Element): params['kdims'] = data.kdims @@ -206,7 +227,7 @@ def _process_data(cls, data, **kwargs): elif isinstance(data, Element): dimensions = data.dimensions(label=True) columns = OrderedDict([(dim, data.dimension_values(dim)) - for dim in dimensions]) + for dim in dimensions]) if pd: data = pd.DataFrame(columns) else: @@ -220,11 +241,28 @@ def _process_data(cls, data, **kwargs): data = np.column_stack(data) elif not isinstance(data, (np.ndarray, dict)): data = np.array() if data is None else list(data) - if all(np.isscalar(d) for coord in data for d in coord): - data = np.array(data) - elif len(data): - data = OrderedDict(data) + array = np.array(data) + # Check if data is of non-numeric type + if array.dtype.kind in ['S', 'U', 'O'] or array.ndim > 2: + # If data is in NdElement dictionary format or pandas + # is not available convert to OrderedDict + if ((isinstance(data[0], tuple) and len(data[0]) == 2 and + all(isinstance(data[0][i], tuple) for i in range(2))) + or not pd): + data = OrderedDict(data) + else: + dimensions = (params.get('kdims', defaults['kdims']) + + params.get('vdims', defaults['vdims'])) + columns = [d.name if isinstance(d, Dimension) else d + for d in dimensions] + data = pd.DataFrame(data, columns=columns) + else: + data = array params.update(kwargs) + if 'kdims' not in params: + params['kdims'] = defaults['kdims'] + if 'vdims' not in params: + params['vdims'] = defaults['vdims'] if isinstance(data, dict): data = NdElement(data, kdims=params['kdims'], vdims=params['vdims']) @@ -307,7 +345,6 @@ def validate_data(cls, data): class ColumnarDataFrame(ColumnarData): - def groupby(self, dimensions, container_type=HoloMap, **kwargs): invalid_dims = list(set(dimensions) - set(self._cached_index_names)) if invalid_dims: @@ -372,13 +409,18 @@ def select(self, selection_specs=None, **select): keyword arguments. """ df = self.element.data + selected_kdims = [] for dim, k in select.items(): if isinstance(k, tuple): k = slice(*k) if isinstance(k, slice): df = df[(k.start < df[dim]) & (df[dim] < k.stop)] else: + if dim in self.kdims: selected_kdims.append(dim) df = df[df[dim] == k] + if len(set(selected_kdims)) == self.element.ndims: + if len(df) and len(self.element.vdims) == 1: + df = df[self.element.vdims[0].name].iloc[0] return df @@ -418,15 +460,19 @@ def add_dimension(cls, data, dimension, values): return np.column_stack([data, values]) + def dframe(self): + return Element.dframe(self.element) + + def closest(self, coords): """ Given single or multiple x-values, returns the list of closest actual samples. """ if not isinstance(coords, list): coords = [coords] - xs = self.data[:, 0] + xs = self.element.data[:, 0] idxs = [np.argmin(np.abs(xs-coord)) for coord in coords] - return [xs[idx] for idx in idxs] + return [xs[idx] for idx in idxs] if len(coords) > 1 else xs[idxs[0]] @classmethod @@ -478,6 +524,8 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): def select(self, **selection): data = self.element.data + selected_kdims = [] + value = selection.pop('value', None) for d, slc in selection.items(): idx = self.element.get_dimension_index(d) if isinstance(slc, slice): @@ -490,22 +538,26 @@ def select(self, **selection): filt = np.in1d(data[:, idx], list(slc)) data = data[filt, :] else: + if d in self.element.kdims: selected_kdims.append(d) if self.element.ndims == 1: data_index = np.argmin(np.abs(data[:, idx] - slc)) - data = data[data_index, :] else: - data = data[data[:, idx] == slc, :] + data_index = data[:, idx] == slc + data = np.atleast_2d(data[data_index, :]) + if len(data) and len(set(selected_kdims)) == self.element.ndims: + if len(data) == 1 and len(self.element.vdims) == 1: + data = data[0, self.element.ndims] return data @classmethod def collapse_data(cls, data, function, **kwargs): - new_data = [arr[:, self.ndim:] for arr in data] + new_data = [arr[:, self.element.ndims:] for arr in data] if isinstance(function, np.ufunc): collapsed = function.reduce(new_data) else: collapsed = function(np.dstack(new_data), axis=-1, **kwargs) - return np.hstack([data[0][:, self.ndims:, np.newaxis], collapsed]) + return np.hstack([data[0][:, self.element.ndims:, np.newaxis], collapsed]) def sample(self, samples=[]): @@ -530,7 +582,7 @@ def reduce(self, dimensions=[], function=None, **reduce_map): dim, reduce_fn = list(reduce_map.items())[0] if dim in self._cached_index_names: - reduced_data = OrderedDict(zip(self.vdims, reduce_fn(self.data[:, self.ndims:], axis=0))) + reduced_data = OrderedDict(zip(self.vdims, reduce_fn(self.data[:, self.element.ndims:], axis=0))) else: raise Exception("Dimension %s not found in %s" % (dim, type(self).__name__)) params = dict(self.get_param_values(onlychanged=True), vdims=self.vdims, diff --git a/holoviews/core/element.py b/holoviews/core/element.py index f3901fb416..208be9044b 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -326,8 +326,11 @@ def __getitem__(self, args): subtable = NdMapping.__getitem__(self, ndmap_index) if not isinstance(subtable, NdElement): - subtable = self.__class__([(args, subtable)], label=self.label, - kdims=self.kdims, vdims=self.vdims) + if len(self.vdims) > 1: + subtable = self.__class__([(args, subtable)], label=self.label, + kdims=self.kdims, vdims=self.vdims) + else: + subtable = subtable[0] # If subtable is not a slice return as reduced type if not isinstance(args, tuple): args = (args,) From 3d53c8cd7f8cddc755481687d79cbc3ce9871031 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 19:08:22 +0100 Subject: [PATCH 040/212] Dimension comparison now includes sanitized version --- holoviews/core/dimension.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index 707246a136..dde0d079bc 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -174,7 +174,8 @@ def __str__(self): def __eq__(self, other): "Dimensions are sorted alphanumerically by name" - return self.name == other.name if isinstance(other, Dimension) else self.name == other + dim_matches = [self.name, sanitize_identifier(self.name)] + return other.name in dim_matches if isinstance(other, Dimension) else other in dim_matches def __lt__(self, other): From a670ad8d3e058d3033102f1e4eea0e57eab98c37 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 19:08:52 +0100 Subject: [PATCH 041/212] Dimensioned.select can now handle non-Dimensioned return types --- holoviews/core/dimension.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index dde0d079bc..bfd561c27e 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -687,13 +687,13 @@ def select(self, selection_specs=None, **kwargs): if np.isscalar(selection): return selection - elif type(selection) is not type(self): + elif type(selection) is not type(self) and isinstance(selection, Dimensioned): # Apply the selection on the selected object of a different type val_dim = ['value'] if selection.vdims else [] key_dims = selection.dimensions('key', label=True) + val_dim if any(kw in key_dims for kw in kwargs): selection = selection.select(selection_specs, **kwargs) - elif selection._deep_indexable: + elif isinstance(selection, Dimensioned) and selection._deep_indexable: # Apply the deep selection on each item in local selection items = [] for k, v in selection.items(): From f0597e1ed0a926159da40927de61fe1f479e7ca4 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 19:09:27 +0100 Subject: [PATCH 042/212] Fix for Element.dframe --- holoviews/core/element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 208be9044b..d8900771c0 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -135,7 +135,7 @@ def table(self): def dframe(self): import pandas as pd column_names = self.dimensions(label=True) - dim_vals = OrderedDict([(d, self.dimension_values(dim)) for dim in column_names]) + dim_vals = OrderedDict([(dim, self.dimension_values(dim)) for dim in column_names]) return pd.DataFrame(dim_vals) From 7e38cfb9a040449c5dae7061be1fa926f65d7f3d Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 19:10:32 +0100 Subject: [PATCH 043/212] Unified Tabular and Chart tests in testcolumns.py --- tests/testcharts.py | 42 ------------- tests/testcolumns.py | 142 +++++++++++++++++++++++++++++++++++++++++++ tests/testtabular.py | 68 --------------------- 3 files changed, 142 insertions(+), 110 deletions(-) delete mode 100644 tests/testcharts.py create mode 100644 tests/testcolumns.py delete mode 100644 tests/testtabular.py diff --git a/tests/testcharts.py b/tests/testcharts.py deleted file mode 100644 index 6691946c1f..0000000000 --- a/tests/testcharts.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -Tests for the Chart Element types. -""" - -import numpy as np -from holoviews import OrderedDict, Chart, Curve, ItemTable -from holoviews.element.comparison import ComparisonTestCase - -class ChartTest(ComparisonTestCase): - """ - Test for the Chart baseclass methods. - """ - - def setUp(self): - self.xs = range(11) - self.ys = np.linspace(0, 1, 11) - self.chart = Chart(zip(self.xs, self.ys)) - self.curve = Curve(zip(self.xs, self.ys)) - - def test_yvalue_constructor(self): - ys = np.linspace(0, 1, 11) - Chart(ys) - - def test_chart_index(self): - self.assertEqual(self.chart[5], self.ys[5]) - - def test_chart_slice(self): - chart_slice = Curve(zip(range(5, 9), np.linspace(0.5,0.8, 4))) - self.assertEqual(self.curve[5:9], chart_slice) - - def test_chart_closest(self): - closest = self.chart.closest([0.51, 1, 9.9]) - self.assertEqual(closest, [1., 1., 10.]) - - def test_chart_reduce(self): - mean = self.chart.reduce(x=np.mean) - itable = ItemTable(OrderedDict([('y', np.mean(self.ys))])) - self.assertEqual(mean, itable) - - def test_chart_sample(self): - samples = self.chart.sample([0, 5, 10]).dimension_values('y') - self.assertEqual(samples, np.array([0, 0.5, 1])) diff --git a/tests/testcolumns.py b/tests/testcolumns.py new file mode 100644 index 0000000000..4dbdb0d714 --- /dev/null +++ b/tests/testcolumns.py @@ -0,0 +1,142 @@ +""" +Tests for the Columns Element types. +""" + +import pandas as pd + + +import numpy as np +from holoviews import OrderedDict, Columns, Curve, ItemTable, NdElement +from holoviews.element.comparison import ComparisonTestCase + + +class ColumnsNdElementTest(ComparisonTestCase): + """ + Test for the Chart baseclass methods. + """ + + def setUp(self): + self.xs = range(11) + self.ys = np.linspace(0, 1, 11) + self.keys1 = [('M',10), ('M',16), ('F',12)] + self.values1 = [(15, 0.8), (18, 0.6), (10, 0.8)] + self.key_dims1 = ['Gender', 'Age'] + self.val_dims1 = ['Weight', 'Height'] + + def test_columns_dict_construct(self): + columns = Columns(OrderedDict(zip(self.xs, self.ys)), kdims=['A'], vdims=['B']) + self.assertTrue(isinstance(columns.data, NdElement)) + + def test_columns_tuple_list_construct(self): + columns = Columns(NdElement(zip(self.xs, self.ys))) + self.assertTrue(isinstance(columns.data, NdElement)) + + def test_table_init(self): + columns = Columns(zip(self.keys1, self.values1), + kdims = self.key_dims1, + vdims = self.val_dims1) + self.assertTrue(isinstance(columns.data, NdElement)) + + def test_columns_index_row_gender(self): + table =Columns(zip(self.keys1, self.values1), + kdims = self.key_dims1, + vdims = self.val_dims1) + row = table['F',:] + self.assertEquals(type(row), Columns) + self.assertEquals(row.data.data, OrderedDict([(('F', 12), (10, 0.8))])) + + def test_columns_index_rows_gender(self): + table =Columns(zip(self.keys1, self.values1), + kdims = self.key_dims1, + vdims = self.val_dims1) + row = table['M',:] + self.assertEquals(type(row), Columns) + self.assertEquals(row.data.data, + OrderedDict([(('M', 10), (15, 0.8)), (('M', 16), (18, 0.6))])) + + def test_columns_index_row_age(self): + table =Columns(zip(self.keys1, self.values1), + kdims = self.key_dims1, + vdims = self.val_dims1) + row = table[:, 12] + self.assertEquals(type(row), Columns) + self.assertEquals(row.data.data, OrderedDict([(('F', 12), (10, 0.8))])) + + def test_columns_index_item_table(self): + table =Columns(zip(self.keys1, self.values1), + kdims = self.key_dims1, + vdims = self.val_dims1) + itemtable = table['F', 12] + self.assertEquals(type(itemtable), Columns) + self.assertEquals(itemtable.data.data, OrderedDict([(('F', 12), (10, 0.8))])) + + + def test_columns_index_value1(self): + table =Columns(zip(self.keys1, self.values1), + kdims = self.key_dims1, + vdims = self.val_dims1) + self.assertEquals(table['F', 12, 'Weight'], 10) + + def test_columns_index_value2(self): + table =Columns(zip(self.keys1, self.values1), + kdims = self.key_dims1, + vdims = self.val_dims1) + self.assertEquals(table['F', 12, 'Height'], 0.8) + + + +class ColumnsNdArrayTest(ComparisonTestCase): + + def setUp(self): + self.xs = range(11) + self.ys = np.linspace(0, 1, 11) + self.columns = Columns((self.xs, self.ys), kdims=['x'], vdims=['y']) + + def test_columns_values_construct(self): + columns = Columns(self.ys) + self.assertTrue(isinstance(columns.data, np.ndarray)) + + def test_columns_tuple_construct(self): + columns = Columns((self.xs, self.ys)) + self.assertTrue(isinstance(columns.data, np.ndarray)) + + def test_columns_array_construct(self): + columns = Columns(np.column_stack([self.xs, self.ys])) + self.assertTrue(isinstance(columns.data, np.ndarray)) + + def test_columns_tuple_list_construct(self): + columns = Columns(zip(self.xs, self.ys)) + self.assertTrue(isinstance(columns.data, np.ndarray)) + + def test_columns_index(self): + self.assertEqual(self.columns[5], self.ys[5]) + + def test_columns_slice(self): + columns_slice = Columns(zip(range(5, 9), np.linspace(0.5,0.8, 4)), + kdims=['x'], vdims=['y']) + self.assertEqual(self.columns[5:9], columns_slice) + + def test_columns_closest(self): + closest = self.columns.closest([0.51, 1, 9.9]) + self.assertEqual(closest, [1., 1., 10.]) + + def test_columns_reduce(self): + mean = self.columns.reduce(x=np.mean) + itable = ItemTable(OrderedDict([('y', np.mean(self.ys))])) + self.assertEqual(mean, itable) + + def test_columns_sample(self): + samples = self.columns.sample([0, 5, 10]).dimension_values('y') + self.assertEqual(samples, np.array([0, 0.5, 1])) + + + +class ColumnsDFrameTest(ComparisonTestCase): + + def setUp(self): + self.xs = range(11) + self.ys = np.linspace(0, 1, 11) + + def test_columns_df_construct(self): + columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys})) + self.assertTrue(isinstance(columns.data, pd.DataFrame)) diff --git a/tests/testtabular.py b/tests/testtabular.py deleted file mode 100644 index eb26af67ec..0000000000 --- a/tests/testtabular.py +++ /dev/null @@ -1,68 +0,0 @@ -""" -Unit tests of tabular elements -""" - -from collections import OrderedDict -from holoviews import Table, ItemTable -from holoviews.element.comparison import ComparisonTestCase - -class TestTable(ComparisonTestCase): - - - def setUp(self): - self.keys1 = [('M',10), ('M',16), ('F',12)] - self.values1 = [(15, 0.8), (18, 0.6), (10, 0.8)] - self.key_dims1 = ['Gender', 'Age'] - self.val_dims1 = ['Weight', 'Height'] - - def test_table_init(self): - self.table1 =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - - def test_table_index_row_gender(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - row = table['F',:] - self.assertEquals(type(row), Table) - self.assertEquals(row.data, OrderedDict([(('F', 12), (10, 0.8))])) - - def test_table_index_rows_gender(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - row = table['M',:] - self.assertEquals(type(row), Table) - self.assertEquals(row.data, - OrderedDict([(('M', 10), (15, 0.8)), (('M', 16), (18, 0.6))])) - - def test_table_index_row_age(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - row = table[:, 12] - self.assertEquals(type(row), Table) - self.assertEquals(row.data, OrderedDict([(('F', 12), (10, 0.8))])) - - def test_table_index_item_table(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - itemtable = table['F', 12] - self.assertEquals(type(itemtable), ItemTable) - self.assertEquals(itemtable.data, OrderedDict([('Weight', 10), ('Height', 0.8)])) - - - def test_table_index_value1(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - self.assertEquals(table['F', 12, 'Weight'], 10) - - def test_table_index_value2(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - self.assertEquals(table['F', 12, 'Height'], 0.8) - From fb340dc86772c30c1d89bdab78197cd8eebd38ab Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 19:10:59 +0100 Subject: [PATCH 044/212] Added Columns comparison method --- holoviews/element/comparison.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/holoviews/element/comparison.py b/holoviews/element/comparison.py index 1263674423..eefae12876 100644 --- a/holoviews/element/comparison.py +++ b/holoviews/element/comparison.py @@ -145,6 +145,7 @@ def register(cls): cls.equality_type_funcs[HeatMap] = cls.compare_heatmap # Charts + cls.equality_type_funcs[Columns] = cls.compare_columns cls.equality_type_funcs[Curve] = cls.compare_curve cls.equality_type_funcs[ErrorBars] = cls.compare_errorbars cls.equality_type_funcs[Spread] = cls.compare_spread @@ -435,7 +436,7 @@ def compare_bounds(cls, el1, el2, msg='Bounds'): #========# @classmethod - def compare_chart(cls, el1, el2, msg=None): + def compare_columns(cls, el1, el2, msg=None): cls.compare_dimensioned(el1, el2) if isinstance(el1.data, np.ndarray): cls.compare_arrays(el1.data, el2.data, msg) @@ -447,27 +448,27 @@ def compare_chart(cls, el1, el2, msg=None): @classmethod def compare_curve(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='Curve data') + cls.compare_columns(el1, el2, msg='Curve data') @classmethod def compare_errorbars(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='ErrorBars data') + cls.compare_columns(el1, el2, msg='ErrorBars data') @classmethod def compare_spread(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='Spread data') + cls.compare_columns(el1, el2, msg='Spread data') @classmethod def compare_scatter(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='Scatter data') + cls.compare_columns(el1, el2, msg='Scatter data') @classmethod def compare_scatter3d(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='Scatter3D data') + cls.compare_columns(el1, el2, msg='Scatter3D data') @classmethod def compare_trisurface(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='Trisurface data') + cls.compare_columns(el1, el2, msg='Trisurface data') @classmethod def compare_histogram(cls, el1, el2, msg=None): @@ -478,16 +479,16 @@ def compare_histogram(cls, el1, el2, msg=None): @classmethod def compare_points(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='Points data') + cls.compare_columns(el1, el2, msg='Points data') @classmethod def compare_vectorfield(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='VectorField data') + cls.compare_columns(el1, el2, msg='VectorField data') @classmethod def compare_bars(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='Bars data') + cls.compare_columns(el1, el2, msg='Bars data') #=========# # Rasters # @@ -553,7 +554,7 @@ def compare_itemtables(cls, el1, el2, msg=None): @classmethod def compare_tables(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='Table data') + cls.compare_columns(el1, el2, msg='Table data') #========# # Pandas # @@ -574,19 +575,19 @@ def compare_dframe(cls, el1, el2, msg=None): @classmethod def compare_distribution(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='Distribution data') + cls.compare_columns(el1, el2, msg='Distribution data') @classmethod def compare_timeseries(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='TimeSeries data') + cls.compare_columns(el1, el2, msg='TimeSeries data') @classmethod def compare_bivariate(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='Bivariate data') + cls.compare_columns(el1, el2, msg='Bivariate data') @classmethod def compare_regression(cls, el1, el2, msg=None): - cls.compare_chart(el1, el2, msg='Regression data') + cls.compare_columns(el1, el2, msg='Regression data') #=======# # Grids # From 19f2aa98b1937db132c03b55ec8e3d8f4d2d786b Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 20:14:24 +0100 Subject: [PATCH 045/212] Fixes and improvements to Columns Comparisons --- holoviews/element/comparison.py | 2 ++ tests/testcomparisonchart.py | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/holoviews/element/comparison.py b/holoviews/element/comparison.py index eefae12876..a1e6f67bc4 100644 --- a/holoviews/element/comparison.py +++ b/holoviews/element/comparison.py @@ -438,6 +438,8 @@ def compare_bounds(cls, el1, el2, msg='Bounds'): @classmethod def compare_columns(cls, el1, el2, msg=None): cls.compare_dimensioned(el1, el2) + if len(el1) != len(el2): + raise AssertionError("%s not of matching length." % msg) if isinstance(el1.data, np.ndarray): cls.compare_arrays(el1.data, el2.data, msg) elif isinstance(el1.data, NdElement): diff --git a/tests/testcomparisonchart.py b/tests/testcomparisonchart.py index cb6ae5080a..70b9ed3931 100644 --- a/tests/testcomparisonchart.py +++ b/tests/testcomparisonchart.py @@ -22,7 +22,7 @@ def test_curves_unequal(self): try: self.assertEqual(self.curve1, self.curve2) except AssertionError as e: - if not str(e).startswith("Curve data not almost equal to 6 decimals"): + if not str(e).startswith("Curve data not of matching length."): raise self.failureException("Curve data mismatch error not raised.") @@ -55,7 +55,7 @@ def test_bars_unequal_1(self): try: self.assertEqual(self.bars1, self.bars2) except AssertionError as e: - if not str(e) == '(16,) != (17,)': + if not 'values are different' in str(e): raise Exception('Bars mismatched data error not raised.') def test_bars_unequal_keydims(self): @@ -135,7 +135,7 @@ def test_scatter_unequal_data_shape(self): try: self.assertEqual(self.scatter1, self.scatter2) except AssertionError as e: - if not str(e).startswith("Scatter data not almost equal to 6 decimals"): + if not str(e).startswith("Scatter data not of matching length."): raise self.failureException("Scatter data mismatch error not raised.") def test_scatter_unequal_data_values(self): @@ -170,7 +170,7 @@ def test_points_unequal_data_shape(self): try: self.assertEqual(self.points1, self.points2) except AssertionError as e: - if not str(e).startswith("Points objects have different numbers of points."): + if not str(e).startswith("Points data not of matching length."): raise self.failureException("Points count mismatch error not raised.") def test_points_unequal_data_values(self): From eec737bcc9c41fc249c4a0c801081acb0570b8fc Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 20:52:59 +0100 Subject: [PATCH 046/212] Fixed add_dimension with array/list of values --- holoviews/core/ndmapping.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index d600704819..8f88b07459 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -304,11 +304,12 @@ def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): dimensions = self.kdims[:] dimensions.insert(dim_pos, dimension) - if isinstance(dim_val, list) and not len(dim_val) == len(self): - raise ValueError("Added dimension values must be same length" - "as existing keys.") - else: + if np.isscalar(dim_val): dim_val = cycle([dim_val]) + else: + if not len(dim_val) == len(self): + raise ValueError("Added dimension values must be same length" + "as existing keys.") items = OrderedDict() for dval, (key, val) in zip(dim_val, self.data.items()): From 0f2e560473b229f330c1c0eb02e86f5ec4530517 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 20:53:52 +0100 Subject: [PATCH 047/212] Added Columns.add_dimension method --- holoviews/core/data.py | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 0f2bea609d..e3c877ba11 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -51,6 +51,29 @@ def closest(self, coords): return self.interface.closest(coords) + def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): + """ + Create a new object with an additional key dimensions. + Requires the dimension name or object, the desired position + in the key dimensions and a key value scalar or sequence of + the same length as the existing keys. + """ + if isinstance(dimension, str): + dimension = Dimension(dimension) + + if dimension.name in self._cached_index_names: + raise Exception('{dim} dimension already defined'.format(dim=dimension.name)) + + dimensions = self.kdims[:] + dimensions.insert(dim_pos, dimension) + + if self.interface is None: + data = self.data.add_dimension(dimension, dim_pos, dim_val, **kwargs) + else: + data = self.interface.add_dimension(self.data, dimension, dim_pos, dim_val) + return self.clone(data, kdims=dimensions) + + def select(self, selection_specs=None, **selection): if selection_specs and not self.matches(selection_specs): return self @@ -432,8 +455,8 @@ def values(self, dim): @classmethod - def add_dimension(cls, data, dimension, values): - data[dimension] = values + def add_dimension(cls, data, dimension, dim_pos, values): + data[dimension.name] = values return data @@ -450,16 +473,14 @@ def validate_data(cls, data): return data - def array(self): - return self.element.data - @classmethod - def add_dimension(cls, data, dimension, values): - if np.isscalar(values): - values = [values]*len(data) - return np.column_stack([data, values]) + def add_dimension(cls, data, dimension, dim_pos, values): + return np.insert(data, dim_pos, values, axis=1) + def array(self): + return self.element.data + def dframe(self): return Element.dframe(self.element) From 0e3bad901a9e38d65dc0181ee64f0d111e56ced7 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 20:55:14 +0100 Subject: [PATCH 048/212] Added Columns.__setstate__ method for backwards compatibility --- holoviews/core/data.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index e3c877ba11..ebd4b2db59 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -41,6 +41,18 @@ def _validate_data(self, data): return self.interface.validate_data(data) + def __setstate__(self, state): + """ + Restores OrderedDict based Columns objects, converting + them to the up-to-date NdElement format. + """ + self.__dict__ = state + if isinstance(self.data, OrderedDict): + self.data = OrderedDict(self.data, kdims=self.kdims, + vdims=self.vdims, group=self.group, + label=self.label) + + def closest(self, coords): if self.ndims > 1: NotImplementedError("Closest method currently only " From 20c57fc917c855ef61373bc0906a17d2cbaa0d17 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 20:57:05 +0100 Subject: [PATCH 049/212] Fix to ErrorBars constructor --- holoviews/element/chart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index cbb8a9a74e..3df65df47a 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -84,7 +84,7 @@ class ErrorBars(Chart): def _validate_data(self, data): if self.shape[1] == 3: data = self.interface.add_dimension(data, self.vdims[1].name, - self.dimension_values(2)) + 3, self.dimension_values(2)) return super(ErrorBars, self)._validate_data(data) From 04fa691d95f2623f91cfe6dbf5c0f65fb31af720 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 21:22:09 +0100 Subject: [PATCH 050/212] Fixed NdElement.sample after changes to indexing --- holoviews/core/element.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index d8900771c0..a9d817c519 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -347,7 +347,8 @@ def sample(self, samples=[]): """ sample_data = OrderedDict() for sample in samples: - sample_data[sample] = self[sample].values()[0] + value = self[sample] + sample_data[sample] = value if np.isscalar(value) else value.values()[0] return self.__class__(sample_data, **dict(self.get_param_values(onlychanged=True))) From dfa50f97e2fc88b0f80dc69ac52ef724342461e9 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 21:22:28 +0100 Subject: [PATCH 051/212] Fixed minor bug in mpl.BarPlot --- holoviews/plotting/mpl/chart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/plotting/mpl/chart.py b/holoviews/plotting/mpl/chart.py index 76ea28751d..15f94b3e49 100644 --- a/holoviews/plotting/mpl/chart.py +++ b/holoviews/plotting/mpl/chart.py @@ -924,7 +924,7 @@ def _create_bars(self, axis, element): style_key[idx] = stk_name val_key[si] = stk_name vals = element.sample([tuple(val_key)]).dimension_values(element.vdims[0].name) - val = float(vals[0]) if vals else np.NaN + val = float(vals[0]) if len(vals) else np.NaN label = ', '.join(label_key) style = dict(style_opts, label='' if label in labels else label, **dict(zip(sopts, color_groups[tuple(style_key)]))) From 400e2e3b7bd7581514fb0b0dd5aac3a01ac2cdd8 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 17 Oct 2015 21:23:30 +0100 Subject: [PATCH 052/212] Fixes and improvements to Columns DataFrame backend --- holoviews/core/data.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index ebd4b2db59..95b7820038 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -281,13 +281,13 @@ def _process_data(cls, data, defaults, **kwargs): if array.dtype.kind in ['S', 'U', 'O'] or array.ndim > 2: # If data is in NdElement dictionary format or pandas # is not available convert to OrderedDict - if ((isinstance(data[0], tuple) and len(data[0]) == 2 and - all(isinstance(data[0][i], tuple) for i in range(2))) + if ((not np.isscalar(data[0]) and len(data[0]) == 2 and + any(not np.isscalar(data[0][i]) for i in range(2))) or not pd): data = OrderedDict(data) else: - dimensions = (params.get('kdims', defaults['kdims']) + - params.get('vdims', defaults['vdims'])) + dimensions = (kwargs.get('kdims', defaults['kdims']) + + kwargs.get('vdims', defaults['vdims'])) columns = [d.name if isinstance(d, Dimension) else d for d in dimensions] data = pd.DataFrame(data, columns=columns) @@ -451,7 +451,7 @@ def select(self, selection_specs=None, **select): if isinstance(k, slice): df = df[(k.start < df[dim]) & (df[dim] < k.stop)] else: - if dim in self.kdims: selected_kdims.append(dim) + if dim in self.element.kdims: selected_kdims.append(dim) df = df[df[dim] == k] if len(set(selected_kdims)) == self.element.ndims: if len(df) and len(self.element.vdims) == 1: @@ -463,7 +463,20 @@ def values(self, dim): data = self.element.data[dim] if util.dd and isinstance(data, util.dd.Series): data = data.compute() - return data + return np.array(data) + + + def sample(self, samples=[]): + """ + Sample the Element data with a list of samples. + """ + data = self.element.data + mask = np.zeros(len(self), dtype=bool) + for sample in samples: + if np.isscalar(sample): sample = [sample] + for i, v in enumerate(sample): + mask = np.logical_or(mask, data.iloc[:, i]==v) + return data[mask] @classmethod From 3329ee00f4c1b669584f72c3655589142923fbcb Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 18 Oct 2015 03:05:56 +0100 Subject: [PATCH 053/212] Improved Columns dimension handling --- holoviews/core/data.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 95b7820038..66a0a8d53d 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -28,8 +28,7 @@ class Columns(Element): def __init__(self, data, **kwargs): - defaults = {'kdims': self.kdims, 'vdims': self.vdims} - data, params = ColumnarData._process_data(data, defaults, **kwargs) + data, params = ColumnarData._process_data(data, self.params(), **kwargs) super(Columns, self).__init__(data, **params) self.data = self._validate_data(self.data) @@ -246,7 +245,7 @@ def shape(self): @classmethod - def _process_data(cls, data, defaults, **kwargs): + def _process_data(cls, data, paramobjs, **kwargs): params = {} if isinstance(data, Element): params['kdims'] = data.kdims @@ -269,7 +268,7 @@ def _process_data(cls, data, defaults, **kwargs): data = OrderedDict([(row[:data.ndims], row[data.ndims:]) for row in zip(*columns.values())]) elif util.is_dataframe(data): - kdims, vdims = cls._process_df_dims(data, params) + kdims, vdims = cls._process_df_dims(data, paramobjs, **params) params['kdims'] = kdims params['vdims'] = vdims elif isinstance(data, tuple): @@ -286,8 +285,8 @@ def _process_data(cls, data, defaults, **kwargs): or not pd): data = OrderedDict(data) else: - dimensions = (kwargs.get('kdims', defaults['kdims']) + - kwargs.get('vdims', defaults['vdims'])) + dimensions = (kwargs.get('kdims', ) + + kwargs.get('vdims', paramobjs['vdims'].default)) columns = [d.name if isinstance(d, Dimension) else d for d in dimensions] data = pd.DataFrame(data, columns=columns) @@ -295,9 +294,9 @@ def _process_data(cls, data, defaults, **kwargs): data = array params.update(kwargs) if 'kdims' not in params: - params['kdims'] = defaults['kdims'] + params['kdims'] = paramobjs['kdims'].default if 'vdims' not in params: - params['vdims'] = defaults['vdims'] + params['vdims'] = paramobjs['vdims'].default if isinstance(data, dict): data = NdElement(data, kdims=params['kdims'], vdims=params['vdims']) @@ -305,7 +304,7 @@ def _process_data(cls, data, defaults, **kwargs): @classmethod - def _process_df_dims(cls, data, kwargs): + def _process_df_dims(cls, data, paramobjs, **kwargs): if 'kdims' in kwargs or 'vdims' in kwargs: kdims = kwargs.get('kdims', []) vdims = kwargs.get('vdims', []) @@ -315,8 +314,9 @@ def _process_df_dims(cls, data, kwargs): raise ValueError("Supplied dimensions don't match columns" "in the dataframe.") else: - kdims = list(data.columns[:2]) - vdims = list(data.columns[2:]) + ndim = len(paramobjs['kdims'].default) + kdims = list(data.columns[:ndim]) + vdims = list(data.columns[ndim:]) return kdims, vdims From 66547bc434b3f5eaa6a12e0aa8fccc5fe3bc482e Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 19 Oct 2015 00:34:09 +0100 Subject: [PATCH 054/212] Implemented collapse_data and add_dimension for Columns --- holoviews/core/data.py | 47 ++++++++++++++++++---- holoviews/core/element.py | 28 ++++++++----- holoviews/core/overlay.py | 2 +- holoviews/core/spaces.py | 2 +- holoviews/element/chart.py | 10 ----- holoviews/element/path.py | 2 +- holoviews/element/raster.py | 4 +- tests/testcolumns.py | 80 ++++++++++++++++++++++++++++++++----- 8 files changed, 132 insertions(+), 43 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 66a0a8d53d..89a4b434c7 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -188,6 +188,15 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): else: return self.interface.groupby(dimensions, container_type, **kwargs) + @classmethod + def collapse_data(cls, data, function=None, kdims=None, **kwargs): + if isinstance(data[0], NdElement): + return data[0].collapse_data(data, function, kdims, **kwargs) + elif isinstance(data[0], np.ndarray): + return ColumnarArray.collapse_data(data, function, kdims, **kwargs) + elif util.is_dataframe(data[0]): + return ColumnarDataFrame.collapse_data(data, function, kdims, **kwargs) + def __len__(self): if self.interface is None: @@ -427,6 +436,7 @@ def reduce(self, dimensions=[], function=None, **reductions): def array(self): return self.element.data.iloc + def reindex(self, kdims=None, vdims=None): # DataFrame based tables don't need to be reindexed return self.element.data @@ -437,6 +447,11 @@ def _datarange(cls, data): return data.min(), data.max() + @classmethod + def collapse_data(cls, data, function, kdims, **kwargs): + return pd.concat(data).groupby([d.name for d in kdims]).agg(function).reset_index() + + def select(self, selection_specs=None, **select): """ Allows slice and select individual values along the DataFrameView @@ -481,7 +496,7 @@ def sample(self, samples=[]): @classmethod def add_dimension(cls, data, dimension, dim_pos, values): - data[dimension.name] = values + data.insert(dim_pos, dimension.name, values) return data @@ -597,13 +612,29 @@ def select(self, **selection): @classmethod - def collapse_data(cls, data, function, **kwargs): - new_data = [arr[:, self.element.ndims:] for arr in data] - if isinstance(function, np.ufunc): - collapsed = function.reduce(new_data) - else: - collapsed = function(np.dstack(new_data), axis=-1, **kwargs) - return np.hstack([data[0][:, self.element.ndims:, np.newaxis], collapsed]) + def collapse_data(cls, data, function, kdims=None, **kwargs): + """ + Applies a groupby operation along the supplied key dimensions + then aggregates across the groups with the supplied function. + """ + ndims = data[0].shape[1] + nkdims = len(kdims) + vdims = ['Value Dimension %s' % i for i in range(ndims-len(kdims))] + joined_data = Columns(np.concatenate(data), kdims=kdims, vdims=vdims) + + rows = [] + for k, group in joined_data.groupby(kdims).items(): + row = np.zeros(ndims) + row[:ndims] = np.array(k) + for i, vdim in enumerate(group.vdims): + group_data = group.dimension_values(vdim) + if isinstance(function, np.ufunc): + collapsed = function.reduce(group_data) + else: + collapsed = function(group_data, **kwargs) + row[nkdims+i] = collapsed + rows.append(row) + return np.array(rows) def sample(self, samples=[]): diff --git a/holoviews/core/element.py b/holoviews/core/element.py index a9d817c519..0cf11b2bd2 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -66,7 +66,7 @@ def __getitem__(self, key): @classmethod - def collapse_data(cls, data, function=None, **kwargs): + def collapse_data(cls, data, function=None, kdims=None, **kwargs): """ Class method to collapse a list of data matching the data format of the Element type. By implementing this @@ -74,8 +74,9 @@ def collapse_data(cls, data, function=None, **kwargs): same type. The kwargs are passed to the collapse function. The collapse function must support the numpy style axis selection. Valid function include: - np.mean, np.sum, np.product, np.std, - scipy.stats.kurtosis etc. + np.mean, np.sum, np.product, np.std, scipy.stats.kurtosis etc. + Some data backends also require the key dimensions + to aggregate over. """ raise NotImplementedError("Collapsing not implemented for %s." % cls.__name__) @@ -394,12 +395,21 @@ def _item_check(self, dim_vals, data): @classmethod - def collapse_data(cls, data, function, **kwargs): - groups = zip(*[(np.array(values) for values in odict.values()) for odict in data]) - return OrderedDict((key, np.squeeze(function(np.dstack(group), axis=-1, **kwargs), 0) - if group[0].shape[0] > 1 else - function(np.concatenate(group), **kwargs)) - for key, group in zip(data[0].keys(), groups)) + def collapse_data(cls, data, function, kdims=None, **kwargs): + index = 0 + joined_data = data[0].clone(shared_data=False, kdims=['Index']+data[0].kdims) + for d in data: + d = d.add_dimension('Index', 0, range(index, index+len(d))) + index += len(d) + joined_data.update(d) + + collapsed = joined_data.clone(shared_data=False, kdims=kdims) + for k, group in joined_data.groupby([d.name for d in kdims]).items(): + if isinstance(function, np.ufunc): + collapsed[k] = tuple(function.reduce(group[vdim.name]) for vdim in group.vdims) + else: + collapsed[k] = tuple(function(group[vdim.name], **kwargs) for vdim in group.vdims) + return collapsed def dimension_values(self, dim): diff --git a/holoviews/core/overlay.py b/holoviews/core/overlay.py index d3c4131dc0..f248de8ccc 100644 --- a/holoviews/core/overlay.py +++ b/holoviews/core/overlay.py @@ -155,7 +155,7 @@ def collapse(self, function): "and cannot be collapsed.") else: return elements[0].clone(types[0].collapse_data([el.data for el in elements], - function)) + function, self.kdims)) @property def group(self): diff --git a/holoviews/core/spaces.py b/holoviews/core/spaces.py index 7f2d6a46f6..b5f4b8e6bf 100644 --- a/holoviews/core/spaces.py +++ b/holoviews/core/spaces.py @@ -207,7 +207,7 @@ def collapse(self, dimensions=None, function=None, **kwargs): if isinstance(function, MapOperation): collapsed[key] = function(group, **kwargs) else: - data = group.type.collapse_data([el.data for el in group], function, **kwargs) + data = group.type.collapse_data([el.data for el in group], function, group.last.kdims, **kwargs) collapsed[key] = group.last.clone(data) return collapsed if self.ndims > 1 else collapsed.last diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 3df65df47a..273da7ef63 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -40,12 +40,6 @@ class Scatter(Chart): group = param.String(default='Scatter', constant=True) - @classmethod - def collapse_data(cls, data, function=None, **kwargs): - if function: - raise Exception("Scatter elements are inhomogenous and " - "cannot be collapsed with a function.") - return np.concatenate(data) class Curve(Chart): @@ -290,10 +284,6 @@ def __iter__(self): yield tuple(self.data[i, ...]) i += 1 - @classmethod - def collapse_data(cls, data, function, **kwargs): - return Scatter.collapse_data(data, function, **kwargs) - class VectorField(Points): diff --git a/holoviews/element/path.py b/holoviews/element/path.py index a76c7b3748..7f43fff1d4 100644 --- a/holoviews/element/path.py +++ b/holoviews/element/path.py @@ -65,7 +65,7 @@ def __getitem__(self, key): @classmethod - def collapse_data(cls, data_list, function=None, **kwargs): + def collapse_data(cls, data_list, function=None, kdims=None, **kwargs): if function is None: return [path for paths in data_list for path in paths] else: diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index cee4723c52..760a97fadb 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -65,7 +65,7 @@ def _coord2matrix(self, coord): @classmethod - def collapse_data(cls, data_list, function, **kwargs): + def collapse_data(cls, data_list, function, kdims=None, **kwargs): if isinstance(function, np.ufunc): return function.reduce(data_list) else: @@ -285,7 +285,7 @@ def __getitem__(self, slices): @classmethod - def collapse_data(cls, data_list, function, **kwargs): + def collapse_data(cls, data_list, function, kdims=None, **kwargs): """ Allows collapsing the data of a number of QuadMesh Elements with a function. diff --git a/tests/testcolumns.py b/tests/testcolumns.py index 4dbdb0d714..ff6178dfdf 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -4,9 +4,8 @@ import pandas as pd - import numpy as np -from holoviews import OrderedDict, Columns, Curve, ItemTable, NdElement +from holoviews import OrderedDict, Columns, Curve, ItemTable, NdElement, HoloMap from holoviews.element.comparison import ComparisonTestCase @@ -22,16 +21,21 @@ def setUp(self): self.values1 = [(15, 0.8), (18, 0.6), (10, 0.8)] self.key_dims1 = ['Gender', 'Age'] self.val_dims1 = ['Weight', 'Height'] + self.columns = Columns(dict(zip(self.xs, self.ys)), + kdims=['x'], vdims=['y']) - def test_columns_dict_construct(self): + def test_columns_odict_construct(self): columns = Columns(OrderedDict(zip(self.xs, self.ys)), kdims=['A'], vdims=['B']) self.assertTrue(isinstance(columns.data, NdElement)) - def test_columns_tuple_list_construct(self): + def test_columns_dict_construct(self): + self.assertTrue(isinstance(self.columns.data, NdElement)) + + def test_columns_ndelement_construct(self): columns = Columns(NdElement(zip(self.xs, self.ys))) self.assertTrue(isinstance(columns.data, NdElement)) - def test_table_init(self): + def test_columns_items_construct(self): columns = Columns(zip(self.keys1, self.values1), kdims = self.key_dims1, vdims = self.val_dims1) @@ -83,6 +87,24 @@ def test_columns_index_value2(self): vdims = self.val_dims1) self.assertEquals(table['F', 12, 'Height'], 0.8) + def test_columns_getitem_column(self): + self.compare_arrays(self.columns['y'], self.ys) + + def test_columns_add_dimensions_value(self): + table = self.columns.add_dimension('z', 1, 0) + self.assertEqual(table.kdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.zeros(len(table))) + + def test_columns_add_dimensions_values(self): + table = self.columns.add_dimension('z', 1, range(1,12)) + self.assertEqual(table.kdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12)))) + + def test_columns_collapse(self): + collapsed = HoloMap({i: Columns(dict(zip(self.xs, self.ys*i)), kdims=['x'], vdims=['y']) + for i in range(10)}, kdims=['z']).collapse('z', np.mean) + self.compare_columns(collapsed, Columns(dict(zip(self.xs, self.ys*4.5)), + kdims=['x'], vdims=['y'])) class ColumnsNdArrayTest(ComparisonTestCase): @@ -120,15 +142,32 @@ def test_columns_closest(self): closest = self.columns.closest([0.51, 1, 9.9]) self.assertEqual(closest, [1., 1., 10.]) - def test_columns_reduce(self): - mean = self.columns.reduce(x=np.mean) - itable = ItemTable(OrderedDict([('y', np.mean(self.ys))])) - self.assertEqual(mean, itable) + def test_columns_getitem_column(self): + self.compare_arrays(self.columns['y'], self.ys) def test_columns_sample(self): samples = self.columns.sample([0, 5, 10]).dimension_values('y') self.assertEqual(samples, np.array([0, 0.5, 1])) + def test_columns_add_dimensions_value(self): + table = Columns((self.xs, self.ys), + kdims=['x'], vdims=['y']) + table = table.add_dimension('z', 1, 0) + self.assertEqual(table.kdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.zeros(len(table))) + + def test_columns_add_dimensions_values(self): + table = Columns((self.xs, self.ys), + kdims=['x'], vdims=['y']) + table = table.add_dimension('z', 1, range(1,12)) + self.assertEqual(table.kdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12)))) + + def test_columns_collapse(self): + collapsed = HoloMap({i: Columns((self.xs, self.ys*i), kdims=['x'], vdims=['y']) + for i in range(10)}, kdims=['z']).collapse('z', np.mean) + self.compare_columns(collapsed, Columns((self.xs, self.ys*4.5), kdims=['x'], vdims=['y'])) + class ColumnsDFrameTest(ComparisonTestCase): @@ -136,7 +175,26 @@ class ColumnsDFrameTest(ComparisonTestCase): def setUp(self): self.xs = range(11) self.ys = np.linspace(0, 1, 11) + self.columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys}), + kdims=['x'], vdims=['y']) def test_columns_df_construct(self): - columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys})) - self.assertTrue(isinstance(columns.data, pd.DataFrame)) + self.assertTrue(isinstance(self.columns.data, pd.DataFrame)) + + def test_columns_add_dimensions_value(self): + table = self.columns.add_dimension('z', 1, 0) + self.assertEqual(table.kdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.zeros(len(table))) + + def test_columns_add_dimensions_values(self): + table = self.columns.add_dimension('z', 1, range(1,12)) + self.assertEqual(table.kdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12)))) + + def test_columns_getitem_column(self): + self.compare_arrays(self.columns['y'], self.ys) + + def test_columns_collapse(self): + collapsed = HoloMap({i: Columns(pd.DataFrame({'x': self.xs, 'y': self.ys*i}), kdims=['x'], vdims=['y']) + for i in range(10)}, kdims=['z']).collapse('z', np.mean) + self.compare_columns(collapsed, Columns(pd.DataFrame({'x': self.xs, 'y': self.ys*4.5}), kdims=['x'], vdims=['y'])) From 2e70509b0bff46cabf6f385d49c936c625439c46 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 19 Oct 2015 00:35:45 +0100 Subject: [PATCH 055/212] Minor fix for Columns constructor --- holoviews/core/data.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 89a4b434c7..415e5bff1c 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -280,11 +280,13 @@ def _process_data(cls, data, paramobjs, **kwargs): kdims, vdims = cls._process_df_dims(data, paramobjs, **params) params['kdims'] = kdims params['vdims'] = vdims - elif isinstance(data, tuple): - data = np.column_stack(data) elif not isinstance(data, (np.ndarray, dict)): - data = np.array() if data is None else list(data) - array = np.array(data) + if isinstance(data, tuple): + data = np.column_stack(data) + array = data + else: + data = np.array() if data is None else list(data) + array = np.array(data) # Check if data is of non-numeric type if array.dtype.kind in ['S', 'U', 'O'] or array.ndim > 2: # If data is in NdElement dictionary format or pandas From c932a374c3f46a79495938a45569068e11f3f9c0 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 19 Oct 2015 00:58:18 +0100 Subject: [PATCH 056/212] NdMappings now allow dropping values after slicing --- holoviews/core/ndmapping.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index 8f88b07459..c161585e93 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -570,12 +570,17 @@ def __getitem__(self, indexslice): for cidx, (condition, dim) in enumerate(zip(conditions, self.kdims)): values = self._cached_index_values.get(dim.name, None) items = [(k, v) for k, v in items - if condition(values.index(k[cidx]) if values else k[cidx])] - items = [(k, self._dataslice(v, data_slice)) for k, v in items] - if len(items) == 0: + if condition(values.index(k[cidx]) + if values else k[cidx])] + sliced_items = [] + for k, v in items: + val_slice = self._dataslice(v, data_slice) + if val_slice: + sliced_items.append((k, v)) + if len(sliced_items) == 0: raise KeyError('No items within specified slice.') with item_check(False): - return self.clone(items) + return self.clone(sliced_items) def _expand_slice(self, indices): From 86315ec3b47cfdb8b96281d197614e986d920f45 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 19 Oct 2015 03:54:05 +0100 Subject: [PATCH 057/212] Updated code to use more idiomatic expressions --- holoviews/core/data.py | 26 ++++++++++++++------------ holoviews/core/element.py | 23 ++++++++++++----------- holoviews/core/layout.py | 2 +- holoviews/core/ndmapping.py | 10 ++++------ holoviews/core/spaces.py | 4 ++-- holoviews/element/chart.py | 4 ++-- holoviews/element/tabular.py | 4 ++-- holoviews/plotting/bokeh/tabular.py | 2 +- 8 files changed, 38 insertions(+), 37 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 415e5bff1c..b16b3373b8 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -117,13 +117,16 @@ def reindex(self, kdims=None, vdims=None): return self.data.reindex(kdims, vdims) if vdims is None: - vdims = self._cached_value_names - elif kdims is None: - dimensions = (self._cached_index_names + - self._cached_value_names) - kdims = [d for d in dimensions if d not in vdims] - key_dims = [self.get_dimension(k) for k in kdims] - val_dims = [self.get_dimension(v) for v in vdims] + vdims = self.vdims + else: + val_dims = [self.get_dimension(v) for v in vdims] + + if kdims is None: + key_dims = [d for d in self.dimensions() + if d not in vdims] + else: + key_dims = [self.get_dimension(k) for k in kdims] + data = self.interface.reindex(self.data, key_dims, val_dims) return self.clone(data, key_dims, val_dims) @@ -175,7 +178,7 @@ def reduce(self, dimensions=[], function=None, **reduce_map): if len(reduce_map) > 1: raise ValueError("Chart Elements may only be reduced to a point.") dim, reduce_fn = list(reduce_map.items())[0] - if dim in self._cached_index_names: + if dim in self.kdims: reduced_data = OrderedDict(zip(self.vdims, reduce_fn(self.data[:, self.ndims:], axis=0))) else: raise Exception("Dimension %s not found in %s" % (dim, type(self).__name__)) @@ -563,9 +566,8 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): # Get dimension objects, labels, indexes and data dimensions = [self.element.get_dimension(d) for d in dimensions] - dim_labels = [d.name for d in dimensions] dim_idxs = [self.element.get_dimension_index(d) for d in dimensions] - dim_data = {d: self.element.dimension_values(d) for d in dim_labels} + dim_data = {d: self.element.dimension_values(d) for d in dimensions} # Find unique entries along supplied dimensions indices = data[:, dim_idxs] @@ -578,7 +580,7 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): grouped_data = [] for group in unique_indices: mask = np.zeros(len(data), dtype=bool) - for d, v in zip(dim_labels, group): + for d, v in zip(dimensions, group): mask = np.logical_or(mask, dim_data[d] == v) group_element = self.element.clone(data[mask, :], **kwargs) grouped_data.append((tuple(group), group_element)) @@ -660,7 +662,7 @@ def reduce(self, dimensions=[], function=None, **reduce_map): reduce_map = self._reduce_map(dimensions, function, reduce_map) dim, reduce_fn = list(reduce_map.items())[0] - if dim in self._cached_index_names: + if dim in self.kdims: reduced_data = OrderedDict(zip(self.vdims, reduce_fn(self.data[:, self.element.ndims:], axis=0))) else: raise Exception("Dimension %s not found in %s" % (dim, type(self).__name__)) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 0cf11b2bd2..eba5d1cc24 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -136,7 +136,7 @@ def table(self): def dframe(self): import pandas as pd column_names = self.dimensions(label=True) - dim_vals = OrderedDict([(dim, self.dimension_values(dim)) for dim in column_names]) + dim_vals = OrderedDict([(dim, self[dim]) for dim in column_names]) return pd.DataFrame(dim_vals) @@ -176,7 +176,7 @@ def pprint_cell(self, row, col): return str(self.kdims[col]) else: dim = self.get_dimension(col) - values = self.dimension_values(dim.name) + values = self[dim.name] return dim.pprint_value(values[row-1]) @@ -250,14 +250,13 @@ def reindex(self, kdims=None, vdims=None, force=False): else: vdims = self._cached_value_names elif kdims is None: - kdims = [d for d in (self._cached_index_names + self._cached_value_names) - if d not in vdims] + kdims = [d for d in self.dimensions if d not in vdims] key_dims = [self.get_dimension(k) for k in kdims] val_dims = [self.get_dimension(v) for v in vdims] - kidxs = [(i, k in self._cached_index_names, self.get_dimension_index(k)) + kidxs = [(i, k in self.kdims, self.get_dimension_index(k)) for i, k in enumerate(kdims)] - vidxs = [(i, v in self._cached_index_names, self.get_dimension_index(v)) + vidxs = [(i, v in self.kdims, self.get_dimension_index(v)) for i, v in enumerate(vdims)] getter = operator.itemgetter(0) items = [] @@ -323,6 +322,8 @@ def __getitem__(self, args): In addition to usual NdMapping indexing, NdElements can be indexed by column name (or a slice over column names) """ + if args in self.dimensions(): + return self.dimension_values(args) ndmap_index = args[:self.ndims] if isinstance(args, tuple) else args subtable = NdMapping.__getitem__(self, ndmap_index) @@ -350,7 +351,7 @@ def sample(self, samples=[]): for sample in samples: value = self[sample] sample_data[sample] = value if np.isscalar(value) else value.values()[0] - return self.__class__(sample_data, **dict(self.get_param_values(onlychanged=True))) + return self.clone(sample_data) def reduce(self, dimensions=None, function=None, **reduce_map): @@ -361,11 +362,11 @@ def reduce(self, dimensions=None, function=None, **reduce_map): """ reduce_map = self._reduce_map(dimensions, function, reduce_map) - dim_labels = self._cached_index_names reduced_table = self for reduce_fn, group in groupby(reduce_map.items(), lambda x: x[1]): dims = [dim for dim, _ in group] - split_dims = [self.get_dimension(d) for d in dim_labels if d not in dims] + split_dims = [self.get_dimension(d) for d in self.kdims + if d not in dims] if len(split_dims) and reduced_table.ndims > 1: split_map = reduced_table.groupby([d.name for d in split_dims], container_type=HoloMap, group_type=self.__class__) @@ -377,7 +378,7 @@ def reduce(self, dimensions=None, function=None, **reduce_map): reduced.append(reduce_fn(valtable.data.values())) reduced_table[k] = reduced else: - reduced = tuple(reduce_fn(self.dimension_values(vdim.name)) + reduced = tuple(reduce_fn(self[vdim.name]) for vdim in self.vdims) reduced_dims = [d for d in self.kdims if d.name not in reduce_map] params = dict(group=self.group) if self.group != type(self).__name__ else {} @@ -548,7 +549,7 @@ def static_dimensions(self): """ dimensions = [] for dim in self.kdims: - if len(set(self.dimension_values(dim.name))) == 1: + if len(set(self[dim.name])) == 1: dimensions.append(dim) return dimensions diff --git a/holoviews/core/layout.py b/holoviews/core/layout.py index eab761e329..6de3eef87a 100644 --- a/holoviews/core/layout.py +++ b/holoviews/core/layout.py @@ -127,7 +127,7 @@ def get(self, key, default=None): def dimension_values(self, dimension): dimension = self.get_dimension(dimension).name - if dimension in self._cached_index_names: + if dimension in self.kdims: return self.layout_order[:len(self.data)] else: return self.main.dimension_values(dimension) diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index c161585e93..2908ecbe8a 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -325,9 +325,8 @@ def drop_dimension(self, dimensions): Returns a new mapping with the named dimension(s) removed. """ dimensions = [dimensions] if np.isscalar(dimensions) else dimensions - dim_labels = [d for d in self._cached_index_names if d not in dimensions] - dim_inds = [self.get_dimension_index(d) for d in dim_labels] - dims = [self.get_dimension(d) for d in dim_labels] + dims = [d for d in self.kdims if d not in dimensions] + dim_inds = [self.get_dimension_index(d) for d in dims] key_getter = itemgetter(*dim_inds) return self.clone([(key_getter(k), v) for k, v in self.data.items()], kdims=dims) @@ -336,7 +335,7 @@ def drop_dimension(self, dimensions): def dimension_values(self, dimension): "Returns the values along the specified dimension." dimension = self.get_dimension(dimension).name - if dimension in self._cached_index_names: + if dimension in self.kdims: return [k[self.get_dimension_index(dimension)] for k in self.data.keys()] if dimension in self.dimensions(label=True): values = [el.dimension_values(dimension) for el in self @@ -452,8 +451,7 @@ def update(self, other): unchanged after the update. """ if isinstance(other, NdMapping): - dims = [d for d in other._cached_index_names - if d not in self._cached_index_names] + dims = [d for d in other.kdims if d not in self.kdims] if len(dims) == other.ndims: raise KeyError("Cannot update with NdMapping that has" " a different set of key dimensions.") diff --git a/holoviews/core/spaces.py b/holoviews/core/spaces.py index b5f4b8e6bf..d6806a5228 100644 --- a/holoviews/core/spaces.py +++ b/holoviews/core/spaces.py @@ -131,10 +131,10 @@ def __mul__(self, other): # Generate keys for both subset and superset and sort them by the dimension index. self_key = tuple(k for p, k in sorted( [(self.get_dimension_index(dim), v) for dim, v in dim_keys - if dim in self._cached_index_names])) + if dim in self.kdims])) other_key = tuple(k for p, k in sorted( [(other.get_dimension_index(dim), v) for dim, v in dim_keys - if dim in other._cached_index_names])) + if dim in other.kdims])) new_key = self_key if other_in_self else other_key # Append SheetOverlay of combined items if (self_key in self) and (other_key in other): diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 273da7ef63..2fa779e3d3 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -228,9 +228,9 @@ def extents(self, extents): def dimension_values(self, dim): dim = self.get_dimension(dim).name - if dim in self._cached_value_names: + if dim in self.vdims: return self.values - elif dim in self._cached_index_names: + elif dim in self.kdims: return np.convolve(self.edges, np.ones((2,))/2, mode='valid') else: return super(Histogram, self).dimension_values(dim) diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index 2264bc094e..ce35ace397 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -220,7 +220,7 @@ def _conversion(self, kdims=None, vdims=None, new_type=None, **kwargs): vdims = [vdim.name if isinstance(vdim, Dimension) else vdim for vdim in vdims] if (any(kd in self._table._cached_value_names for kd in kdims) or any(vd in self._table._cached_index_names for vd in vdims)): - new_kdims = [kd for kd in self._table._cached_index_names + new_kdims = [kd for kd in self._table.kdims if kd not in kdims and kd not in vdims] + kdims selected = self._table.reindex(new_kdims, vdims) else: @@ -230,7 +230,7 @@ def _conversion(self, kdims=None, vdims=None, new_type=None, **kwargs): if invalid: raise Exception("Dimensions %r could not be found during conversion to %s new_type" % (invalid, new_type.__name__)) - group_dims = [dim for dim in selected._cached_index_names if not dim in kdims+vdims] + group_dims = [dim for dim in selected.kdims if not dim in kdims+vdims] params = dict({'kdims': [selected.get_dimension(kd) for kd in kdims], 'vdims': [selected.get_dimension(vd) for vd in vdims]}, diff --git a/holoviews/plotting/bokeh/tabular.py b/holoviews/plotting/bokeh/tabular.py index d6944ea424..9aadcd1c1c 100644 --- a/holoviews/plotting/bokeh/tabular.py +++ b/holoviews/plotting/bokeh/tabular.py @@ -18,7 +18,7 @@ class TablePlot(BokehPlot, GenericElementPlot): def get_data(self, element, ranges=None): dims = element.dimensions() - return ({d.name: element.dimension_values(d.name) for d in dims}, + return ({d.name: element[d] for d in dims}, {d.name: d.name for d in dims}) From 1e3963e4a8cb0104221d0839192ac0c075b58e1c Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 19 Oct 2015 03:58:03 +0100 Subject: [PATCH 058/212] Reimplemented Element and Columns dframe methods --- holoviews/core/data.py | 16 ++++++++++------ holoviews/core/element.py | 15 +++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index b16b3373b8..94a75f093e 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -224,11 +224,11 @@ def dimension_values(self, dim): return self.interface.values(dim) - def dframe(self): + def dframe(self, as_table=False): if self.interface is None: - return self.data.dframe() + return self.data.dframe(as_table) else: - return self.interface.dframe() + return self.interface.dframe(as_table) def array(self): @@ -505,7 +505,11 @@ def add_dimension(cls, data, dimension, dim_pos, values): return data - def dframe(self): + def dframe(self, as_table=False): + if as_table: + from ..element import Table + params = self.element.get_param_values(onlychanged=True) + return Table(self.element.data, **params) return self.element.data @@ -526,8 +530,8 @@ def add_dimension(cls, data, dimension, dim_pos, values): def array(self): return self.element.data - def dframe(self): - return Element.dframe(self.element) + def dframe(self, as_table=False): + return Element.dframe(self.element, as_table) def closest(self, coords): diff --git a/holoviews/core/element.py b/holoviews/core/element.py index eba5d1cc24..8a8a6830ae 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -423,18 +423,17 @@ def dimension_values(self, dim): return NdMapping.dimension_values(self, dim.name) - def dframe(self, value_label='data'): + def dframe(self, as_table=False): try: import pandas except ImportError: raise Exception("Cannot build a DataFrame without the pandas library.") - if self._dataframe: - return self.data - labels = [d.name for d in self.dimensions()] - return pandas.DataFrame( - [dict(zip(labels, np.concatenate([np.array(k),v]))) - for (k, v) in self.data.items()]) - + columns = [d.name for d in self.dimensions()] + df = pandas.DataFrame((k+v for (k, v) in self.data.items()), columns=columns) + if as_table: + from ..element import Table + return Table(df, **self.get_param_values(onlychanged=True)) + return df class Element3D(Element2D): From bdf4539c106a75f2e45168798040f0e6cafb38e9 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 22 Oct 2015 22:37:41 +0100 Subject: [PATCH 059/212] Fixed Columns dataframe slicing and added tests --- holoviews/core/data.py | 14 ++++++++-- tests/testcolumns.py | 59 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 94a75f093e..f9bcb51660 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -465,14 +465,24 @@ def select(self, selection_specs=None, **select): """ df = self.element.data selected_kdims = [] + slcs = [] for dim, k in select.items(): if isinstance(k, tuple): k = slice(*k) if isinstance(k, slice): - df = df[(k.start < df[dim]) & (df[dim] < k.stop)] + if k.start is not None: + slcs.append(k.start < df[dim]) + if k.stop is not None: + slc.append(df[dim] < k.stop) + elif isinstance(k, (set, list)): + iter_slcs = [] + for ik in k: + iter_slcs.append(df[dim] == ik) + slcs.append(np.logical_or.reduce(iter_slcs)) else: if dim in self.element.kdims: selected_kdims.append(dim) - df = df[df[dim] == k] + slcs.append(df[dim] == k) + df = df[np.logical_and.reduce(slcs)] if len(set(selected_kdims)) == self.element.ndims: if len(df) and len(self.element.vdims) == 1: df = df[self.element.vdims[0].name].iloc[0] diff --git a/tests/testcolumns.py b/tests/testcolumns.py index ff6178dfdf..b02219db4a 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -173,6 +173,10 @@ def test_columns_collapse(self): class ColumnsDFrameTest(ComparisonTestCase): def setUp(self): + self.column_data = [('M',10, 15, 0.8), ('M',16, 18, 0.6), + ('F',12, 10, 0.8)] + self.kdims = ['Gender', 'Age'] + self.vdims = ['Weight', 'Height'] self.xs = range(11) self.ys = np.linspace(0, 1, 11) self.columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys}), @@ -181,6 +185,61 @@ def setUp(self): def test_columns_df_construct(self): self.assertTrue(isinstance(self.columns.data, pd.DataFrame)) + def test_columns_tuple_list_construct(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + self.assertTrue(isinstance(self.columns.data, pd.DataFrame)) + + def test_columns_index_row_gender(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + row = columns['F',:] + self.assertEquals(type(row), Columns) + self.compare_columns(row.clone(row.data.reset_index(drop=True)), + Columns(self.column_data[2:], + kdims=self.kdims, + vdims=self.vdims)) + + def test_columns_index_rows_gender(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + row = columns['M',:] + self.assertEquals(type(row), Columns) + self.compare_columns(row.clone(row.data.reset_index(drop=True)), + Columns(self.column_data[:2], + kdims=self.kdims, + vdims=self.vdims)) + + def test_columns_index_row_age(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + row = columns[:, 12] + self.assertEquals(type(row), Columns) + self.compare_columns(row.clone(row.data.reset_index(drop=True)), + Columns(self.column_data[2:], + kdims=self.kdims, + vdims=self.vdims)) + + def test_columns_index_single_row(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + row = columns['F', 12] + self.assertEquals(type(row), Columns) + self.compare_columns(row.clone(row.data.reset_index(drop=True)), + Columns(self.column_data[2:], + kdims=self.kdims, + vdims=self.vdims)) + + def test_columns_index_value1(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + self.assertEquals(columns['F', 12, 'Weight'], 10) + + def test_columns_index_value2(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + self.assertEquals(columns['F', 12, 'Height'], 0.8) + def test_columns_add_dimensions_value(self): table = self.columns.add_dimension('z', 1, 0) self.assertEqual(table.kdims[1], 'z') From 68aaec9578dda47c4fb22f17aaa1312b71d881ec Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 22 Oct 2015 22:46:59 +0100 Subject: [PATCH 060/212] Made message handling in Comparison class consistent --- holoviews/element/comparison.py | 93 ++++++++++++++++----------------- 1 file changed, 46 insertions(+), 47 deletions(-) diff --git a/holoviews/element/comparison.py b/holoviews/element/comparison.py index a1e6f67bc4..b203c6ead6 100644 --- a/holoviews/element/comparison.py +++ b/holoviews/element/comparison.py @@ -436,7 +436,7 @@ def compare_bounds(cls, el1, el2, msg='Bounds'): #========# @classmethod - def compare_columns(cls, el1, el2, msg=None): + def compare_columns(cls, el1, el2, msg='Columns'): cls.compare_dimensioned(el1, el2) if len(el1) != len(el2): raise AssertionError("%s not of matching length." % msg) @@ -449,90 +449,89 @@ def compare_columns(cls, el1, el2, msg=None): @classmethod - def compare_curve(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='Curve data') + def compare_curve(cls, el1, el2, msg='Curve'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_errorbars(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='ErrorBars data') + def compare_errorbars(cls, el1, el2, msg='ErrorBars'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_spread(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='Spread data') + def compare_spread(cls, el1, el2, msg='Spread'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_scatter(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='Scatter data') + def compare_scatter(cls, el1, el2, msg='Scatter'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_scatter3d(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='Scatter3D data') + def compare_scatter3d(cls, el1, el2, msg='Scatter3D'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_trisurface(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='Trisurface data') + def compare_trisurface(cls, el1, el2, msg='Trisurface'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_histogram(cls, el1, el2, msg=None): + def compare_histogram(cls, el1, el2, msg='Histogram'): cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.edges, el2.edges, "Histogram edges") - cls.compare_arrays(el1.values, el2.values, "Histogram values") - + cls.compare_arrays(el1.edges, el2.edges, ' '.join([msg, 'edges'])) + cls.compare_arrays(el1.values, el2.values, ' '.join([msg, 'values'])) @classmethod - def compare_points(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='Points data') + def compare_points(cls, el1, el2, msg='Points'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_vectorfield(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='VectorField data') + def compare_vectorfield(cls, el1, el2, msg='VectorField'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_bars(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='Bars data') + def compare_bars(cls, el1, el2, msg='Bars'): + cls.compare_columns(el1, el2, msg) #=========# # Rasters # #=========# @classmethod - def compare_raster(cls, el1, el2, msg=None): + def compare_raster(cls, el1, el2, msg='Raster'): cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Raster data') + cls.compare_arrays(el1.data, el2.data, msg) @classmethod - def compare_quadmesh(cls, el1, el2, msg=None): + def compare_quadmesh(cls, el1, el2, msg='QuadMesh'): cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data[0], el2.data[0], 'QuadMesh x-data') - cls.compare_arrays(el1.data[1], el2.data[1], 'QuadMesh y-data') - cls.compare_arrays(el1.data[2], el2.data[2], 'QuadMesh z-data') + cls.compare_arrays(el1.data[0], el2.data[0], ' '.join([msg, 'x-data'])) + cls.compare_arrays(el1.data[1], el2.data[1], ' '.join([msg, 'y-data'])) + cls.compare_arrays(el1.data[2], el2.data[2], ' '.join([msg, 'z-data'])) @classmethod - def compare_heatmap(cls, el1, el2, msg=None): + def compare_heatmap(cls, el1, el2, msg='HeatMap'): cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'HeatMap data') + cls.compare_arrays(el1.data, el2.data, msg) @classmethod - def compare_image(cls, el1, el2, msg='Image data'): + def compare_image(cls, el1, el2, msg='Image'): cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, msg=msg) + cls.compare_arrays(el1.data, el2.data, msg) cls.bounds_check(el1,el2) @classmethod - def compare_rgb(cls, el1, el2, msg='RGB data'): + def compare_rgb(cls, el1, el2, msg='RGB'): cls.compare_dimensioned(el1, el2) cls.compare_arrays(el1.data, el2.data, msg=msg) cls.bounds_check(el1,el2) @classmethod - def compare_hsv(cls, el1, el2, msg='HSV data'): + def compare_hsv(cls, el1, el2, msg='HSV'): cls.compare_dimensioned(el1, el2) cls.compare_arrays(el1.data, el2.data, msg=msg) cls.bounds_check(el1,el2) @classmethod - def compare_surface(cls, el1, el2, msg='Surface data'): + def compare_surface(cls, el1, el2, msg='Surface'): cls.compare_dimensioned(el1, el2) cls.compare_arrays(el1.data, el2.data, msg=msg) @@ -555,15 +554,15 @@ def compare_itemtables(cls, el1, el2, msg=None): @classmethod - def compare_tables(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='Table data') + def compare_tables(cls, el1, el2, msg='Table'): + cls.compare_columns(el1, el2, msg) #========# # Pandas # #========# @classmethod - def compare_dframe(cls, el1, el2, msg=None): + def compare_dframe(cls, el1, el2, msg='DFrame'): cls.compare_dimensioned(el1, el2) from pandas.util.testing import assert_frame_equal try: @@ -576,20 +575,20 @@ def compare_dframe(cls, el1, el2, msg=None): #=========# @classmethod - def compare_distribution(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='Distribution data') + def compare_distribution(cls, el1, el2, msg='Distribution'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_timeseries(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='TimeSeries data') + def compare_timeseries(cls, el1, el2, msg='TimeSeries'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_bivariate(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='Bivariate data') + def compare_bivariate(cls, el1, el2, msg='Bivariate'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_regression(cls, el1, el2, msg=None): - cls.compare_columns(el1, el2, msg='Regression data') + def compare_regression(cls, el1, el2, msg='Regression'): + cls.compare_columns(el1, el2, msg) #=======# # Grids # From 3aad4d1171b1e66c49d7c9651076422a99d28e53 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Thu, 22 Oct 2015 22:47:52 +0100 Subject: [PATCH 061/212] Added Columns dataframe add_dimension unit tests --- tests/testcolumns.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testcolumns.py b/tests/testcolumns.py index b02219db4a..4fc3d42299 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -241,14 +241,14 @@ def test_columns_index_value2(self): self.assertEquals(columns['F', 12, 'Height'], 0.8) def test_columns_add_dimensions_value(self): - table = self.columns.add_dimension('z', 1, 0) - self.assertEqual(table.kdims[1], 'z') - self.compare_arrays(table.dimension_values('z'), np.zeros(len(table))) + columns = self.columns.add_dimension('z', 1, 0) + self.assertEqual(columns.kdims[1], 'z') + self.compare_arrays(columns.dimension_values('z'), np.zeros(len(columns))) def test_columns_add_dimensions_values(self): - table = self.columns.add_dimension('z', 1, range(1,12)) - self.assertEqual(table.kdims[1], 'z') - self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12)))) + columns = self.columns.add_dimension('z', 1, range(1,12)) + self.assertEqual(columns.kdims[1], 'z') + self.compare_arrays(columns.dimension_values('z'), np.array(list(range(1,12)))) def test_columns_getitem_column(self): self.compare_arrays(self.columns['y'], self.ys) From 3a035cbd5d482939657a04a318adacc8d9bc72ed Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 23 Oct 2015 13:10:36 +0100 Subject: [PATCH 062/212] Implemented Columns ndarray reduce and aggregate function --- holoviews/core/data.py | 64 +++++++++++++++++++++++++-------------- holoviews/core/element.py | 19 +++++++----- tests/testcolumns.py | 14 +++++++++ 3 files changed, 68 insertions(+), 29 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index f9bcb51660..7e0654bfa0 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -170,19 +170,35 @@ def sample(self, samples=[]): def reduce(self, dimensions=[], function=None, **reduce_map): """ - Allows collapsing of Chart objects using the supplied map of + Allows collapsing of Columns objects using the supplied map of dimensions and reduce functions. """ - reduce_map = self._reduce_map(dimensions, function, reduce_map) + if self.interface is None: + return self.data.reduce(dimensions, function, **reduce_map) + + reduce_dims, reduce_map = self._reduce_map(dimensions, function, reduce_map) + reduced = self + for reduce_fn, group in reduce_map: + reduced = self.interface.reduce(reduced, group, function) - if len(reduce_map) > 1: - raise ValueError("Chart Elements may only be reduced to a point.") - dim, reduce_fn = list(reduce_map.items())[0] - if dim in self.kdims: - reduced_data = OrderedDict(zip(self.vdims, reduce_fn(self.data[:, self.ndims:], axis=0))) + if np.isscalar(reduced): + return reduced else: - raise Exception("Dimension %s not found in %s" % (dim, type(self).__name__)) - return self.clone(reduced_data) + kdims = [kdim for kdim in self.kdims if kdim not in reduce_dims] + return self.clone(reduced, kdims=kdims) + + + + def aggregate(self, dimensions, function): + """ + Groups over the supplied dimensions and aggregates. + """ + if self.interface is None: + aggregated = self.data.aggregate(dimensions, function) + else: + aggregated = self.interface.aggregate(dimensions, function) + kdims = [self.get_dimension(d) for d in dimensions] + return self.clone(aggregated, kdims=kdims) def groupby(self, dimensions, container_type=HoloMap, **kwargs): @@ -668,19 +684,23 @@ def sample(self, samples=[]): return data[mask] - def reduce(self, dimensions=[], function=None, **reduce_map): + @classmethod + def reduce(cls, columns, reduce_dims, function): """ - Allows collapsing of Chart objects using the supplied map of - dimensions and reduce functions. + This implementation allows reducing dimensions by aggregating + over all the remaining key dimensions using the collapse_data + method. """ - reduce_map = self._reduce_map(dimensions, function, reduce_map) - - dim, reduce_fn = list(reduce_map.items())[0] - if dim in self.kdims: - reduced_data = OrderedDict(zip(self.vdims, reduce_fn(self.data[:, self.element.ndims:], axis=0))) + kdims = [kdim for kdim in columns.kdims if kdim not in reduce_dims] + if len(kdims): + reindexed = columns.reindex(kdims) + reduced = reindexed.collapse_data([reindexed.data], function, kdims) else: - raise Exception("Dimension %s not found in %s" % (dim, type(self).__name__)) - params = dict(self.get_param_values(onlychanged=True), vdims=self.vdims, - kdims=[]) - params.pop('extents', None) - return ItemTable(reduced_data, **params) + reduced = function(columns.data, axis=0)[columns.ndims:] + if reduced.ndim == 1: + if len(reduced) == 1: + return reduced[0] + else: + return np.atleast_2d(reduced) + return reduced + diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 8a8a6830ae..1efd7dd94c 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -117,15 +117,20 @@ def _reduce_map(self, dimensions, function, reduce_map): if dimensions and reduce_map: raise Exception("Pass reduced dimensions either as an argument" "or as part of the kwargs not both.") + sanitized_dict = {sanitize_identifier(kd): kd + for kd in self._cached_index_names} + if reduce_map: + reduce_map = reduce_map.items() if dimensions: - reduce_map = {d: function for d in dimensions} + reduce_map = [(d, function) for d in dimensions] elif not reduce_map: - reduce_map = {d: function for d in self._cached_index_names} - reduce_map = {(d if isinstance(d, Dimension) else d): fn - for d, fn in reduce_map.items()} - sanitized = {sanitize_identifier(kd): kd - for kd in self._cached_index_names} - return {sanitized.get(d, d): fn for d, fn in reduce_map.items()} + reduce_map = [(d, function) for d in self._cached_index_names] + reduced = [(d.name if isinstance(d, Dimension) else d, fn) + for d, fn in reduce_map] + sanitized = [(sanitized_dict.get(d, d), fn) for d, fn in reduced] + grouped = [(fn, [dim for dim, _ in grp]) for fn, grp in groupby(sanitized, lambda x: x[1])] + dims = [d for grp in grouped for d in grp[1]] + return dims, grouped def table(self): diff --git a/tests/testcolumns.py b/tests/testcolumns.py index 4fc3d42299..047e7f7892 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -107,11 +107,13 @@ def test_columns_collapse(self): kdims=['x'], vdims=['y'])) + class ColumnsNdArrayTest(ComparisonTestCase): def setUp(self): self.xs = range(11) self.ys = np.linspace(0, 1, 11) + self.zs = np.sin(self.xs) self.columns = Columns((self.xs, self.ys), kdims=['x'], vdims=['y']) def test_columns_values_construct(self): @@ -168,6 +170,18 @@ def test_columns_collapse(self): for i in range(10)}, kdims=['z']).collapse('z', np.mean) self.compare_columns(collapsed, Columns((self.xs, self.ys*4.5), kdims=['x'], vdims=['y'])) + def test_columns_1d_reduce(self): + columns = Columns((self.xs, self.ys), kdims=['x'], vdims=['y']) + self.assertEqual(columns.reduce('x', np.mean), np.float64(0.5)) + + def test_columns_2d_reduce(self): + columns = Columns((self.xs, self.ys, self.zs), kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891004) + + def test_columns_2d_partial_reduce(self): + columns = Columns((self.xs, self.ys, self.zs), kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.reduce(['y'], np.mean), + Columns((self.xs, self.zs), kdims=['x'], vdims=['z'])) class ColumnsDFrameTest(ComparisonTestCase): From 148a15f91a10c73035603377d3a0cafb8da4f6b3 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 23 Oct 2015 13:11:29 +0100 Subject: [PATCH 063/212] Fixed handling of initial values in MultiDimensionalMapping --- holoviews/core/ndmapping.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index 2908ecbe8a..06e787faeb 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -114,8 +114,10 @@ def __init__(self, initial_items=None, **params): if isinstance(initial_items, tuple): self._add_item(initial_items[0], initial_items[1]) elif not self._check_items and self._instantiated: - if isinstance(initial_items, (dict, MultiDimensionalMapping)): + if isinstance(initial_items, dict): initial_items = initial_items.items() + elif isinstance(initial_items, MultiDimensionalMapping): + initial_items = initial_items.data.items() self.data = OrderedDict((k if isinstance(k, tuple) else (k,), v) for k, v in initial_items) self._resort() From 13b8708a17e623cbf2eb09e25cfca18ced2d7d91 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 23 Oct 2015 13:12:23 +0100 Subject: [PATCH 064/212] Tuple unpacking of NdElement values --- holoviews/core/element.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 1efd7dd94c..156191cc5b 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -423,11 +423,19 @@ def dimension_values(self, dim): value_dims = self.dimensions('value', label=True) if dim.name in value_dims: index = value_dims.index(dim.name) - return [v[index] for v in self.values()] + return [v[index] for v in self.data.values()] else: return NdMapping.dimension_values(self, dim.name) + def values(self): + " Returns the values of all the elements." + values = self.data.values() + if len(self.vdims) == 1: + return [v[0] for v in values] + return list(values) + + def dframe(self, as_table=False): try: import pandas From eef3c4176bff9b41e51795f6ac109491ecb1c8ec Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 23 Oct 2015 13:13:05 +0100 Subject: [PATCH 065/212] Minor fix to NdElement.collapse_data --- holoviews/core/element.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 156191cc5b..2c728265e0 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -409,8 +409,9 @@ def collapse_data(cls, data, function, kdims=None, **kwargs): index += len(d) joined_data.update(d) + grouped = joined_data.groupby([d.name for d in kdims], container_type=HoloMap) collapsed = joined_data.clone(shared_data=False, kdims=kdims) - for k, group in joined_data.groupby([d.name for d in kdims]).items(): + for k, group in grouped.items(): if isinstance(function, np.ufunc): collapsed[k] = tuple(function.reduce(group[vdim.name]) for vdim in group.vdims) else: From 41247f31e79d8d210ee4b62e0d0854b5946880f2 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 23 Oct 2015 13:13:52 +0100 Subject: [PATCH 066/212] Fixed bug in NdElement.reindex --- holoviews/core/element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 2c728265e0..5c62350d8f 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -268,7 +268,7 @@ def reindex(self, kdims=None, vdims=None, force=False): for k, v in self.data.items(): _, key = zip(*sorted(((i, k[idx] if iskey else v[idx-self.ndims]) for i, iskey, idx in kidxs), key=getter)) - _, val = zip(*sorted(((i, k[idx] if iskey else v[idx-self.ndims]) + _, val = zip(*sorted(((i, v[idx] if iskey else v[idx-self.ndims]) for i, iskey, idx in vidxs), key=getter)) items.append((key, val)) reindexed = self.clone(items, kdims=key_dims, vdims=val_dims) From 52e72a15db48285978a2927f08f17e5446119ac3 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Fri, 23 Oct 2015 13:14:14 +0100 Subject: [PATCH 067/212] Fix to NdElement.__getitem__ --- holoviews/core/element.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 5c62350d8f..f0b3cf73ac 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -336,8 +336,6 @@ def __getitem__(self, args): if len(self.vdims) > 1: subtable = self.__class__([(args, subtable)], label=self.label, kdims=self.kdims, vdims=self.vdims) - else: - subtable = subtable[0] # If subtable is not a slice return as reduced type if not isinstance(args, tuple): args = (args,) From c0609e31b9470567c84c423bab0225fa9898492a Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 24 Oct 2015 18:26:11 +0100 Subject: [PATCH 068/212] Fixes to Columns.reduce implementation and improved unit tests --- holoviews/core/data.py | 51 +++++++-------- holoviews/core/element.py | 49 +++++++-------- tests/testcolumns.py | 128 +++++++++++++++++++++++++++----------- 3 files changed, 137 insertions(+), 91 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 7e0654bfa0..11ebe00d63 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -173,13 +173,13 @@ def reduce(self, dimensions=[], function=None, **reduce_map): Allows collapsing of Columns objects using the supplied map of dimensions and reduce functions. """ - if self.interface is None: - return self.data.reduce(dimensions, function, **reduce_map) - reduce_dims, reduce_map = self._reduce_map(dimensions, function, reduce_map) reduced = self for reduce_fn, group in reduce_map: - reduced = self.interface.reduce(reduced, group, function) + if self.interface is None: + reduced = self.data.reduce(reduced, group, function) + else: + reduced = self.interface.reduce(reduced, group, function) if np.isscalar(reduced): return reduced @@ -426,32 +426,29 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): return mapping - def reduce(self, dimensions=[], function=None, **reductions): + @classmethod + def reduce(cls, columns, reduce_dims, function=None): """ The aggregate function accepts either a list of Dimensions and a function to apply to find the aggregate across those Dimensions or a list of dimension/function pairs to apply one by one. """ - if not dimensions and not reductions: - raise Exception("Supply either a list of Dimensions or" - "reductions as keyword arguments") - reduced = self.element.data - dfnumeric = reduced.applymap(np.isreal).all(axis=0) - unreducable = list(dfnumeric[dfnumeric == False].index) - if dimensions: - if not function: - raise Exception("Supply a function to reduce the Dimensions with.") - reductions.update({d: function for d in dimensions}) - if reductions: - reduce_ops = defaultdict(list) - for d, fn in reductions.items(): reduce_ops[fn].append(fn) - for fn, dims in reduce_ops.items(): - reduced = reduced.groupby(dims, as_index=True).aggregate(fn) - reduced_indexes = [reduced.index.names.index(d) for d in unreducable] - reduced = reduced.reset_index(level=reduced_indexes) - kdims = [self.element.get_dimension(d) for d in reduced.columns] - return self.element.clone(reduced, kdims=kdims) + reduced = columns.data + kdims = [kdim.name for kdim in columns.kdims if kdim not in reduce_dims] + vdims = columns.dimensions('value', True) + if kdims: + reduced = reduced.reindex(columns=kdims+vdims).groupby(kdims).aggregate(function).reset_index() + else: + if isinstance(function, np.ufunc): + reduced = function.reduce(columns.data, axis=0) + else: + reduced = function(columns.data, axis=0)[vdims] + if len(reduced) == 1: + reduced = reduced[0] + else: + reduced = pd.DataFrame([reduced], columns=vdims) + return reduced def array(self): @@ -696,7 +693,11 @@ def reduce(cls, columns, reduce_dims, function): reindexed = columns.reindex(kdims) reduced = reindexed.collapse_data([reindexed.data], function, kdims) else: - reduced = function(columns.data, axis=0)[columns.ndims:] + if isinstance(function, np.ufunc): + reduced = function.reduce(columns.data, axis=0) + else: + reduced = function(columns.data, axis=0) + reduced = reduced[columns.ndims:] if reduced.ndim == 1: if len(reduced) == 1: return reduced[0] diff --git a/holoviews/core/element.py b/holoviews/core/element.py index f0b3cf73ac..6fe041c279 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -357,37 +357,30 @@ def sample(self, samples=[]): return self.clone(sample_data) - def reduce(self, dimensions=None, function=None, **reduce_map): + @classmethod + def reduce(cls, columns, reduce_dims, function): """ - Allows collapsing the Table down by dimension by passing - the dimension name and reduce_fn as kwargs. Reduces - dimensionality of Table until only an ItemTable is left. + This implementation allows reducing dimensions by aggregating + over all the remaining key dimensions using the collapse_data + method. """ - reduce_map = self._reduce_map(dimensions, function, reduce_map) - - reduced_table = self - for reduce_fn, group in groupby(reduce_map.items(), lambda x: x[1]): - dims = [dim for dim, _ in group] - split_dims = [self.get_dimension(d) for d in self.kdims - if d not in dims] - if len(split_dims) and reduced_table.ndims > 1: - split_map = reduced_table.groupby([d.name for d in split_dims], container_type=HoloMap, - group_type=self.__class__) - reduced_table = self.clone(shared_data=False, kdims=split_dims) - for k, table in split_map.items(): - reduced = [] - for vdim in self.vdims: - valtable = table.select(value=vdim.name) if len(self.vdims) > 1 else table - reduced.append(reduce_fn(valtable.data.values())) - reduced_table[k] = reduced + kdims = [kdim for kdim in columns.kdims if kdim not in reduce_dims] + if len(kdims): + reindexed = columns.reindex(kdims) + reduced = reindexed.collapse_data([reindexed], function, kdims) + else: + reduced = [] + for vdim in columns.vdims: + data = columns[vdim.name] + if isinstance(function, np.ufunc): + reduced.append(function.reduce(data)) + else: + reduced.append(function(data)) + if len(reduced) == 1: + reduced = reduced[0] else: - reduced = tuple(reduce_fn(self[vdim.name]) - for vdim in self.vdims) - reduced_dims = [d for d in self.kdims if d.name not in reduce_map] - params = dict(group=self.group) if self.group != type(self).__name__ else {} - reduced_table = self.__class__([((), reduced)], label=self.label, kdims=reduced_dims, - vdims=self.vdims, **params) - return reduced_table + reduced = OrderedDict([((), tuple(reduced))]) + return reduced def _item_check(self, dim_vals, data): diff --git a/tests/testcolumns.py b/tests/testcolumns.py index 047e7f7892..48745e866b 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -17,10 +17,11 @@ class ColumnsNdElementTest(ComparisonTestCase): def setUp(self): self.xs = range(11) self.ys = np.linspace(0, 1, 11) + self.zs = np.sin(self.xs) self.keys1 = [('M',10), ('M',16), ('F',12)] self.values1 = [(15, 0.8), (18, 0.6), (10, 0.8)] - self.key_dims1 = ['Gender', 'Age'] - self.val_dims1 = ['Weight', 'Height'] + self.kdims = ['Gender', 'Age'] + self.vdims = ['Weight', 'Height'] self.columns = Columns(dict(zip(self.xs, self.ys)), kdims=['x'], vdims=['y']) @@ -37,54 +38,47 @@ def test_columns_ndelement_construct(self): def test_columns_items_construct(self): columns = Columns(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) + kdims=self.kdims, vdims=self.vdims) self.assertTrue(isinstance(columns.data, NdElement)) def test_columns_index_row_gender(self): - table =Columns(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) row = table['F',:] self.assertEquals(type(row), Columns) self.assertEquals(row.data.data, OrderedDict([(('F', 12), (10, 0.8))])) def test_columns_index_rows_gender(self): - table =Columns(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) row = table['M',:] self.assertEquals(type(row), Columns) self.assertEquals(row.data.data, OrderedDict([(('M', 10), (15, 0.8)), (('M', 16), (18, 0.6))])) def test_columns_index_row_age(self): - table =Columns(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) row = table[:, 12] self.assertEquals(type(row), Columns) self.assertEquals(row.data.data, OrderedDict([(('F', 12), (10, 0.8))])) def test_columns_index_item_table(self): - table =Columns(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) itemtable = table['F', 12] self.assertEquals(type(itemtable), Columns) self.assertEquals(itemtable.data.data, OrderedDict([(('F', 12), (10, 0.8))])) def test_columns_index_value1(self): - table =Columns(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) self.assertEquals(table['F', 12, 'Weight'], 10) def test_columns_index_value2(self): - table =Columns(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) self.assertEquals(table['F', 12, 'Height'], 0.8) def test_columns_getitem_column(self): @@ -106,6 +100,34 @@ def test_columns_collapse(self): self.compare_columns(collapsed, Columns(dict(zip(self.xs, self.ys*4.5)), kdims=['x'], vdims=['y'])) + def test_columns_1d_reduce(self): + self.assertEqual(self.columns.reduce('x', np.mean), np.float64(0.5)) + + def test_columns_2d_reduce(self): + columns = Columns(zip(zip(self.xs, self.ys), self.zs), + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891004) + + def test_columns_2d_partial_reduce(self): + columns = Columns(zip(zip(self.xs, self.ys), self.zs), + kdims=['x', 'y'], vdims=['z']) + reduced = Columns(zip(zip(self.xs), self.zs), + kdims=['x'], vdims=['z']) + self.assertEqual(columns.reduce(['y'], np.mean), reduced) + + def test_columns_heterogeneous_reduce(self): + columns = Columns(zip(self.keys1, self.values1), kdims=self.kdims, + vdims=self.vdims) + reduced = Columns(zip([k[1:] for k in self.keys1], self.values1), + kdims=self.kdims[1:], vdims=self.vdims) + self.assertEqual(columns.reduce(['Gender'], np.mean), reduced) + + def test_columns_heterogeneous_reduce2d(self): + columns = Columns(zip(self.keys1, self.values1), kdims=self.kdims, + vdims=self.vdims) + reduced = Columns([((), (14.333333333333334, 0.73333333333333339))], kdims=[], vdims=self.vdims) + self.assertEqual(columns.reduce(function=np.mean), reduced) + class ColumnsNdArrayTest(ComparisonTestCase): @@ -193,6 +215,7 @@ def setUp(self): self.vdims = ['Weight', 'Height'] self.xs = range(11) self.ys = np.linspace(0, 1, 11) + self.zs = np.sin(self.xs) self.columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys}), kdims=['x'], vdims=['y']) @@ -209,40 +232,36 @@ def test_columns_index_row_gender(self): vdims=self.vdims) row = columns['F',:] self.assertEquals(type(row), Columns) - self.compare_columns(row.clone(row.data.reset_index(drop=True)), - Columns(self.column_data[2:], - kdims=self.kdims, - vdims=self.vdims)) + self.compare_columns(row, Columns(self.column_data[2:], + kdims=self.kdims, + vdims=self.vdims)) def test_columns_index_rows_gender(self): columns = Columns(self.column_data, kdims=self.kdims, vdims=self.vdims) row = columns['M',:] self.assertEquals(type(row), Columns) - self.compare_columns(row.clone(row.data.reset_index(drop=True)), - Columns(self.column_data[:2], - kdims=self.kdims, - vdims=self.vdims)) + self.compare_columns(row, Columns(self.column_data[:2], + kdims=self.kdims, + vdims=self.vdims)) def test_columns_index_row_age(self): columns = Columns(self.column_data, kdims=self.kdims, vdims=self.vdims) row = columns[:, 12] self.assertEquals(type(row), Columns) - self.compare_columns(row.clone(row.data.reset_index(drop=True)), - Columns(self.column_data[2:], - kdims=self.kdims, - vdims=self.vdims)) + self.compare_columns(row, Columns(self.column_data[2:], + kdims=self.kdims, + vdims=self.vdims)) def test_columns_index_single_row(self): columns = Columns(self.column_data, kdims=self.kdims, vdims=self.vdims) row = columns['F', 12] self.assertEquals(type(row), Columns) - self.compare_columns(row.clone(row.data.reset_index(drop=True)), - Columns(self.column_data[2:], - kdims=self.kdims, - vdims=self.vdims)) + self.compare_columns(row, Columns(self.column_data[2:], + kdims=self.kdims, + vdims=self.vdims)) def test_columns_index_value1(self): columns = Columns(self.column_data, kdims=self.kdims, @@ -271,3 +290,36 @@ def test_columns_collapse(self): collapsed = HoloMap({i: Columns(pd.DataFrame({'x': self.xs, 'y': self.ys*i}), kdims=['x'], vdims=['y']) for i in range(10)}, kdims=['z']).collapse('z', np.mean) self.compare_columns(collapsed, Columns(pd.DataFrame({'x': self.xs, 'y': self.ys*4.5}), kdims=['x'], vdims=['y'])) + + def test_columns_1d_reduce(self): + self.assertEqual(self.columns.reduce('x', np.mean), np.float64(0.5)) + + def test_columns_2d_reduce(self): + columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys, 'z': self.zs}), + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891004) + + def test_columns_2d_partial_reduce(self): + columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys, 'z': self.zs}), + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.reduce(['y'], np.mean), + Columns(pd.DataFrame({'x': self.xs, 'z': self.zs}), + kdims=['x'], vdims=['z'])) + + def test_columns_heterogeneous_reduce(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + reduced_data = pd.DataFrame([d[1:] for d in self.column_data], + columns=columns.dimensions(label=True)[1:]) + reduced = Columns(reduced_data, kdims=self.kdims[1:], + vdims=self.vdims) + self.assertEqual(columns.reduce(['Gender'], np.mean), reduced) + + def test_columns_heterogeneous_reduce2d(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + reduced_data = pd.DataFrame([d[1:] for d in self.column_data], + columns=columns.dimensions(label=True)[1:]) + reduced = Columns(pd.DataFrame([(14.333333333333334, 0.73333333333333339)], columns=self.vdims), + kdims=[], vdims=self.vdims) + self.assertEqual(columns.reduce(function=np.mean), reduced) From 4ae2868f5f251a59f5173c04a619ce2ff85b802f Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 24 Oct 2015 18:26:38 +0100 Subject: [PATCH 069/212] Made Columns dataframe comparisons ignore indexes --- holoviews/element/comparison.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/holoviews/element/comparison.py b/holoviews/element/comparison.py index b203c6ead6..b92b1cd2a1 100644 --- a/holoviews/element/comparison.py +++ b/holoviews/element/comparison.py @@ -566,7 +566,9 @@ def compare_dframe(cls, el1, el2, msg='DFrame'): cls.compare_dimensioned(el1, el2) from pandas.util.testing import assert_frame_equal try: - assert_frame_equal(el1.data, el2.data) + df1 = el1.data.reset_index(drop=True) + df2 = el2.data.reset_index(drop=True) + assert_frame_equal(df1, df2) except AssertionError as e: raise cls.failureException(msg+': '+str(e)) From e1a42e8f1ed8cc2869cdde432b0e28014b7a9a48 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sat, 24 Oct 2015 18:27:37 +0100 Subject: [PATCH 070/212] Various fixes to Columns interface --- holoviews/core/data.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 11ebe00d63..6ca3f2e256 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -117,7 +117,7 @@ def reindex(self, kdims=None, vdims=None): return self.data.reindex(kdims, vdims) if vdims is None: - vdims = self.vdims + val_dims = self.vdims else: val_dims = [self.get_dimension(v) for v in vdims] @@ -127,8 +127,8 @@ def reindex(self, kdims=None, vdims=None): else: key_dims = [self.get_dimension(k) for k in kdims] - data = self.interface.reindex(self.data, key_dims, val_dims) - return self.clone(data, key_dims, val_dims) + data = self.interface.reindex(key_dims, val_dims) + return self.clone(data, kdims=key_dims, vdims=val_dims) def __getitem__(self, slices): @@ -330,6 +330,9 @@ def _process_data(cls, data, paramobjs, **kwargs): if isinstance(data, dict): data = NdElement(data, kdims=params['kdims'], vdims=params['vdims']) + elif util.is_dataframe(data): + data = data.sort_values(by=[d.name if isinstance(d, Dimension) else d + for dims in ['kdims', 'vdims'] for d in params[dims]]) return data, params @@ -495,7 +498,7 @@ def select(self, selection_specs=None, **select): else: if dim in self.element.kdims: selected_kdims.append(dim) slcs.append(df[dim] == k) - df = df[np.logical_and.reduce(slcs)] + df = df.iloc[np.logical_and.reduce(slcs)] if len(set(selected_kdims)) == self.element.ndims: if len(df) and len(self.element.vdims) == 1: df = df[self.element.vdims[0].name].iloc[0] @@ -595,6 +598,9 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): dimensions = [self.element.get_dimension(d) for d in dimensions] dim_idxs = [self.element.get_dimension_index(d) for d in dimensions] dim_data = {d: self.element.dimension_values(d) for d in dimensions} + ndims = len(dimensions) + kwargs['kdims'] = [kdim for kdim in self.element.kdims + if kdim not in dimensions] # Find unique entries along supplied dimensions indices = data[:, dim_idxs] @@ -609,7 +615,7 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): mask = np.zeros(len(data), dtype=bool) for d, v in zip(dimensions, group): mask = np.logical_or(mask, dim_data[d] == v) - group_element = self.element.clone(data[mask, :], **kwargs) + group_element = self.element.clone(data[mask, ndims:], **kwargs) grouped_data.append((tuple(group), group_element)) return container_type(grouped_data, kdims=dimensions) @@ -662,7 +668,7 @@ def collapse_data(cls, data, function, kdims=None, **kwargs): if isinstance(function, np.ufunc): collapsed = function.reduce(group_data) else: - collapsed = function(group_data, **kwargs) + collapsed = function(group_data, axis=0, **kwargs) row[nkdims+i] = collapsed rows.append(row) return np.array(rows) From 398a3ed54332cc6b83c685770c074560a64c64dd Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 17:35:45 +0000 Subject: [PATCH 071/212] Removed dimension caching attributes --- holoviews/core/data.py | 29 +++++++++++++++-------------- holoviews/core/dimension.py | 12 +++++------- holoviews/core/element.py | 11 +++++------ holoviews/core/ndmapping.py | 21 +++++++++++---------- holoviews/core/spaces.py | 11 +++++------ holoviews/element/raster.py | 4 ++-- holoviews/element/tabular.py | 4 ++-- holoviews/interface/pandas.py | 2 +- 8 files changed, 46 insertions(+), 48 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 6ca3f2e256..e117c50940 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -72,7 +72,7 @@ def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): if isinstance(dimension, str): dimension = Dimension(dimension) - if dimension.name in self._cached_index_names: + if dimension.name in self.kdims: raise Exception('{dim} dimension already defined'.format(dim=dimension.name)) dimensions = self.kdims[:] @@ -383,19 +383,20 @@ def as_ndelement(self, **kwargs): This method transforms any ViewableElement type into a Table as long as it implements a dimension_values method. """ - keys = zip(*[self.values(dim.name) - for dim in self.kdims]) - values = zip(*[self.values(dim.name) - for dim in self.vdims]) - if not keys: keys = [()]*len(values) - if not values: [()]*len(keys) + if self.kdims: + keys = zip(*[self.values(dim.name) + for dim in self.kdims]) + else: + keys = [()]*len(values) + + if self.vdims: + values = zip(*[self.values(dim.name) + for dim in self.vdims]) + else: + values = [()]*len(keys) + data = zip(keys, values) - kwargs = {'label': self.label - for k, v in self.get_param_values(onlychanged=True) - if k in ['group', 'label']} - params = dict(kdims=self.kdims, - vdims=self.vdims, - label=self.label) + params = dict(kdims=self.kdims, vdims=self.vdims, label=self.label) if not self.params()['group'].default == self.group: params['group'] = self.group el_type = type(self.element) @@ -414,7 +415,7 @@ def validate_data(cls, data): class ColumnarDataFrame(ColumnarData): def groupby(self, dimensions, container_type=HoloMap, **kwargs): - invalid_dims = list(set(dimensions) - set(self._cached_index_names)) + invalid_dims = list(set(dimensions) - set(self.element.dimensions('key', True))) if invalid_dims: raise Exception('Following dimensions could not be found:\n%s.' % invalid_dims) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index bfd561c27e..18a119da19 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -518,8 +518,6 @@ def __init__(self, data, **params): self.ndims = len(self.kdims) cdims = [(d.name, val) for d, val in self.cdims.items()] self._cached_constants = OrderedDict(cdims) - self._cached_index_names = [d.name for d in self.kdims] - self._cached_value_names = [d.name for d in self.vdims] self._settings = None @@ -535,7 +533,7 @@ def _valid_dimensions(self, dimensions): valid_dimensions = [] for dim in dimensions: if isinstance(dim, Dimension): dim = dim.name - if dim not in self._cached_index_names: + if dim not in self.kdims: raise Exception("Supplied dimensions %s not found." % dim) valid_dimensions.append(dim) return valid_dimensions @@ -601,7 +599,7 @@ def get_dimension_index(self, dim): return IndexError('Dimension index out of bounds') try: sanitized = {sanitize_identifier(kd): kd - for kd in self._cached_index_names} + for kd in self.dimensions('key', True)} return [d.name for d in self.dimensions()].index(sanitized.get(dim, dim)) except ValueError: raise Exception("Dimension %s not found in %s." % @@ -654,10 +652,10 @@ def select(self, selection_specs=None, **kwargs): # Apply all indexes applying on this object val_dim = ['value'] if self.vdims else [] + kdims = self.dimensions('key', label=True) sanitized = {sanitize_identifier(kd): kd - for kd in self._cached_index_names} - local_dims = (self._cached_index_names - + list(sanitized.keys()) + val_dim) + for kd in kdims} + local_dims = (kdims + list(sanitized.keys()) + val_dim) local_kwargs = {k: v for k, v in kwargs.items() if k in local_dims} diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 6fe041c279..a022dcdafa 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -118,13 +118,13 @@ def _reduce_map(self, dimensions, function, reduce_map): raise Exception("Pass reduced dimensions either as an argument" "or as part of the kwargs not both.") sanitized_dict = {sanitize_identifier(kd): kd - for kd in self._cached_index_names} + for kd in self.dimensions('key', True)} if reduce_map: reduce_map = reduce_map.items() if dimensions: reduce_map = [(d, function) for d in dimensions] elif not reduce_map: - reduce_map = [(d, function) for d in self._cached_index_names] + reduce_map = [(d, function) for d in self.kdims] reduced = [(d.name if isinstance(d, Dimension) else d, fn) for d, fn in reduce_map] sanitized = [(sanitized_dict.get(d, d), fn) for d, fn in reduced] @@ -253,7 +253,7 @@ def reindex(self, kdims=None, vdims=None, force=False): if kdims is None: return super(NdElement, self).reindex(force=force) else: - vdims = self._cached_value_names + vdims = self.vdims elif kdims is None: kdims = [d for d in self.dimensions if d not in vdims] key_dims = [self.get_dimension(k) for k in kdims] @@ -522,11 +522,10 @@ def __call__(self): raise ValueError("Collator values must be Dimensioned objects " "before collation.") - dim_keys = zip(self._cached_index_names, key) + dim_keys = zip(self.kdims, key) varying_keys = [(d, k) for d, k in dim_keys if not self.drop_constant or (d not in constant_dims and d not in self.drop)] - constant_keys = [(d if isinstance(d, Dimension) else Dimension(d), k) - for d, k in dim_keys if d in constant_dims + constant_keys = [(d, k) for d, k in dim_keys if d in constant_dims and d not in self.drop and self.drop_constant] if varying_keys or constant_keys: data = self._add_dimensions(data, varying_keys, diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index 06e787faeb..4335429d43 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -161,19 +161,19 @@ def _add_item(self, dim_vals, data, sort=True): # Check and validate for categorical dimensions if self._cached_categorical: - valid_vals = zip(self._cached_index_names, dim_vals) + valid_vals = zip(self.kdims, dim_vals) else: valid_vals = [] for dim, val in valid_vals: - vals = self._cached_index_values[dim] - if vals == 'initial': self._cached_index_values[dim] = [] + vals = self._cached_index_values[dim.name] + if vals == 'initial': self._cached_index_values[dim.name] = [] if not self._instantiated and self.get_dimension(dim).values == 'initial': if val not in vals: - self._cached_index_values[dim].append(val) + self._cached_index_values[dim.name].append(val) elif vals and val not in vals: - raise KeyError('%s Dimension value %s not in' - ' specified Dimension values.' % (dim, repr(val))) + raise KeyError('%s dimension value %s not in' + ' specified dimension values.' % (dim, repr(val))) # Updates nested data structures rather than simply overriding them. if ((dim_vals in self.data) @@ -300,7 +300,7 @@ def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): if isinstance(dimension, str): dimension = Dimension(dimension) - if dimension.name in self._cached_index_names: + if dimension in self.kdims: raise Exception('{dim} dimension already defined'.format(dim=dimension.name)) dimensions = self.kdims[:] @@ -357,8 +357,9 @@ def reindex(self, kdims=[], force=False): created object as the new labels must be sufficient to address each value uniquely. """ + old_kdims = [d.name for d in self.kdims] if not len(kdims): - kdims = [d for d in self._cached_index_names + kdims = [d for d in old_kdims if not len(set(self.dimension_values(d))) == 1] indices = [self.get_dimension_index(el) for el in kdims] @@ -440,7 +441,7 @@ def dframe(self): import pandas except ImportError: raise Exception("Cannot build a DataFrame without the pandas library.") - labels = self._cached_index_names + [self.group] + labels = self.dimensions('key', True) + [self.group] return pandas.DataFrame( [dict(zip(labels, k + (v,))) for (k, v) in self.data.items()]) @@ -811,7 +812,7 @@ def dframe(self): dframes = [] for key, view in self.data.items(): view_frame = view.dframe() - key_dims = reversed(list(zip(key, self._cached_index_names))) + key_dims = reversed(list(zip(key, self.dimensions('key', label)))) for val, dim in key_dims: dimn = 1 while dim in view_frame: diff --git a/holoviews/core/spaces.py b/holoviews/core/spaces.py index d6806a5228..f8bfa4642c 100644 --- a/holoviews/core/spaces.py +++ b/holoviews/core/spaces.py @@ -31,8 +31,7 @@ def overlay(self, dimensions=None, **kwargs): with item_check(False): return NdOverlay(self, **kwargs) else: - dims = [d for d in self._cached_index_names - if d not in dimensions] + dims = [d for d in self.kdims if d not in dimensions] return self.groupby(dims, group_type=NdOverlay, **kwargs) @@ -108,8 +107,8 @@ def __mul__(self, other): with completely different dimensions aren't overlaid. """ if isinstance(other, self.__class__): - self_set = set(self._cached_index_names) - other_set = set(other._cached_index_names) + self_set = {d.name for d in self.kdims} + other_set = {d.name for d in other.kdims} # Determine which is the subset, to generate list of keys and # dimension labels for the new view @@ -195,9 +194,9 @@ def collapse(self, dimensions=None, function=None, **kwargs): """ from .operation import MapOperation if not dimensions: - dimensions = self._cached_index_names + dimensions = self.kdims if self.ndims > 1 and len(dimensions) != self.ndims: - groups = self.groupby([dim for dim in self._cached_index_names + groups = self.groupby([dim for dim in self.kdims if dim not in dimensions]) else: [self.get_dimension(dim) for dim in dimensions] diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index 760a97fadb..a8d162b68d 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -397,7 +397,7 @@ def __getitem__(self, coords): """ Slice the underlying NdMapping. """ - return self.clone(self._data.select(**dict(zip(self._data._cached_index_names, coords)))) + return self.clone(self._data.select(**dict(zip(self._data.kdims, coords)))) def dense_keys(self): @@ -411,7 +411,7 @@ def dense_keys(self): def dimension_values(self, dim, unique=True): dim = self.get_dimension(dim).name - if dim in self._cached_index_names: + if dim in self.kdims: idx = self.get_dimension_index(dim) return [k[idx] for k in self._data.keys()] elif dim in self._cached_value_names: diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index ce35ace397..5d325ba490 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -218,8 +218,8 @@ def _conversion(self, kdims=None, vdims=None, new_type=None, **kwargs): elif vdims and not isinstance(vdims, list): vdims = [vdims] kdims = [kdim.name if isinstance(kdim, Dimension) else kdim for kdim in kdims] vdims = [vdim.name if isinstance(vdim, Dimension) else vdim for vdim in vdims] - if (any(kd in self._table._cached_value_names for kd in kdims) or - any(vd in self._table._cached_index_names for vd in vdims)): + if (any(kd in self._table.vdims for kd in kdims) or + any(vd in self._table.kdims for vd in vdims)): new_kdims = [kd for kd in self._table.kdims if kd not in kdims and kd not in vdims] + kdims selected = self._table.reindex(new_kdims, vdims) diff --git a/holoviews/interface/pandas.py b/holoviews/interface/pandas.py index 070d059753..542bed67c6 100644 --- a/holoviews/interface/pandas.py +++ b/holoviews/interface/pandas.py @@ -84,7 +84,7 @@ def __init__(self, data, dimensions={}, kdims=None, clone_override=False, dims[list(data.columns).index(name)] = dim ViewableElement.__init__(self, data, kdims=dims, **params) - self.data.columns = self._cached_index_names + self.data.columns = self.dimensions('key', True) def apply(self, name, *args, **kwargs): From 468be0d80f22213c77c97ed2576c246b38252940 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 17:37:55 +0000 Subject: [PATCH 072/212] Improved array and dframe methods --- holoviews/core/data.py | 19 ++++++++++++------- holoviews/core/element.py | 26 ++++++++++++++++++++++++-- holoviews/core/util.py | 5 +++++ 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index e117c50940..2b7dd9ccc8 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -247,13 +247,18 @@ def dframe(self, as_table=False): return self.interface.dframe(as_table) - def array(self): + def array(self, as_table=False): if self.interface is None: - dims = self._cached_index_names + self._cached_value_names - return np.column_stack([self.dimension_values(d) for d in dims]) - else: - return self.interface.array() - + return super(Columns, self).array(as_table) + array = self.interface.array() + if as_table: + from ..element import Table + if array.dtype.kind in ['S', 'O', 'U']: + raise ValueError("%s data contains non-numeric type, " + "could not convert to array based " + "Element" % type(self).__name__) + return Table(array, **util.get_param_values(self, Table)) + return array @@ -456,7 +461,7 @@ def reduce(cls, columns, reduce_dims, function=None): def array(self): - return self.element.data.iloc + return self.element.data.values def reindex(self, kdims=None, vdims=None): diff --git a/holoviews/core/element.py b/holoviews/core/element.py index a022dcdafa..28d57e118c 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -11,7 +11,7 @@ from .overlay import Overlayable, NdOverlay, Overlay, CompositeOverlay from .spaces import HoloMap, GridSpace from .tree import AttrTree -from .util import sanitize_identifier, is_dataframe +from .util import sanitize_identifier, is_dataframe, dimension_sort, get_param_values class Element(ViewableElement, Composable, Overlayable): @@ -428,6 +428,27 @@ def values(self): return list(values) + def array(self, as_table=False): + dims = self.kdims + self.vdims + columns, types = [], [] + for dim in dims: + column = self.dimension_values(d) + data.append(column) + types.append(column.dtype.kind) + if len(set(types)) > 1: + columns = [c.astype('object') for c in columns] + array = np.column_stack(columns) + if as_table: + from ..element import Table + if array.dtype.kind in ['S', 'O', 'U']: + raise ValueError("%s data contains non-numeric type, " + "could not convert to array based " + "Element" % type(self).__name__) + return Table(array, **get_param_values(self, Table)) + else: + return array + + def dframe(self, as_table=False): try: import pandas @@ -437,10 +458,11 @@ def dframe(self, as_table=False): df = pandas.DataFrame((k+v for (k, v) in self.data.items()), columns=columns) if as_table: from ..element import Table - return Table(df, **self.get_param_values(onlychanged=True)) + return Table(df, **get_param_values(self, Table)) return df + class Element3D(Element2D): extents = param.Tuple(default=(None, None, None, diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 3e01fc7f79..c10c81296a 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -594,3 +594,8 @@ def is_dataframe(data): return((pd is not None and isinstance(data, pd.DataFrame)) or (dd is not None and isinstance(data, dd.DataFrame)) or (bz is not None and isinstance(data, bz.Data))) + + +def get_param_values(data, new_type): + return {k: v for k, v in self.get_param_values(onlychanged=True) + if k in new_type.params()} From 3756705b7620fa7afcb3b8036606b1910099f61a Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 17:38:51 +0000 Subject: [PATCH 073/212] Added aggregate methods to Columns interface --- holoviews/core/data.py | 22 ++++++++++++++++++++++ holoviews/core/element.py | 12 ++++++++++++ 2 files changed, 34 insertions(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 2b7dd9ccc8..7e729521cd 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -518,6 +518,17 @@ def values(self, dim): return np.array(data) + def aggregate(self, dimensions, function): + """ + Allows aggregating. + """ + data = self.element.data + columns = [d.name for d in self.element.kdims if d in dimensions] + vdims = self.element.dimensions('value', True) + return data.reindex(columns=columns+vdims).groupby(columns).\ + aggregate(function).reset_index() + + def sample(self, samples=[]): """ Sample the Element data with a list of samples. @@ -717,3 +728,14 @@ def reduce(cls, columns, reduce_dims, function): return np.atleast_2d(reduced) return reduced + + def aggregate(self, dimensions, function): + """ + Allows aggregating. + """ + if not isinstance(dimensions, Iterable): dimensions = [dimensions] + rows = [] + for k, group in self.groupby(dimensions).data.items(): + reduced = group.reduce(function) + rows.append(np.concatenate([k, (reduced,) if np.isscalar(reduced) else reduced])) + return np.array(rows) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 28d57e118c..4511369a7b 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -410,6 +410,18 @@ def collapse_data(cls, data, function, kdims=None, **kwargs): return collapsed + def aggregate(self, dimensions, function): + """ + Allows aggregating. + """ + rows = [] + grouped = self.groupby(dimensions) + for k, group in grouped.data.items(): + reduced = group.reduce(group, group.kdims, function).values()[0] + rows.append((k, reduced)) + return self.clone(rows, kdims=grouped.kdims) + + def dimension_values(self, dim): dim = self.get_dimension(dim) value_dims = self.dimensions('value', label=True) From e997802a08a358969939e59e385263e9adff3cff Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 17:40:24 +0000 Subject: [PATCH 074/212] dimension_values method now consistently returns numpy arrays --- holoviews/core/dimension.py | 2 +- holoviews/core/element.py | 2 +- holoviews/core/ndmapping.py | 2 +- holoviews/core/overlay.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index 18a119da19..3886ffbd71 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -715,7 +715,7 @@ def dimension_values(self, dimension): """ val = self._cached_constants.get(dimension, None) if val: - return val + return np.array([val]) else: raise Exception("Dimension %s not found in %s." % (dimension, self.__class__.__name__)) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 4511369a7b..83895fbe25 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -427,7 +427,7 @@ def dimension_values(self, dim): value_dims = self.dimensions('value', label=True) if dim.name in value_dims: index = value_dims.index(dim.name) - return [v[index] for v in self.data.values()] + return np.array([v[index] for v in self.data.values()]) else: return NdMapping.dimension_values(self, dim.name) diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index 4335429d43..af26638fa2 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -338,7 +338,7 @@ def dimension_values(self, dimension): "Returns the values along the specified dimension." dimension = self.get_dimension(dimension).name if dimension in self.kdims: - return [k[self.get_dimension_index(dimension)] for k in self.data.keys()] + return np.array([k[self.get_dimension_index(dimension)] for k in self.data.keys()]) if dimension in self.dimensions(label=True): values = [el.dimension_values(dimension) for el in self if dimension in el.dimensions()] diff --git a/holoviews/core/overlay.py b/holoviews/core/overlay.py index f248de8ccc..09d7c54000 100644 --- a/holoviews/core/overlay.py +++ b/holoviews/core/overlay.py @@ -75,7 +75,7 @@ def dimension_values(self, dimension): if not found: return super(CompositeOverlay, self).dimension_values(dimension) values = [v for v in values if v is not None and len(v)] - return np.concatenate(values) if len(values) else [] + return np.concatenate(values) if len(values) else np.array() From e24dcf5f33051f9352b324de6657673be0727361 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 18:00:40 +0000 Subject: [PATCH 075/212] Various fixes and improvements to Columns interface --- holoviews/core/data.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 7e729521cd..dbed7d0db9 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -335,9 +335,6 @@ def _process_data(cls, data, paramobjs, **kwargs): if isinstance(data, dict): data = NdElement(data, kdims=params['kdims'], vdims=params['vdims']) - elif util.is_dataframe(data): - data = data.sort_values(by=[d.name if isinstance(d, Dimension) else d - for dims in ['kdims', 'vdims'] for d in params[dims]]) return data, params @@ -425,13 +422,14 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): raise Exception('Following dimensions could not be found:\n%s.' % invalid_dims) - index_dims = [self.get_dimension(d) for d in dimensions] + index_dims = [self.element.get_dimension(d) for d in dimensions] + element_dims = [kdim for kdim in self.element.kdims + if kdim not in index_dims] mapping = container_type(None, kdims=index_dims) - for k, v in self.data.groupby(dimensions): + for k, v in self.element.data.groupby(dimensions): data = v.drop(dimensions, axis=1) - mapping[k] = self.clone(data, - kdims=[self.get_dimension(d) - for d in data.columns], **kwargs) + mapping[k] = self.element.clone(data, kdims=element_dims, + **kwargs) return mapping @@ -443,11 +441,11 @@ def reduce(cls, columns, reduce_dims, function=None): those Dimensions or a list of dimension/function pairs to apply one by one. """ - reduced = columns.data kdims = [kdim.name for kdim in columns.kdims if kdim not in reduce_dims] vdims = columns.dimensions('value', True) if kdims: - reduced = reduced.reindex(columns=kdims+vdims).groupby(kdims).aggregate(function).reset_index() + reduced = columns.data.reindex(columns=kdims+vdims).\ + groupby(kdims).aggregate(function).reset_index() else: if isinstance(function, np.ufunc): reduced = function.reduce(columns.data, axis=0) @@ -639,6 +637,7 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): def select(self, **selection): data = self.element.data + slices = [] selected_kdims = [] value = selection.pop('value', None) for d, slc in selection.items(): @@ -648,17 +647,21 @@ def select(self, **selection): stop = float("inf") if slc.stop is None else slc.stop clip_start = start <= data[:, idx] clip_stop = data[:, idx] < stop - data = data[np.logical_and(clip_start, clip_stop), :] + slices.append(np.logical_and(clip_start, clip_stop)) elif isinstance(slc, (set, list)): - filt = np.in1d(data[:, idx], list(slc)) - data = data[filt, :] + slices.append(np.in1d(data[:, idx], list(slc))) else: if d in self.element.kdims: selected_kdims.append(d) if self.element.ndims == 1: data_index = np.argmin(np.abs(data[:, idx] - slc)) + data = data[data_index, :] + break else: data_index = data[:, idx] == slc - data = np.atleast_2d(data[data_index, :]) + slices.append(data_index) + if slices: + data = data[np.logical_and.reduce(slices), :] + data = np.atleast_2d(data) if len(data) and len(set(selected_kdims)) == self.element.ndims: if len(data) == 1 and len(self.element.vdims) == 1: data = data[0, self.element.ndims] From d4533a5b4c2e6e7201aff3d5b7a24ac23a9b43a5 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 18:59:47 +0000 Subject: [PATCH 076/212] Updated Comparison unit tests --- tests/testcomparisonchart.py | 14 +++++++------- tests/testcomparisonraster.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/testcomparisonchart.py b/tests/testcomparisonchart.py index 70b9ed3931..d7c9b41a56 100644 --- a/tests/testcomparisonchart.py +++ b/tests/testcomparisonchart.py @@ -22,8 +22,8 @@ def test_curves_unequal(self): try: self.assertEqual(self.curve1, self.curve2) except AssertionError as e: - if not str(e).startswith("Curve data not of matching length."): - raise self.failureException("Curve data mismatch error not raised.") + if not str(e).startswith("Curve not of matching length."): + raise self.failureException("Curve mismatch error not raised.") @@ -135,14 +135,14 @@ def test_scatter_unequal_data_shape(self): try: self.assertEqual(self.scatter1, self.scatter2) except AssertionError as e: - if not str(e).startswith("Scatter data not of matching length."): + if not str(e).startswith("Scatter not of matching length."): raise self.failureException("Scatter data mismatch error not raised.") def test_scatter_unequal_data_values(self): try: self.assertEqual(self.scatter1, self.scatter3) except AssertionError as e: - if not str(e).startswith("Scatter data not almost equal to 6 decimals"): + if not str(e).startswith("Scatter not almost equal to 6 decimals"): raise self.failureException("Scatter data mismatch error not raised.") @@ -170,14 +170,14 @@ def test_points_unequal_data_shape(self): try: self.assertEqual(self.points1, self.points2) except AssertionError as e: - if not str(e).startswith("Points data not of matching length."): + if not str(e).startswith("Points not of matching length."): raise self.failureException("Points count mismatch error not raised.") def test_points_unequal_data_values(self): try: self.assertEqual(self.points1, self.points3) except AssertionError as e: - if not str(e).startswith("Points data not almost equal to 6 decimals"): + if not str(e).startswith("Points not almost equal to 6 decimals"): raise self.failureException("Points data mismatch error not raised.") @@ -206,6 +206,6 @@ def test_vfield_unequal_1(self): try: self.assertEqual(self.vfield1, self.vfield2) except AssertionError as e: - if not str(e).startswith("VectorField data not almost equal to 6 decimals"): + if not str(e).startswith("VectorField not almost equal to 6 decimals"): raise self.failureException("VectorField data mismatch error not raised.") diff --git a/tests/testcomparisonraster.py b/tests/testcomparisonraster.py index 319cc3b5c6..1b709b4c9f 100644 --- a/tests/testcomparisonraster.py +++ b/tests/testcomparisonraster.py @@ -101,7 +101,7 @@ def test_unequal_arrays(self): self.assertEqual(self.mat1, self.mat2) raise AssertionError("Array mismatch not raised") except AssertionError as e: - if not str(e).startswith('Image data not almost equal to 6 decimals\n'): + if not str(e).startswith('Image not almost equal to 6 decimals\n'): raise self.failureException("Image data mismatch error not raised.") def test_bounds_mismatch(self): @@ -124,8 +124,8 @@ def test_element_mismatch(self): try: self.assertEqual(self.overlay1_depth2, self.overlay2_depth2) except AssertionError as e: - if not str(e).startswith('Image data not almost equal to 6 decimals\n'): - raise self.failureException("Image data mismatch error not raised.") + if not str(e).startswith('Image not almost equal to 6 decimals\n'): + raise self.failureException("Image mismatch error not raised.") @@ -167,5 +167,5 @@ def test_element_mismatch(self): self.assertEqual(self.map1_1D, self.map4_1D) raise AssertionError("Pane mismatch in array data not raised.") except AssertionError as e: - if not str(e).startswith('Image data not almost equal to 6 decimals\n'): + if not str(e).startswith('Image not almost equal to 6 decimals\n'): raise self.failureException("Image mismatch error not raised.") From 85c5dbcac910ecc79e7b1f2dd492dec41c5cb9d6 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 19:00:12 +0000 Subject: [PATCH 077/212] Minor fixes to NdMapping --- holoviews/core/ndmapping.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index af26638fa2..efff09928e 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -275,6 +275,7 @@ def groupby(self, dimensions, container_type=None, group_type=None, **kwargs): self.warning('Cannot split Map with only one dimension.') return self + dimensions = [self.get_dimension(d).name for d in dimensions] container_type = container_type if container_type else type(self) group_type = group_type if group_type else type(self) dims, inds = zip(*((self.get_dimension(dim), self.get_dimension_index(dim)) @@ -366,7 +367,7 @@ def reindex(self, kdims=[], force=False): keys = [tuple(k[i] for i in indices) for k in self.data.keys()] reindexed_items = OrderedDict( (k, v) for (k, v) in zip(keys, self.data.values())) - reduced_dims = set(self._cached_index_names).difference(kdims) + reduced_dims = set([d.name for d in self.kdims]).difference(kdims) dimensions = [self.get_dimension(d) for d in kdims if d not in reduced_dims] @@ -577,7 +578,7 @@ def __getitem__(self, indexslice): for k, v in items: val_slice = self._dataslice(v, data_slice) if val_slice: - sliced_items.append((k, v)) + sliced_items.append((k, val_slice)) if len(sliced_items) == 0: raise KeyError('No items within specified slice.') with item_check(False): From be47788183778771693943fd60542903d78a152f Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 19:00:31 +0000 Subject: [PATCH 078/212] Fixed NdElement.reduce --- holoviews/core/element.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 83895fbe25..e112ab1643 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -417,7 +417,11 @@ def aggregate(self, dimensions, function): rows = [] grouped = self.groupby(dimensions) for k, group in grouped.data.items(): - reduced = group.reduce(group, group.kdims, function).values()[0] + reduced = group.reduce(group, group.kdims, function) + if not np.isscalar(reduced): + reduced = reduced.values()[0] + else: + reduced = (reduced,) rows.append((k, reduced)) return self.clone(rows, kdims=grouped.kdims) From 947e982f8200221ce07050ad00cd8fddb360e311 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 19:01:16 +0000 Subject: [PATCH 079/212] Eliminated references to cached dimension attributes --- holoviews/core/dimension.py | 11 ++++------- holoviews/core/spaces.py | 4 ++-- holoviews/element/raster.py | 4 ++-- holoviews/element/tabular.py | 14 +++++++------- 4 files changed, 15 insertions(+), 18 deletions(-) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index 3886ffbd71..6fe5d6bc11 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -651,13 +651,10 @@ def select(self, selection_specs=None, **kwargs): """ # Apply all indexes applying on this object - val_dim = ['value'] if self.vdims else [] - kdims = self.dimensions('key', label=True) - sanitized = {sanitize_identifier(kd): kd - for kd in kdims} - local_dims = (kdims + list(sanitized.keys()) + val_dim) + vdims = self.vdims+['value'] if self.vdims else [] + kdims = self.kdims local_kwargs = {k: v for k, v in kwargs.items() - if k in local_dims} + if k in kdims+vdims} # Check selection_spec applies if selection_specs is not None: @@ -666,6 +663,7 @@ def select(self, selection_specs=None, **kwargs): else: matches = True + # Apply selection to self if local_kwargs and matches: select = [slice(None) for i in range(self.ndims)] for dim, val in local_kwargs.items(): @@ -673,7 +671,6 @@ def select(self, selection_specs=None, **kwargs): select += [val] else: if isinstance(val, tuple): val = slice(*val) - dim = sanitized.get(dim, dim) select[self.get_dimension_index(dim)] = val if self._deep_indexable: selection = self.get(tuple(select), diff --git a/holoviews/core/spaces.py b/holoviews/core/spaces.py index f8bfa4642c..202e29eb5b 100644 --- a/holoviews/core/spaces.py +++ b/holoviews/core/spaces.py @@ -91,7 +91,7 @@ def _dimension_keys(self): Helper for __mul__ that returns the list of keys together with the dimension labels. """ - return [tuple(zip(self._cached_index_names, [k] if self.ndims == 1 else k)) + return [tuple(zip([d.name for d in self.kdims], [k] if self.ndims == 1 else k)) for k in self.keys()] @@ -360,7 +360,7 @@ def _transform_indices(self, key): ndims = self.ndims if all(not isinstance(el, slice) for el in key): dim_inds = [] - for dim in self._cached_index_names: + for dim in self.kdims: dim_type = self.get_dimension_type(dim) if isinstance(dim_type, type) and issubclass(dim_type, Number): dim_inds.append(self.get_dimension_index(dim)) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index a8d162b68d..9c76be04d5 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -414,8 +414,8 @@ def dimension_values(self, dim, unique=True): if dim in self.kdims: idx = self.get_dimension_index(dim) return [k[idx] for k in self._data.keys()] - elif dim in self._cached_value_names: - idx = self._cached_value_names.index(dim) + elif dim in self.vdims: + idx = self.vdims.index(dim) return [v[idx] if isinstance(v, tuple) else v for v in self._data.values()] else: diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index 5d325ba490..8847bef397 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -60,7 +60,7 @@ def __getitem__(self, heading): """ if heading is (): return self - if heading not in self._cached_value_names: + if heading not in self.vdims: raise IndexError("%r not in available headings." % heading) return self.data.get(heading, np.NaN) @@ -105,8 +105,8 @@ def pprint_cell(self, row, col): return str(self.dimensions('value')[row]) else: dim = self.get_dimension(row) - heading = self._cached_value_names[row] - return dim.pprint_value(self.data.get(heading, np.NaN)) + heading = self.vdims[row] + return dim.pprint_value(self.data.get(heading.name, np.NaN)) def hist(self, *args, **kwargs): @@ -137,8 +137,8 @@ def table(self): vdims=self.vdims) def values(self): - return tuple(self.data.get(k, np.NaN) - for k in self._cached_value_names) + return tuple(self.data.get(d.name, np.NaN) + for d in self.vdims) @@ -211,10 +211,10 @@ def __init__(self, table): def _conversion(self, kdims=None, vdims=None, new_type=None, **kwargs): if kdims is None: - kdims = self._table._cached_index_names + kdims = self._table.kdims elif kdims and not isinstance(kdims, list): kdims = [kdims] if vdims is None: - vdims = self._table._cached_value_names + vdims = self._table.vdims elif vdims and not isinstance(vdims, list): vdims = [vdims] kdims = [kdim.name if isinstance(kdim, Dimension) else kdim for kdim in kdims] vdims = [vdim.name if isinstance(vdim, Dimension) else vdim for vdim in vdims] From 8c5323483de38aca1c0ffe8744c7969225e0d15d Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 19:02:19 +0000 Subject: [PATCH 080/212] Fixed Columns aggregation and addded unit tests --- holoviews/core/data.py | 2 +- tests/testcolumns.py | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index dbed7d0db9..fbde1cf38a 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -739,6 +739,6 @@ def aggregate(self, dimensions, function): if not isinstance(dimensions, Iterable): dimensions = [dimensions] rows = [] for k, group in self.groupby(dimensions).data.items(): - reduced = group.reduce(function) + reduced = group.reduce(function=function) rows.append(np.concatenate([k, (reduced,) if np.isscalar(reduced) else reduced])) return np.array(rows) diff --git a/tests/testcolumns.py b/tests/testcolumns.py index 48745e866b..b2c489df55 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -128,6 +128,20 @@ def test_columns_heterogeneous_reduce2d(self): reduced = Columns([((), (14.333333333333334, 0.73333333333333339))], kdims=[], vdims=self.vdims) self.assertEqual(columns.reduce(function=np.mean), reduced) + def test_column_heterogeneous_aggregate(self): + columns = Columns(zip(self.keys1, self.values1), kdims=self.kdims, + vdims=self.vdims) + aggregated = Columns(OrderedDict([('F', (10., 0.8)), ('M', (16.5, 0.7))]), + kdims=self.kdims[:1], vdims=self.vdims) + self.compare_columns(columns.aggregate(['Gender'], np.mean), aggregated) + + def test_columns_2d_aggregate_partial(self): + columns = Columns(zip(zip(self.xs, self.ys), self.zs), + kdims=['x', 'y'], vdims=['z']) + reduced = Columns(zip(zip(self.xs), self.zs), + kdims=['x'], vdims=['z']) + self.assertEqual(columns.aggregate(['x'], np.mean), reduced) + class ColumnsNdArrayTest(ComparisonTestCase): @@ -205,6 +219,11 @@ def test_columns_2d_partial_reduce(self): self.assertEqual(columns.reduce(['y'], np.mean), Columns((self.xs, self.zs), kdims=['x'], vdims=['z'])) + def test_columns_2d_aggregate_partial(self): + columns = Columns((self.xs, self.ys, self.zs), kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.aggregate(['x'], np.mean), + Columns((self.xs, self.zs), kdims=['x'], vdims=['z'])) + class ColumnsDFrameTest(ComparisonTestCase): @@ -309,7 +328,7 @@ def test_columns_2d_partial_reduce(self): def test_columns_heterogeneous_reduce(self): columns = Columns(self.column_data, kdims=self.kdims, vdims=self.vdims) - reduced_data = pd.DataFrame([d[1:] for d in self.column_data], + reduced_data = pd.DataFrame([(10, 15, 0.8), (12, 10, 0.8), (16, 18, 0.6)], columns=columns.dimensions(label=True)[1:]) reduced = Columns(reduced_data, kdims=self.kdims[1:], vdims=self.vdims) @@ -323,3 +342,18 @@ def test_columns_heterogeneous_reduce2d(self): reduced = Columns(pd.DataFrame([(14.333333333333334, 0.73333333333333339)], columns=self.vdims), kdims=[], vdims=self.vdims) self.assertEqual(columns.reduce(function=np.mean), reduced) + + def test_column_heterogeneous_aggregate(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + aggregated = Columns(pd.DataFrame([('F', 10., 0.8), ('M', 16.5, 0.7)], + columns=['Gender']+self.vdims), + kdims=self.kdims[:1], vdims=self.vdims) + self.compare_columns(columns.aggregate(['Gender'], np.mean), aggregated) + + def test_columns_2d_partial_reduce(self): + columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys, 'z': self.zs}), + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.aggregate(['x'], np.mean), + Columns(pd.DataFrame({'x': self.xs, 'z': self.zs}), + kdims=['x'], vdims=['z'])) From f606fef302e62bf7aa4d8f63316ad3906699c7fa Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 20:17:14 +0000 Subject: [PATCH 081/212] Further optimized Columns indexing operations --- holoviews/core/data.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index fbde1cf38a..853f9869a0 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -485,24 +485,24 @@ def select(self, selection_specs=None, **select): """ df = self.element.data selected_kdims = [] - slcs = [] + mask = True for dim, k in select.items(): if isinstance(k, tuple): k = slice(*k) if isinstance(k, slice): if k.start is not None: - slcs.append(k.start < df[dim]) + mask &= k.start <= df[dim] if k.stop is not None: - slc.append(df[dim] < k.stop) + mask &= df[dim] < k.stop elif isinstance(k, (set, list)): iter_slcs = [] for ik in k: iter_slcs.append(df[dim] == ik) - slcs.append(np.logical_or.reduce(iter_slcs)) + mask &= np.logical_or.reduce(iter_slcs) else: if dim in self.element.kdims: selected_kdims.append(dim) - slcs.append(df[dim] == k) - df = df.iloc[np.logical_and.reduce(slcs)] + mask &= df[dim] == k + df = df.ix[mask] if len(set(selected_kdims)) == self.element.ndims: if len(df) and len(self.element.vdims) == 1: df = df[self.element.vdims[0].name].iloc[0] @@ -637,19 +637,18 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): def select(self, **selection): data = self.element.data - slices = [] + mask = True selected_kdims = [] value = selection.pop('value', None) for d, slc in selection.items(): idx = self.element.get_dimension_index(d) if isinstance(slc, slice): - start = -float("inf") if slc.start is None else slc.start - stop = float("inf") if slc.stop is None else slc.stop - clip_start = start <= data[:, idx] - clip_stop = data[:, idx] < stop - slices.append(np.logical_and(clip_start, clip_stop)) + if slc.start is not None: + mask &= slc.start <= data[:, idx] + if slc.stop is not None: + mask &= data[:, idx] < slc.stop elif isinstance(slc, (set, list)): - slices.append(np.in1d(data[:, idx], list(slc))) + mask &= np.in1d(data[:, idx], list(slc)) else: if d in self.element.kdims: selected_kdims.append(d) if self.element.ndims == 1: @@ -657,10 +656,9 @@ def select(self, **selection): data = data[data_index, :] break else: - data_index = data[:, idx] == slc - slices.append(data_index) - if slices: - data = data[np.logical_and.reduce(slices), :] + mask &= data[:, idx] == slc + if mask is not True: + data = data[mask, :] data = np.atleast_2d(data) if len(data) and len(set(selected_kdims)) == self.element.ndims: if len(data) == 1 and len(self.element.vdims) == 1: @@ -699,11 +697,11 @@ def sample(self, samples=[]): Sample the Element data with a list of samples. """ data = self.element.data - mask = np.zeros(len(self), dtype=bool) + mask = False for sample in samples: if np.isscalar(sample): sample = [sample] for i, v in enumerate(sample): - mask = np.logical_or(mask, data[:, i]==v) + mask |= data[:, i]==v return data[mask] From 62e5a9fbbf22469265316278775976b0d7c874f3 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Sun, 25 Oct 2015 21:51:08 +0000 Subject: [PATCH 082/212] Added Columns.shape unit tests --- tests/testcolumns.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testcolumns.py b/tests/testcolumns.py index b2c489df55..36a5893b97 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -25,6 +25,10 @@ def setUp(self): self.columns = Columns(dict(zip(self.xs, self.ys)), kdims=['x'], vdims=['y']) + def test_columns_shape(self): + self.assertEqual(self.columns.shape, (11, 2)) + + def test_columns_odict_construct(self): columns = Columns(OrderedDict(zip(self.xs, self.ys)), kdims=['A'], vdims=['B']) self.assertTrue(isinstance(columns.data, NdElement)) @@ -152,6 +156,9 @@ def setUp(self): self.zs = np.sin(self.xs) self.columns = Columns((self.xs, self.ys), kdims=['x'], vdims=['y']) + def test_columns_shape(self): + self.assertEqual(self.columns.shape, (11, 2)) + def test_columns_values_construct(self): columns = Columns(self.ys) self.assertTrue(isinstance(columns.data, np.ndarray)) @@ -238,6 +245,9 @@ def setUp(self): self.columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys}), kdims=['x'], vdims=['y']) + def test_columns_shape(self): + self.assertEqual(self.columns.shape, (11, 2)) + def test_columns_df_construct(self): self.assertTrue(isinstance(self.columns.data, pd.DataFrame)) From f9a6a0c5f7414a7e38fa4c38f72e2b8c81fe9987 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 26 Oct 2015 01:13:11 +0000 Subject: [PATCH 083/212] Added general Columns.closest method --- holoviews/core/data.py | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 853f9869a0..d604d3bea4 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -53,13 +53,18 @@ def __setstate__(self, state): def closest(self, coords): + """ + Given single or multiple x-values, returns the list + of closest actual samples. + """ if self.ndims > 1: NotImplementedError("Closest method currently only " "implemented for 1D Elements") - elif self.interface is None: - return self.data.closest(coords) - else: - return self.interface.closest(coords) + + if not isinstance(coords, list): coords = [coords] + xs = self.dimension_values(0) + idxs = [np.argmin(np.abs(xs-coord)) for coord in coords] + return [xs[idx] for idx in idxs] if len(coords) > 1 else xs[idxs[0]] def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): @@ -425,12 +430,12 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): index_dims = [self.element.get_dimension(d) for d in dimensions] element_dims = [kdim for kdim in self.element.kdims if kdim not in index_dims] - mapping = container_type(None, kdims=index_dims) + map_data = [] for k, v in self.element.data.groupby(dimensions): data = v.drop(dimensions, axis=1) - mapping[k] = self.element.clone(data, kdims=element_dims, - **kwargs) - return mapping + map_data.append((k, self.element.clone(data, kdims=element_dims, + **kwargs))) + return container_type(map_data, kdims=index_dims) @classmethod @@ -574,18 +579,6 @@ def array(self): def dframe(self, as_table=False): return Element.dframe(self.element, as_table) - - def closest(self, coords): - """ - Given single or multiple x-values, returns the list - of closest actual samples. - """ - if not isinstance(coords, list): coords = [coords] - xs = self.element.data[:, 0] - idxs = [np.argmin(np.abs(xs-coord)) for coord in coords] - return [xs[idx] for idx in idxs] if len(coords) > 1 else xs[idxs[0]] - - @classmethod def _datarange(cls, data): return np.nanmin(data), np.nanmax(data) From 8f8d3e343cf6a9574f16f70daa30c1a00e170fb1 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 26 Oct 2015 01:25:15 +0000 Subject: [PATCH 084/212] Moved .array method from NdElement to Element --- holoviews/core/element.py | 44 +++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index e112ab1643..5463928128 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -145,6 +145,27 @@ def dframe(self): return pd.DataFrame(dim_vals) + def array(self, as_table=False): + dims = self.kdims + self.vdims + columns, types = [], [] + for dim in dims: + column = self.dimension_values(dim) + columns.append(column) + types.append(column.dtype.kind) + if len(set(types)) > 1: + columns = [c.astype('object') for c in columns] + array = np.column_stack(columns) + if as_table: + from ..element import Table + if array.dtype.kind in ['S', 'O', 'U']: + raise ValueError("%s data contains non-numeric type, " + "could not convert to array based " + "Element" % type(self).__name__) + return Table(array, **get_param_values(self, Table)) + else: + return array + + class Tabular(Element): """ @@ -443,28 +464,6 @@ def values(self): return [v[0] for v in values] return list(values) - - def array(self, as_table=False): - dims = self.kdims + self.vdims - columns, types = [], [] - for dim in dims: - column = self.dimension_values(d) - data.append(column) - types.append(column.dtype.kind) - if len(set(types)) > 1: - columns = [c.astype('object') for c in columns] - array = np.column_stack(columns) - if as_table: - from ..element import Table - if array.dtype.kind in ['S', 'O', 'U']: - raise ValueError("%s data contains non-numeric type, " - "could not convert to array based " - "Element" % type(self).__name__) - return Table(array, **get_param_values(self, Table)) - else: - return array - - def dframe(self, as_table=False): try: import pandas @@ -478,7 +477,6 @@ def dframe(self, as_table=False): return df - class Element3D(Element2D): extents = param.Tuple(default=(None, None, None, From 89a062b02a63f260cc212c6ee4875d38f1e25314 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 26 Oct 2015 01:26:04 +0000 Subject: [PATCH 085/212] Added Columns unit tests --- tests/testcolumns.py | 48 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/tests/testcolumns.py b/tests/testcolumns.py index 36a5893b97..6ac5a6ac57 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -28,11 +28,17 @@ def setUp(self): def test_columns_shape(self): self.assertEqual(self.columns.shape, (11, 2)) + def test_columns_range(self): + self.assertEqual(self.columns.range('y'), (0., 1.)) def test_columns_odict_construct(self): columns = Columns(OrderedDict(zip(self.xs, self.ys)), kdims=['A'], vdims=['B']) self.assertTrue(isinstance(columns.data, NdElement)) + def test_columns_closest(self): + closest = self.columns.closest([0.51, 1, 9.9]) + self.assertEqual(closest, [1., 1., 10.]) + def test_columns_dict_construct(self): self.assertTrue(isinstance(self.columns.data, NdElement)) @@ -146,6 +152,8 @@ def test_columns_2d_aggregate_partial(self): kdims=['x'], vdims=['z']) self.assertEqual(columns.aggregate(['x'], np.mean), reduced) + def test_columns_array(self): + self.assertEqual(self.columns.array(), np.column_stack([self.xs, self.ys])) class ColumnsNdArrayTest(ComparisonTestCase): @@ -159,6 +167,13 @@ def setUp(self): def test_columns_shape(self): self.assertEqual(self.columns.shape, (11, 2)) + def test_columns_range(self): + self.assertEqual(self.columns.range('y'), (0., 1.)) + + def test_columns_closest(self): + closest = self.columns.closest([0.51, 1, 9.9]) + self.assertEqual(closest, [1., 1., 10.]) + def test_columns_values_construct(self): columns = Columns(self.ys) self.assertTrue(isinstance(columns.data, np.ndarray)) @@ -231,6 +246,9 @@ def test_columns_2d_aggregate_partial(self): self.assertEqual(columns.aggregate(['x'], np.mean), Columns((self.xs, self.zs), kdims=['x'], vdims=['z'])) + def test_columns_array(self): + self.assertEqual(self.columns.array(), np.column_stack([self.xs, self.ys])) + class ColumnsDFrameTest(ComparisonTestCase): @@ -245,9 +263,16 @@ def setUp(self): self.columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys}), kdims=['x'], vdims=['y']) + def test_columns_range(self): + self.assertEqual(self.columns.range('y'), (0., 1.)) + def test_columns_shape(self): self.assertEqual(self.columns.shape, (11, 2)) + def test_columns_closest(self): + closest = self.columns.closest([0.51, 1, 9.9]) + self.assertEqual(closest, [1., 1., 10.]) + def test_columns_df_construct(self): self.assertTrue(isinstance(self.columns.data, pd.DataFrame)) @@ -256,6 +281,12 @@ def test_columns_tuple_list_construct(self): vdims=self.vdims) self.assertTrue(isinstance(self.columns.data, pd.DataFrame)) + def test_columns_slice(self): + data = [('x', range(5, 9)), ('y', np.linspace(0.5, 0.8, 4))] + columns_slice = Columns(pd.DataFrame.from_items(data), + kdims=['x'], vdims=['y']) + self.assertEqual(self.columns[5:9], columns_slice) + def test_columns_index_row_gender(self): columns = Columns(self.column_data, kdims=self.kdims, vdims=self.vdims) @@ -353,7 +384,19 @@ def test_columns_heterogeneous_reduce2d(self): kdims=[], vdims=self.vdims) self.assertEqual(columns.reduce(function=np.mean), reduced) - def test_column_heterogeneous_aggregate(self): + + def test_columns_groupby(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + cols = self.kdims + self.vdims + group1 = pd.DataFrame(self.column_data[:2], columns=cols) + group2 = pd.DataFrame(self.column_data[2:], columns=cols) + grouped = HoloMap({'M': Columns(group1, kdims=['Age'], vdims=self.vdims), + 'F': Columns(group2, kdims=['Age'], vdims=self.vdims)}, + kdims=['Gender']) + self.assertEqual(columns.groupby(['Gender']), grouped) + + def test_columns_heterogeneous_aggregate(self): columns = Columns(self.column_data, kdims=self.kdims, vdims=self.vdims) aggregated = Columns(pd.DataFrame([('F', 10., 0.8), ('M', 16.5, 0.7)], @@ -367,3 +410,6 @@ def test_columns_2d_partial_reduce(self): self.assertEqual(columns.aggregate(['x'], np.mean), Columns(pd.DataFrame({'x': self.xs, 'z': self.zs}), kdims=['x'], vdims=['z'])) + + def test_columns_array(self): + self.assertEqual(self.columns.array(), np.column_stack([self.xs, self.ys])) From 577f1a00d75c5b23af6002658f1449765d6f69c8 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 26 Oct 2015 01:26:40 +0000 Subject: [PATCH 086/212] Added range implementations to Columns interface --- holoviews/core/data.py | 50 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index d604d3bea4..aa64da4339 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -67,6 +67,24 @@ def closest(self, coords): return [xs[idx] for idx in idxs] if len(coords) > 1 else xs[idxs[0]] + + def range(self, dim, data_range=True): + if self.interface is None: + return self.data.range(dim, data_range) + dim = self.get_dimension(dim) + if dim.range != (None, None): + return dim.range + elif dim in self.dimensions(): + if len(self): + drange = self.interface.range(dim) + else: + drange = (np.NaN, np.NaN) + if data_range: + return util.max_range([drange, dim.soft_range]) + else: + return dim.soft_range + + def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): """ Create a new object with an additional key dimensions. @@ -272,6 +290,9 @@ class ColumnarData(param.Parameterized): def __init__(self, element, **params): self.element = element + def range(self, dimension): + column = self.element.dimension_values(dimension) + return (np.nanmin(column), np.nanmax(column)) def array(self): NotImplementedError @@ -360,31 +381,6 @@ def _process_df_dims(cls, data, paramobjs, **kwargs): return kdims, vdims - @classmethod - def _datarange(cls, data): - """ - Should return minimum and maximum of data - returned by values method. - """ - raise NotImplementedError - - - def range(self, dim, data_range=True): - dim_idx = self.get_dimension_index(dim) - if dim.range != (None, None): - return dim.range - elif dim_idx < len(self.dimensions()): - if len(self): - data = self.values(dim_idx) - data_range = self._datarange(data) - else: - data_range = (np.NaN, np.NaN) - if data_range: - return util.max_range([data_range, dim.soft_range]) - else: - return dim.soft_range - - def as_ndelement(self, **kwargs): """ This method transforms any ViewableElement type into a Table @@ -421,6 +417,10 @@ def validate_data(cls, data): class ColumnarDataFrame(ColumnarData): + def range(self, dimension): + column = self.element.data[self.element.get_dimension(dimension).name] + return (column.min(), column.max()) + def groupby(self, dimensions, container_type=HoloMap, **kwargs): invalid_dims = list(set(dimensions) - set(self.element.dimensions('key', True))) if invalid_dims: From 13ebf23ae1ade9f86f6ea541b808d404157b42f7 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 26 Oct 2015 01:29:26 +0000 Subject: [PATCH 087/212] Implemented column label based indexing for all Elements --- holoviews/element/annotation.py | 1 + holoviews/element/chart.py | 1 + holoviews/element/path.py | 1 + holoviews/element/raster.py | 5 +++++ 4 files changed, 8 insertions(+) diff --git a/holoviews/element/annotation.py b/holoviews/element/annotation.py index adaa388dbe..d925535af1 100644 --- a/holoviews/element/annotation.py +++ b/holoviews/element/annotation.py @@ -32,6 +32,7 @@ def __init__(self, data, **params): def __getitem__(self, key): + if key in self.dimensions(): return self.dimension_values(key) if not isinstance(key, tuple) or len(key) == 1: key = (key, slice(None)) elif len(key) == 0: return self.clone() diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 2fa779e3d3..eea2fe9d6c 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -156,6 +156,7 @@ def __getitem__(self, key): """ Implements slicing or indexing of the Histogram """ + if key in self.dimensions(): return self.dimension_values(key) if key is (): return self # May no longer be necessary if isinstance(key, tuple) and len(key) > self.ndims: raise Exception("Slice must match number of key dimensions.") diff --git a/holoviews/element/path.py b/holoviews/element/path.py index 7f43fff1d4..2142681f07 100644 --- a/holoviews/element/path.py +++ b/holoviews/element/path.py @@ -52,6 +52,7 @@ def __init__(self, data, **params): def __getitem__(self, key): + if key in self.dimensions(): return self.dimension_values(key) if not isinstance(key, tuple) or len(key) == 1: key = (key, slice(None)) elif len(key) == 0: return self.clone() diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index 9c76be04d5..79183a8a42 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -48,6 +48,7 @@ def _zdata(self): def __getitem__(self, slices): + if slices in self.dimensions(): return self.dimension_values(slices) if not isinstance(slices, tuple): slices = (slices, slice(None)) slc_types = [isinstance(sl, slice) for sl in slices] data = self.data.__getitem__(slices[::-1]) @@ -253,6 +254,7 @@ def _validate_data(self, data): def __getitem__(self, slices): + if slices in self.dimensions(): return self.dimension_values(key) if not self._grid: raise IndexError("Indexing of non-grid based QuadMesh" "currently not supported") @@ -397,6 +399,7 @@ def __getitem__(self, coords): """ Slice the underlying NdMapping. """ + if coords in self.dimensions(): return self.dimension_values(coords) return self.clone(self._data.select(**dict(zip(self._data.kdims, coords)))) @@ -499,6 +502,7 @@ def __getitem__(self, coords): """ Slice the underlying numpy array in sheet coordinates. """ + if coords in self.dimensions(): return self.dimension_values(coords) if coords is () or coords == slice(None, None): return self @@ -689,6 +693,7 @@ def __getitem__(self, coords): """ Slice the underlying numpy array in sheet coordinates. """ + if coords in self.dimensions(): return self.dimension_values(coords) if not isinstance(coords, slice) and len(coords) > self.ndims: value = coords[self.ndims:] if len(value) > 1: From 6880628e488b2799428baa402e6985274a115757 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 26 Oct 2015 01:29:57 +0000 Subject: [PATCH 088/212] Simplified implementation of Raster.reduce --- holoviews/element/raster.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index 79183a8a42..afd2cc4abc 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -135,10 +135,7 @@ def reduce(self, dimensions=None, function=None, **reduce_map): """ reduce_map = self._reduce_map(dimensions, function, reduce_map) if len(reduce_map) == self.ndims: - reduced_view = self - for dim, reduce_fn in reduce_map.items(): - reduced_view = reduced_view.reduce(**{dim: reduce_fn}) - return reduced_view + return function(self.data) else: dimension, reduce_fn = list(reduce_map.items())[0] other_dimension = [d for d in self.kdims if d.name != dimension] From 8371a69c26ec4b60012879659d632763da23eca0 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 26 Oct 2015 01:30:43 +0000 Subject: [PATCH 089/212] Optimized ColumnarDataFrame.groupby method --- holoviews/core/data.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index aa64da4339..e52e95087f 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -3,7 +3,7 @@ data backends. """ -from collections import defaultdict +from collections import defaultdict, Iterable from itertools import groupby try: @@ -21,6 +21,7 @@ from .dimension import OrderedDict, Dimension from .element import Element, NdElement +from .ndmapping import item_check from .spaces import HoloMap from . import util @@ -432,10 +433,10 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): if kdim not in index_dims] map_data = [] for k, v in self.element.data.groupby(dimensions): - data = v.drop(dimensions, axis=1) - map_data.append((k, self.element.clone(data, kdims=element_dims, + map_data.append((k, self.element.clone(v, kdims=element_dims, **kwargs))) - return container_type(map_data, kdims=index_dims) + with item_check(False): + return container_type(map_data, kdims=index_dims) @classmethod From fd7e38a7a437b048cbf9e805f9ee5d19e513d84d Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 26 Oct 2015 02:41:01 +0000 Subject: [PATCH 090/212] Unified TableConversion interface with DFrame conversions --- holoviews/element/tabular.py | 58 ++++++-------- holoviews/interface/pandas.py | 144 +--------------------------------- 2 files changed, 28 insertions(+), 174 deletions(-) diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index 8847bef397..01125cf5ce 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -209,70 +209,62 @@ class TableConversion(object): def __init__(self, table): self._table = table - def _conversion(self, kdims=None, vdims=None, new_type=None, **kwargs): + def _conversion(self, kdims=None, vdims=None, mdims=None, new_type=None, **kwargs): if kdims is None: kdims = self._table.kdims elif kdims and not isinstance(kdims, list): kdims = [kdims] if vdims is None: vdims = self._table.vdims + if mdims is None: + mdims = [d for d in self._table.kdims if not in kdims] elif vdims and not isinstance(vdims, list): vdims = [vdims] - kdims = [kdim.name if isinstance(kdim, Dimension) else kdim for kdim in kdims] - vdims = [vdim.name if isinstance(vdim, Dimension) else vdim for vdim in vdims] - if (any(kd in self._table.vdims for kd in kdims) or - any(vd in self._table.kdims for vd in vdims)): - new_kdims = [kd for kd in self._table.kdims - if kd not in kdims and kd not in vdims] + kdims - selected = self._table.reindex(new_kdims, vdims) - else: - selected = self._table.select(**{'value': vdims}) - all_dims = selected.dimensions(label=True) - invalid = [dim for dim in kdims+vdims if dim not in all_dims] - if invalid: - raise Exception("Dimensions %r could not be found during conversion to %s new_type" % - (invalid, new_type.__name__)) - group_dims = [dim for dim in selected.kdims if not dim in kdims+vdims] - + selected = self._table.reindex(mdims+kdims, vdims) params = dict({'kdims': [selected.get_dimension(kd) for kd in kdims], 'vdims': [selected.get_dimension(vd) for vd in vdims]}, **kwargs) if len(kdims) == selected.ndims: return new_type(selected, **params) - return selected.groupby(group_dims, container_type=HoloMap, group_type=new_type, **params) + return selected.groupby(mdims, container_type=HoloMap, group_type=new_type, **params) - def bars(self, kdims=None, vdims=None, **kwargs): + def bars(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Bars - return self._conversion(kdims, vdims, Bars, **kwargs) + return self._conversion(kdims, vdims, mdims, Bars, **kwargs) - def curve(self, kdims=None, vdims=None, **kwargs): + def curve(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Curve - return self._conversion(kdims, vdims, Curve, **kwargs) + return self._conversion(kdims, vdims, mdims, Curve, **kwargs) - def heatmap(self, kdims=None, vdims=None, **kwargs): + def heatmap(self, kdims=None, vdims=None, mdims=None, **kwargs): from .raster import HeatMap - return self._conversion(kdims, vdims, HeatMap, **kwargs) + return self._conversion(kdims, vdims, mdims, HeatMap, **kwargs) - def points(self, kdims=None, vdims=None, **kwargs): + def points(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Points - return self._conversion(kdims, vdims, Points, **kwargs) + return self._conversion(kdims, vdims, mdims, Points, **kwargs) - def scatter(self, kdims=None, vdims=None, **kwargs): + def scatter(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Scatter - return self._conversion(kdims, vdims, Scatter, **kwargs) + return self._conversion(kdims, vdims, mdims, Scatter, **kwargs) - def scatter3d(self, kdims=None, vdims=None, **kwargs): + def scatter3d(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart3d import Scatter3D - return self._conversion(kdims, vdims, Scatter3D, **kwargs) + return self._conversion(kdims, vdims, mdims, Scatter3D, **kwargs) - def raster(self, kdims=None, vdims=None, **kwargs): + def scatter3d(self, kdims=None, vdims=None, mdims=None, **kwargs): + from .chart3d import Trisurface + return self._conversion(kdims, vdims, mdims, Trisurface, **kwargs) + + def raster(self, kdims=None, vdims=None, mdims=None, **kwargs): from .raster import Raster heatmap = self.heatmap(kdims, vdims, **kwargs) return Raster(heatmap.data, **dict(self._table.get_param_values(onlychanged=True))) - def surface(self, kdims=None, vdims=None, **kwargs): + def surface(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart3d import Surface heatmap = self.heatmap(kdims, vdims, **kwargs) return Surface(heatmap.data, **dict(self._table.get_param_values(onlychanged=True))) - def vectorfield(self, kdims=None, vdims=None, **kwargs): + def vectorfield(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import VectorField return self._conversion(kdims, vdims, VectorField, **kwargs) + diff --git a/holoviews/interface/pandas.py b/holoviews/interface/pandas.py index 542bed67c6..717abf1a0a 100644 --- a/holoviews/interface/pandas.py +++ b/holoviews/interface/pandas.py @@ -20,7 +20,7 @@ from ..core import ViewableElement, NdMapping, Columns, NdOverlay,\ NdLayout, GridSpace, NdElement, HoloMap -from ..element import Chart, Table, Curve, Scatter, Bars, Points, VectorField, HeatMap, Scatter3D, Surface +from ..element import Chart, Table, Curve, Scatter, Bars, Points, VectorField, HeatMap, Scatter3D, Surface, TableConversion class DataFrameView(Columns): @@ -119,18 +119,6 @@ def holomap(self, kdims=[]): return self.groupby(kdims, HoloMap) -def is_type(df, baseType): - test = [issubclass(np.dtype(d).type, baseType) for d in df.dtypes] - return pd.DataFrame(data=test, index=df.columns, columns=["numeric"]) - - -def is_number(df): - try: - return is_type(df, np.number) - except: - return False - - class DFrame(DataFrameView): """ DFrame is a DataFrameView type, which additionally provides @@ -145,131 +133,5 @@ class DFrame(DataFrameView): * Optional map_dims (list of strings). """ - def _convert(self, kdims=[], vdims=[], mdims=[], reduce_fn=None, - view_type=None, dropna=False, **kwargs): - """ - Conversion method to generate HoloViews objects from a - DFrame. Accepts key, value and HoloMap dimensions. - If no HoloMap dimensions are supplied then non-numeric - dimensions are used. If a reduce_fn such as np.mean is - supplied the data is aggregated for each group along the - key_dimensions. Also supports a dropna option. - """ - if not isinstance(kdims, list): kdims = [kdims] - if not isinstance(vdims, list): vdims = [vdims] - - # Process dimensions - sel_dims = kdims + vdims + mdims - el_dims = kdims + vdims - if not mdims and not reduce_fn: - numeric = is_number(self.data) - mdims = [dim for dim in self.dimensions(label=True) - if dim not in sel_dims and not numeric.ix[dim][0]] - # Find leftover dimensions to reduce - if reduce_fn: - reduce_dims = kdims - else: - reduce_dims = [] - - key_dims = [self.get_dimension(d) for d in kdims] - val_dims = [self.get_dimension(d) for d in vdims] - if mdims: - groups = self.groupby(mdims, HoloMap) - mdims = [self.get_dimension(d) for d in mdims] - else: - groups = NdMapping({0: self}) - mdims = ['Default'] - create_kwargs = dict(kdims=key_dims, vdims=val_dims, - view_type=view_type) - create_kwargs.update(kwargs) - - # Convert each element in the HoloMap - hmap = HoloMap(kdims=mdims) - for k, v in groups.items(): - if reduce_dims: - v = v.aggregate(reduce_dims, function=reduce_fn) - v_indexes = [v.data.index.names.index(d) for d in kdims - if d in v.data.index.names] - v = v.apply('reset_index', level=v_indexes) - - vdata = v.data.filter(el_dims) - vdata = vdata.dropna() if dropna else vdata - if issubclass(view_type, Chart): - data = [np.array(vdata[d]) for d in el_dims] - hmap[k] = self._create_chart(data, **create_kwargs) - else: - data = [np.array(vdata[d]) for d in el_dims] - hmap[k] = self._create_table(data, **create_kwargs) - return hmap if mdims != ['Default'] else hmap.last - - - def _create_chart(self, data, kdims=None, vdims=None, - view_type=None, **kwargs): - inherited = dict(kdims=kdims, - vdims=vdims, label=self.label) - return view_type(np.vstack(data).T, **dict(inherited, **kwargs)) - - - def _create_table(self, data, kdims=None, vdims=None, - view_type=None, **kwargs): - ndims = len(kdims) - key_data, value_data = data[:ndims], data[ndims:] - keys = zip(*key_data) - values = zip(*value_data) - inherited = dict(kdims=kdims, - vdims=vdims, label=self.label) - return view_type(zip(keys, values), **dict(inherited, **kwargs)) - - - def curve(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): - return self._convert(kdims, vdims, mdims, reduce_fn, - view_type=Curve, **kwargs) - - def points(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): - return self._convert(kdims, vdims, mdims, reduce_fn, - view_type=Points, **kwargs) - - def scatter3d(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): - return self._convert(kdims, vdims, mdims, reduce_fn, - view_type=Scatter3D, **kwargs) - - def scatter(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): - return self._convert(kdims, vdims, mdims, reduce_fn, - view_type=Scatter, **kwargs) - - def vectorfield(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): - return self._convert(kdims, vdims, mdims, reduce_fn, - view_type=VectorField, **kwargs) - - def bars(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): - return self._convert(kdims, vdims, mdims, reduce_fn, - view_type=Bars, **kwargs) - - def table(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): - return self._convert(kdims, vdims, mdims, reduce_fn, - view_type=Table, **kwargs) - - def heatmap(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): - tables = self.table(kdims, vdims, mdims, reduce_fn, **kwargs) - - if isinstance(tables, HoloMap): - kwargs = dict(tables.last.get_param_values(onlychanged=True), - **kwargs) - return tables.map(lambda x: HeatMap(x, **kwargs), ['Table']) - else: - kwargs = dict(tables.get_param_values(onlychanged=True), - **kwargs) - return HeatMap(tables, **kwargs) - - def surface(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): - if not isinstance(kdims, list): kdims = [kdims] - if not isinstance(vdims, list): vdims = [vdims] - heatmap = self.heatmap(kdims, vdims, mdims, reduce_fn, **kwargs) - key_dims = [self.get_dimension(d) for d in kdims] - val_dims = [self.get_dimension(d) for d in vdims] - kwargs = dict(kwargs, kdims=key_dims, vdims=val_dims, - label=self.label) - if isinstance(heatmap, HoloMap): - return heatmap.map(lambda x: Surface(x.data, **kwargs), ['HeatMap']) - else: - return Surface(heatmap.data, **kwargs) + def to(self): + return TableConversion(self) From f1e48f1aa9bccd5307a07c1b99d531bebc08819d Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 26 Oct 2015 02:41:50 +0000 Subject: [PATCH 091/212] Fixes for Columns sampling and added unit tests --- holoviews/core/element.py | 4 +++- tests/testcolumns.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 5463928128..1d629a7206 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -357,6 +357,8 @@ def __getitem__(self, args): if len(self.vdims) > 1: subtable = self.__class__([(args, subtable)], label=self.label, kdims=self.kdims, vdims=self.vdims) + else: + return subtable[0] # If subtable is not a slice return as reduced type if not isinstance(args, tuple): args = (args,) @@ -374,7 +376,7 @@ def sample(self, samples=[]): sample_data = OrderedDict() for sample in samples: value = self[sample] - sample_data[sample] = value if np.isscalar(value) else value.values()[0] + sample_data[sample] = (value,) if len(self.vdims) == 1 else value.values()[0] return self.clone(sample_data) diff --git a/tests/testcolumns.py b/tests/testcolumns.py index 6ac5a6ac57..dcc3899dae 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -51,6 +51,10 @@ def test_columns_items_construct(self): kdims=self.kdims, vdims=self.vdims) self.assertTrue(isinstance(columns.data, NdElement)) + def test_columns_sample(self): + samples = self.columns.sample([0, 5, 10]).dimension_values('y') + self.assertEqual(samples, np.array([0, 0.5, 1])) + def test_columns_index_row_gender(self): table = Columns(zip(self.keys1, self.values1), kdims=self.kdims, vdims=self.vdims) @@ -273,6 +277,10 @@ def test_columns_closest(self): closest = self.columns.closest([0.51, 1, 9.9]) self.assertEqual(closest, [1., 1., 10.]) + def test_columns_sample(self): + samples = self.columns.sample([0, 5, 10]).dimension_values('y') + self.assertEqual(samples, np.array([0, 0.5, 1])) + def test_columns_df_construct(self): self.assertTrue(isinstance(self.columns.data, pd.DataFrame)) From 24ed84053c25b116e80fbc4b390fbb54b0876a56 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 31 Oct 2015 11:47:33 +0000 Subject: [PATCH 092/212] Implemented sort method for Columns --- holoviews/core/data.py | 19 +++++++++++++++ holoviews/core/element.py | 9 ++++++++ holoviews/core/ndmapping.py | 3 ++- holoviews/core/util.py | 20 ++++++++++++---- tests/testcolumns.py | 46 +++++++++++++++++++++++++++++++++---- 5 files changed, 86 insertions(+), 11 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index e52e95087f..6a544719f2 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -68,6 +68,13 @@ def closest(self, coords): return [xs[idx] for idx in idxs] if len(coords) > 1 else xs[idxs[0]] + def sort(self, by=[]): + if self.interface is None: + sorted_columns = self.data.sort(by) + else: + sorted_columns = self.interface.sort(by) + return self.clone(sorted_columns) + def range(self, dim, data_range=True): if self.interface is None: @@ -483,6 +490,13 @@ def collapse_data(cls, data, function, kdims, **kwargs): return pd.concat(data).groupby([d.name for d in kdims]).agg(function).reset_index() + def sort(self, by=[]): + if not isinstance(by, list): by = [by] + if not by: by = range(self.element.ndims) + columns = [self.element.get_dimension(d).name for d in by] + return self.element.data.sort_values(columns) + + def select(self, selection_specs=None, **select): """ Allows slice and select individual values along the DataFrameView @@ -585,6 +599,11 @@ def _datarange(cls, data): return np.nanmin(data), np.nanmax(data) + def sort(self, by=[]): + data = self.element.data + idxs = [self.element.get_dimension_index(dim) for dim in by] + return data[np.lexsort(np.flipud(data[:, idxs].T))] + def values(self, dim): data = self.element.data dim_idx = self.element.get_dimension_index(dim) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 1d629a7206..3c4c134009 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -252,6 +252,7 @@ class NdElement(NdMapping, Tabular): will then be promoted to Dimension objects.""") _deep_indexable = False + _sorted = False def __init__(self, data=None, **params): if isinstance(data, list) and all(np.isscalar(el) for el in data): @@ -369,6 +370,14 @@ def __getitem__(self, args): return self._filter_data(subtable, args[-1]) + def sort(self, by=[]): + if not isinstance(by, list): by = [by] + if not by: by = range(self.ndims) + indexes = [self.get_dimension_index(d) for d in by] + return self.clone(dimension_sort(self.data, self.kdims, self.vdims, + False, indexes, self._cached_index_values)) + + def sample(self, samples=[]): """ Allows sampling of the Table with a list of samples. diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index efff09928e..6f7f007e2f 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -249,8 +249,9 @@ def _dataslice(self, data, indices): def _resort(self): if self._sorted: - resorted = dimension_sort(self.data, self.kdims, + resorted = dimension_sort(self.data, self.kdims, self.vdims, self._cached_categorical, + range(self.ndims), self._cached_index_values) self.data = OrderedDict(resorted) diff --git a/holoviews/core/util.py b/holoviews/core/util.py index c10c81296a..fe07b7b2ad 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -405,16 +405,26 @@ def python2sort(x,key=None): return itertools.chain.from_iterable(sorted(group, key=key) for group in groups) -def dimension_sort(odict, dimensions, categorical, cached_values): +def dimension_sort(odict, kdims, vdims, categorical, key_index, cached_values): """ Sorts data by key using usual Python tuple sorting semantics or sorts in categorical order for any categorical Dimensions. """ sortkws = {} - if categorical: - sortkws['key'] = lambda x: tuple(cached_values[d.name].index(x[0][i]) - if d.values else x[0][i] - for i, d in enumerate(dimensions)) + ndims = len(kdims) + dimensions = kdims+vdims + indexes = [(dimensions[i], int(i not in range(ndims)), + i if i in range(ndims) else i-ndims) + for i in key_index] + + if len(set(key_index)) != len(key_index): + raise ValueError("Cannot sort on duplicated dimensions") + elif categorical: + sortkws['key'] = lambda x: tuple(cached_values[dim.name].index(x[t][d]) + if dim.values else x[t][d] + for i, (dim, t, d) in enumerate(indexes)) + elif key_index != list(range(len(kdims+vdims))): + sortkws['key'] = lambda x: tuple(x[t][d] for _, t, d in indexes) if sys.version_info.major == 3: return python2sort(odict.items(), **sortkws) else: diff --git a/tests/testcolumns.py b/tests/testcolumns.py index dcc3899dae..77b8f51d44 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -25,6 +25,22 @@ def setUp(self): self.columns = Columns(dict(zip(self.xs, self.ys)), kdims=['x'], vdims=['y']) + def test_columns_sort_vdim(self): + columns = Columns(OrderedDict(zip(self.xs, -self.ys)), + kdims=['x'], vdims=['y']) + columns_sorted = Columns(OrderedDict(zip(self.xs[::-1], -self.ys[::-1])), + kdims=['x'], vdims=['y']) + self.assertEqual(columns.sort('y'), columns_sorted) + + def test_columns_sort_heterogeneous_string(self): + columns = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) + keys = [('F',12), ('M',10), ('M',16)] + values = [(10, 0.8), (15, 0.8), (18, 0.6)] + columns_sorted = Columns(zip(keys, values), + kdims=self.kdims, vdims=self.vdims) + self.assertEqual(columns.sort(), columns_sorted) + def test_columns_shape(self): self.assertEqual(self.columns.shape, (11, 2)) @@ -120,7 +136,7 @@ def test_columns_1d_reduce(self): def test_columns_2d_reduce(self): columns = Columns(zip(zip(self.xs, self.ys), self.zs), kdims=['x', 'y'], vdims=['z']) - self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891004) + self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891001) def test_columns_2d_partial_reduce(self): columns = Columns(zip(zip(self.xs, self.ys), self.zs), @@ -132,7 +148,8 @@ def test_columns_2d_partial_reduce(self): def test_columns_heterogeneous_reduce(self): columns = Columns(zip(self.keys1, self.values1), kdims=self.kdims, vdims=self.vdims) - reduced = Columns(zip([k[1:] for k in self.keys1], self.values1), + reduced = Columns(zip([k[1:] for k in [self.keys1[i] for i in [0, 2, 1]]], + [self.values1[i] for i in [0, 2, 1]]), kdims=self.kdims[1:], vdims=self.vdims) self.assertEqual(columns.reduce(['Gender'], np.mean), reduced) @@ -145,7 +162,7 @@ def test_columns_heterogeneous_reduce2d(self): def test_column_heterogeneous_aggregate(self): columns = Columns(zip(self.keys1, self.values1), kdims=self.kdims, vdims=self.vdims) - aggregated = Columns(OrderedDict([('F', (10., 0.8)), ('M', (16.5, 0.7))]), + aggregated = Columns(OrderedDict([('M', (16.5, 0.7)), ('F', (10., 0.8))]), kdims=self.kdims[:1], vdims=self.vdims) self.compare_columns(columns.aggregate(['Gender'], np.mean), aggregated) @@ -194,6 +211,12 @@ def test_columns_tuple_list_construct(self): columns = Columns(zip(self.xs, self.ys)) self.assertTrue(isinstance(columns.data, np.ndarray)) + def test_columns_sort_vdim(self): + columns = Columns((self.xs, -self.ys), kdims=['x'], vdims=['y']) + columns_sorted = Columns((self.xs[::-1], -self.ys[::-1]), + kdims=['x'], vdims=['y']) + self.assertEqual(columns.sort('y'), columns_sorted) + def test_columns_index(self): self.assertEqual(self.columns[5], self.ys[5]) @@ -238,7 +261,7 @@ def test_columns_1d_reduce(self): def test_columns_2d_reduce(self): columns = Columns((self.xs, self.ys, self.zs), kdims=['x', 'y'], vdims=['z']) - self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891004) + self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891001) def test_columns_2d_partial_reduce(self): columns = Columns((self.xs, self.ys, self.zs), kdims=['x', 'y'], vdims=['z']) @@ -341,6 +364,19 @@ def test_columns_index_value2(self): vdims=self.vdims) self.assertEquals(columns['F', 12, 'Height'], 0.8) + def test_columns_sort_vdim(self): + columns = Columns(pd.DataFrame({'x': self.xs, 'y': -self.ys}), + kdims=['x'], vdims=['y']) + columns_sorted = Columns(pd.DataFrame({'x': self.xs[::-1], 'y': -self.ys[::-1]}), + kdims=['x'], vdims=['y']) + self.assertEqual(columns.sort('y'), columns_sorted) + + def test_columns_sort_heterogeneous_string(self): + columns = Columns(self.column_data, kdims=self.kdims, vdims=self.vdims) + columns_sorted = Columns([self.column_data[i] for i in [2, 0, 1]], + kdims=self.kdims, vdims=self.vdims) + self.assertEqual(columns.sort(), columns_sorted) + def test_columns_add_dimensions_value(self): columns = self.columns.add_dimension('z', 1, 0) self.assertEqual(columns.kdims[1], 'z') @@ -365,7 +401,7 @@ def test_columns_1d_reduce(self): def test_columns_2d_reduce(self): columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys, 'z': self.zs}), kdims=['x', 'y'], vdims=['z']) - self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891004) + self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891001) def test_columns_2d_partial_reduce(self): columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys, 'z': self.zs}), From 48d8d362aabed022ae72df8d41a25f45cb3b8880 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 31 Oct 2015 11:48:04 +0000 Subject: [PATCH 093/212] Made Columns comparison data backend agnostic --- holoviews/element/comparison.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/holoviews/element/comparison.py b/holoviews/element/comparison.py index b92b1cd2a1..b977aa6c07 100644 --- a/holoviews/element/comparison.py +++ b/holoviews/element/comparison.py @@ -434,18 +434,23 @@ def compare_bounds(cls, el1, el2, msg='Bounds'): #========# # Charts # #========# - + @classmethod def compare_columns(cls, el1, el2, msg='Columns'): cls.compare_dimensioned(el1, el2) if len(el1) != len(el2): raise AssertionError("%s not of matching length." % msg) - if isinstance(el1.data, np.ndarray): - cls.compare_arrays(el1.data, el2.data, msg) - elif isinstance(el1.data, NdElement): - cls.compare_ndmappings(el1.data, el2.data, msg) - else: - cls.compare_dframe(el1, el2, msg) + dimension_data = [(d, el1[d], el2[d]) for d in el1.dimensions()] + for dim, d1, d2 in dimension_data: + if d1.dtype != d2.dtype: + cls.failureException("%s %s columns have different type." % (msg, dim) + + " First has type %s, and second has type %s." + % (d1, d2)) + if d1.dtype.kind in 'SUOV': + if np.all(d1 != d2): + cls.failureException("Columns along dimension %s not equal." % dim) + else: + cls.compare_arrays(d1, d2, msg) @classmethod From dd07a39bf9420d782da166604951fe61d687523f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 31 Oct 2015 11:48:30 +0000 Subject: [PATCH 094/212] Fix to Table conversion --- holoviews/element/tabular.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index 01125cf5ce..57ffabaa70 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -216,7 +216,7 @@ def _conversion(self, kdims=None, vdims=None, mdims=None, new_type=None, **kwarg if vdims is None: vdims = self._table.vdims if mdims is None: - mdims = [d for d in self._table.kdims if not in kdims] + mdims = [d for d in self._table.kdims if d not in kdims] elif vdims and not isinstance(vdims, list): vdims = [vdims] selected = self._table.reindex(mdims+kdims, vdims) params = dict({'kdims': [selected.get_dimension(kd) for kd in kdims], From 8e7bdbe83f405e758e9e5045a1895f132eb25099 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 31 Oct 2015 12:32:54 +0000 Subject: [PATCH 095/212] Refactored Columns interface as class and static methods --- holoviews/core/data.py | 271 +++++++++++++++++++++-------------------- 1 file changed, 142 insertions(+), 129 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 6a544719f2..9b2a119a82 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -38,7 +38,7 @@ def _validate_data(self, data): if self.interface is None: return data else: - return self.interface.validate_data(data) + return self.interface.validate_data(self, data) def __setstate__(self, state): @@ -72,7 +72,7 @@ def sort(self, by=[]): if self.interface is None: sorted_columns = self.data.sort(by) else: - sorted_columns = self.interface.sort(by) + sorted_columns = self.interface.sort(self, by) return self.clone(sorted_columns) @@ -84,7 +84,7 @@ def range(self, dim, data_range=True): return dim.range elif dim in self.dimensions(): if len(self): - drange = self.interface.range(dim) + drange = self.interface.range(self, dim) else: drange = (np.NaN, np.NaN) if data_range: @@ -112,7 +112,7 @@ def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): if self.interface is None: data = self.data.add_dimension(dimension, dim_pos, dim_val, **kwargs) else: - data = self.interface.add_dimension(self.data, dimension, dim_pos, dim_val) + data = self.interface.add_dimension(self, dimension, dim_pos, dim_val) return self.clone(data, kdims=dimensions) @@ -123,7 +123,7 @@ def select(self, selection_specs=None, **selection): if self.interface is None: data = self.data.select(**selection) else: - data = self.interface.select(**selection) + data = self.interface.select(self, **selection) if np.isscalar(data): return data else: @@ -133,9 +133,9 @@ def select(self, selection_specs=None, **selection): @property def interface(self): if util.is_dataframe(self.data): - return ColumnarDataFrame(self) + return ColumnarDataFrame elif isinstance(self.data, np.ndarray): - return ColumnarArray(self) + return ColumnarArray def reindex(self, kdims=None, vdims=None): @@ -158,7 +158,7 @@ def reindex(self, kdims=None, vdims=None): else: key_dims = [self.get_dimension(k) for k in kdims] - data = self.interface.reindex(key_dims, val_dims) + data = self.interface.reindex(self, key_dims, val_dims) return self.clone(data, kdims=key_dims, vdims=val_dims) @@ -196,7 +196,7 @@ def sample(self, samples=[]): if self.interface is None: return self.clone(self.data.sample(samples)) else: - return self.clone(self.interface.sample(samples)) + return self.clone(self.interface.sample(self, samples)) def reduce(self, dimensions=[], function=None, **reduce_map): @@ -227,7 +227,7 @@ def aggregate(self, dimensions, function): if self.interface is None: aggregated = self.data.aggregate(dimensions, function) else: - aggregated = self.interface.aggregate(dimensions, function) + aggregated = self.interface.aggregate(self, dimensions, function) kdims = [self.get_dimension(d) for d in dimensions] return self.clone(aggregated, kdims=kdims) @@ -236,7 +236,7 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): if self.interface is None: return self.data.groupby(dimensions, container_type, **kwargs) else: - return self.interface.groupby(dimensions, container_type, **kwargs) + return self.interface.groupby(self, dimensions, container_type, **kwargs) @classmethod def collapse_data(cls, data, function=None, kdims=None, **kwargs): @@ -252,7 +252,7 @@ def __len__(self): if self.interface is None: return len(self.data) else: - return len(self.interface) + return self.interface.length(self) @property @@ -260,7 +260,7 @@ def shape(self): if self.interface is None: return (len(self), len(self.dimensions())) else: - return self.interface.shape + return self.interface.shape(self) def dimension_values(self, dim): @@ -268,20 +268,20 @@ def dimension_values(self, dim): return self.data.dimension_values(dim) else: dim = self.get_dimension(dim).name - return self.interface.values(dim) + return self.interface.values(self, dim) def dframe(self, as_table=False): if self.interface is None: return self.data.dframe(as_table) else: - return self.interface.dframe(as_table) + return self.interface.dframe(self, as_table) def array(self, as_table=False): if self.interface is None: return super(Columns, self).array(as_table) - array = self.interface.array() + array = self.interface.array(self) if as_table: from ..element import Table if array.dtype.kind in ['S', 'O', 'U']: @@ -295,20 +295,15 @@ def array(self, as_table=False): class ColumnarData(param.Parameterized): - def __init__(self, element, **params): - self.element = element - - def range(self, dimension): - column = self.element.dimension_values(dimension) + @staticmethod + def range(columns, dimension): + column = columns.dimension_values(dimension) return (np.nanmin(column), np.nanmax(column)) - def array(self): - NotImplementedError - - @property - def shape(self): - return self.element.data.shape + @staticmethod + def shape(columns): + return columns.data.shape @classmethod @@ -372,8 +367,8 @@ def _process_data(cls, data, paramobjs, **kwargs): return data, params - @classmethod - def _process_df_dims(cls, data, paramobjs, **kwargs): + @staticmethod + def _process_df_dims(data, paramobjs, **kwargs): if 'kdims' in kwargs or 'vdims' in kwargs: kdims = kwargs.get('kdims', []) vdims = kwargs.get('vdims', []) @@ -389,65 +384,71 @@ def _process_df_dims(cls, data, paramobjs, **kwargs): return kdims, vdims - def as_ndelement(self, **kwargs): + @classmethod + def as_ndelement(cls, columns, **kwargs): """ This method transforms any ViewableElement type into a Table as long as it implements a dimension_values method. """ if self.kdims: - keys = zip(*[self.values(dim.name) + keys = zip(*[cls.values(columns, dim.name) for dim in self.kdims]) else: keys = [()]*len(values) if self.vdims: - values = zip(*[self.values(dim.name) + values = zip(*[cls.values(columns, dim.name) for dim in self.vdims]) else: values = [()]*len(keys) data = zip(keys, values) - params = dict(kdims=self.kdims, vdims=self.vdims, label=self.label) - if not self.params()['group'].default == self.group: - params['group'] = self.group - el_type = type(self.element) + params = dict(kdims=columns.kdims, vdims=columns.vdims, label=columns.label) + if not columns.params()['group'].default == columns.group: + params['group'] = columns.group + el_type = type(columns.element) return el_type(data, **dict(params, **kwargs)) - def __len__(self): - return len(self.element.data) + @staticmethod + def length(columns): + return len(columns.data) - @classmethod - def validate_data(cls, data): + @staticmethod + def validate_data(columns, data): return data class ColumnarDataFrame(ColumnarData): - def range(self, dimension): - column = self.element.data[self.element.get_dimension(dimension).name] + + @staticmethod + def range(columns, dimension): + column = columns.data[columns.get_dimension(dimension).name] return (column.min(), column.max()) - def groupby(self, dimensions, container_type=HoloMap, **kwargs): - invalid_dims = list(set(dimensions) - set(self.element.dimensions('key', True))) + + @staticmethod + def groupby(columns, dimensions, container_type=HoloMap, **kwargs): + invalid_dims = list(set(dimensions) - set(columns.dimensions('key', True))) if invalid_dims: raise Exception('Following dimensions could not be found:\n%s.' % invalid_dims) - index_dims = [self.element.get_dimension(d) for d in dimensions] - element_dims = [kdim for kdim in self.element.kdims + index_dims = [columns.get_dimension(d) for d in dimensions] + element_dims = [kdim for kdim in columns.kdims if kdim not in index_dims] map_data = [] - for k, v in self.element.data.groupby(dimensions): - map_data.append((k, self.element.clone(v, kdims=element_dims, + for k, v in columns.data.groupby(dimensions): + map_data.append((k, columns.clone(v, kdims=element_dims, **kwargs))) with item_check(False): return container_type(map_data, kdims=index_dims) - @classmethod - def reduce(cls, columns, reduce_dims, function=None): + @staticmethod + def reduce(columns, reduce_dims, function=None): """ The aggregate function accepts either a list of Dimensions and a function to apply to find the aggregate across @@ -471,39 +472,38 @@ def reduce(cls, columns, reduce_dims, function=None): return reduced - def array(self): - return self.element.data.values + @staticmethod + def array(columns): + return columns.data.values - def reindex(self, kdims=None, vdims=None): + @staticmethod + def reindex(columns, kdims=None, vdims=None): # DataFrame based tables don't need to be reindexed - return self.element.data + return columns.data - @classmethod - def _datarange(cls, data): - return data.min(), data.max() - - - @classmethod - def collapse_data(cls, data, function, kdims, **kwargs): + @staticmethod + def collapse_data(data, function, kdims, **kwargs): return pd.concat(data).groupby([d.name for d in kdims]).agg(function).reset_index() - def sort(self, by=[]): + @staticmethod + def sort(columns, by=[]): if not isinstance(by, list): by = [by] - if not by: by = range(self.element.ndims) - columns = [self.element.get_dimension(d).name for d in by] - return self.element.data.sort_values(columns) + if not by: by = range(columns.ndims) + cols = [columns.get_dimension(d).name for d in by] + return columns.data.sort_values(cols) - def select(self, selection_specs=None, **select): + @staticmethod + def select(columns, selection_specs=None, **select): """ Allows slice and select individual values along the DataFrameView dimensions. Supply the dimensions and values or slices as keyword arguments. """ - df = self.element.data + df = columns.data selected_kdims = [] mask = True for dim, k in select.items(): @@ -520,39 +520,42 @@ def select(self, selection_specs=None, **select): iter_slcs.append(df[dim] == ik) mask &= np.logical_or.reduce(iter_slcs) else: - if dim in self.element.kdims: selected_kdims.append(dim) + if dim in columns.kdims: selected_kdims.append(dim) mask &= df[dim] == k df = df.ix[mask] - if len(set(selected_kdims)) == self.element.ndims: - if len(df) and len(self.element.vdims) == 1: - df = df[self.element.vdims[0].name].iloc[0] + if len(set(selected_kdims)) == columns.ndims: + if len(df) and len(columns.vdims) == 1: + df = df[columns.vdims[0].name].iloc[0] return df - def values(self, dim): - data = self.element.data[dim] + @staticmethod + def values(columns, dim): + data = columns.data[dim] if util.dd and isinstance(data, util.dd.Series): data = data.compute() return np.array(data) - def aggregate(self, dimensions, function): + @staticmethod + def aggregate(columns, dimensions, function): """ Allows aggregating. """ - data = self.element.data - columns = [d.name for d in self.element.kdims if d in dimensions] - vdims = self.element.dimensions('value', True) - return data.reindex(columns=columns+vdims).groupby(columns).\ + data = columns.data + cols = [d.name for d in columns.kdims if d in dimensions] + vdims = columns.dimensions('value', True) + return data.reindex(columns=cols+vdims).groupby(cols).\ aggregate(function).reset_index() - def sample(self, samples=[]): + @classmethod + def sample(cls, columns, samples=[]): """ Sample the Element data with a list of samples. """ - data = self.element.data - mask = np.zeros(len(self), dtype=bool) + data = columns.data + mask = np.zeros(cls.length(columns), dtype=bool) for sample in samples: if np.isscalar(sample): sample = [sample] for i, v in enumerate(sample): @@ -560,74 +563,81 @@ def sample(self, samples=[]): return data[mask] - @classmethod - def add_dimension(cls, data, dimension, dim_pos, values): + @staticmethod + def add_dimension(columns, dimension, dim_pos, values): + data = columns.data.copy() data.insert(dim_pos, dimension.name, values) return data - def dframe(self, as_table=False): + @staticmethod + def dframe(columns, as_table=False): if as_table: from ..element import Table - params = self.element.get_param_values(onlychanged=True) - return Table(self.element.data, **params) - return self.element.data + return Table(columns) + return columns.data + class ColumnarArray(ColumnarData): - @classmethod - def validate_data(cls, data): + @staticmethod + def validate_data(columns, data): if data.ndim == 1: data = np.column_stack([np.arange(len(data)), data]) return data - @classmethod - def add_dimension(cls, data, dimension, dim_pos, values): + @staticmethod + def add_dimension(columns, dimension, dim_pos, values): + data = columns.data.copy() return np.insert(data, dim_pos, values, axis=1) - def array(self): - return self.element.data + @staticmethod + def array(columns): + return columns.data - def dframe(self, as_table=False): - return Element.dframe(self.element, as_table) - @classmethod - def _datarange(cls, data): - return np.nanmin(data), np.nanmax(data) + @staticmethod + def dframe(columns, as_table=False): + return Element.dframe(columns, as_table) - def sort(self, by=[]): - data = self.element.data - idxs = [self.element.get_dimension_index(dim) for dim in by] + @staticmethod + def sort(columns, by=[]): + data = columns.data + idxs = [columns.get_dimension_index(dim) for dim in by] return data[np.lexsort(np.flipud(data[:, idxs].T))] - def values(self, dim): - data = self.element.data - dim_idx = self.element.get_dimension_index(dim) + + @staticmethod + def values(columns, dim): + data = columns.data + dim_idx = columns.get_dimension_index(dim) if data.ndim == 1: data = np.atleast_2d(data).T return data[:, dim_idx] - def reindex(self, kdims=None, vdims=None): + @staticmethod + def reindex(columns, kdims=None, vdims=None): # DataFrame based tables don't need to be reindexed dims = kdims + vdims - data = [self.element.dimension_values(d) for d in dims] + data = [columns.dimension_values(d) for d in dims] return np.column_stack(data) - def groupby(self, dimensions, container_type=HoloMap, **kwargs): - data = self.element.data + @staticmethod + def groupby(columns, dimensions, container_type=HoloMap, **kwargs): + data = columns.data # Get dimension objects, labels, indexes and data - dimensions = [self.element.get_dimension(d) for d in dimensions] - dim_idxs = [self.element.get_dimension_index(d) for d in dimensions] - dim_data = {d: self.element.dimension_values(d) for d in dimensions} + dimensions = [columns.get_dimension(d) for d in dimensions] + dim_idxs = [columns.get_dimension_index(d) for d in dimensions] + dim_data = {d: columns.dimension_values(d) for d in dimensions} ndims = len(dimensions) - kwargs['kdims'] = [kdim for kdim in self.element.kdims + kwargs['kdims'] = [kdim for kdim in columns.kdims if kdim not in dimensions] # Find unique entries along supplied dimensions @@ -643,18 +653,19 @@ def groupby(self, dimensions, container_type=HoloMap, **kwargs): mask = np.zeros(len(data), dtype=bool) for d, v in zip(dimensions, group): mask = np.logical_or(mask, dim_data[d] == v) - group_element = self.element.clone(data[mask, ndims:], **kwargs) + group_element = columns.clone(data[mask, ndims:], **kwargs) grouped_data.append((tuple(group), group_element)) return container_type(grouped_data, kdims=dimensions) - def select(self, **selection): - data = self.element.data + @staticmethod + def select(columns, **selection): + data = columns.data mask = True selected_kdims = [] value = selection.pop('value', None) for d, slc in selection.items(): - idx = self.element.get_dimension_index(d) + idx = columns.get_dimension_index(d) if isinstance(slc, slice): if slc.start is not None: mask &= slc.start <= data[:, idx] @@ -663,8 +674,8 @@ def select(self, **selection): elif isinstance(slc, (set, list)): mask &= np.in1d(data[:, idx], list(slc)) else: - if d in self.element.kdims: selected_kdims.append(d) - if self.element.ndims == 1: + if d in columns.kdims: selected_kdims.append(d) + if columns.ndims == 1: data_index = np.argmin(np.abs(data[:, idx] - slc)) data = data[data_index, :] break @@ -673,14 +684,14 @@ def select(self, **selection): if mask is not True: data = data[mask, :] data = np.atleast_2d(data) - if len(data) and len(set(selected_kdims)) == self.element.ndims: - if len(data) == 1 and len(self.element.vdims) == 1: - data = data[0, self.element.ndims] + if len(data) and len(set(selected_kdims)) == columns.ndims: + if len(data) == 1 and len(columns.vdims) == 1: + data = data[0, columns.ndims] return data - @classmethod - def collapse_data(cls, data, function, kdims=None, **kwargs): + @staticmethod + def collapse_data(data, function, kdims=None, **kwargs): """ Applies a groupby operation along the supplied key dimensions then aggregates across the groups with the supplied function. @@ -705,11 +716,12 @@ def collapse_data(cls, data, function, kdims=None, **kwargs): return np.array(rows) - def sample(self, samples=[]): + @staticmethod + def sample(columns, samples=[]): """ Sample the Element data with a list of samples. """ - data = self.element.data + data = columns.data mask = False for sample in samples: if np.isscalar(sample): sample = [sample] @@ -718,8 +730,8 @@ def sample(self, samples=[]): return data[mask] - @classmethod - def reduce(cls, columns, reduce_dims, function): + @staticmethod + def reduce(columns, reduce_dims, function): """ This implementation allows reducing dimensions by aggregating over all the remaining key dimensions using the collapse_data @@ -743,13 +755,14 @@ def reduce(cls, columns, reduce_dims, function): return reduced - def aggregate(self, dimensions, function): + @classmethod + def aggregate(cls, columns, dimensions, function): """ Allows aggregating. """ if not isinstance(dimensions, Iterable): dimensions = [dimensions] rows = [] - for k, group in self.groupby(dimensions).data.items(): + for k, group in cls.groupby(columns, dimensions).data.items(): reduced = group.reduce(function=function) rows.append(np.concatenate([k, (reduced,) if np.isscalar(reduced) else reduced])) return np.array(rows) From 6431366d7985831b4cb8b86de116f87eec9c0013 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 31 Oct 2015 13:39:45 +0000 Subject: [PATCH 096/212] Factored out initial Columns NdElement interface --- holoviews/core/data.py | 134 +++++++++++++++++++++++------------------ tests/testcolumns.py | 3 +- 2 files changed, 75 insertions(+), 62 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 9b2a119a82..4d1036229e 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -31,14 +31,7 @@ class Columns(Element): def __init__(self, data, **kwargs): data, params = ColumnarData._process_data(data, self.params(), **kwargs) super(Columns, self).__init__(data, **params) - self.data = self._validate_data(self.data) - - - def _validate_data(self, data): - if self.interface is None: - return data - else: - return self.interface.validate_data(self, data) + self.data = self.interface.validate_data(self, self.data) def __setstate__(self, state): @@ -69,16 +62,11 @@ def closest(self, coords): def sort(self, by=[]): - if self.interface is None: - sorted_columns = self.data.sort(by) - else: - sorted_columns = self.interface.sort(self, by) + sorted_columns = self.interface.sort(self, by) return self.clone(sorted_columns) def range(self, dim, data_range=True): - if self.interface is None: - return self.data.range(dim, data_range) dim = self.get_dimension(dim) if dim.range != (None, None): return dim.range @@ -109,10 +97,7 @@ def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): dimensions = self.kdims[:] dimensions.insert(dim_pos, dimension) - if self.interface is None: - data = self.data.add_dimension(dimension, dim_pos, dim_val, **kwargs) - else: - data = self.interface.add_dimension(self, dimension, dim_pos, dim_val) + data = self.interface.add_dimension(self, dimension, dim_pos, dim_val) return self.clone(data, kdims=dimensions) @@ -120,10 +105,7 @@ def select(self, selection_specs=None, **selection): if selection_specs and not self.matches(selection_specs): return self - if self.interface is None: - data = self.data.select(**selection) - else: - data = self.interface.select(self, **selection) + data = self.interface.select(self, **selection) if np.isscalar(data): return data else: @@ -136,6 +118,8 @@ def interface(self): return ColumnarDataFrame elif isinstance(self.data, np.ndarray): return ColumnarArray + elif isinstance(self.data, NdElement): + return ColumnarNdElement def reindex(self, kdims=None, vdims=None): @@ -144,9 +128,6 @@ def reindex(self, kdims=None, vdims=None): Allows converting key dimensions to value dimensions and vice versa. """ - if self.interface is None: - return self.data.reindex(kdims, vdims) - if vdims is None: val_dims = self.vdims else: @@ -193,10 +174,7 @@ def sample(self, samples=[]): Allows sampling of Columns as an iterator of coordinates matching the key dimensions. """ - if self.interface is None: - return self.clone(self.data.sample(samples)) - else: - return self.clone(self.interface.sample(self, samples)) + return self.clone(self.interface.sample(self, samples)) def reduce(self, dimensions=[], function=None, **reduce_map): @@ -207,10 +185,7 @@ def reduce(self, dimensions=[], function=None, **reduce_map): reduce_dims, reduce_map = self._reduce_map(dimensions, function, reduce_map) reduced = self for reduce_fn, group in reduce_map: - if self.interface is None: - reduced = self.data.reduce(reduced, group, function) - else: - reduced = self.interface.reduce(reduced, group, function) + reduced = self.interface.reduce(reduced, group, function) if np.isscalar(reduced): return reduced @@ -224,19 +199,14 @@ def aggregate(self, dimensions, function): """ Groups over the supplied dimensions and aggregates. """ - if self.interface is None: - aggregated = self.data.aggregate(dimensions, function) - else: - aggregated = self.interface.aggregate(self, dimensions, function) + aggregated = self.interface.aggregate(self, dimensions, function) kdims = [self.get_dimension(d) for d in dimensions] return self.clone(aggregated, kdims=kdims) def groupby(self, dimensions, container_type=HoloMap, **kwargs): - if self.interface is None: - return self.data.groupby(dimensions, container_type, **kwargs) - else: - return self.interface.groupby(self, dimensions, container_type, **kwargs) + return self.interface.groupby(self, dimensions, container_type, **kwargs) + @classmethod def collapse_data(cls, data, function=None, kdims=None, **kwargs): @@ -249,38 +219,24 @@ def collapse_data(cls, data, function=None, kdims=None, **kwargs): def __len__(self): - if self.interface is None: - return len(self.data) - else: - return self.interface.length(self) + return self.interface.length(self) @property def shape(self): - if self.interface is None: - return (len(self), len(self.dimensions())) - else: - return self.interface.shape(self) + return self.interface.shape(self) def dimension_values(self, dim): - if self.interface is None: - return self.data.dimension_values(dim) - else: - dim = self.get_dimension(dim).name - return self.interface.values(self, dim) + dim = self.get_dimension(dim).name + return self.interface.values(self, dim) def dframe(self, as_table=False): - if self.interface is None: - return self.data.dframe(as_table) - else: - return self.interface.dframe(self, as_table) + return self.interface.dframe(self, as_table) def array(self, as_table=False): - if self.interface is None: - return super(Columns, self).array(as_table) array = self.interface.array(self) if as_table: from ..element import Table @@ -301,6 +257,11 @@ def range(columns, dimension): return (np.nanmin(column), np.nanmax(column)) + @staticmethod + def dframe(columns, as_table=False): + return Element.dframe(columns, as_table) + + @staticmethod def shape(columns): return columns.data.shape @@ -420,6 +381,59 @@ def validate_data(columns, data): return data + +class ColumnarNdElement(ColumnarData): + + @staticmethod + def validate_data(columns, data): + return data + + @staticmethod + def add_dimension(columns, dimension, dim_pos, values): + return columns.data.add_dimension(dimension, dim_pos, values) + + @staticmethod + def array(columns): + return columns.data.array() + + @staticmethod + def sort(columns, by=[]): + return columns.data.sort(by) + + @staticmethod + def values(columns, dim): + return columns.data.dimension_values(dim) + + @staticmethod + def reindex(columns, kdims=None, vdims=None): + return columns.data.reindex(kdims, vdims) + + @staticmethod + def groupby(columns, dimensions, container_type=HoloMap, **kwargs): + return columns.data.groupby(dimensions, container_type, **kwargs) + + @staticmethod + def select(columns, **selection): + return columns.data.select(**selection) + + @staticmethod + def collapse_data(data, function, kdims=None, **kwargs): + return data[0].collapse_data(data, function, kdims, **kwargs) + + @staticmethod + def sample(columns, samples=[]): + return columns.data.sample(samples) + + @staticmethod + def reduce(columns, reduce_dims, function): + return columns.data.reduce(columns, reduce_dims, function) + + @classmethod + def aggregate(cls, columns, dimensions, function): + return columns.data.aggregate(dimensions, function) + + + class ColumnarDataFrame(ColumnarData): diff --git a/tests/testcolumns.py b/tests/testcolumns.py index 77b8f51d44..cc397d2320 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -148,8 +148,7 @@ def test_columns_2d_partial_reduce(self): def test_columns_heterogeneous_reduce(self): columns = Columns(zip(self.keys1, self.values1), kdims=self.kdims, vdims=self.vdims) - reduced = Columns(zip([k[1:] for k in [self.keys1[i] for i in [0, 2, 1]]], - [self.values1[i] for i in [0, 2, 1]]), + reduced = Columns(zip([k[1:] for k in self.keys1], self.values1), kdims=self.kdims[1:], vdims=self.vdims) self.assertEqual(columns.reduce(['Gender'], np.mean), reduced) From bef53ff185d9327e63519dfc0649732e0c80abe4 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 31 Oct 2015 13:43:04 +0000 Subject: [PATCH 097/212] Updated Bars comparison test --- tests/testcomparisonchart.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testcomparisonchart.py b/tests/testcomparisonchart.py index d7c9b41a56..5eb7c7c2bd 100644 --- a/tests/testcomparisonchart.py +++ b/tests/testcomparisonchart.py @@ -55,8 +55,8 @@ def test_bars_unequal_1(self): try: self.assertEqual(self.bars1, self.bars2) except AssertionError as e: - if not 'values are different' in str(e): - raise Exception('Bars mismatched data error not raised.') + if not 'not almost equal' in str(e): + raise Exception('Bars mismatched data error not raised. %s' % e) def test_bars_unequal_keydims(self): try: From 39ef416f7aec533e0958fda6dacc9dda3f5327b1 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 2 Nov 2015 02:34:40 +0000 Subject: [PATCH 098/212] Various improvements to Columns groupby implementations --- holoviews/core/data.py | 54 ++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 4d1036229e..661709d2c5 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -21,7 +21,7 @@ from .dimension import OrderedDict, Dimension from .element import Element, NdElement -from .ndmapping import item_check +from .ndmapping import NdMapping, item_check, sorted_context from .spaces import HoloMap from . import util @@ -204,8 +204,17 @@ def aggregate(self, dimensions, function): return self.clone(aggregated, kdims=kdims) - def groupby(self, dimensions, container_type=HoloMap, **kwargs): - return self.interface.groupby(self, dimensions, container_type, **kwargs) + def groupby(self, dimensions=[], container_type=HoloMap, group_type=None, **kwargs): + if not isinstance(dimensions, list): dimensions = [dimensions] + if not len(dimensions): dimensions = self.dimensions('key', True) + dimensions = [self.get_dimension(d).name for d in dimensions] + invalid_dims = list(set(dimensions) - set(self.dimensions('key', True))) + if invalid_dims: + raise Exception('Following dimensions could not be found:\n%s.' + % invalid_dims) + if group_type is None: + group_type = type(self) + return self.interface.groupby(self, dimensions, container_type, group_type, **kwargs) @classmethod @@ -409,8 +418,10 @@ def reindex(columns, kdims=None, vdims=None): return columns.data.reindex(kdims, vdims) @staticmethod - def groupby(columns, dimensions, container_type=HoloMap, **kwargs): - return columns.data.groupby(dimensions, container_type, **kwargs) + def groupby(columns, dimensions, container_type, group_type, **kwargs): + if 'kdims' not in kwargs: + kwargs['kdims'] = [d for d in columns.kdims if d not in dimensions] + return columns.data.groupby(dimensions, container_type, group_type, **kwargs) @staticmethod def select(columns, **selection): @@ -444,20 +455,16 @@ def range(columns, dimension): @staticmethod - def groupby(columns, dimensions, container_type=HoloMap, **kwargs): - invalid_dims = list(set(dimensions) - set(columns.dimensions('key', True))) - if invalid_dims: - raise Exception('Following dimensions could not be found:\n%s.' - % invalid_dims) - + def groupby(columns, dimensions, container_type, group_type, **kwargs): index_dims = [columns.get_dimension(d) for d in dimensions] element_dims = [kdim for kdim in columns.kdims if kdim not in index_dims] map_data = [] - for k, v in columns.data.groupby(dimensions): - map_data.append((k, columns.clone(v, kdims=element_dims, - **kwargs))) - with item_check(False): + with item_check(False), sorted_context(False): + for k, v in columns.data.groupby(dimensions): + map_data.append((k, columns.clone(v, new_type=group_type, + **dict({'kdims':element_dims}, + **kwargs)))) return container_type(map_data, kdims=index_dims) @@ -643,7 +650,7 @@ def reindex(columns, kdims=None, vdims=None): @staticmethod - def groupby(columns, dimensions, container_type=HoloMap, **kwargs): + def groupby(columns, dimensions, container_type, group_type, **kwargs): data = columns.data # Get dimension objects, labels, indexes and data @@ -655,21 +662,26 @@ def groupby(columns, dimensions, container_type=HoloMap, **kwargs): if kdim not in dimensions] # Find unique entries along supplied dimensions + # by creating a view that treats the selected + # groupby keys as a single object. indices = data[:, dim_idxs] view = indices.view(np.dtype((np.void, indices.dtype.itemsize * indices.shape[1]))) _, idx = np.unique(view, return_index=True) + idx.sort() unique_indices = indices[idx] # Iterate over the unique entries building masks # to apply the group selection grouped_data = [] for group in unique_indices: - mask = np.zeros(len(data), dtype=bool) + mask = False for d, v in zip(dimensions, group): - mask = np.logical_or(mask, dim_data[d] == v) - group_element = columns.clone(data[mask, ndims:], **kwargs) + mask |= dim_data[d] == v + group_element = columns.clone(data[mask, ndims:], + new_type=group_type, **kwargs) grouped_data.append((tuple(group), group_element)) - return container_type(grouped_data, kdims=dimensions) + with item_check(False), sorted_context(False): + return container_type(grouped_data, kdims=dimensions) @staticmethod @@ -776,7 +788,7 @@ def aggregate(cls, columns, dimensions, function): """ if not isinstance(dimensions, Iterable): dimensions = [dimensions] rows = [] - for k, group in cls.groupby(columns, dimensions).data.items(): + for k, group in cls.groupby(columns, dimensions, NdMapping, type(columns)).data.items(): reduced = group.reduce(function=function) rows.append(np.concatenate([k, (reduced,) if np.isscalar(reduced) else reduced])) return np.array(rows) From af9b935a04033fff2b28437d5c624209d5b2b02e Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 2 Nov 2015 02:35:36 +0000 Subject: [PATCH 099/212] Fix to Bokeh Curve plot --- holoviews/plotting/bokeh/chart.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/plotting/bokeh/chart.py b/holoviews/plotting/bokeh/chart.py index bab190e3b5..fd94db4c06 100644 --- a/holoviews/plotting/bokeh/chart.py +++ b/holoviews/plotting/bokeh/chart.py @@ -99,8 +99,8 @@ class CurvePlot(ElementPlot): def get_data(self, element, ranges=None): x = element.get_dimension(0).name y = element.get_dimension(1).name - return ({'x': element.dimension_values(0), - 'y': element.dimension_values(1)}, + return ({x: element.dimension_values(0), + y: element.dimension_values(1)}, dict(x=x, y=y)) From ea000198ae3148b4f455d75641e51404a64c4fa7 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 2 Nov 2015 02:36:45 +0000 Subject: [PATCH 100/212] Added support for new_type in clone --- holoviews/core/dimension.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index 6fe5d6bc11..0c55d233b5 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -245,7 +245,7 @@ def __init__(self, data, id=None, **params): self.label) - def clone(self, data=None, shared_data=True, *args, **overrides): + def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides): """ Returns a clone of the object with matching parameter values containing the specified args and kwargs. @@ -253,10 +253,18 @@ def clone(self, data=None, shared_data=True, *args, **overrides): If shared_data is set to True and no data explicitly supplied, the clone will share data with the original. """ - settings = dict(self.get_param_values(), **overrides) + params = dict(self.get_param_values()) + if new_type is None: + clone_type = self.__class__ + else: + clone_type = new_type + new_params = new_type.params() + params = {k: v for k, v in params.items() + if k in new_params} + settings = dict(params, **overrides) if data is None and shared_data: data = self.data - return self.__class__(data, *args, **settings) + return clone_type(data, *args, **settings) def relabel(self, label=None, group=None, depth=0): From a13fd2920c6d24c84c2d1abbdb3e5e4fb68a1525 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 2 Nov 2015 22:59:31 +0000 Subject: [PATCH 101/212] Automatically adding index on NdElement types --- holoviews/core/data.py | 16 ++++++++---- holoviews/core/element.py | 52 +++++++++++++++++++++++++-------------- tests/testcolumns.py | 26 +++++++++++--------- 3 files changed, 58 insertions(+), 36 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 661709d2c5..b996bcd12d 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -317,7 +317,7 @@ def _process_data(cls, data, paramobjs, **kwargs): if ((not np.isscalar(data[0]) and len(data[0]) == 2 and any(not np.isscalar(data[0][i]) for i in range(2))) or not pd): - data = OrderedDict(data) + pass else: dimensions = (kwargs.get('kdims', ) + kwargs.get('vdims', paramobjs['vdims'].default)) @@ -331,7 +331,7 @@ def _process_data(cls, data, paramobjs, **kwargs): params['kdims'] = paramobjs['kdims'].default if 'vdims' not in params: params['vdims'] = paramobjs['vdims'].default - if isinstance(data, dict): + if isinstance(data, (dict, list)): data = NdElement(data, kdims=params['kdims'], vdims=params['vdims']) return data, params @@ -397,16 +397,21 @@ class ColumnarNdElement(ColumnarData): def validate_data(columns, data): return data + @staticmethod + def shape(columns): + return (len(columns), len(columns.dimensions())) + @staticmethod def add_dimension(columns, dimension, dim_pos, values): - return columns.data.add_dimension(dimension, dim_pos, values) + return columns.data.add_dimension(dimension, dim_pos+1, values) @staticmethod def array(columns): - return columns.data.array() + return columns.data.array(dimensions=columns.dimensions()) @staticmethod def sort(columns, by=[]): + if not len(by): by = columns.dimensions('key', True) return columns.data.sort(by) @staticmethod @@ -421,7 +426,8 @@ def reindex(columns, kdims=None, vdims=None): def groupby(columns, dimensions, container_type, group_type, **kwargs): if 'kdims' not in kwargs: kwargs['kdims'] = [d for d in columns.kdims if d not in dimensions] - return columns.data.groupby(dimensions, container_type, group_type, **kwargs) + with item_check(False), sorted_context(False): + return columns.data.groupby(dimensions, container_type, group_type, **kwargs) @staticmethod def select(columns, **selection): diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 3c4c134009..f3bec88096 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -145,8 +145,11 @@ def dframe(self): return pd.DataFrame(dim_vals) - def array(self, as_table=False): - dims = self.kdims + self.vdims + def array(self, as_table=False, dimensions=[]): + if dimensions: + dims = [self.get_dimension(d) for d in dimensions] + else: + dims = [d for d in self.kdims + self.vdims if d != 'Index'] columns, types = [], [] for dim in dims: column = self.dimension_values(dim) @@ -256,7 +259,12 @@ class NdElement(NdMapping, Tabular): def __init__(self, data=None, **params): if isinstance(data, list) and all(np.isscalar(el) for el in data): - data = OrderedDict(list(((k,), v) for k, v in enumerate(data))) + data = (((k,), (v,)) for k, v in enumerate(data)) + kdims = params.get('kdims', self.kdims) + if not isinstance(data, NdElement) and 'Index' not in kdims: + params['kdims'] = ['Index'] + list(kdims) + data_items = data.items() if isinstance(data, dict) else data + data = [((i,)+((k,) if np.isscalar(k) else k), v) for i, (k, v) in enumerate(data_items)] super(NdElement, self).__init__(data, **params) @@ -278,6 +286,7 @@ def reindex(self, kdims=None, vdims=None, force=False): vdims = self.vdims elif kdims is None: kdims = [d for d in self.dimensions if d not in vdims] + if 'Index' not in kdims: kdims = ['Index'] + kdims key_dims = [self.get_dimension(k) for k in kdims] val_dims = [self.get_dimension(v) for v in vdims] @@ -383,9 +392,10 @@ def sample(self, samples=[]): Allows sampling of the Table with a list of samples. """ sample_data = OrderedDict() - for sample in samples: - value = self[sample] - sample_data[sample] = (value,) if len(self.vdims) == 1 else value.values()[0] + for i, sample in enumerate(samples): + sample = (sample,) if np.isscalar(sample) else sample + value = self[(slice(None),)+sample] + sample_data[(i,)+sample] = value.data.values()[0] return self.clone(sample_data) @@ -425,21 +435,25 @@ def _item_check(self, dim_vals, data): @classmethod def collapse_data(cls, data, function, kdims=None, **kwargs): - index = 0 - joined_data = data[0].clone(shared_data=False, kdims=['Index']+data[0].kdims) - for d in data: - d = d.add_dimension('Index', 0, range(index, index+len(d))) - index += len(d) - joined_data.update(d) - - grouped = joined_data.groupby([d.name for d in kdims], container_type=HoloMap) - collapsed = joined_data.clone(shared_data=False, kdims=kdims) - for k, group in grouped.items(): + offset = 0 + joined_data = data[0] + if len(data) > 1: + concatenated = [] + for d in data: + reindexed = [((i+offset,)+k[1:], v) for i, (k, v) in enumerate(d.data.items())] + concatenated += reindexed + offset += len(reindexed) + joined_data = joined_data.clone(concatenated, kdims=joined_data.kdims) + + collapsed = [] + grouped = joined_data.groupby([d.name for d in kdims], container_type=NdMapping) + for i, (k, group) in enumerate(grouped.data.items()): if isinstance(function, np.ufunc): - collapsed[k] = tuple(function.reduce(group[vdim.name]) for vdim in group.vdims) + reduced = tuple(function.reduce(group[vdim.name]) for vdim in group.vdims) else: - collapsed[k] = tuple(function(group[vdim.name], **kwargs) for vdim in group.vdims) - return collapsed + reduced = tuple(function(group[vdim.name], **kwargs) for vdim in group.vdims) + collapsed.append(((i,)+k, reduced)) + return joined_data.clone(collapsed, kdims=['Index']+kdims) def aggregate(self, dimensions, function): diff --git a/tests/testcolumns.py b/tests/testcolumns.py index cc397d2320..bdf51e53ec 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -74,31 +74,33 @@ def test_columns_sample(self): def test_columns_index_row_gender(self): table = Columns(zip(self.keys1, self.values1), kdims=self.kdims, vdims=self.vdims) + indexed = Columns(OrderedDict([(('F', 12), (10, 0.8))]), + kdims=self.kdims, vdims=self.vdims) row = table['F',:] - self.assertEquals(type(row), Columns) - self.assertEquals(row.data.data, OrderedDict([(('F', 12), (10, 0.8))])) + self.assertEquals(row, indexed) def test_columns_index_rows_gender(self): table = Columns(zip(self.keys1, self.values1), kdims=self.kdims, vdims=self.vdims) row = table['M',:] - self.assertEquals(type(row), Columns) - self.assertEquals(row.data.data, - OrderedDict([(('M', 10), (15, 0.8)), (('M', 16), (18, 0.6))])) + indexed = Columns(OrderedDict([(('M', 10), (15, 0.8)), + (('M', 16), (18, 0.6))]), + kdims=self.kdims, vdims=self.vdims) + self.assertEquals(row, indexed) def test_columns_index_row_age(self): table = Columns(zip(self.keys1, self.values1), kdims=self.kdims, vdims=self.vdims) - row = table[:, 12] - self.assertEquals(type(row), Columns) - self.assertEquals(row.data.data, OrderedDict([(('F', 12), (10, 0.8))])) + indexed = Columns(OrderedDict([(('F', 12), (10, 0.8))]), + kdims=self.kdims, vdims=self.vdims) + self.assertEquals(table[:, 12], indexed) def test_columns_index_item_table(self): table = Columns(zip(self.keys1, self.values1), kdims=self.kdims, vdims=self.vdims) - itemtable = table['F', 12] - self.assertEquals(type(itemtable), Columns) - self.assertEquals(itemtable.data.data, OrderedDict([(('F', 12), (10, 0.8))])) + indexed = Columns(OrderedDict([(('F', 12), (10, 0.8))]), + kdims=self.kdims, vdims=self.vdims) + self.assertEquals(table['F', 12], indexed) def test_columns_index_value1(self): @@ -127,7 +129,7 @@ def test_columns_add_dimensions_values(self): def test_columns_collapse(self): collapsed = HoloMap({i: Columns(dict(zip(self.xs, self.ys*i)), kdims=['x'], vdims=['y']) for i in range(10)}, kdims=['z']).collapse('z', np.mean) - self.compare_columns(collapsed, Columns(dict(zip(self.xs, self.ys*4.5)), + self.compare_columns(collapsed, Columns(zip(zip(self.xs), self.ys*4.5), kdims=['x'], vdims=['y'])) def test_columns_1d_reduce(self): From ac15afbff2d43b41804c7819066e060b496c5e8c Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 3 Nov 2015 14:28:07 +0000 Subject: [PATCH 102/212] Handling datetime types in Columns and plotting --- holoviews/core/data.py | 10 ++++++++-- holoviews/core/util.py | 27 ++++++++++++++++----------- holoviews/plotting/bokeh/element.py | 23 ++++++++++++++++++----- holoviews/plotting/bokeh/util.py | 4 ++++ 4 files changed, 46 insertions(+), 18 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index b996bcd12d..80669e543d 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -76,7 +76,10 @@ def range(self, dim, data_range=True): else: drange = (np.NaN, np.NaN) if data_range: - return util.max_range([drange, dim.soft_range]) + if dim.soft_range != (None, None): + return util.max_range([drange, dim.soft_range]) + else: + return drange else: return dim.soft_range @@ -263,7 +266,10 @@ class ColumnarData(param.Parameterized): @staticmethod def range(columns, dimension): column = columns.dimension_values(dimension) - return (np.nanmin(column), np.nanmax(column)) + if columns.get_dimension_type(dimension) is np.datetime64: + return column.min(), column.max() + else: + return (np.nanmin(column), np.nanmax(column)) @staticmethod diff --git a/holoviews/core/util.py b/holoviews/core/util.py index fe07b7b2ad..b12778431a 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -269,7 +269,10 @@ def find_range(values, soft_range=[]): try: values = np.array(values) values = np.squeeze(values) if len(values.shape) > 1 else values - values = np.concatenate([values, soft_range]) + if len(soft_range): + values = np.concatenate([values, soft_range]) + if values.dtype.kind == 'M': + return values.min(), values.max() return np.nanmin(values), np.nanmax(values) except: try: @@ -280,16 +283,18 @@ def find_range(values, soft_range=[]): def max_range(ranges): - """ - Computes the maximal lower and upper bounds from a list bounds. - """ - try: - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', r'All-NaN (slice|axis) encountered') - arr = np.array(ranges, dtype=np.float) - return (np.nanmin(arr[:, 0]), np.nanmax(arr[:, 1])) - except: - return (np.NaN, np.NaN) + """ + Computes the maximal lower and upper bounds from a list bounds. + """ + try: + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', r'All-NaN (slice|axis) encountered') + arr = np.array(ranges) + if arr.dtype.kind == 'M': + return arr[:, 0].min(), arr[:, 1].max() + return (np.nanmin(arr[:, 0]), np.nanmax(arr[:, 1])) + except: + return (np.NaN, np.NaN) def max_extents(extents, zrange=False): diff --git a/holoviews/plotting/bokeh/element.py b/holoviews/plotting/bokeh/element.py index d4a7a43a10..184ffed2c8 100644 --- a/holoviews/plotting/bokeh/element.py +++ b/holoviews/plotting/bokeh/element.py @@ -18,7 +18,7 @@ from ...element import RGB from ..plot import GenericElementPlot, GenericOverlayPlot from .plot import BokehPlot -from .util import mpl_to_bokeh +from .util import mpl_to_bokeh, convert_datetime # Define shared style properties for bokeh plots @@ -170,13 +170,25 @@ def _axes_props(self, plots, subplots, element, ranges): if plot.yaxis[0].axis_label == xlabel: plot_ranges['x_range'] = plot.y_range + if element.get_dimension_type(0) is np.datetime64: + x_axis_type = 'datetime' + else: + x_axis_type = 'log' if self.logx else 'auto' + if element.get_dimension_type(1) is np.datetime64: + y_axis_type = 'datetime' + else: + y_axis_type = 'log' if self.logy else 'auto' + if not 'x_range' in plot_ranges: if 'x_range' in ranges: plot_ranges['x_range'] = ranges['x_range'] else: l, b, r, t = self.get_extents(element, ranges) low, high = (b, t) if self.invert_axes else (l, r) - if low == high: + if x_axis_type == 'datetime': + low = convert_datetime(low) + high = convert_datetime(high) + elif low == high: offset = low*0.1 if low else 0.5 low -= offset high += offset @@ -192,7 +204,10 @@ def _axes_props(self, plots, subplots, element, ranges): else: l, b, r, t = self.get_extents(element, ranges) low, high = (l, r) if self.invert_axes else (b, t) - if low == high: + if y_axis_type == 'datetime': + low = convert_datetime(low) + high = convert_datetime(high) + elif low == high: offset = low*0.1 if low else 0.5 low -= offset high += offset @@ -205,8 +220,6 @@ def _axes_props(self, plots, subplots, element, ranges): end=yrange.start) else: plot_ranges['y_range'] = yrange[::-1] - x_axis_type = 'log' if self.logx else 'auto' - y_axis_type = 'log' if self.logy else 'auto' return (x_axis_type, y_axis_type), (xlabel, ylabel, zlabel), plot_ranges diff --git a/holoviews/plotting/bokeh/util.py b/holoviews/plotting/bokeh/util.py index 8f074103e6..edce38c62f 100644 --- a/holoviews/plotting/bokeh/util.py +++ b/holoviews/plotting/bokeh/util.py @@ -114,3 +114,7 @@ def layout_padding(plots): p.ygrid.grid_line_color = None expanded_plots[r].append(p) return expanded_plots + + +def convert_datetime(time): + return time.astype('datetime64[s]').astype(float)*1000 From 3e83b5c723735b266f470ff7514e9701c0468bf8 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 3 Nov 2015 14:29:48 +0000 Subject: [PATCH 103/212] Refactored and optimized Columns groupby and collapse_data methods --- holoviews/core/data.py | 62 +++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 80669e543d..ed94119d01 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -471,12 +471,12 @@ def groupby(columns, dimensions, container_type, group_type, **kwargs): index_dims = [columns.get_dimension(d) for d in dimensions] element_dims = [kdim for kdim in columns.kdims if kdim not in index_dims] - map_data = [] + + element_kwargs = dict(kdims=element_dims, new_type=group_type) + element_kwargs.update(kwargs) + map_data = [(k, columns.clone(v, **element_kwargs)) for k, v in + columns.data.groupby(dimensions)] with item_check(False), sorted_context(False): - for k, v in columns.data.groupby(dimensions): - map_data.append((k, columns.clone(v, new_type=group_type, - **dict({'kdims':element_dims}, - **kwargs)))) return container_type(map_data, kdims=index_dims) @@ -662,13 +662,12 @@ def reindex(columns, kdims=None, vdims=None): @staticmethod - def groupby(columns, dimensions, container_type, group_type, **kwargs): + def groupby(columns, dimensions, container_type=HoloMap, group_type=NdMapping, raw=False, **kwargs): data = columns.data # Get dimension objects, labels, indexes and data dimensions = [columns.get_dimension(d) for d in dimensions] dim_idxs = [columns.get_dimension_index(d) for d in dimensions] - dim_data = {d: columns.dimension_values(d) for d in dimensions} ndims = len(dimensions) kwargs['kdims'] = [kdim for kdim in columns.kdims if kdim not in dimensions] @@ -686,14 +685,18 @@ def groupby(columns, dimensions, container_type, group_type, **kwargs): # to apply the group selection grouped_data = [] for group in unique_indices: - mask = False - for d, v in zip(dimensions, group): - mask |= dim_data[d] == v - group_element = columns.clone(data[mask, ndims:], - new_type=group_type, **kwargs) - grouped_data.append((tuple(group), group_element)) - with item_check(False), sorted_context(False): - return container_type(grouped_data, kdims=dimensions) + mask = np.logical_or.reduce([data[:, i] == group[i] + for i in range(ndims)]) + group_data = data[mask, ndims:] + if not raw: + group_data = columns.clone(group_data, new_type=group_type, **kwargs) + grouped_data.append((tuple(group), group_data)) + + if raw: + return grouped_data + else: + with item_check(False), sorted_context(False): + return container_type(grouped_data, kdims=dimensions) @staticmethod @@ -728,28 +731,27 @@ def select(columns, **selection): return data - @staticmethod - def collapse_data(data, function, kdims=None, **kwargs): + @classmethod + def collapse_data(cls, data, function, kdims=None, **kwargs): """ Applies a groupby operation along the supplied key dimensions then aggregates across the groups with the supplied function. """ ndims = data[0].shape[1] nkdims = len(kdims) + data = data[0] if len(data) == 0 else np.concatenate(data) vdims = ['Value Dimension %s' % i for i in range(ndims-len(kdims))] - joined_data = Columns(np.concatenate(data), kdims=kdims, vdims=vdims) + joined_data = Columns(data, kdims=kdims, vdims=vdims) rows = [] - for k, group in joined_data.groupby(kdims).items(): + for k, group in cls.groupby(joined_data, kdims, raw=True): row = np.zeros(ndims) row[:ndims] = np.array(k) - for i, vdim in enumerate(group.vdims): - group_data = group.dimension_values(vdim) - if isinstance(function, np.ufunc): - collapsed = function.reduce(group_data) - else: - collapsed = function(group_data, axis=0, **kwargs) - row[nkdims+i] = collapsed + if isinstance(function, np.ufunc): + collapsed = function.reduce(group) + else: + collapsed = function(group, axis=0, **kwargs) + row[nkdims+i] = collapsed rows.append(row) return np.array(rows) @@ -800,7 +802,11 @@ def aggregate(cls, columns, dimensions, function): """ if not isinstance(dimensions, Iterable): dimensions = [dimensions] rows = [] - for k, group in cls.groupby(columns, dimensions, NdMapping, type(columns)).data.items(): - reduced = group.reduce(function=function) + reindexed = columns.reindex(dimensions) + for k, group in cls.groupby(reindexed, dimensions, raw=True): + if isinstance(function, np.ufunc): + reduced = function.reduce(group, axis=0) + else: + reduced = function(group, axis=0) rows.append(np.concatenate([k, (reduced,) if np.isscalar(reduced) else reduced])) return np.array(rows) From 5202115fbfbaff1d7dd9e1f262b998bb072c7031 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Nov 2015 01:31:40 +0000 Subject: [PATCH 104/212] Fixes to range handling after datetime support changes --- holoviews/core/data.py | 5 +++-- holoviews/core/util.py | 38 ++++++++++++++++++------------------- holoviews/element/raster.py | 12 ++++++++---- 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index ed94119d01..5eace86eae 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -76,8 +76,9 @@ def range(self, dim, data_range=True): else: drange = (np.NaN, np.NaN) if data_range: - if dim.soft_range != (None, None): - return util.max_range([drange, dim.soft_range]) + soft_range = [r for r in dim.soft_range if r is not None] + if soft_range: + return util.max_range([drange, soft_range]) else: return drange else: diff --git a/holoviews/core/util.py b/holoviews/core/util.py index b12778431a..0e10a6008f 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -261,25 +261,25 @@ def find_minmax(lims, olims): def find_range(values, soft_range=[]): - """ - Safely finds either the numerical min and max of - a set of values, falling back to the first and - the last value in the sorted list of values. - """ - try: - values = np.array(values) - values = np.squeeze(values) if len(values.shape) > 1 else values - if len(soft_range): - values = np.concatenate([values, soft_range]) - if values.dtype.kind == 'M': - return values.min(), values.max() - return np.nanmin(values), np.nanmax(values) - except: - try: - values = sorted(values) - return (values[0], values[-1]) - except: - return (None, None) + """ + Safely finds either the numerical min and max of + a set of values, falling back to the first and + the last value in the sorted list of values. + """ + try: + values = np.array(values) + values = np.squeeze(values) if len(values.shape) > 1 else values + if len(soft_range): + values = np.concatenate([values, soft_range]) + if values.dtype.kind == 'M': + return values.min(), values.max() + return np.nanmin(values), np.nanmax(values) + except: + try: + values = sorted(values) + return (values[0], values[-1]) + except: + return (None, None) def max_range(ranges): diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index afd2cc4abc..6361d990ca 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -528,15 +528,19 @@ def range(self, dim, data_range=True): elif dim_idx in [0, 1]: l, b, r, t = self.bounds.lbrt() if dim_idx: - data_range = (b, t) + drange = (b, t) else: - data_range = (l, r) + drange = (l, r) elif dim_idx < len(self.vdims) + 2: dim_idx -= 2 data = np.atleast_3d(self.data)[:, :, dim_idx] - data_range = (np.nanmin(data), np.nanmax(data)) + drange = (np.nanmin(data), np.nanmax(data)) if data_range: - return util.max_range([data_range, dim.soft_range]) + soft_range = [r for r in dim.soft_range if r is not None] + if soft_range: + return util.max_range([drange, soft_range]) + else: + return drange else: return dim.soft_range From cd292502b3a6064117eb3a2f1f952e2fb7fac23d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Nov 2015 01:32:07 +0000 Subject: [PATCH 105/212] Moved .as_ndelement method to Element.mapping --- holoviews/core/data.py | 26 -------------------------- holoviews/core/element.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 5eace86eae..71407bf6f1 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -361,32 +361,6 @@ def _process_df_dims(data, paramobjs, **kwargs): return kdims, vdims - @classmethod - def as_ndelement(cls, columns, **kwargs): - """ - This method transforms any ViewableElement type into a Table - as long as it implements a dimension_values method. - """ - if self.kdims: - keys = zip(*[cls.values(columns, dim.name) - for dim in self.kdims]) - else: - keys = [()]*len(values) - - if self.vdims: - values = zip(*[cls.values(columns, dim.name) - for dim in self.vdims]) - else: - values = [()]*len(keys) - - data = zip(keys, values) - params = dict(kdims=columns.kdims, vdims=columns.vdims, label=columns.label) - if not columns.params()['group'].default == columns.group: - params['group'] = columns.group - el_type = type(columns.element) - return el_type(data, **dict(params, **kwargs)) - - @staticmethod def length(columns): return len(columns.data) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index f3bec88096..29e197abcd 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -145,6 +145,34 @@ def dframe(self): return pd.DataFrame(dim_vals) + def mapping(self, as_table=False, **kwargs): + """ + This method transforms any ViewableElement type into a Table + as long as it implements a dimension_values method. + """ + if self.kdims: + keys = zip(*[self.dimension_values(dim.name) + for dim in self.kdims]) + else: + keys = [()]*len(values) + + if self.vdims: + values = zip(*[self.dimension_values(dim.name) + for dim in self.vdims]) + else: + values = [()]*len(keys) + + data = zip(keys, values) + params = dict(kdims=self.kdims, vdims=self.vdims, label=self.label) + if not self.params()['group'].default == columns.group: + params['group'] = columns.group + mapping = NdElement(data, **dict(params, **kwargs)) + if as_table: + from ..element import Table + return Table(mapping) + return mapping + + def array(self, as_table=False, dimensions=[]): if dimensions: dims = [self.get_dimension(d) for d in dimensions] From a35279eefeab3eaee7ae0a23efb75d3671218783 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Nov 2015 01:35:10 +0000 Subject: [PATCH 106/212] Add Columns.data_type parameter to control which backend to use --- holoviews/core/data.py | 65 +++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 71407bf6f1..8d63e1700d 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -28,6 +28,11 @@ class Columns(Element): + data_type = param.ObjectSelector(default='mapping', allow_None=True, + objects=['pandas', 'mapping'], + doc=""" + Defines the data type used for storing non-numeric data.""") + def __init__(self, data, **kwargs): data, params = ColumnarData._process_data(data, self.params(), **kwargs) super(Columns, self).__init__(data, **params) @@ -299,38 +304,46 @@ def _process_data(cls, data, paramobjs, **kwargs): data = data.data elif isinstance(data, Element): dimensions = data.dimensions(label=True) - columns = OrderedDict([(dim, data.dimension_values(dim)) - for dim in dimensions]) - if pd: - data = pd.DataFrame(columns) - else: - data = OrderedDict([(row[:data.ndims], row[data.ndims:]) - for row in zip(*columns.values())]) - elif util.is_dataframe(data): + data = tuple(data.dimension_values(d) for d in data.dimensions(dim)) + + if util.is_dataframe(data): kdims, vdims = cls._process_df_dims(data, paramobjs, **params) params['kdims'] = kdims params['vdims'] = vdims - elif not isinstance(data, (np.ndarray, dict)): + elif not isinstance(data, (NdElement, np.ndarray, dict)): if isinstance(data, tuple): - data = np.column_stack(data) - array = data + try: + array = np.column_stack(data) + except: + array = None else: - data = np.array() if data is None else list(data) - array = np.array(data) - # Check if data is of non-numeric type - if array.dtype.kind in ['S', 'U', 'O'] or array.ndim > 2: - # If data is in NdElement dictionary format or pandas - # is not available convert to OrderedDict - if ((not np.isscalar(data[0]) and len(data[0]) == 2 and - any(not np.isscalar(data[0][i]) for i in range(2))) - or not pd): - pass - else: - dimensions = (kwargs.get('kdims', ) + - kwargs.get('vdims', paramobjs['vdims'].default)) + data = [] if data is None else list(data) + try: + array = np.array(data) + except: + array = None + + # If ndim > 2 data is assumed to be a mapping + if array.ndim > 2 or (isinstance(data[0], tuple) and + any(isinstance(d, tuple) for d in data[0])): + pass + elif array is None or array.dtype.kind in ['S', 'U', 'O']: + # Check if data is of non-numeric type + # Then use defined data type + data_type = kwargs.get('data_type', paramobjs['data_type'].default) + kdims = kwargs.get('kdims', paramobjs['kdims'].default) + vdims = kwargs.get('vdims', paramobjs['vdims'].default) + if data_type == 'pandas': columns = [d.name if isinstance(d, Dimension) else d - for d in dimensions] - data = pd.DataFrame(data, columns=columns) + for d in kdims+vdims] + if isinstance(data, tuple): + data = pd.DataFrame.from_items([(c, d) for c, d in + zip(columns, data)]) + else: + data = pd.DataFrame(data, columns=columns) + else: + ndims = len(kdims) + data = [(row[:ndims], row[ndims:]) for row in zip(data)] else: data = array params.update(kwargs) From d8156e523949c132050b7009292deed0f992331a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Nov 2015 01:37:13 +0000 Subject: [PATCH 107/212] Updated Columns unit tests to set and unset data_type --- tests/testcolumns.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testcolumns.py b/tests/testcolumns.py index bdf51e53ec..a5208d5340 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -15,6 +15,8 @@ class ColumnsNdElementTest(ComparisonTestCase): """ def setUp(self): + self.data_type = Columns.data_type + Columns.data_type = 'mapping' self.xs = range(11) self.ys = np.linspace(0, 1, 11) self.zs = np.sin(self.xs) @@ -25,6 +27,9 @@ def setUp(self): self.columns = Columns(dict(zip(self.xs, self.ys)), kdims=['x'], vdims=['y']) + def tearDown(self): + Columns.data_type = self.data_type + def test_columns_sort_vdim(self): columns = Columns(OrderedDict(zip(self.xs, -self.ys)), kdims=['x'], vdims=['y']) @@ -281,6 +286,8 @@ def test_columns_array(self): class ColumnsDFrameTest(ComparisonTestCase): def setUp(self): + self.data_type = Columns.data_type + Columns.data_type = 'pandas' self.column_data = [('M',10, 15, 0.8), ('M',16, 18, 0.6), ('F',12, 10, 0.8)] self.kdims = ['Gender', 'Age'] @@ -291,6 +298,9 @@ def setUp(self): self.columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys}), kdims=['x'], vdims=['y']) + def tearDown(self): + Columns.data_type = self.data_type + def test_columns_range(self): self.assertEqual(self.columns.range('y'), (0., 1.)) From a28d0dda6c2a529dc612c6dab37410ad46c4eeda Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Nov 2015 03:09:30 +0000 Subject: [PATCH 108/212] Fixed bug in Columns NdElement constructor --- holoviews/core/data.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 8d63e1700d..732ff39dab 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -322,10 +322,9 @@ def _process_data(cls, data, paramobjs, **kwargs): array = np.array(data) except: array = None - # If ndim > 2 data is assumed to be a mapping - if array.ndim > 2 or (isinstance(data[0], tuple) and - any(isinstance(d, tuple) for d in data[0])): + if (isinstance(data[0], tuple) and any(isinstance(d, tuple) for d in data[0]) + or (array and array.ndim > 2)): pass elif array is None or array.dtype.kind in ['S', 'U', 'O']: # Check if data is of non-numeric type @@ -342,8 +341,11 @@ def _process_data(cls, data, paramobjs, **kwargs): else: data = pd.DataFrame(data, columns=columns) else: + if isinstance(data, tuple): + data = zip(*data) ndims = len(kdims) - data = [(row[:ndims], row[ndims:]) for row in zip(data)] + data = [(tuple(row[:ndims]), tuple(row[ndims:])) + for row in data] else: data = array params.update(kwargs) From 56e5bbf272f5939e457ee7f4aba2543d2cd7fe20 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Nov 2015 20:18:19 +0000 Subject: [PATCH 109/212] Fix to Columns constructor --- holoviews/core/data.py | 10 +++++++--- holoviews/element/chart.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 732ff39dab..1bb08732ec 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -36,7 +36,11 @@ class Columns(Element): def __init__(self, data, **kwargs): data, params = ColumnarData._process_data(data, self.params(), **kwargs) super(Columns, self).__init__(data, **params) - self.data = self.interface.validate_data(self, self.data) + self.data = self._validate_data(self.data) + + + def _validate_data(self, data): + return self.interface.validate_data(self, data) def __setstate__(self, state): @@ -304,7 +308,7 @@ def _process_data(cls, data, paramobjs, **kwargs): data = data.data elif isinstance(data, Element): dimensions = data.dimensions(label=True) - data = tuple(data.dimension_values(d) for d in data.dimensions(dim)) + data = tuple(data.dimension_values(d) for d in data.dimensions()) if util.is_dataframe(data): kdims, vdims = cls._process_df_dims(data, paramobjs, **params) @@ -324,7 +328,7 @@ def _process_data(cls, data, paramobjs, **kwargs): array = None # If ndim > 2 data is assumed to be a mapping if (isinstance(data[0], tuple) and any(isinstance(d, tuple) for d in data[0]) - or (array and array.ndim > 2)): + or (array is not None and array.ndim > 2)): pass elif array is None or array.dtype.kind in ['S', 'U', 'O']: # Check if data is of non-numeric type diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index eea2fe9d6c..0ec6efa52e 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -77,7 +77,7 @@ class ErrorBars(Chart): def _validate_data(self, data): if self.shape[1] == 3: - data = self.interface.add_dimension(data, self.vdims[1].name, + data = self.interface.add_dimension(self, self.vdims[1].name, 3, self.dimension_values(2)) return super(ErrorBars, self)._validate_data(data) From 2b3650a3495575874939c620961ed5d9703acf59 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Nov 2015 20:19:05 +0000 Subject: [PATCH 110/212] Fixed Columns non-numeric range handling --- holoviews/core/data.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 1bb08732ec..e17d5a2b8e 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -279,7 +279,11 @@ def range(columns, dimension): if columns.get_dimension_type(dimension) is np.datetime64: return column.min(), column.max() else: - return (np.nanmin(column), np.nanmax(column)) + try: + return (np.nanmin(column), np.nanmax(column)) + except TypeError: + column.sort() + return column[0], column[-1] @staticmethod From 50e9ab8902868b9516e8e6951734edaad58f00ff Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Nov 2015 20:19:56 +0000 Subject: [PATCH 111/212] Fix to Element.mapping method --- holoviews/core/element.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 29e197abcd..7697815af0 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -164,8 +164,8 @@ def mapping(self, as_table=False, **kwargs): data = zip(keys, values) params = dict(kdims=self.kdims, vdims=self.vdims, label=self.label) - if not self.params()['group'].default == columns.group: - params['group'] = columns.group + if not self.params()['group'].default == self.group: + params['group'] = self.group mapping = NdElement(data, **dict(params, **kwargs)) if as_table: from ..element import Table From 98c24d7e47f6016ef68d06848e71d2e48e8c1a90 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Nov 2015 22:57:35 +0000 Subject: [PATCH 112/212] Fix to Columns.__setstate__ for backward compatibility --- holoviews/core/data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index e17d5a2b8e..f3d9eb6635 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -53,6 +53,7 @@ def __setstate__(self, state): self.data = OrderedDict(self.data, kdims=self.kdims, vdims=self.vdims, group=self.group, label=self.label) + self.interface = ColumnarNdElement def closest(self, coords): From c250701cd3110843cd22ca554e81cabcb32e5a2d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 4 Nov 2015 22:58:06 +0000 Subject: [PATCH 113/212] Fixed NdElement constructor for Columns support --- holoviews/core/element.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 7697815af0..1e6682c306 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -289,7 +289,8 @@ def __init__(self, data=None, **params): if isinstance(data, list) and all(np.isscalar(el) for el in data): data = (((k,), (v,)) for k, v in enumerate(data)) kdims = params.get('kdims', self.kdims) - if not isinstance(data, NdElement) and 'Index' not in kdims: + if (data is not None and not isinstance(data, NdMapping) + and 'Index' not in kdims): params['kdims'] = ['Index'] + list(kdims) data_items = data.items() if isinstance(data, dict) else data data = [((i,)+((k,) if np.isscalar(k) else k), v) for i, (k, v) in enumerate(data_items)] From 0ec3761266fdd9e8ca7c95ddfdcfe03d15474094 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 01:13:02 +0000 Subject: [PATCH 114/212] Updated Chart/Chart3D plots for compatibility with Columns --- holoviews/plotting/mpl/chart.py | 2 +- holoviews/plotting/mpl/chart3d.py | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/holoviews/plotting/mpl/chart.py b/holoviews/plotting/mpl/chart.py index 437aa293f8..736aae98d3 100644 --- a/holoviews/plotting/mpl/chart.py +++ b/holoviews/plotting/mpl/chart.py @@ -548,7 +548,7 @@ def initialize_plot(self, ranges=None): ranges = self.compute_ranges(self.hmap, self.keys[-1], ranges) ranges = match_spec(points, ranges) - ndims = points.data.shape[1] + ndims = points.shape[1] xs = points.dimension_values(0) if len(points.data) else [] ys = points.dimension_values(1) if len(points.data) else [] cs = points.dimension_values(self.color_index) if self.color_index < ndims else None diff --git a/holoviews/plotting/mpl/chart3d.py b/holoviews/plotting/mpl/chart3d.py index 0ff31ceb3e..64d0123585 100644 --- a/holoviews/plotting/mpl/chart3d.py +++ b/holoviews/plotting/mpl/chart3d.py @@ -126,11 +126,9 @@ def initialize_plot(self, ranges=None): return self._finalize_axis(key, ranges=ranges) def update_handles(self, axis, points, key, ranges=None): - ndims = points.data.shape[1] - xs = points.data[:, 0] if len(points.data) else [] - ys = points.data[:, 1] if len(points.data) else [] - zs = points.data[:, 2] if len(points.data) else [] - cs = points.data[:, self.color_index] if self.color_index < ndims else None + ndims = points.shape[1] + xs, ys, zs = (points.dimension_values(i) for i in range(3)) + cs = points.dimension_values(self.color_index) if self.color_index < ndims else None style = self.style[self.cyclic_index] if self.size_index < ndims and self.scaling_factor > 1: @@ -225,7 +223,7 @@ def initialize_plot(self, ranges=None): def update_handles(self, axis, element, key, ranges=None): style_opts = self.style[self.cyclic_index] dims = element.dimensions(label=True) - vrange = ranges[dims[2]] + vrange = ranges[dims.pop(2)] x, y, z = [element.dimension_values(d) for d in dims] artist = axis.plot_trisurf(x, y, z, vmax=vrange[1], vmin=vrange[0], **style_opts) From 8f0c3f16fc0ca261c1708e480bfaea223d823d6b Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 01:56:46 +0000 Subject: [PATCH 115/212] Added Columns.concat classmethod --- holoviews/core/data.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index f3d9eb6635..2e5225659c 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -241,6 +241,15 @@ def collapse_data(cls, data, function=None, kdims=None, **kwargs): return ColumnarDataFrame.collapse_data(data, function, kdims, **kwargs) + @classmethod + def concat(cls, columns_objs): + columns = columns_objs[0] + if len({col.interface for col in columns_objs}) > 1: + raise TypeError("Ensure that all Columns share the same " + "data type.") + return columns.clone(columns.interface.concat(columns_objs)) + + def __len__(self): return self.interface.length(self) @@ -410,6 +419,11 @@ def shape(columns): def add_dimension(columns, dimension, dim_pos, values): return columns.data.add_dimension(dimension, dim_pos+1, values) + @staticmethod + def concat(columns_objs): + return [(k[1:], v) for col in columns_objs + for k, v in col.data.data.items()] + @staticmethod def array(columns): return columns.data.array(dimensions=columns.dimensions()) @@ -464,6 +478,11 @@ def range(columns, dimension): column = columns.data[columns.get_dimension(dimension).name] return (column.min(), column.max()) + + @staticmethod + def concat(columns_objs): + return pd.concat([col.data for col in columns_objs]) + @staticmethod def groupby(columns, dimensions, container_type, group_type, **kwargs): @@ -631,6 +650,11 @@ def array(columns): return columns.data + @staticmethod + def concat(columns_objs): + return np.concatenate([col.data for col in columns_objs]) + + @staticmethod def dframe(columns, as_table=False): return Element.dframe(columns, as_table) From 8fe495b93fddb697b801aa197593de4f4476cd26 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 01:57:14 +0000 Subject: [PATCH 116/212] Fixed NdMapping Table conversion --- holoviews/core/ndmapping.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index 6f7f007e2f..4c0887ad02 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -425,16 +425,13 @@ def info(self): def table(self, **kwargs): "Creates a table from the stored keys and data." - table = None + tables = [] for key, value in self.data.items(): value = value.table(**kwargs) for idx, (dim, val) in enumerate(zip(self.kdims, key)): value = value.add_dimension(dim, idx, val) - if table is None: - table = value - else: - table.update(value) - return table + tables.append(value) + return value.concat(tables) def dframe(self): From 2bed1d87af5e57c9daa98b6e9a81b33d77282432 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 01:57:54 +0000 Subject: [PATCH 117/212] Fixed Trisurface Table conversion --- holoviews/element/tabular.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index 57ffabaa70..08731a495b 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -250,7 +250,7 @@ def scatter3d(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart3d import Scatter3D return self._conversion(kdims, vdims, mdims, Scatter3D, **kwargs) - def scatter3d(self, kdims=None, vdims=None, mdims=None, **kwargs): + def trisurface(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart3d import Trisurface return self._conversion(kdims, vdims, mdims, Trisurface, **kwargs) From 0c5cd498b2c22992bf75c83d1668f6aa02cd3cb2 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 01:58:34 +0000 Subject: [PATCH 118/212] Fixed ColumnarArray.select support for tuples --- holoviews/core/data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 2e5225659c..fb763a055a 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -730,6 +730,8 @@ def select(columns, **selection): value = selection.pop('value', None) for d, slc in selection.items(): idx = columns.get_dimension_index(d) + if isinstance(slc, tuple): + slc = slice(*slc) if isinstance(slc, slice): if slc.start is not None: mask &= slc.start <= data[:, idx] From 26438269963f9b4e0818cbd4356b82c2286ae2cd Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 02:44:30 +0000 Subject: [PATCH 119/212] Minor fixes to Sampling_Data tutorial --- doc/Tutorials/Sampling_Data.ipynb | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/Tutorials/Sampling_Data.ipynb b/doc/Tutorials/Sampling_Data.ipynb index 1112c8bcfe..c95f9646d0 100644 --- a/doc/Tutorials/Sampling_Data.ipynb +++ b/doc/Tutorials/Sampling_Data.ipynb @@ -274,7 +274,7 @@ "outputs": [], "source": [ "raster = hv.Raster(np.random.rand(3, 3))\n", - "raster + hv.Points(raster.table().keys())[-1:3, -1:3] + raster.table()" + "raster + hv.Points(raster)[-1:3, -1:3] + raster.table()" ] }, { @@ -316,7 +316,7 @@ "source": [ "extents = (0, 0, 3, 3)\n", "img = hv.Image(np.random.rand(3, 3), bounds=extents)\n", - "img + hv.Points(img.table().keys(), extents=extents) + img.table()" + "img + hv.Points(img, extents=extents) + img.table()" ] }, { @@ -481,7 +481,7 @@ "outputs": [], "source": [ "sampled = img.sample(y=5)\n", - "img + img * img_coords * hv.Points(zip(sampled.table().keys(), [img.closest((5,5))[1]]*10)) + sampled" + "img + img * img_coords * hv.Points(zip(sampled['x'], [img.closest(y=5)]*10)) + sampled" ] }, { @@ -610,8 +610,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "scrolled": true + "collapsed": false }, "outputs": [], "source": [ From 2d44be3c1482be8f593480722c6163566fff70fb Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 02:48:20 +0000 Subject: [PATCH 120/212] Various fixes and improvements to Raster type methods --- holoviews/element/raster.py | 49 ++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index 6361d990ca..d74c72326a 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -1,3 +1,4 @@ +from operator import itemgetter from itertools import product import numpy as np import colorsys @@ -94,9 +95,8 @@ def sample(self, samples=[], **sample_values): samples = zip(*[c if isinstance(c, list) else [c] for didx, c in sorted([(self.get_dimension_index(k), v) for k, v in sample_values.items()])]) - table_data = OrderedDict() - for c in samples: - table_data[c] = self._zdata[self._coord2matrix(c)] + table_data = [c+(self._zdata[self._coord2matrix(c)],) + for c in samples] params['kdims'] = self.kdims return Table(table_data, **params) else: @@ -135,13 +135,15 @@ def reduce(self, dimensions=None, function=None, **reduce_map): """ reduce_map = self._reduce_map(dimensions, function, reduce_map) if len(reduce_map) == self.ndims: - return function(self.data) + if isinstance(function, np.ufunc): + return function.reduce(self.data, axis=None) + else: + return function(self.data) else: dimension, reduce_fn = list(reduce_map.items())[0] other_dimension = [d for d in self.kdims if d.name != dimension] oidx = self.get_dimension_index(other_dimension[0]) x_vals = self.dimension_values(other_dimension[0].name, unique=True) - if oidx: x_vals = np.sort(x_vals) reduced = reduce_fn(self._zdata, axis=oidx) data = zip(x_vals, reduced if not oidx else reduced[::-1]) params = dict(dict(self.get_param_values(onlychanged=True)), @@ -483,16 +485,41 @@ def _convert_element(self, data): return super(Image, self)._convert_element(data) - def closest(self, coords): + def closest(self, coords=[], **kwargs): """ - Given a single coordinate tuple (or list of coordinates) - return the coordinate (or coordinatess) needed to address the - corresponding Image exactly. + Given a single coordinate or multiple coordinates as + a tuple or list of tuples or keyword arguments matching + the dimension closest will find the closest actual x/y + coordinates. """ + if kwargs and coords: + raise ValueError("Specify coordinate using as either a list " + "keyword arguments not both") + if kwargs: + coords = [] + getter = [] + for k, v in kwargs.items(): + idx = self.get_dimension_index(k) + if np.isscalar(v): + coords.append((0, v) if idx else (v, 0)) + else: + if isinstance(coords, tuple): + coords = [(0, c) if idx else (c, 0) for c in v] + if len(coords) not in [0, len(v)]: + raise ValueError("Length of samples must match") + elif len(coords): + coords = [(t[abs(idx-1)], c) if idx else (c, t[abs(idx-1)]) + for c, t in zip(v, coords)] + getter.append(idx) + else: + getter = [0, 1] + getter = itemgetter(*sorted(getter)) + if len(coords) == 1: + coords = coords[0] if isinstance(coords, tuple): - return self.closest_cell_center(*coords) + return getter(self.closest_cell_center(*coords)) else: - return [self.closest_cell_center(*el) for el in coords] + return [getter(self.closest_cell_center(*el)) for el in coords] def __getitem__(self, coords): From 8170abb3aacb65df47083102f7b7b446fcbdb67d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 03:19:59 +0000 Subject: [PATCH 121/212] Fixed Element.dframe method and removed Table.dframe --- holoviews/core/element.py | 11 +++++++++-- holoviews/element/tabular.py | 5 ----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 1e6682c306..1cf7a5de1a 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -138,11 +138,18 @@ def table(self): return Table(self) - def dframe(self): + def dframe(self, as_table=False): import pandas as pd column_names = self.dimensions(label=True) dim_vals = OrderedDict([(dim, self[dim]) for dim in column_names]) - return pd.DataFrame(dim_vals) + data = pd.DataFrame(dim_vals) + if as_table: + from ..element import Table + params = dict(kdims=self.kdims, vdims=self.vdims, label=self.label) + if not self.params()['group'].default == self.group: + params['group'] = self.group + return Table(data, **params) + return data def mapping(self, as_table=False, **kwargs): diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index 08731a495b..bfc99a3b26 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -191,11 +191,6 @@ def to(self): """ return TableConversion(self) - def dframe(self, value_label='data'): - dframe = super(Table, self).dframe(value_label=value_label) - # Drop 'Row' column as it is redundant with dframe index - if self.indexed: del dframe['Row'] - return dframe From 03a1a49cee5707049d06f393b568ce32aca9d3e0 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 03:21:06 +0000 Subject: [PATCH 122/212] Fixed Tabular.cols property --- holoviews/core/element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 1cf7a5de1a..eb7bea3d3e 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -219,7 +219,7 @@ def rows(self): @property def cols(self): - return self.ndims + max([1, len(self.vdims)]) + return len(self.dimensions()) def pprint_cell(self, row, col): From b1ef22b96e67b78633b0de2d32add25b3667d108 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 03:21:40 +0000 Subject: [PATCH 123/212] Readded DFrame conversion interface with deprecation warning --- holoviews/interface/pandas.py | 151 +++++++++++++++++++++++++++++++++- 1 file changed, 148 insertions(+), 3 deletions(-) diff --git a/holoviews/interface/pandas.py b/holoviews/interface/pandas.py index 717abf1a0a..83ffcae6bd 100644 --- a/holoviews/interface/pandas.py +++ b/holoviews/interface/pandas.py @@ -20,7 +20,8 @@ from ..core import ViewableElement, NdMapping, Columns, NdOverlay,\ NdLayout, GridSpace, NdElement, HoloMap -from ..element import Chart, Table, Curve, Scatter, Bars, Points, VectorField, HeatMap, Scatter3D, Surface, TableConversion +from ..element import (Chart, Table, Curve, Scatter, Bars, Points, + VectorField, HeatMap, Scatter3D, Surface) class DataFrameView(Columns): @@ -119,6 +120,18 @@ def holomap(self, kdims=[]): return self.groupby(kdims, HoloMap) +def is_type(df, baseType): + test = [issubclass(np.dtype(d).type, baseType) for d in df.dtypes] + return pd.DataFrame(data=test, index=df.columns, columns=["numeric"]) + + +def is_number(df): + try: + return is_type(df, np.number) + except: + return False + + class DFrame(DataFrameView): """ DFrame is a DataFrameView type, which additionally provides @@ -133,5 +146,137 @@ class DFrame(DataFrameView): * Optional map_dims (list of strings). """ - def to(self): - return TableConversion(self) + def _convert(self, kdims=[], vdims=[], mdims=[], reduce_fn=None, + view_type=None, dropna=False, **kwargs): + """ + Conversion method to generate HoloViews objects from a + DFrame. Accepts key, value and HoloMap dimensions. + If no HoloMap dimensions are supplied then non-numeric + dimensions are used. If a reduce_fn such as np.mean is + supplied the data is aggregated for each group along the + key_dimensions. Also supports a dropna option. + """ + + # Deprecation warning + self.warning("The DFrame conversion interface is deprecated" + "and has been superseded by a real integration" + "with pandas.") + + if not isinstance(kdims, list): kdims = [kdims] + if not isinstance(vdims, list): vdims = [vdims] + + # Process dimensions + sel_dims = kdims + vdims + mdims + el_dims = kdims + vdims + if not mdims and not reduce_fn: + numeric = is_number(self.data) + mdims = [dim for dim in self.dimensions(label=True) + if dim not in sel_dims and not numeric.ix[dim][0]] + # Find leftover dimensions to reduce + if reduce_fn: + reduce_dims = kdims + else: + reduce_dims = [] + + key_dims = [self.get_dimension(d) for d in kdims] + val_dims = [self.get_dimension(d) for d in vdims] + if mdims: + groups = self.groupby(mdims, HoloMap) + mdims = [self.get_dimension(d) for d in mdims] + else: + groups = NdMapping({0: self}) + mdims = ['Default'] + create_kwargs = dict(kdims=key_dims, vdims=val_dims, + view_type=view_type) + create_kwargs.update(kwargs) + + # Convert each element in the HoloMap + hmap = HoloMap(kdims=mdims) + for k, v in groups.items(): + if reduce_dims: + v = v.aggregate(reduce_dims, function=reduce_fn) + v_indexes = [v.data.index.names.index(d) for d in kdims + if d in v.data.index.names] + v = v.apply('reset_index', level=v_indexes) + + vdata = v.data.filter(el_dims) + vdata = vdata.dropna() if dropna else vdata + if issubclass(view_type, Chart): + data = [np.array(vdata[d]) for d in el_dims] + hmap[k] = self._create_chart(data, **create_kwargs) + else: + data = [np.array(vdata[d]) for d in el_dims] + hmap[k] = self._create_table(data, **create_kwargs) + return hmap if mdims != ['Default'] else hmap.last + + + def _create_chart(self, data, kdims=None, vdims=None, + view_type=None, **kwargs): + inherited = dict(kdims=kdims, + vdims=vdims, label=self.label) + return view_type(np.vstack(data).T, **dict(inherited, **kwargs)) + + + def _create_table(self, data, kdims=None, vdims=None, + view_type=None, **kwargs): + ndims = len(kdims) + key_data, value_data = data[:ndims], data[ndims:] + keys = zip(*key_data) + values = zip(*value_data) + inherited = dict(kdims=kdims, + vdims=vdims, label=self.label) + return view_type(zip(keys, values), **dict(inherited, **kwargs)) + + + def curve(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): + return self._convert(kdims, vdims, mdims, reduce_fn, + view_type=Curve, **kwargs) + + def points(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): + return self._convert(kdims, vdims, mdims, reduce_fn, + view_type=Points, **kwargs) + + def scatter3d(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): + return self._convert(kdims, vdims, mdims, reduce_fn, + view_type=Scatter3D, **kwargs) + + def scatter(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): + return self._convert(kdims, vdims, mdims, reduce_fn, + view_type=Scatter, **kwargs) + + def vectorfield(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): + return self._convert(kdims, vdims, mdims, reduce_fn, + view_type=VectorField, **kwargs) + + def bars(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): + return self._convert(kdims, vdims, mdims, reduce_fn, + view_type=Bars, **kwargs) + + def table(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): + return self._convert(kdims, vdims, mdims, reduce_fn, + view_type=Table, **kwargs) + + def heatmap(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): + tables = self.table(kdims, vdims, mdims, reduce_fn, **kwargs) + + if isinstance(tables, HoloMap): + kwargs = dict(tables.last.get_param_values(onlychanged=True), + **kwargs) + return tables.map(lambda x: HeatMap(x, **kwargs), ['Table']) + else: + kwargs = dict(tables.get_param_values(onlychanged=True), + **kwargs) + return HeatMap(tables, **kwargs) + + def surface(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): + if not isinstance(kdims, list): kdims = [kdims] + if not isinstance(vdims, list): vdims = [vdims] + heatmap = self.heatmap(kdims, vdims, mdims, reduce_fn, **kwargs) + key_dims = [self.get_dimension(d) for d in kdims] + val_dims = [self.get_dimension(d) for d in vdims] + kwargs = dict(kwargs, kdims=key_dims, vdims=val_dims, + label=self.label) + if isinstance(heatmap, HoloMap): + return heatmap.map(lambda x: Surface(x.data, **kwargs), ['HeatMap']) + else: + return Surface(heatmap.data, **kwargs) From 2e2bc62195bb453dc6f27acf4a82697d32664b16 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 03:26:36 +0000 Subject: [PATCH 124/212] Fix to Columns.__setstate__ --- holoviews/core/data.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index fb763a055a..d24578cf81 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -50,10 +50,9 @@ def __setstate__(self, state): """ self.__dict__ = state if isinstance(self.data, OrderedDict): - self.data = OrderedDict(self.data, kdims=self.kdims, - vdims=self.vdims, group=self.group, - label=self.label) - self.interface = ColumnarNdElement + self.data = NdElement(self.data, kdims=self.kdims, + vdims=self.vdims, group=self.group, + label=self.label) def closest(self, coords): From 63f8607ef18f2ebd6e3097fb907488b3e508a856 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 03:54:48 +0000 Subject: [PATCH 125/212] Readded old DFrame groupby for backwards compatibility --- holoviews/interface/pandas.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/holoviews/interface/pandas.py b/holoviews/interface/pandas.py index 83ffcae6bd..e59550ec9a 100644 --- a/holoviews/interface/pandas.py +++ b/holoviews/interface/pandas.py @@ -88,6 +88,22 @@ def __init__(self, data, dimensions={}, kdims=None, clone_override=False, self.data.columns = self.dimensions('key', True) + def groupby(self, dimensions, container_type=NdMapping): + invalid_dims = [d for d in dimensions if d not in self.dimensions()] + if invalid_dims: + raise Exception('Following dimensions could not be found %s.' + % invalid_dims) + + index_dims = [self.get_dimension(d) for d in dimensions] + view_dims = [d for d in self.kdims if d not in dimensions] + mapping_data = [] + for k, v in self.data.groupby([self.get_dimension(d).name for d in dimensions]): + data = v.drop(dimensions, axis=1) + mapping_data.append((k, self.clone(data, kdims=[self.get_dimension(d) + for d in data.columns]))) + return container_type(mapping_data, kdims=index_dims) + + def apply(self, name, *args, **kwargs): """ Applies the Pandas dframe method corresponding to the supplied From 9c6c6308f72f494a815b5019b6b5e76237bc0e90 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 17:13:48 +0000 Subject: [PATCH 126/212] Various fixes for Columns interface --- holoviews/core/data.py | 18 ++++++++++++------ holoviews/core/element.py | 13 +++++++++++-- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index d24578cf81..1533dbf5d2 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -258,9 +258,13 @@ def shape(self): return self.interface.shape(self) - def dimension_values(self, dim): + def dimension_values(self, dim, unique=False): dim = self.get_dimension(dim).name - return self.interface.values(self, dim) + dim_vals = self.interface.values(self, dim) + if unique: + return np.unique(dim_vals) + else: + return dim_vals def dframe(self, as_table=False): @@ -327,8 +331,10 @@ def _process_data(cls, data, paramobjs, **kwargs): kdims, vdims = cls._process_df_dims(data, paramobjs, **params) params['kdims'] = kdims params['vdims'] = vdims - elif not isinstance(data, (NdElement, np.ndarray, dict)): - if isinstance(data, tuple): + elif not isinstance(data, (NdElement, dict)): + if isinstance(data, np.ndarray): + array = data + elif isinstance(data, tuple): try: array = np.column_stack(data) except: @@ -697,7 +703,7 @@ def groupby(columns, dimensions, container_type=HoloMap, group_type=NdMapping, r # Find unique entries along supplied dimensions # by creating a view that treats the selected # groupby keys as a single object. - indices = data[:, dim_idxs] + indices = data[:, dim_idxs].copy() view = indices.view(np.dtype((np.void, indices.dtype.itemsize * indices.shape[1]))) _, idx = np.unique(view, return_index=True) idx.sort() @@ -707,7 +713,7 @@ def groupby(columns, dimensions, container_type=HoloMap, group_type=NdMapping, r # to apply the group selection grouped_data = [] for group in unique_indices: - mask = np.logical_or.reduce([data[:, i] == group[i] + mask = np.logical_and.reduce([data[:, i] == group[i] for i in range(ndims)]) group_data = data[mask, ndims:] if not raw: diff --git a/holoviews/core/element.py b/holoviews/core/element.py index eb7bea3d3e..8e2bbdd609 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -399,6 +399,9 @@ def __getitem__(self, args): ndmap_index = args[:self.ndims] if isinstance(args, tuple) else args subtable = NdMapping.__getitem__(self, ndmap_index) + if isinstance(subtable, NdElement) and all(np.isscalar(idx) for idx in ndmap_index[1:]): + if len(subtable) == 1: + subtable = subtable.data.values()[0] if not isinstance(subtable, NdElement): if len(self.vdims) > 1: subtable = self.__class__([(args, subtable)], label=self.label, @@ -427,11 +430,17 @@ def sample(self, samples=[]): """ Allows sampling of the Table with a list of samples. """ - sample_data = OrderedDict() + sample_data = [] + offset = 0 for i, sample in enumerate(samples): sample = (sample,) if np.isscalar(sample) else sample value = self[(slice(None),)+sample] - sample_data[(i,)+sample] = value.data.values()[0] + if isinstance(value, NdElement): + for idx, (k, v) in enumerate(value.data.items()): + sample_data.append(((i+offset+idx,)+k, v)) + offset += idx + else: + sample_data.append(((i+offset,)+sample, (value,))) return self.clone(sample_data) From ed3d5ec9a9c867e843dbf5d313cabbed4e8299fb Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 17:14:33 +0000 Subject: [PATCH 127/212] Improved DFrame warning formatting --- holoviews/interface/pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/interface/pandas.py b/holoviews/interface/pandas.py index e59550ec9a..a73267f14c 100644 --- a/holoviews/interface/pandas.py +++ b/holoviews/interface/pandas.py @@ -174,8 +174,8 @@ def _convert(self, kdims=[], vdims=[], mdims=[], reduce_fn=None, """ # Deprecation warning - self.warning("The DFrame conversion interface is deprecated" - "and has been superseded by a real integration" + self.warning("The DFrame conversion interface is deprecated " + "and has been superseded by a real integration " "with pandas.") if not isinstance(kdims, list): kdims = [kdims] From 2a69a6147d8c93934218cfc0e98190d61286bfdb Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 18:15:05 +0000 Subject: [PATCH 128/212] Updated HeatMap to use Columns object internally --- holoviews/element/raster.py | 54 ++++++++++++++---------------- holoviews/plotting/bokeh/raster.py | 7 ++-- holoviews/plotting/mpl/raster.py | 22 +++++------- 3 files changed, 38 insertions(+), 45 deletions(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index d74c72326a..2359651984 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -5,7 +5,8 @@ import param from ..core import util -from ..core import OrderedDict, Dimension, NdMapping, Element2D, Overlay, Element +from ..core import (OrderedDict, Dimension, NdMapping, Element2D, + Overlay, Element, Columns) from ..core.boundingregion import BoundingRegion, BoundingBox from ..core.sheetcoords import SheetCoordinateSystem, Slice from .chart import Curve @@ -358,33 +359,29 @@ def __init__(self, data, extents=None, **params): def _process_data(self, data, params): dimensions = {group: params.get(group, getattr(self, group)) for group in self._dim_groups[:2]} - if isinstance(data, NdMapping): + if isinstance(data, Columns): if 'kdims' not in params: dimensions['kdims'] = data.kdims if 'vdims' not in params: dimensions['vdims'] = data.vdims elif isinstance(data, (dict, OrderedDict, type(None))): - data = NdMapping(data, **dimensions) + data = Columns(data, **dimensions) elif isinstance(data, Element): data = data.table() if not data.ndims == 2: raise TypeError('HeatMap conversion requires 2 key dimensions') else: - raise TypeError('HeatMap only accepts dict or NdMapping types.') + raise TypeError('HeatMap only accepts Columns or dict types.') - keys = list(data.keys()) - dim1_keys = NdMapping([(k[0], None) for k in keys], - kdims=[self.kdims[0]]).keys() - dim2_keys = NdMapping([(k[1], None) for k in keys], - kdims=[self.kdims[1]]).keys() - grid_keys = [((i1, d1), (i2, d2)) for i1, d1 in enumerate(dim1_keys) - for i2, d2 in enumerate(dim2_keys)] - - array = np.zeros((len(dim2_keys), len(dim1_keys))) - for (i1, d1), (i2, d2) in grid_keys: - val = data.get((d1, d2), np.NaN) - array[len(dim2_keys)-i2-1, i1] = val[0] if isinstance(val, tuple) else val + if len(dimensions['vdims']) > 1: + raise ValueError("HeatMap data may only have one value dimension") + d1keys = data.dimension_values(0, True) + d2keys = data.dimension_values(1, True) + coords = [(d1, d2, np.NaN) for d1 in d1keys for d2 in d2keys] + dense_data = data.clone(coords) + data = data.concat([data, dense_data]).aggregate(data.kdims, np.nanmean).sort(data.kdims) + array = np.flipud(data.dimension_values(2).reshape(len(d1keys), len(d2keys))) return data, array, dimensions @@ -403,23 +400,24 @@ def __getitem__(self, coords): def dense_keys(self): - keys = list(self._data.keys()) - dim1_keys = NdMapping([(k[0], None) for k in keys], - kdims=[self.kdims[0]]).keys() - dim2_keys = NdMapping([(k[1], None) for k in keys], - kdims=[self.kdims[1]]).keys() - return dim1_keys, dim2_keys + d1keys = np.unique(self._data.dimension_values(0)) + d2keys = np.unique(self._data.dimension_values(1)) + return zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys]) - def dimension_values(self, dim, unique=True): + def dimension_values(self, dim, unique=False): dim = self.get_dimension(dim).name if dim in self.kdims: - idx = self.get_dimension_index(dim) - return [k[idx] for k in self._data.keys()] + if unique: + return np.unique(self._data.dimension_values(dim)) + else: + idx = self.get_dimension_index(dim) + return self.dense_keys()[idx] elif dim in self.vdims: - idx = self.vdims.index(dim) - return [v[idx] if isinstance(v, tuple) else v - for v in self._data.values()] + if unique: + return self._data.dimension_values(dim) + else: + return np.flipud(self.data).flatten() else: return super(HeatMap, self).dimension_values(dim) diff --git a/holoviews/plotting/bokeh/raster.py b/holoviews/plotting/bokeh/raster.py index 2ef2950cb8..0fa38afd05 100644 --- a/holoviews/plotting/bokeh/raster.py +++ b/holoviews/plotting/bokeh/raster.py @@ -95,7 +95,7 @@ class HeatmapPlot(ElementPlot): def _axes_props(self, plots, subplots, element, ranges): labels = self._axis_labels(element, plots) - xvals, yvals = element.dense_keys() + xvals, yvals = [element.dimension_values(i, True) for i in range(2)] plot_ranges = {'x_range': [str(x) for x in xvals], 'y_range': [str(y) for y in yvals]} return ('auto', 'auto'), labels, plot_ranges @@ -106,8 +106,9 @@ def get_data(self, element, ranges=None): cmap = style.get('palette', style.get('cmap', None)) cmap = get_cmap(cmap) x, y, z = element.dimensions(label=True) - zvals = np.rot90(element.data, 3).flatten() + zvals = element.dimension_values(z) colors = map_colors(zvals, ranges[z], cmap) - xvals, yvals = zip(*product(*element.dense_keys())) + xvals, yvals = [[str(v) for v in element.dimension_values(i)] + for i in range(2)] return ({x: xvals, y: yvals, z: zvals, 'color': colors}, {'x': x, 'y': y, 'fill_color': 'color', 'height': 1, 'width': 1}) diff --git a/holoviews/plotting/mpl/raster.py b/holoviews/plotting/mpl/raster.py index 537a9d7d2b..7591c34fa0 100644 --- a/holoviews/plotting/mpl/raster.py +++ b/holoviews/plotting/mpl/raster.py @@ -104,7 +104,8 @@ def initialize_plot(self, ranges=None): def _compute_ticks(self, element, ranges): if isinstance(element, HeatMap): xdim, ydim = element.kdims - dim1_keys, dim2_keys = element.dense_keys() + dim1_keys, dim2_keys = [element.dimension_values(i, True) + for i in range(2)] num_x, num_y = len(dim1_keys), len(dim2_keys) x0, y0, x1, y1 = element.extents xstep, ystep = ((x1-x0)/num_x, (y1-y0)/num_y) @@ -120,23 +121,16 @@ def _compute_ticks(self, element, ranges): def _annotate_values(self, element): axis = self.handles['axis'] val_dim = element.vdims[0] - dim1_keys, dim2_keys = element.dense_keys() - num_x, num_y = len(dim1_keys), len(dim2_keys) + d1keys, d2keys, vals = [element.dimension_values(i) for i in range(3)] + d1uniq, d2uniq = [element.dimension_values(i, True) for i in range(2)] + num_x, num_y = len(d1uniq), len(d2uniq) xstep, ystep = 1.0/num_x, 1.0/num_y xpos = np.linspace(xstep/2., 1.0-xstep/2., num_x) ypos = np.linspace(ystep/2., 1.0-ystep/2., num_y) - coords = product(dim1_keys, dim2_keys) plot_coords = product(xpos, ypos) - for plot_coord, coord in zip(plot_coords, coords): - if isinstance(element, HeatMap): - val = element._data.get(coord, np.NaN) - val = val[0] if isinstance(val, tuple) else val - else: - val = element[coord] - val = val_dim.type(val) if val_dim.type else val - val = val[0] if isinstance(val, tuple) else val - text = val_dim.pprint_value(val) - text = '' if val is np.nan else text + for plot_coord, v in zip(plot_coords, vals): + text = val_dim.pprint_value(v) + text = '' if v is np.nan else text if plot_coord not in self.handles['annotations']: annotation = axis.annotate(text, xy=plot_coord, xycoords='axes fraction', From e8a6ae98ef71e9ed360976b83b3bc7c6971909dc Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 18:15:46 +0000 Subject: [PATCH 129/212] Removed row indexing from Tables --- holoviews/element/tabular.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index bfc99a3b26..0db9294307 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -148,14 +148,6 @@ class Table(Columns, Tabular): format and is convertible to most other Element types. """ - kdims = param.List(default=[Dimension(name="Row")], doc=""" - One or more key dimensions. By default, the special 'Row' - dimension ensures that the table is always indexed by the row - number. - - If no key dimensions are set, only one entry can be stored - using the empty key ().""") - group = param.String(default='Table', constant=True, doc=""" The group is used to describe the Table.""") @@ -175,13 +167,6 @@ def _add_item(self, key, value, sort=True): value = value.data.values() super(Table, self)._add_item(key, value, sort) - @property - def indexed(self): - """ - Whether this is an indexed table: a table that has a single - key dimension called 'Row' corresponds to the row number. - """ - return self.ndims == 1 and self.kdims[0].name == 'Row' @property def to(self): From 7f3c859e725d40accb275c5cb65eea0aca249a87 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 18:17:09 +0000 Subject: [PATCH 130/212] Minor fixes to Pandas_Conversion Tutorial --- doc/Tutorials/Pandas_Conversion.ipynb | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/doc/Tutorials/Pandas_Conversion.ipynb b/doc/Tutorials/Pandas_Conversion.ipynb index 34acac59c3..e1e7f1033e 100644 --- a/doc/Tutorials/Pandas_Conversion.ipynb +++ b/doc/Tutorials/Pandas_Conversion.ipynb @@ -140,24 +140,6 @@ "HTML(df.reset_index().to_html())" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For now though, the a, b, and c columns is all we'll need. To confirm the dataframe was converted correctly we can call the `.info` method on Table:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "example_table.info" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -267,7 +249,7 @@ }, "outputs": [], "source": [ - "macro_df = pd.read_csv('http://ioam.github.com/holoviews/Tutorials/macro.csv', sep='\\t')" + "macro_df = pd.read_csv('http://ioam.github.com/holoviews/Tutorials/macro.csv', '\\t')" ] }, { @@ -404,7 +386,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now that we've extracted the gdp_curves we can apply some operations to them. The collapse method applies some function across the data along the supplied dimensions. This lets us quickly compute the mean GDP Growth by year for example, but it also allows us to map a function with parameters to the data and visualize the resulting samples. A simple example is computing a curve for each percentile and embedding it in an NdOverlay.\n", + "Now that we've extracted the gdp_curves we can apply some operations to them. The collapse method applies some function across the data along the supplied dimensions. This let's us quickly compute a the mean GDP Growth by year for example, but it also allows us to map a function with parameters to the data and visualize the resulting samples. A simple example is computing a curve for each percentile and embedding it in an NdOverlay.\n", "\n", "Additionally we can apply a Palette to visualize the range of percentiles." ] @@ -462,9 +444,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Using the .select method we can pull out the data for just a few countries and specific years. We can also make more advanced use of the Palettes.\n", + "Using the .select method we can pull out the data for just a few countries and specific years. We can also make more advanced use the Palettes.\n", "\n", - "Palettes can be customized by selecting only a subrange of the underlying cmap to draw the colors from. The Palette draws samples from the colormap using the supplied sample_fn, which by default just draws linear samples but may be overriden with any function that draws samples in the supplied ranges. By slicing the Set1 colormap we draw colors only from the upper half of the palette and then reverse it." + "Palettes can customized by selecting only a subrange of the underlying cmap to draw the colors from. The Palette draws samples from the colormap using the supplied sample_fn, which by default just draws linear samples but may be overriden with any function that draws samples in the supplied ranges. By slicing the Set1 colormap we draw colors only from the upper half of the palette and then reverse it." ] }, { From f6c225537d63821a34b493ed63a6d9e38e56e55f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 18:52:21 +0000 Subject: [PATCH 131/212] Implemented cross-type Columns concatenation --- holoviews/core/data.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 1533dbf5d2..923270d13d 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -244,8 +244,12 @@ def collapse_data(cls, data, function=None, kdims=None, **kwargs): def concat(cls, columns_objs): columns = columns_objs[0] if len({col.interface for col in columns_objs}) > 1: - raise TypeError("Ensure that all Columns share the same " - "data type.") + if isinstance(columns.data, NdElement): + columns_objs = [co.mapping(as_table=True) for co in columns_objs] + elif isinstance(columns.data, np.ndarray): + columns_objs = [co.array(as_table=True) for co in columns_objs] + elif util.is_dataframe(data[0]): + columns_objs = [co.dframe(as_table=True) for co in columns_objs] return columns.clone(columns.interface.concat(columns_objs)) From b647cdfd0e74352a6602f424d94628ee7e478029 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 20:34:58 +0000 Subject: [PATCH 132/212] Fixes to parameter passing in Columns conversions --- holoviews/core/data.py | 12 +++++++++--- holoviews/core/util.py | 10 +++++++--- holoviews/element/tabular.py | 12 ++++++++---- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 923270d13d..a3e51e8a10 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -283,7 +283,7 @@ def array(self, as_table=False): raise ValueError("%s data contains non-numeric type, " "could not convert to array based " "Element" % type(self).__name__) - return Table(array, **util.get_param_values(self, Table)) + return Table(array, **util.get_param_values(self)) return array @@ -694,7 +694,7 @@ def reindex(columns, kdims=None, vdims=None): @staticmethod - def groupby(columns, dimensions, container_type=HoloMap, group_type=NdMapping, raw=False, **kwargs): + def groupby(columns, dimensions, container_type=HoloMap, group_type=None, raw=False, **kwargs): data = columns.data # Get dimension objects, labels, indexes and data @@ -713,6 +713,9 @@ def groupby(columns, dimensions, container_type=HoloMap, group_type=NdMapping, r idx.sort() unique_indices = indices[idx] + params = util.get_param_values(columns) + params.update(kwargs) + # Iterate over the unique entries building masks # to apply the group selection grouped_data = [] @@ -721,7 +724,10 @@ def groupby(columns, dimensions, container_type=HoloMap, group_type=NdMapping, r for i in range(ndims)]) group_data = data[mask, ndims:] if not raw: - group_data = columns.clone(group_data, new_type=group_type, **kwargs) + if group_type is None: + group_data = columns.clone(group_data, **params) + else: + group_data = group_type(group_data, **params) grouped_data.append((tuple(group), group_data)) if raw: diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 0e10a6008f..3bd8e44ad7 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -611,6 +611,10 @@ def is_dataframe(data): (bz is not None and isinstance(data, bz.Data))) -def get_param_values(data, new_type): - return {k: v for k, v in self.get_param_values(onlychanged=True) - if k in new_type.params()} +def get_param_values(data): + params = dict(kdims=data.kdims, vdims=data.vdims, + label=data.label) + if data.group != data.params()['group'].default: + params['group'] = data.group + return params + diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index 0db9294307..035c74d793 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -197,11 +197,15 @@ def _conversion(self, kdims=None, vdims=None, mdims=None, new_type=None, **kwarg vdims = self._table.vdims if mdims is None: mdims = [d for d in self._table.kdims if d not in kdims] - elif vdims and not isinstance(vdims, list): vdims = [vdims] + if vdims and not isinstance(vdims, list): vdims = [vdims] + selected = self._table.reindex(mdims+kdims, vdims) - params = dict({'kdims': [selected.get_dimension(kd) for kd in kdims], - 'vdims': [selected.get_dimension(vd) for vd in vdims]}, - **kwargs) + params = {'kdims': [selected.get_dimension(kd) for kd in kdims], + 'vdims': [selected.get_dimension(vd) for vd in vdims], + 'label': selected.label} + if selected.group != selected.params()['group'].default: + params['group'] = selected.group + params.update(kwargs) if len(kdims) == selected.ndims: return new_type(selected, **params) return selected.groupby(mdims, container_type=HoloMap, group_type=new_type, **params) From 8f246a72ee0877f39ef25360895e58fa5433e8c7 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 20:36:50 +0000 Subject: [PATCH 133/212] Minor fix to Showcase notebook --- doc/Tutorials/Showcase.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/Tutorials/Showcase.ipynb b/doc/Tutorials/Showcase.ipynb index 8d64337a74..9e0ac3c719 100644 --- a/doc/Tutorials/Showcase.ipynb +++ b/doc/Tutorials/Showcase.ipynb @@ -246,7 +246,7 @@ "source": [ "sample_pos = (0,0.25)\n", "annotated = circular_wave * hv.Points([sample_pos])\n", - "sample = circular_wave.sample(samples=[sample_pos]).reindex().to.curve('Phase', 'Amplitude')\n", + "sample = circular_wave.sample(samples=[sample_pos]).to.curve('Phase', 'Amplitude', ['Frequency'])\n", "annotated + sample" ] }, From 7e71f02f8f70eda5877c05509dc0c7823e6b14c1 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 20:55:27 +0000 Subject: [PATCH 134/212] Fix for Trisurface plot --- holoviews/plotting/mpl/chart3d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/plotting/mpl/chart3d.py b/holoviews/plotting/mpl/chart3d.py index 64d0123585..5d8212d906 100644 --- a/holoviews/plotting/mpl/chart3d.py +++ b/holoviews/plotting/mpl/chart3d.py @@ -223,7 +223,7 @@ def initialize_plot(self, ranges=None): def update_handles(self, axis, element, key, ranges=None): style_opts = self.style[self.cyclic_index] dims = element.dimensions(label=True) - vrange = ranges[dims.pop(2)] + vrange = ranges[dims[2]] x, y, z = [element.dimension_values(d) for d in dims] artist = axis.plot_trisurf(x, y, z, vmax=vrange[1], vmin=vrange[0], **style_opts) From 7c93b48eb9b30b8801e7804fa36f586f97ae7cc9 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 20:55:41 +0000 Subject: [PATCH 135/212] Fix to NdElement indexing --- holoviews/core/element.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 8e2bbdd609..568102e8cd 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -404,8 +404,8 @@ def __getitem__(self, args): subtable = subtable.data.values()[0] if not isinstance(subtable, NdElement): if len(self.vdims) > 1: - subtable = self.__class__([(args, subtable)], label=self.label, - kdims=self.kdims, vdims=self.vdims) + subtable = self.__class__([(args[1:], subtable)], label=self.label, + kdims=self.kdims[1:], vdims=self.vdims) else: return subtable[0] From ce5f90c51340df3209f0ea463f2810d904278aa7 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 21:05:37 +0000 Subject: [PATCH 136/212] Fix to ColumnarArray.collapse_data --- holoviews/core/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index a3e51e8a10..8a53c52f6d 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -786,12 +786,12 @@ def collapse_data(cls, data, function, kdims=None, **kwargs): rows = [] for k, group in cls.groupby(joined_data, kdims, raw=True): row = np.zeros(ndims) - row[:ndims] = np.array(k) + row[:nkdims] = np.array(k) if isinstance(function, np.ufunc): collapsed = function.reduce(group) else: collapsed = function(group, axis=0, **kwargs) - row[nkdims+i] = collapsed + row[nkdims:] = collapsed rows.append(row) return np.array(rows) From c9b62dac38620d06693abae8f44caa8722e9dbdf Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 21:18:32 +0000 Subject: [PATCH 137/212] Python3 fix for NdElement indexing --- holoviews/core/element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 568102e8cd..210e8a6104 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -401,7 +401,7 @@ def __getitem__(self, args): if isinstance(subtable, NdElement) and all(np.isscalar(idx) for idx in ndmap_index[1:]): if len(subtable) == 1: - subtable = subtable.data.values()[0] + subtable = list(subtable.data.values())[0] if not isinstance(subtable, NdElement): if len(self.vdims) > 1: subtable = self.__class__([(args[1:], subtable)], label=self.label, From 38d656959618839907cd6185a2c7dfaddbc7e5ef Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 21:44:57 +0000 Subject: [PATCH 138/212] Fix for HeatMap --- holoviews/element/raster.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index 2359651984..ebd2ff8beb 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -381,8 +381,8 @@ def _process_data(self, data, params): coords = [(d1, d2, np.NaN) for d1 in d1keys for d2 in d2keys] dense_data = data.clone(coords) data = data.concat([data, dense_data]).aggregate(data.kdims, np.nanmean).sort(data.kdims) - array = np.flipud(data.dimension_values(2).reshape(len(d1keys), len(d2keys))) - return data, array, dimensions + array = data.dimension_values(2).reshape(len(d1keys), len(d2keys)) + return data, np.fliplr(array), dimensions def clone(self, data=None, shared_data=True, *args, **overrides): @@ -417,7 +417,7 @@ def dimension_values(self, dim, unique=False): if unique: return self._data.dimension_values(dim) else: - return np.flipud(self.data).flatten() + return np.fliplr(self.data).flatten() else: return super(HeatMap, self).dimension_values(dim) From 549137956ffef305dfeaf579211bb91e5eeeaacf Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 23:00:17 +0000 Subject: [PATCH 139/212] Ensure TableConversion sorts Curves --- holoviews/element/tabular.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index 035c74d793..3bd381ace6 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -189,7 +189,7 @@ class TableConversion(object): def __init__(self, table): self._table = table - def _conversion(self, kdims=None, vdims=None, mdims=None, new_type=None, **kwargs): + def _conversion(self, kdims=None, vdims=None, mdims=None, new_type=None, sort=False, **kwargs): if kdims is None: kdims = self._table.kdims elif kdims and not isinstance(kdims, list): kdims = [kdims] @@ -207,8 +207,13 @@ def _conversion(self, kdims=None, vdims=None, mdims=None, new_type=None, **kwarg params['group'] = selected.group params.update(kwargs) if len(kdims) == selected.ndims: - return new_type(selected, **params) - return selected.groupby(mdims, container_type=HoloMap, group_type=new_type, **params) + element = new_type(selected, **params) + return element.sort() if sort else element + group = selected.groupby(mdims, container_type=HoloMap, group_type=new_type, **params) + if sort: + return group.map(lambda x: x.sort(), [new_type]) + else: + group def bars(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Bars @@ -216,7 +221,7 @@ def bars(self, kdims=None, vdims=None, mdims=None, **kwargs): def curve(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Curve - return self._conversion(kdims, vdims, mdims, Curve, **kwargs) + return self._conversion(kdims, vdims, mdims, Curve, sort=True, **kwargs) def heatmap(self, kdims=None, vdims=None, mdims=None, **kwargs): from .raster import HeatMap From 4616d8bb9101202a1a125018d6449213a1657bd6 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 5 Nov 2015 23:01:13 +0000 Subject: [PATCH 140/212] Fixes to NdElement reduce/collapse --- holoviews/core/data.py | 2 +- holoviews/core/element.py | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 8a53c52f6d..11f9ffe5e3 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -471,7 +471,7 @@ def sample(columns, samples=[]): @staticmethod def reduce(columns, reduce_dims, function): - return columns.data.reduce(columns, reduce_dims, function) + return columns.data.reduce(columns.data, reduce_dims, function) @classmethod def aggregate(cls, columns, dimensions, function): diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 210e8a6104..a19d026128 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -7,7 +7,7 @@ from .dimension import Dimension, Dimensioned, ViewableElement from .layout import Composable, Layout, AdjointLayout, NdLayout -from .ndmapping import OrderedDict, UniformNdMapping, NdMapping, item_check +from .ndmapping import OrderedDict, UniformNdMapping, NdMapping, item_check, sorted_context from .overlay import Overlayable, NdOverlay, Overlay, CompositeOverlay from .spaces import HoloMap, GridSpace from .tree import AttrTree @@ -452,7 +452,7 @@ def reduce(cls, columns, reduce_dims, function): method. """ kdims = [kdim for kdim in columns.kdims if kdim not in reduce_dims] - if len(kdims): + if len(kdims) > 1: reindexed = columns.reindex(kdims) reduced = reindexed.collapse_data([reindexed], function, kdims) else: @@ -491,14 +491,23 @@ def collapse_data(cls, data, function, kdims=None, **kwargs): joined_data = joined_data.clone(concatenated, kdims=joined_data.kdims) collapsed = [] - grouped = joined_data.groupby([d.name for d in kdims], container_type=NdMapping) - for i, (k, group) in enumerate(grouped.data.items()): + vdims = joined_data.dimensions('value', True) + if len(joined_data.kdims) > len(kdims): + group_dims = (kdims[1:] if len(kdims) == 2 else kdims) + with sorted_context(False): + grouped = joined_data.groupby([d.name for d in group_dims], + container_type=NdMapping).data.items() + else: + grouped = [(k[1:], {d: [v] for d, v in zip(vdims, v)}) + for k, v in joined_data.data.items()] + + for i, (k, group) in enumerate(grouped): if isinstance(function, np.ufunc): - reduced = tuple(function.reduce(group[vdim.name]) for vdim in group.vdims) + reduced = tuple(function.reduce(group[vdim]) for vdim in vdims) else: - reduced = tuple(function(group[vdim.name], **kwargs) for vdim in group.vdims) + reduced = tuple(function(group[vdim], **kwargs) for vdim in vdims) collapsed.append(((i,)+k, reduced)) - return joined_data.clone(collapsed, kdims=['Index']+kdims) + return joined_data.clone(collapsed, kdims=kdims) def aggregate(self, dimensions, function): From e7a2f06655c72a17344e94e50538a94fbf2c4301 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 00:28:05 +0000 Subject: [PATCH 141/212] Made Columns dataframe version compatible with older pandas --- holoviews/core/data.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 11f9ffe5e3..5337a8b3b4 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -3,6 +3,7 @@ data backends. """ +from distutils.version import LooseVersion from collections import defaultdict, Iterable from itertools import groupby @@ -550,10 +551,15 @@ def collapse_data(data, function, kdims, **kwargs): @staticmethod def sort(columns, by=[]): + import pandas as pd if not isinstance(by, list): by = [by] if not by: by = range(columns.ndims) cols = [columns.get_dimension(d).name for d in by] - return columns.data.sort_values(cols) + + if (not isinstance(columns.data, pd.DataFrame) or + LooseVersion(pd.__version__) < '0.17.0'): + return columns.data.sort(columns=cols) + return columns.data.sort_values(by=cols) @staticmethod From c4521e4f51a88160edfe4401d1efe936c9175218 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 00:35:56 +0000 Subject: [PATCH 142/212] Fix to Columns reduce tests, now use numpy comparison --- tests/testcolumns.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/testcolumns.py b/tests/testcolumns.py index a5208d5340..57a5637659 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -143,7 +143,8 @@ def test_columns_1d_reduce(self): def test_columns_2d_reduce(self): columns = Columns(zip(zip(self.xs, self.ys), self.zs), kdims=['x', 'y'], vdims=['z']) - self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891001) + self.assertEqual(np.array(columns.reduce(['x', 'y'], np.mean)), + np.array(0.12828985192891)) def test_columns_2d_partial_reduce(self): columns = Columns(zip(zip(self.xs, self.ys), self.zs), @@ -267,7 +268,8 @@ def test_columns_1d_reduce(self): def test_columns_2d_reduce(self): columns = Columns((self.xs, self.ys, self.zs), kdims=['x', 'y'], vdims=['z']) - self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891001) + self.assertEqual(np.array(columns.reduce(['x', 'y'], np.mean)), + np.array(0.12828985192891)) def test_columns_2d_partial_reduce(self): columns = Columns((self.xs, self.ys, self.zs), kdims=['x', 'y'], vdims=['z']) @@ -412,7 +414,8 @@ def test_columns_1d_reduce(self): def test_columns_2d_reduce(self): columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys, 'z': self.zs}), kdims=['x', 'y'], vdims=['z']) - self.assertEqual(columns.reduce(['x', 'y'], np.mean), 0.12828985192891001) + self.assertEqual(np.array(columns.reduce(['x', 'y'], np.mean)), + np.array(0.12828985192891)) def test_columns_2d_partial_reduce(self): columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys, 'z': self.zs}), From 1d0827dba990df6d0083b977914cfcf68acc9f3f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 00:40:09 +0000 Subject: [PATCH 143/212] Fix to HeatMap array orientation --- holoviews/element/raster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index ebd2ff8beb..a4f8c09f85 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -382,7 +382,7 @@ def _process_data(self, data, params): dense_data = data.clone(coords) data = data.concat([data, dense_data]).aggregate(data.kdims, np.nanmean).sort(data.kdims) array = data.dimension_values(2).reshape(len(d1keys), len(d2keys)) - return data, np.fliplr(array), dimensions + return data, np.flipud(array.T), dimensions def clone(self, data=None, shared_data=True, *args, **overrides): @@ -417,7 +417,7 @@ def dimension_values(self, dim, unique=False): if unique: return self._data.dimension_values(dim) else: - return np.fliplr(self.data).flatten() + return np.flipud(self.data.T).flatten() else: return super(HeatMap, self).dimension_values(dim) From 0d5f0edb925904a2b690bfdb654c8a55bdca6525 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 01:33:36 +0000 Subject: [PATCH 144/212] Fixes to Columns methods dimension input handling --- holoviews/core/data.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 5337a8b3b4..371ce16883 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -72,6 +72,7 @@ def closest(self, coords): def sort(self, by=[]): + if not by: by = self.kdims sorted_columns = self.interface.sort(self, by) return self.clone(sorted_columns) @@ -142,17 +143,19 @@ def reindex(self, kdims=None, vdims=None): Allows converting key dimensions to value dimensions and vice versa. """ - if vdims is None: - val_dims = self.vdims - else: - val_dims = [self.get_dimension(v) for v in vdims] - if kdims is None: - key_dims = [d for d in self.dimensions() + key_dims = [d for d in self.kdims if d not in vdims] else: key_dims = [self.get_dimension(k) for k in kdims] + if vdims is None: + val_dims = [d for d in self.vdims + if d not in kdims] + else: + val_dims = [self.get_dimension(v) for v in vdims] + + data = self.interface.reindex(self, key_dims, val_dims) return self.clone(data, kdims=key_dims, vdims=val_dims) @@ -208,11 +211,12 @@ def reduce(self, dimensions=[], function=None, **reduce_map): return self.clone(reduced, kdims=kdims) - - def aggregate(self, dimensions, function): + def aggregate(self, dimensions=[], function=None): """ Groups over the supplied dimensions and aggregates. """ + if not isinstance(dimensions, list): dimensions = [dimensions] + if not dimensions: dimensions = self.kdims aggregated = self.interface.aggregate(self, dimensions, function) kdims = [self.get_dimension(d) for d in dimensions] return self.clone(aggregated, kdims=kdims) From a93350a21bae51f0fd44739f9663c4cb6304e7dc Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 01:34:01 +0000 Subject: [PATCH 145/212] Created Table conversion methods for seaborn elements --- holoviews/element/tabular.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index 3bd381ace6..ca944738e8 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -219,10 +219,28 @@ def bars(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Bars return self._conversion(kdims, vdims, mdims, Bars, **kwargs) + def bivariate(self, kdims=None, vdims=None, mdims=None, **kwargs): + from ..interface.seaborn import Bivariate + return self._convert(kdims, vdims, mdims, Bivariate, **kwargs) + def curve(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Curve return self._conversion(kdims, vdims, mdims, Curve, sort=True, **kwargs) + def distribution(self, dim, mdims=[], **kwargs): + from ..interface.seaborn import Distribution + if mdims: + reindexed = self._table.reindex(mdims+[dim]) + return reindexed.groupby(mdims, HoloMap, Distribution, **kwargs) + else: + table = self._table + params = dict(kdims=[table.get_dimension(dim)], + label=table.label) + if table.group != table.params()['group'].default: + params['group'] = table.group + return Distribution((table.dimension_values(dim),), + **dict(params, **kwargs)) + def heatmap(self, kdims=None, vdims=None, mdims=None, **kwargs): from .raster import HeatMap return self._conversion(kdims, vdims, mdims, HeatMap, **kwargs) @@ -248,6 +266,9 @@ def raster(self, kdims=None, vdims=None, mdims=None, **kwargs): heatmap = self.heatmap(kdims, vdims, **kwargs) return Raster(heatmap.data, **dict(self._table.get_param_values(onlychanged=True))) + def regression(self, kdims=None, vdims=None, mdims=None, **kwargs): + return self._convert(kdims, vdims, mdims, Regression, **kwargs) + def surface(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart3d import Surface heatmap = self.heatmap(kdims, vdims, **kwargs) From 0438eeb7b0edea2237f6048bcb15e38d34e41c6c Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 01:34:30 +0000 Subject: [PATCH 146/212] Cleaned up seaborn interface --- holoviews/interface/seaborn.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/holoviews/interface/seaborn.py b/holoviews/interface/seaborn.py index a3675b4d53..bf1da8b880 100644 --- a/holoviews/interface/seaborn.py +++ b/holoviews/interface/seaborn.py @@ -67,10 +67,6 @@ def reduce(self, dimensions=[], function=None, **reduce_map): raise NotImplementedError('Reduction of TimeSeries not ' 'implemented.') - @property - def ylabel(self): - return str(self.vdims[0]) - class Bivariate(Chart): @@ -103,13 +99,6 @@ class Distribution(Chart): vdims = param.List(default=[Dimension('Frequency')]) - def _validate_data(self, data): - data = np.expand_dims(data, 1) if data.ndim == 1 else data - if not data.shape[1] == 1: - raise ValueError("Distribution only support single dimensional arrays.") - return data - - def range(self, dimension): dim_idx = self.get_dimension_index(dimension) dim = self.get_dimension(dim_idx) @@ -202,20 +191,6 @@ def timeseries(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): kdims=[self.get_dimension(dim) for dim in kdims], **kwargs) - @property - def ylabel(self): - return self.x2 if self.x2 else self.y - - @property - def ylim(self): - if self._ylim: - return self._ylim - elif self.x2 or self.y: - ydata = self.data[self.x2 if self.x2 else self.y] - return min(ydata), max(ydata) - else: - return None - __all__ = ['DFrame', 'Bivariate', 'Distribution', 'TimeSeries', 'Regression'] From 9a13a58e1faf81bb1b4e0eb9240809274a8d5a31 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 04:17:13 +0000 Subject: [PATCH 147/212] Fixes to NdElement slicing, groupby and reindex --- holoviews/core/dimension.py | 4 ++- holoviews/core/element.py | 56 ++++++++++++++++++++++--------------- holoviews/core/ndmapping.py | 5 ++-- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index 0c55d233b5..46b2d4db1e 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -673,7 +673,9 @@ def select(self, selection_specs=None, **kwargs): # Apply selection to self if local_kwargs and matches: - select = [slice(None) for i in range(self.ndims)] + ndims = (len(self.dimensions()) if any(d in self.vdims for d in kwargs) + else self.ndims) + select = [slice(None) for i in range(ndims)] for dim, val in local_kwargs.items(): if dim == 'value': select += [val] diff --git a/holoviews/core/element.py b/holoviews/core/element.py index a19d026128..fe2e9312ac 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -319,7 +319,7 @@ def reindex(self, kdims=None, vdims=None, force=False): if kdims is None: return super(NdElement, self).reindex(force=force) else: - vdims = self.vdims + vdims = [d for d in self.vdims if d not in kdims] elif kdims is None: kdims = [d for d in self.dimensions if d not in vdims] if 'Index' not in kdims: kdims = ['Index'] + kdims @@ -333,10 +333,16 @@ def reindex(self, kdims=None, vdims=None, force=False): getter = operator.itemgetter(0) items = [] for k, v in self.data.items(): - _, key = zip(*sorted(((i, k[idx] if iskey else v[idx-self.ndims]) - for i, iskey, idx in kidxs), key=getter)) - _, val = zip(*sorted(((i, v[idx] if iskey else v[idx-self.ndims]) - for i, iskey, idx in vidxs), key=getter)) + if key_dims: + _, key = zip(*sorted(((i, k[idx] if iskey else v[idx-self.ndims]) + for i, iskey, idx in kidxs), key=getter)) + else: + key = () + if val_dims: + _, val = zip(*sorted(((i, k[idx] if iskey else v[idx-self.ndims]) + for i, iskey, idx in vidxs), key=getter)) + else: + val = () items.append((key, val)) reindexed = self.clone(items, kdims=key_dims, vdims=val_dims) if not force and len(reindexed) != len(items): @@ -396,8 +402,18 @@ def __getitem__(self, args): """ if args in self.dimensions(): return self.dimension_values(args) - ndmap_index = args[:self.ndims] if isinstance(args, tuple) else args - subtable = NdMapping.__getitem__(self, ndmap_index) + if not isinstance(args, tuple): args = (args,) + ndmap_index = args[:self.ndims] + val_index = args[self.ndims:] + if val_index: + if len(val_index) == 1 and val_index[0] in self.vdims: + val_index = val_index[0] + else: + reindexed = self.reindex(self.kdims+list(self.vdims)) + subtable = reindexed[args] + + if not val_index or not isinstance(val_index, tuple): + subtable = NdMapping.__getitem__(self, ndmap_index) if isinstance(subtable, NdElement) and all(np.isscalar(idx) for idx in ndmap_index[1:]): if len(subtable) == 1: @@ -407,16 +423,15 @@ def __getitem__(self, args): subtable = self.__class__([(args[1:], subtable)], label=self.label, kdims=self.kdims[1:], vdims=self.vdims) else: + if np.isscalar(subtable): + return subtable return subtable[0] - # If subtable is not a slice return as reduced type - if not isinstance(args, tuple): args = (args,) - shallow = len(args) <= self.ndims - if shallow: + if val_index and not isinstance(val_index, tuple): + return self._filter_data(subtable, args[-1]) + else: return subtable - return self._filter_data(subtable, args[-1]) - def sort(self, by=[]): if not isinstance(by, list): by = [by] @@ -453,8 +468,8 @@ def reduce(cls, columns, reduce_dims, function): """ kdims = [kdim for kdim in columns.kdims if kdim not in reduce_dims] if len(kdims) > 1: - reindexed = columns.reindex(kdims) - reduced = reindexed.collapse_data([reindexed], function, kdims) + reduced = columns.collapse_data([columns], function, kdims) + reindexed = reduced.reindex(kdims) else: reduced = [] for vdim in columns.vdims: @@ -492,14 +507,9 @@ def collapse_data(cls, data, function, kdims=None, **kwargs): collapsed = [] vdims = joined_data.dimensions('value', True) - if len(joined_data.kdims) > len(kdims): - group_dims = (kdims[1:] if len(kdims) == 2 else kdims) - with sorted_context(False): - grouped = joined_data.groupby([d.name for d in group_dims], - container_type=NdMapping).data.items() - else: - grouped = [(k[1:], {d: [v] for d, v in zip(vdims, v)}) - for k, v in joined_data.data.items()] + with sorted_context(False): + grouped = joined_data.groupby([d.name for d in kdims[1:]], + container_type=NdMapping).data.items() for i, (k, group) in enumerate(grouped): if isinstance(function, np.ufunc): diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index 4c0887ad02..c6f7e69959 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -287,7 +287,8 @@ def groupby(self, dimensions, container_type=None, group_type=None, **kwargs): for key in self.data.keys()) with item_check(False): selects = group_select(list(selects)) - groups = [(k, group_type(v.reindex(inames), **kwargs)) + groups = [(k, group_type((v.reindex(inames) if isinstance(v, NdMapping) + else [((), (v,))]), **kwargs)) for k, v in iterative_select(self, dimensions, selects)] return container_type(groups, kdims=dims) @@ -575,7 +576,7 @@ def __getitem__(self, indexslice): sliced_items = [] for k, v in items: val_slice = self._dataslice(v, data_slice) - if val_slice: + if val_slice or isinstance(val_slice, tuple): sliced_items.append((k, val_slice)) if len(sliced_items) == 0: raise KeyError('No items within specified slice.') From 8e4af8765250fbfce372c2511d28205d4444e439 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 13:50:24 +0000 Subject: [PATCH 148/212] Fixed NdElement.collapse_data --- holoviews/core/element.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index fe2e9312ac..caf327d51d 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -507,10 +507,10 @@ def collapse_data(cls, data, function, kdims=None, **kwargs): collapsed = [] vdims = joined_data.dimensions('value', True) + group_dims = kdims[1:] if 'Index' in kdims else kdims with sorted_context(False): - grouped = joined_data.groupby([d.name for d in kdims[1:]], + grouped = joined_data.groupby([d.name for d in group_dims], container_type=NdMapping).data.items() - for i, (k, group) in enumerate(grouped): if isinstance(function, np.ufunc): reduced = tuple(function.reduce(group[vdim]) for vdim in vdims) From e9d2262044f12dcc7921b34d53400585fc86ffae Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 13:52:10 +0000 Subject: [PATCH 149/212] Reverted changes to Dimensioned.clone --- holoviews/core/dimension.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index 46b2d4db1e..6b2b87cd81 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -245,7 +245,7 @@ def __init__(self, data, id=None, **params): self.label) - def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides): + def clone(self, data=None, shared_data=True, *args, **overrides): """ Returns a clone of the object with matching parameter values containing the specified args and kwargs. @@ -254,17 +254,10 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides): the clone will share data with the original. """ params = dict(self.get_param_values()) - if new_type is None: - clone_type = self.__class__ - else: - clone_type = new_type - new_params = new_type.params() - params = {k: v for k, v in params.items() - if k in new_params} settings = dict(params, **overrides) if data is None and shared_data: data = self.data - return clone_type(data, *args, **settings) + return self.__class__(data, *args, **settings) def relabel(self, label=None, group=None, depth=0): From 2a5eb702f5a0dd06019e7ad11b35f75d8ac34077 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 14:06:58 +0000 Subject: [PATCH 150/212] Registered NdElement and Columns to be displayed as Table --- holoviews/plotting/bokeh/__init__.py | 5 ++++- holoviews/plotting/mpl/__init__.py | 2 ++ holoviews/plotting/mpl/element.py | 5 +++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/holoviews/plotting/bokeh/__init__.py b/holoviews/plotting/bokeh/__init__.py index 8350944638..398477fdbd 100644 --- a/holoviews/plotting/bokeh/__init__.py +++ b/holoviews/plotting/bokeh/__init__.py @@ -1,4 +1,5 @@ -from ...core import Store, Overlay, NdOverlay, Layout, AdjointLayout, GridSpace +from ...core import (Store, Overlay, NdOverlay, Layout, AdjointLayout, + GridSpace, NdElement, Columns) from ...element import (Curve, Points, Scatter, Image, Raster, Path, RGB, Histogram, Spread, HeatMap, Contours, Path, Box, Bounds, Ellipse, Polygons, @@ -50,6 +51,8 @@ Table: TablePlot, ItemTable: TablePlot, DFrame: TablePlot, + NdElement: TablePlot, + Columns: TablePlot, Surface: PlotSelector(lambda x: 'bokeh', [('mpl', SurfacePlot), ('bokeh', BokehMPLRawWrapper)], True), diff --git a/holoviews/plotting/mpl/__init__.py b/holoviews/plotting/mpl/__init__.py index e3633ff641..5db33dfb66 100644 --- a/holoviews/plotting/mpl/__init__.py +++ b/holoviews/plotting/mpl/__init__.py @@ -127,6 +127,8 @@ def grid_selector(grid): # Tabular plots ItemTable: TablePlot, Table: TablePlot, + NdElement: TablePlot, + Columns: TablePlot, Collator: TablePlot, # Raster plots diff --git a/holoviews/plotting/mpl/element.py b/holoviews/plotting/mpl/element.py index 167b339bb7..c4bed9ebb7 100644 --- a/holoviews/plotting/mpl/element.py +++ b/holoviews/plotting/mpl/element.py @@ -7,7 +7,8 @@ import param from ...core import util -from ...core import OrderedDict, Collator, NdOverlay, HoloMap, CompositeOverlay, Element3D +from ...core import (OrderedDict, Collator, NdOverlay, HoloMap, + CompositeOverlay, Element3D, Columns, NdElement) from ...element import Table, ItemTable, Raster from ..plot import GenericElementPlot, GenericOverlayPlot from .plot import MPLPlot @@ -102,7 +103,7 @@ class ElementPlot(GenericElementPlot, MPLPlot): # Element Plots should declare the valid style options for matplotlib call style_opts = [] - _suppressed = [Table, Collator, ItemTable] + _suppressed = [Table, NdElement, Collator, Columns, ItemTable] def __init__(self, element, **params): super(ElementPlot, self).__init__(element, **params) From 639073ef0ed1a800d5c7ac3e5328b00883fb8051 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 15:46:51 +0000 Subject: [PATCH 151/212] Fix for Image.dimension_values to support RGB types --- holoviews/element/raster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index a4f8c09f85..a862f551cc 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -581,7 +581,7 @@ def dimension_values(self, dim, unique=False): dim_idx = self.get_dimension_index(dim) if dim_idx in [0, 1]: l, b, r, t = self.bounds.lbrt() - dim2, dim1 = self.data.shape + dim2, dim1 = self.data.shape[:2] d1_half_unit = (r - l)/dim1/2. d2_half_unit = (t - b)/dim2/2. d1lin = np.linspace(l+d1_half_unit, r-d1_half_unit, dim1) From 2e3b3919546d407bd7df4ea6f5cc45f75f53cf31 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 15:47:19 +0000 Subject: [PATCH 152/212] Fix for ItemTable plotting in bokeh --- holoviews/plotting/bokeh/tabular.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/plotting/bokeh/tabular.py b/holoviews/plotting/bokeh/tabular.py index 9aadcd1c1c..d3001a20e9 100644 --- a/holoviews/plotting/bokeh/tabular.py +++ b/holoviews/plotting/bokeh/tabular.py @@ -18,7 +18,7 @@ class TablePlot(BokehPlot, GenericElementPlot): def get_data(self, element, ranges=None): dims = element.dimensions() - return ({d.name: element[d] for d in dims}, + return ({d.name: element.dimension_values(d) for d in dims}, {d.name: d.name for d in dims}) From 7469d793cbca03c7dab88f6833dc43c485a9ee10 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 15:47:44 +0000 Subject: [PATCH 153/212] Fix for RasterPlot.color_mapper issue --- holoviews/plotting/bokeh/raster.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/holoviews/plotting/bokeh/raster.py b/holoviews/plotting/bokeh/raster.py index 0fa38afd05..b96666d198 100644 --- a/holoviews/plotting/bokeh/raster.py +++ b/holoviews/plotting/bokeh/raster.py @@ -51,9 +51,10 @@ def _glyph_properties(self, plot, element, source, ranges): def _update_glyph(self, glyph, properties, mapping): allowed_properties = glyph.properties() - cmap = properties.pop('color_mapper') - glyph.color_mapper.low = cmap.low - glyph.color_mapper.high = cmap.high + cmap = properties.pop('color_mapper', None) + if cmap: + glyph.color_mapper.low = cmap.low + glyph.color_mapper.high = cmap.high merged = dict(properties, **mapping) glyph.set(**{k: v for k, v in merged.items() if k in allowed_properties}) From c0b2714913eb95c18067b729c6f5f8e26eaec602 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 15:48:13 +0000 Subject: [PATCH 154/212] Fix for ItemTable getitem and dimension_values methods --- holoviews/element/tabular.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index ca944738e8..191541cc51 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -62,7 +62,7 @@ def __getitem__(self, heading): return self if heading not in self.vdims: raise IndexError("%r not in available headings." % heading) - return self.data.get(heading, np.NaN) + return np.array(self.data.get(heading, np.NaN)) @classmethod @@ -74,7 +74,7 @@ def collapse_data(cls, data, function, **kwargs): def dimension_values(self, dimension): dimension = self.get_dimension(dimension).name if dimension in self.dimensions('value', label=True): - return [self.data.get(dimension, np.NaN)] + return np.array([self.data.get(dimension, np.NaN)]) else: return super(ItemTable, self).dimension_values(dimension) From ff311eee2aa4189ea309810093e5d54aacef26ff Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 15:48:51 +0000 Subject: [PATCH 155/212] Fix for HoloMap.hist method --- holoviews/core/spaces.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/holoviews/core/spaces.py b/holoviews/core/spaces.py index 202e29eb5b..24c38e3dc5 100644 --- a/holoviews/core/spaces.py +++ b/holoviews/core/spaces.py @@ -277,7 +277,8 @@ def relabel(self, label=None, group=None, depth=1): def hist(self, num_bins=20, bin_range=None, adjoin=True, individually=True, **kwargs): - histmap = self.clone(shared_data=False) + histmaps = [self.clone(shared_data=False) + for d in kwargs.get('dimension', range(1))] if individually: map_range = None @@ -290,16 +291,27 @@ def hist(self, num_bins=20, bin_range=None, adjoin=True, individually=True, **kw if issubclass(self.type, (NdOverlay, Overlay)) and 'index' not in kwargs: kwargs['index'] = 0 for k, v in self.data.items(): - histmap[k] = v.hist(adjoin=False, bin_range=bin_range, - individually=individually, num_bins=num_bins, - style_prefix=style_prefix, **kwargs) - - if adjoin and issubclass(self.type, (NdOverlay, Overlay)): - layout = (self << histmap) - layout.main_layer = kwargs['index'] + hists = v.hist(adjoin=False, bin_range=bin_range, + individually=individually, num_bins=num_bins, + style_prefix=style_prefix, **kwargs) + if isinstance(hists, Layout): + for i, hist in enumerate(hists): + histmaps[i][k] = hist + else: + histmap[k] = hists + + if adjoin: + layout = self + for hist in histmaps: + layout = (layout << hist) + if issubclass(self.type, (NdOverlay, Overlay)): + layout.main_layer = kwargs['index'] return layout - - return (self << histmap) if adjoin else histmap + else: + if len(histmaps) > 1: + return Layout.from_values(histmaps) + else: + return histmaps[0] From 426ef265965b8d65dbd08f0eeffe26ccdda2fe73 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 16:13:37 +0000 Subject: [PATCH 156/212] Implemented boolean array slicing of NdMapping and Columns types --- holoviews/core/data.py | 4 ++++ holoviews/core/element.py | 2 ++ holoviews/core/ndmapping.py | 5 +++++ 3 files changed, 11 insertions(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 371ce16883..b9bd762129 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -167,6 +167,10 @@ def __getitem__(self, slices): Scatter object. """ if slices is (): return self + if isinstance(slices, np.ndarray) and slices.dtype.kind == 'b': + if not len(slices) == len(self): + raise IndexError("Boolean index must match length of sliced object") + return self.clone(self.data[slices]) if not isinstance(slices, tuple): slices = (slices,) value_select = None if len(slices) == 1 and slices[0] in self.dimensions(): diff --git a/holoviews/core/element.py b/holoviews/core/element.py index caf327d51d..db08135356 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -402,6 +402,8 @@ def __getitem__(self, args): """ if args in self.dimensions(): return self.dimension_values(args) + if isinstance(args, np.ndarray) and args.dtype.kind == 'b': + return NdMapping.__getitem__(self, args) if not isinstance(args, tuple): args = (args,) ndmap_index = args[:self.ndims] val_index = args[self.ndims:] diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index c6f7e69959..5a35f1c889 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -558,6 +558,11 @@ def __getitem__(self, indexslice): """ if indexslice in [Ellipsis, ()]: return self + elif isinstance(indexslice, np.ndarray) and indexslice.dtype.kind == 'b': + if not len(indexslice) == len(self): + raise IndexError("Boolean index must match length of sliced object") + selection = zip(indexslice, self.data.items()) + return self.clone([item for c, item in selection if c]) map_slice, data_slice = self._split_index(indexslice) map_slice = self._transform_indices(map_slice) From dde4d9a837df01d465f0010425890a42dad966ee Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 16:28:22 +0000 Subject: [PATCH 157/212] Fix to ColumnarDataFrame.groupby --- holoviews/core/data.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index b9bd762129..4bca344715 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -508,9 +508,10 @@ def groupby(columns, dimensions, container_type, group_type, **kwargs): element_dims = [kdim for kdim in columns.kdims if kdim not in index_dims] - element_kwargs = dict(kdims=element_dims, new_type=group_type) + element_kwargs = dict(util.get_param_values(columns), + kdims=element_dims) element_kwargs.update(kwargs) - map_data = [(k, columns.clone(v, **element_kwargs)) for k, v in + map_data = [(k, group_type(v, **element_kwargs)) for k, v in columns.data.groupby(dimensions)] with item_check(False), sorted_context(False): return container_type(map_data, kdims=index_dims) From 79788d30844ae8e81e11418b8d3ccc0f7b98667f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 16:47:49 +0000 Subject: [PATCH 158/212] Fixed usage of get_param_values utility --- holoviews/core/element.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index db08135356..ca4109a4a0 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -145,10 +145,7 @@ def dframe(self, as_table=False): data = pd.DataFrame(dim_vals) if as_table: from ..element import Table - params = dict(kdims=self.kdims, vdims=self.vdims, label=self.label) - if not self.params()['group'].default == self.group: - params['group'] = self.group - return Table(data, **params) + return Table(data, **get_param_values(self)) return data @@ -199,7 +196,7 @@ def array(self, as_table=False, dimensions=[]): raise ValueError("%s data contains non-numeric type, " "could not convert to array based " "Element" % type(self).__name__) - return Table(array, **get_param_values(self, Table)) + return Table(array, **get_param_values(self)) else: return array @@ -564,7 +561,7 @@ def dframe(self, as_table=False): df = pandas.DataFrame((k+v for (k, v) in self.data.items()), columns=columns) if as_table: from ..element import Table - return Table(df, **get_param_values(self, Table)) + return Table(df, **get_param_values(self)) return df From 2ef52702752f7290517b0fe3c0a28995cc00698e Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 22:27:06 +0000 Subject: [PATCH 159/212] Fix for Distribution Element after Columns changes --- holoviews/interface/seaborn.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/holoviews/interface/seaborn.py b/holoviews/interface/seaborn.py index bf1da8b880..43046acf54 100644 --- a/holoviews/interface/seaborn.py +++ b/holoviews/interface/seaborn.py @@ -101,16 +101,18 @@ class Distribution(Chart): def range(self, dimension): dim_idx = self.get_dimension_index(dimension) - dim = self.get_dimension(dim_idx) - if dim.range != (None, None): - return dim.range - elif dim_idx == 0: - return (np.nanmin(self.data), np.nanmax(self.data)) - elif dim_idx == 1: - return (None, None) + if dim_idx == 1: + dim = self.get_dimension(dim_idx) + if dim.range != (None, None): + return dim.range + else: + return (None, None) else: return super(Distribution, self).dimension_values(dimension) + def _validate_data(self, data): + return data + def dimension_values(self, dimension): dim_idx = self.get_dimension_index(dimension) if dim_idx == 0: From c4f6a3e4fc5020643dbef798e6e2680d011887c2 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 22:27:39 +0000 Subject: [PATCH 160/212] Cleaned up Element.mapping method --- holoviews/core/element.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index ca4109a4a0..0143483b33 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -167,10 +167,7 @@ def mapping(self, as_table=False, **kwargs): values = [()]*len(keys) data = zip(keys, values) - params = dict(kdims=self.kdims, vdims=self.vdims, label=self.label) - if not self.params()['group'].default == self.group: - params['group'] = self.group - mapping = NdElement(data, **dict(params, **kwargs)) + mapping = NdElement(data, **dict(get_param_values(self), **kwargs)) if as_table: from ..element import Table return Table(mapping) From 3d451ea89766118224b65f1b5a99ced9b7591db8 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 6 Nov 2015 22:29:17 +0000 Subject: [PATCH 161/212] Added and cleaned up documentation in Columns interface --- holoviews/core/data.py | 136 ++++++++++++++++++++++++++++++----------- 1 file changed, 101 insertions(+), 35 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 4bca344715..41241d6c6a 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -28,6 +28,29 @@ class Columns(Element): + """ + Columns provides a general baseclass for column based + Element types. Through the use of utility class interfaces + data may be supplied and stored in a range of formats. + + Data is assumed to be in a columnar data format with N + observations and at least D columns, where D is the number + of dimensions. Data supplied in one of the native formats + will be retained. Alternatively the columns maybe supplied + as a tuple or the rows as a list of tuples. If the data is + purely numeric the data will automatically be converted to + a numpy array, otherwise it will fall back to the specified + data_type. + + Currently either an NdElement or a pandas DataFrame are + supported as storage formats for heterogeneous data. An + NdElement is a HoloViews wrapper around dictionary objects, + which maps between the key dimensions and the value dimensions. + + The Columns class also provides various methods to transform + the data in various ways and allows indexing and selecting + along all dimensions. + """ data_type = param.ObjectSelector(default='mapping', allow_None=True, objects=['pandas', 'mapping'], @@ -58,8 +81,9 @@ def __setstate__(self, state): def closest(self, coords): """ - Given single or multiple x-values, returns the list - of closest actual samples. + Given single or multiple samples along the first + key dimension will return the closest actual sample + coordinates. """ if self.ndims > 1: NotImplementedError("Closest method currently only " @@ -72,12 +96,21 @@ def closest(self, coords): def sort(self, by=[]): + """ + Sorts the data by the values along the supplied + dimensions. + """ if not by: by = self.kdims sorted_columns = self.interface.sort(self, by) return self.clone(sorted_columns) def range(self, dim, data_range=True): + """ + Computes the range of values along a supplied + dimension, taking into account the range and + soft_range defined on the Dimension object. + """ dim = self.get_dimension(dim) if dim.range != (None, None): return dim.range @@ -117,6 +150,16 @@ def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): def select(self, selection_specs=None, **selection): + """ + Allows selecting data by the slices, sets and scalar + values along a particular dimension. The indices + should be supplied as keywords mapping between + the selected dimension and value. Additionally + selection_specs (taking the form of a list of + type.group.label strings, types or functions) may + be supplied, which will ensure the selection is + only applied if the specs match the selected object. + """ if selection_specs and not self.matches(selection_specs): return self @@ -129,6 +172,10 @@ def select(self, selection_specs=None, **selection): @property def interface(self): + """ + Property that return the interface class to apply + operations on the data. + """ if util.is_dataframe(self.data): return ColumnarDataFrame elif isinstance(self.data, np.ndarray): @@ -162,9 +209,19 @@ def reindex(self, kdims=None, vdims=None): def __getitem__(self, slices): """ - Implements slicing or indexing of the data by the data x-value. - If a single element is indexed reduces the Element2D to a single - Scatter object. + Allows slicing and selecting values in the Columns object. + Supports multiple indexing modes: + + (1) Slicing and indexing along the values of each + dimension in the columns object using either + scalars, slices or sets of values. + (2) Supplying the name of a dimension as the first + argument will return the values along that + dimension as a numpy array. + (3) Slicing of all key dimensions and selecting + a single value dimension by name. + (4) A boolean array index matching the length of + the Columns object. """ if slices is (): return self if isinstance(slices, np.ndarray) and slices.dtype.kind == 'b': @@ -193,15 +250,18 @@ def __getitem__(self, slices): def sample(self, samples=[]): """ Allows sampling of Columns as an iterator of coordinates - matching the key dimensions. + matching the key dimensions, returning a new object + containing just the selected samples. """ return self.clone(self.interface.sample(self, samples)) def reduce(self, dimensions=[], function=None, **reduce_map): """ - Allows collapsing of Columns objects using the supplied map of - dimensions and reduce functions. + Allows reducing the values along one or more key dimension + with the supplied function. The dimensions may be supplied + as a list and a function to apply or a mapping between the + dimensions and functions to apply along each dimension. """ reduce_dims, reduce_map = self._reduce_map(dimensions, function, reduce_map) reduced = self @@ -217,7 +277,8 @@ def reduce(self, dimensions=[], function=None, **reduce_map): def aggregate(self, dimensions=[], function=None): """ - Groups over the supplied dimensions and aggregates. + Aggregates over the supplied key dimensions with the + defined function. """ if not isinstance(dimensions, list): dimensions = [dimensions] if not dimensions: dimensions = self.kdims @@ -241,6 +302,11 @@ def groupby(self, dimensions=[], container_type=HoloMap, group_type=None, **kwar @classmethod def collapse_data(cls, data, function=None, kdims=None, **kwargs): + """ + Class method utility function to concatenate the supplied data + and apply a groupby operation along the supplied key dimensions + then aggregates across the groups with the supplied function. + """ if isinstance(data[0], NdElement): return data[0].collapse_data(data, function, kdims, **kwargs) elif isinstance(data[0], np.ndarray): @@ -251,6 +317,11 @@ def collapse_data(cls, data, function=None, kdims=None, **kwargs): @classmethod def concat(cls, columns_objs): + """ + Concatenates a list of Columns objects. If data types + don't match all types will be converted to that of + the first object before concatenation. + """ columns = columns_objs[0] if len({col.interface for col in columns_objs}) > 1: if isinstance(columns.data, NdElement): @@ -263,15 +334,24 @@ def concat(cls, columns_objs): def __len__(self): + """ + Returns the number of rows in the Columns object. + """ return self.interface.length(self) @property def shape(self): + """Returns the shape of the data.""" return self.interface.shape(self) def dimension_values(self, dim, unique=False): + """ + Returns the values along a particular + dimension. If unique values are requested + will return only unique values. + """ dim = self.get_dimension(dim).name dim_vals = self.interface.values(self, dim) if unique: @@ -281,10 +361,22 @@ def dimension_values(self, dim, unique=False): def dframe(self, as_table=False): + """ + Returns the data in the form of a DataFrame, + if as_table is requested the data will be + wrapped in a Table object. + """ return self.interface.dframe(self, as_table) def array(self, as_table=False): + """ + Returns the data in the form of an array, + if as_table is requested the data will be + wrapped in a Table object (note if the data + has heterogeneous types this will raise an + error. + """ array = self.interface.array(self) if as_table: from ..element import Table @@ -573,11 +665,6 @@ def sort(columns, by=[]): @staticmethod def select(columns, selection_specs=None, **select): - """ - Allows slice and select individual values along the DataFrameView - dimensions. Supply the dimensions and values or slices as - keyword arguments. - """ df = columns.data selected_kdims = [] mask = True @@ -614,9 +701,6 @@ def values(columns, dim): @staticmethod def aggregate(columns, dimensions, function): - """ - Allows aggregating. - """ data = columns.data cols = [d.name for d in columns.kdims if d in dimensions] vdims = columns.dimensions('value', True) @@ -626,9 +710,6 @@ def aggregate(columns, dimensions, function): @classmethod def sample(cls, columns, samples=[]): - """ - Sample the Element data with a list of samples. - """ data = columns.data mask = np.zeros(cls.length(columns), dtype=bool) for sample in samples: @@ -788,10 +869,6 @@ def select(columns, **selection): @classmethod def collapse_data(cls, data, function, kdims=None, **kwargs): - """ - Applies a groupby operation along the supplied key dimensions - then aggregates across the groups with the supplied function. - """ ndims = data[0].shape[1] nkdims = len(kdims) data = data[0] if len(data) == 0 else np.concatenate(data) @@ -813,9 +890,6 @@ def collapse_data(cls, data, function, kdims=None, **kwargs): @staticmethod def sample(columns, samples=[]): - """ - Sample the Element data with a list of samples. - """ data = columns.data mask = False for sample in samples: @@ -827,11 +901,6 @@ def sample(columns, samples=[]): @staticmethod def reduce(columns, reduce_dims, function): - """ - This implementation allows reducing dimensions by aggregating - over all the remaining key dimensions using the collapse_data - method. - """ kdims = [kdim for kdim in columns.kdims if kdim not in reduce_dims] if len(kdims): reindexed = columns.reindex(kdims) @@ -852,9 +921,6 @@ def reduce(columns, reduce_dims, function): @classmethod def aggregate(cls, columns, dimensions, function): - """ - Allows aggregating. - """ if not isinstance(dimensions, Iterable): dimensions = [dimensions] rows = [] reindexed = columns.reindex(dimensions) From c468eaa76e7f0842eadf0468274fd67905f3459b Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 03:47:56 +0000 Subject: [PATCH 162/212] Let Columns use general array method implementation --- holoviews/core/data.py | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 41241d6c6a..0f74ecd748 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -369,24 +369,6 @@ def dframe(self, as_table=False): return self.interface.dframe(self, as_table) - def array(self, as_table=False): - """ - Returns the data in the form of an array, - if as_table is requested the data will be - wrapped in a Table object (note if the data - has heterogeneous types this will raise an - error. - """ - array = self.interface.array(self) - if as_table: - from ..element import Table - if array.dtype.kind in ['S', 'O', 'U']: - raise ValueError("%s data contains non-numeric type, " - "could not convert to array based " - "Element" % type(self).__name__) - return Table(array, **util.get_param_values(self)) - return array - class ColumnarData(param.Parameterized): @@ -534,10 +516,6 @@ def concat(columns_objs): return [(k[1:], v) for col in columns_objs for k, v in col.data.data.items()] - @staticmethod - def array(columns): - return columns.data.array(dimensions=columns.dimensions()) - @staticmethod def sort(columns, by=[]): if not len(by): by = columns.dimensions('key', True) @@ -634,11 +612,6 @@ def reduce(columns, reduce_dims, function=None): return reduced - @staticmethod - def array(columns): - return columns.data.values - - @staticmethod def reindex(columns, kdims=None, vdims=None): # DataFrame based tables don't need to be reindexed @@ -750,11 +723,6 @@ def add_dimension(columns, dimension, dim_pos, values): return np.insert(data, dim_pos, values, axis=1) - @staticmethod - def array(columns): - return columns.data - - @staticmethod def concat(columns_objs): return np.concatenate([col.data for col in columns_objs]) From a10b2fc0080028d23b5db9abba58888479cd2551 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 03:48:28 +0000 Subject: [PATCH 163/212] Fixed handling of NdElement Index dimension in Columns constructor --- holoviews/core/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 0f74ecd748..09a4a1d0d9 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -407,7 +407,7 @@ def _process_data(cls, data, paramobjs, **kwargs): params['group'] = data.group if isinstance(data, NdElement): - pass + params['kdims'] = [d for d in params['kdims'] if d != 'Index'] elif isinstance(data, Columns): data = data.data elif isinstance(data, Element): From 0df5fffd4a612596a97f39d3b1ed844221c93a92 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 03:49:52 +0000 Subject: [PATCH 164/212] Fix in mpl PointPlot for Columns based data --- holoviews/plotting/mpl/chart.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/holoviews/plotting/mpl/chart.py b/holoviews/plotting/mpl/chart.py index 736aae98d3..a0ff298319 100644 --- a/holoviews/plotting/mpl/chart.py +++ b/holoviews/plotting/mpl/chart.py @@ -584,14 +584,14 @@ def _compute_size(self, element, opts): def update_handles(self, axis, element, key, ranges=None): paths = self.handles['artist'] - paths.set_offsets(element.data[:, 0:2]) - ndims = element.data.shape[1] + paths.set_offsets(element.array(dimensions=[0, 1])) + ndims = element.shape[1] dims = element.dimensions(label=True) if self.size_index < ndims: opts = self.style[self.cyclic_index] paths.set_sizes(self._compute_size(element, opts)) if self.color_index < ndims: - cs = element.data[:, self.color_index] + cs = element.dimension_values(self.color_index) val_dim = dims[self.color_index] paths.set_clim(ranges[val_dim]) paths.set_array(cs) From dba7a316af9d749dbd1133f1390f5689004ea8c8 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 03:50:19 +0000 Subject: [PATCH 165/212] Fix to axis handling in GridPlot --- holoviews/plotting/bokeh/plot.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/holoviews/plotting/bokeh/plot.py b/holoviews/plotting/bokeh/plot.py index b8fb4679dd..ca508c1592 100644 --- a/holoviews/plotting/bokeh/plot.py +++ b/holoviews/plotting/bokeh/plot.py @@ -133,17 +133,17 @@ def _create_subplots(self, layout, ranges): # Create axes kwargs = {} if c == 0 and r != 0: - kwargs['xaxis'] = 'left-bare' + kwargs['xaxis'] = 'bottom-bare' kwargs['width'] = 175 if c != 0 and r == 0 and not layout.ndims == 1: - kwargs['yaxis'] = 'bottom-bare' + kwargs['yaxis'] = 'left-bare' kwargs['height'] = 175 if c == 0 and r == 0: kwargs['width'] = 175 kwargs['height'] = 175 if r != 0 and c != 0: - kwargs['xaxis'] = 'left-bare' - kwargs['yaxis'] = 'bottom-bare' + kwargs['xaxis'] = 'bottom-bare' + kwargs['yaxis'] = 'left-bare' if 'width' not in kwargs: kwargs['width'] = 125 From 660976e5fb7c4082baf368f10c56971589657b66 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 14:52:59 +0000 Subject: [PATCH 166/212] Added option to share bokeh datasources across plots --- holoviews/plotting/bokeh/chart.py | 11 +++++----- holoviews/plotting/bokeh/plot.py | 36 +++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/holoviews/plotting/bokeh/chart.py b/holoviews/plotting/bokeh/chart.py index fd94db4c06..9861fcb711 100644 --- a/holoviews/plotting/bokeh/chart.py +++ b/holoviews/plotting/bokeh/chart.py @@ -50,18 +50,19 @@ def get_data(self, element, ranges=None): cmap = style.get('palette', style.get('cmap', None)) if self.color_index < len(dims) and cmap: - map_key = 'color_' + str(id(element.data)) + map_key = 'color_' + dims[self.color_index] mapping['color'] = map_key cmap = get_cmap(cmap) colors = element.dimension_values(self.color_index) crange = ranges.get(dims[self.color_index], None) - data['color'] = map_colors(colors, crange, cmap) + data[map_key] = map_colors(colors, crange, cmap) if self.size_index < len(dims): - mapping['size'] = 'size' + map_key = 'size_' + dims[self.size_index] + mapping['size'] = map_key ms = style.get('size', 1) sizes = element.dimension_values(self.size_index) - data['size'] = compute_sizes(sizes, self.size_fn, - self.scaling_factor, ms) + data[map_key] = compute_sizes(sizes, self.size_fn, + self.scaling_factor, ms) data[dims[0]] = element.dimension_values(0) data[dims[1]] = element.dimension_values(1) if 'hover' in self.tools: diff --git a/holoviews/plotting/bokeh/plot.py b/holoviews/plotting/bokeh/plot.py index ca508c1592..0b84be8ad9 100644 --- a/holoviews/plotting/bokeh/plot.py +++ b/holoviews/plotting/bokeh/plot.py @@ -83,6 +83,27 @@ def _fontsize(self, key, label='fontsize', common=True): for k, v in size.items()} + def sync_sources(self): + """ + Syncs data sources between Elements, which draw data + from the same object. + """ + get_sources = lambda x: (id(x.current_frame.data), x) + filter_fn = lambda x: 'source' in x.handles + data_sources = self.traverse(get_sources, [filter_fn]) + grouped_sources = groupby(sorted(data_sources), lambda x: x[0]) + for gid, group in grouped_sources: + group = list(group) + if len(group) > 1: + source_data = {} + for _, plot in group: + source_data.update(plot.handles['source'].data) + new_source = ColumnDataSource(source_data) + for _, plot in group: + renderer = plot.handles['glyph_renderer'] + renderer.update(data_source=new_source) + plot.handles['source'] = new_source + class GridPlot(BokehPlot, GenericCompositePlot): """ @@ -90,6 +111,11 @@ class GridPlot(BokehPlot, GenericCompositePlot): object. """ + shared_datasource = param.Boolean(default=True, doc=""" + Whether Elements drawing the data from the same object should + share their Bokeh data source allowing for linked brushing + and other linked behaviors.""") + def __init__(self, layout, ranges=None, keys=None, dimensions=None, layout_num=1, **params): if not isinstance(layout, GridSpace): @@ -184,6 +210,8 @@ def initialize_plot(self, ranges=None, plots=[]): passed_plots.append(None) self.handles['plot'] = gridplot(plots[::-1]) self.handles['plots'] = plots + if self.shared_datasource: + self.sync_sources() self.drawn = True return self.handles['plot'] @@ -211,6 +239,11 @@ class LayoutPlot(BokehPlot, GenericLayoutPlot): shared_axes = param.Boolean(default=True, doc=""" Whether axes should be shared across plots""") + shared_datasource = param.Boolean(default=True, doc=""" + Whether Elements drawing the data from the same object should + share their Bokeh data source allowing for linked brushing + and other linked behaviors.""") + tabs = param.Boolean(default=False, doc=""" Whether to display overlaid plots in separate panes""") @@ -424,6 +457,9 @@ def initialize_plot(self, ranges=None): self.handles['plot'] = layout_plot self.handles['plots'] = plots + if self.shared_datasource: + self.sync_sources() + self.drawn = True return self.handles['plot'] From 6e0f187245c536fa01db832859340d322aa01e23 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 14:55:04 +0000 Subject: [PATCH 167/212] Added GridMatrix container and gridmatrix operation These new objects provide a highly useful view into multi-variate data, in particular it can take advantage of the linked brushing feature in the bokeh backend. --- holoviews/core/spaces.py | 25 +++++++-- holoviews/operation/element.py | 81 +++++++++++++++++++++++++++- holoviews/plotting/bokeh/__init__.py | 36 +++++++++---- holoviews/plotting/mpl/__init__.py | 6 ++- 4 files changed, 132 insertions(+), 16 deletions(-) diff --git a/holoviews/core/spaces.py b/holoviews/core/spaces.py index 24c38e3dc5..e41857e352 100644 --- a/holoviews/core/spaces.py +++ b/holoviews/core/spaces.py @@ -3,6 +3,7 @@ import param +from . import traversal from .dimension import OrderedDict, Dimension, Dimensioned, ViewableElement from .layout import Layout, AdjointLayout, NdLayout from .ndmapping import UniformNdMapping, NdMapping, item_check @@ -325,9 +326,6 @@ class GridSpace(UniformNdMapping): 2D parameter spaces. """ - # NOTE: If further composite types supporting Overlaying and Layout these - # classes may be moved to core/composite.py - kdims = param.List(default=[Dimension(name="X"), Dimension(name="Y")], bounds=(1,2)) @@ -451,3 +449,24 @@ def shape(self): if self.ndims == 1: return (len(keys), 1) return len(set(k[0] for k in keys)), len(set(k[1] for k in keys)) + + + +class GridMatrix(GridSpace): + """ + GridMatrix is container type for heterogeneous Element types + laid out in a grid. Unlike a GridSpace the axes of the Grid + must not represent an actual coordinate space, but may be used + to plot various dimensions against each other. The GridMatrix + is usually constructed using the gridmatrix operation, which + will generate a GridMatrix plotting each dimension in an + Element against each other. + """ + + + def _item_check(self, dim_vals, data): + if not traversal.uniform(NdMapping([(0, self), (1, data)])): + raise ValueError("HoloMaps dimensions must be consistent in %s." % + type(self).__name__) + NdMapping._item_check(self, dim_vals, data) + diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index b80d03b51f..a3e7e14dc3 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -6,10 +6,12 @@ import numpy as np import param +from param import _is_number -from ..core import ElementOperation, NdOverlay, Overlay +from ..core import (ElementOperation, NdOverlay, Overlay, GridMatrix, + HoloMap, Columns) from ..core.util import find_minmax, sanitize_identifier -from ..element.chart import Histogram, Curve +from ..element.chart import Histogram, Curve, Scatter from ..element.raster import Raster, Image, RGB, QuadMesh from ..element.path import Contours, Polygons @@ -573,3 +575,78 @@ def _process(self, overlay, key=None): return Curve(np.array(data), group=self.p.group, label=self.get_overlay_label(overlay)) + + + +class gridmatrix(param.ParameterizedFunction): + """ + The gridmatrix operation takes an Element or HoloMap + of Elements as input and creates a GridMatrix object, + which plots each dimension in the Element against + each other dimension. This provides a very useful + overview of high-dimensional data and is inspired + by pandas and seaborn scatter_matrix implementations. + """ + + chart_type = param.Parameter(default=Scatter, doc=""" + The Element type used to display bivariate distributions + of the data.""") + + diagonal_type = param.Parameter(default=Histogram, doc=""" + The Element type along the diagonal, may be a Histogram or any + other plot type which can visualize a univariate distribution.""") + + overlay_dims = param.List(default=[], doc=""" + If a HoloMap is supplied this will allow overlaying one or + more of it's key dimensions.""") + + def __call__(self, data, **params): + p = param.ParamOverrides(self, params) + + if isinstance(data, HoloMap): + ranges = {d.name: data.range(d) for d in data.dimensions()} + data = data.clone({k: GridMatrix(self._process(p, v, ranges)) + for k, v in data.items()}).collate() + if p.overlay_dims: + data = data.map(lambda x: x.overlay(p.overlay_dims), (HoloMap,)) + return data + elif isinstance(data, Element): + data = self._process(p, data) + return GridMatrix(data) + + + def _process(self, p, element, ranges={}): + # Creates a unified Columns.data attribute + # to draw the data from + if isinstance(element.data, np.ndarray): + if Columns.data_type == 'mapping': + el_data = element.mapping() + else: + el_data = element.dframe() + else: + el_data = element.data + + # Get dimensions to plot against each other + dims = [d for d in element.dimensions() + if _is_number(element.range(d)[0])] + permuted_dims = [(d1, d2) for d1 in dims + for d2 in dims[::-1]] + + data = {} + for d1, d2 in permuted_dims: + key = (d1.name, d2.name) + if d1 == d2: + if p.diagonal_type is Histogram: + bin_range = ranges.get(d1.name, element.range(d1)) + el = element.hist(dimension=d1.name, + bin_range=bin_range, + adjoin=False) + else: + values = element.dimension_values(d1) + el = p.diagonal_type(values, kdims=[d1]) + else: + el = p.chart_type(el_data, kdims=[d1], + vdims=[d2]) + data[(d1.name, d2.name)] = el + return data + diff --git a/holoviews/plotting/bokeh/__init__.py b/holoviews/plotting/bokeh/__init__.py index 398477fdbd..1eff95d596 100644 --- a/holoviews/plotting/bokeh/__init__.py +++ b/holoviews/plotting/bokeh/__init__.py @@ -1,14 +1,14 @@ from ...core import (Store, Overlay, NdOverlay, Layout, AdjointLayout, - GridSpace, NdElement, Columns) + GridSpace, NdElement, Columns, GridMatrix) from ...element import (Curve, Points, Scatter, Image, Raster, Path, RGB, Histogram, Spread, HeatMap, Contours, Path, Box, Bounds, Ellipse, Polygons, ErrorBars, Text, HLine, VLine, Spline, - Table, ItemTable, Surface, Scatter3D) + Table, ItemTable, Surface, Scatter3D, Trisurface) from ...core.options import Options, Cycle, OptionTree from ...interface import DFrame from ..plot import PlotSelector -from ..mpl import SurfacePlot, Scatter3DPlot +from ..mpl import SurfacePlot, Scatter3DPlot, TrisurfacePlot from .annotation import TextPlot, LineAnnotationPlot, SplinePlot from .element import OverlayPlot, BokehMPLWrapper, BokehMPLRawWrapper @@ -24,20 +24,26 @@ Store.register({Overlay: OverlayPlot, NdOverlay: OverlayPlot, + GridSpace: GridPlot, + GridMatrix: GridPlot, + AdjointLayout: AdjointLayoutPlot, + Layout: LayoutPlot, + + # Charts Curve: CurvePlot, Points: PointPlot, Scatter: PointPlot, + ErrorBars: ErrorPlot, Spread: SpreadPlot, - HLine: LineAnnotationPlot, - VLine: LineAnnotationPlot, - GridSpace: GridPlot, + + # Rasters Image: RasterPlot, RGB: RGBPlot, Raster: RasterPlot, HeatMap: HeatmapPlot, Histogram: HistogramPlot, - AdjointLayout: AdjointLayoutPlot, - Layout: LayoutPlot, + + # Paths Path: PathPlot, Contours: PathPlot, Path: PathPlot, @@ -45,20 +51,30 @@ Bounds: PathPlot, Ellipse: PathPlot, Polygons: PolygonPlot, - ErrorBars: ErrorPlot, + + # Annotations + HLine: LineAnnotationPlot, + VLine: LineAnnotationPlot, Text: TextPlot, Spline: SplinePlot, + + # Tabular Table: TablePlot, ItemTable: TablePlot, DFrame: TablePlot, NdElement: TablePlot, Columns: TablePlot, + + # Wrapped mpl 3d plots Surface: PlotSelector(lambda x: 'bokeh', [('mpl', SurfacePlot), ('bokeh', BokehMPLRawWrapper)], True), Scatter3D: PlotSelector(lambda x: 'bokeh', [('mpl', Scatter3DPlot), - ('bokeh', BokehMPLRawWrapper)], True)}, + ('bokeh', BokehMPLRawWrapper)], True), + Trisurface: PlotSelector(lambda x: 'bokeh', + [('mpl', TrisurfacePlot), + ('bokeh', BokehMPLRawWrapper)], True)}, 'bokeh') diff --git a/holoviews/plotting/mpl/__init__.py b/holoviews/plotting/mpl/__init__.py index 5db33dfb66..02b24a77cb 100644 --- a/holoviews/plotting/mpl/__init__.py +++ b/holoviews/plotting/mpl/__init__.py @@ -12,7 +12,7 @@ from matplotlib import rc_params_from_file -from ...core import Layout, NdOverlay, Collator +from ...core import Layout, NdOverlay, Collator, GridMatrix from ...core.options import Cycle, Palette, Options from ...element import * # pyflakes:ignore (API import) from ..plot import PlotSelector @@ -111,6 +111,7 @@ def grid_selector(grid): # General plots GridSpace: GridPlot, + GridMatrix: GridPlot, NdLayout: LayoutPlot, Layout: LayoutPlot, AdjointLayout: AdjointLayoutPlot, @@ -183,6 +184,9 @@ def grid_selector(grid): options.RGB = Options('style', interpolation='nearest') # Composites options.Layout = Options('plot', sublabel_format='{Alpha}') +options.GridMatrix = Options('plot', fig_size=160, shared_xaxis=True, + shared_yaxis=True, xaxis=None, yaxis=None) + # Annotations options.VLine = Options('style', color=Cycle()) options.HLine = Options('style', color=Cycle()) From 2e0723c713de58f9599db72bfe914379fa0b3a4e Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 17:17:37 +0000 Subject: [PATCH 168/212] Improvements to handling of dataframe Columns input --- holoviews/core/data.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 09a4a1d0d9..f0d42078b6 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -408,14 +408,14 @@ def _process_data(cls, data, paramobjs, **kwargs): if isinstance(data, NdElement): params['kdims'] = [d for d in params['kdims'] if d != 'Index'] - elif isinstance(data, Columns): - data = data.data elif isinstance(data, Element): dimensions = data.dimensions(label=True) data = tuple(data.dimension_values(d) for d in data.dimensions()) - if util.is_dataframe(data): - kdims, vdims = cls._process_df_dims(data, paramobjs, **params) + if isinstance(data, Columns): + data = data.data + elif util.is_dataframe(data): + kdims, vdims = cls._process_df_dims(data, paramobjs, **kwargs) params['kdims'] = kdims params['vdims'] = vdims elif not isinstance(data, (NdElement, dict)): @@ -471,18 +471,22 @@ def _process_data(cls, data, paramobjs, **kwargs): @staticmethod def _process_df_dims(data, paramobjs, **kwargs): - if 'kdims' in kwargs or 'vdims' in kwargs: - kdims = kwargs.get('kdims', []) - vdims = kwargs.get('vdims', []) - col_labels = [c.name if isinstance(c, Dimension) else c - for c in kdims+vdims] - if not all(c in data.columns for c in col_labels): - raise ValueError("Supplied dimensions don't match columns" - "in the dataframe.") - else: - ndim = len(paramobjs['kdims'].default) + columns = data.columns + kdims = kwargs.get('kdims', []) + vdims = kwargs.get('vdims', []) + ndim = paramobjs['kdims'].bounds[1] if paramobjs['kdims'].bounds else None + if 'kdims' in kwargs and 'vdims' not in kwargs: + vdims = [c for c in data.columns if c not in kdims] + elif 'kdims' not in kwargs and 'vdims' in kwargs: + kdims = [c for c in data.columns if c not in kdims][:ndim] + elif 'kdims' not in kwargs and 'vdims' not in kwargs: kdims = list(data.columns[:ndim]) vdims = list(data.columns[ndim:]) + col_labels = [c.name if isinstance(c, Dimension) else c + for c in kdims+vdims] + if not all(c in data.columns for c in col_labels): + raise ValueError("Supplied dimensions don't match columns" + "in the dataframe.") return kdims, vdims @@ -581,8 +585,10 @@ def groupby(columns, dimensions, container_type, group_type, **kwargs): element_kwargs = dict(util.get_param_values(columns), kdims=element_dims) element_kwargs.update(kwargs) - map_data = [(k, group_type(v, **element_kwargs)) for k, v in - columns.data.groupby(dimensions)] + names = [d.name for d in columns.dimensions() + if d not in dimensions] + map_data = [(k, group_type(v, **element_kwargs)) + for k, v in columns.data.groupby(dimensions)] with item_check(False), sorted_context(False): return container_type(map_data, kdims=index_dims) From 23c7c4957a09178b3bd3947567dac2e20d81bff0 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 17:18:44 +0000 Subject: [PATCH 169/212] Only share dataframe or NdElement data across plots --- holoviews/plotting/bokeh/plot.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/holoviews/plotting/bokeh/plot.py b/holoviews/plotting/bokeh/plot.py index 0b84be8ad9..673f6fc8c6 100644 --- a/holoviews/plotting/bokeh/plot.py +++ b/holoviews/plotting/bokeh/plot.py @@ -89,7 +89,9 @@ def sync_sources(self): from the same object. """ get_sources = lambda x: (id(x.current_frame.data), x) - filter_fn = lambda x: 'source' in x.handles + filter_fn = lambda x: (x.current_frame and + not isinstance(x.current_frame.data, np.ndarray) + and 'source' in x.handles) data_sources = self.traverse(get_sources, [filter_fn]) grouped_sources = groupby(sorted(data_sources), lambda x: x[0]) for gid, group in grouped_sources: From 5c1b202594a6e2ee61208034c440aefc472dffa8 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 18:30:37 +0000 Subject: [PATCH 170/212] Implemented Dimension not equal operator --- holoviews/core/dimension.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index 6b2b87cd81..a3b8f01b2b 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -173,10 +173,13 @@ def __str__(self): def __eq__(self, other): - "Dimensions are sorted alphanumerically by name" + "Implements equals operator including sanitized comparison." dim_matches = [self.name, sanitize_identifier(self.name)] return other.name in dim_matches if isinstance(other, Dimension) else other in dim_matches + def __ne__(self, other): + "Implements not equal operator including sanitized comparison." + return not self.__eq__(other) def __lt__(self, other): "Dimensions are sorted alphanumerically by name" From 19e3e022a4bcc78482a7d2a149eb755fa7daadc0 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 18:30:56 +0000 Subject: [PATCH 171/212] Added missing import --- holoviews/operation/element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index a3e7e14dc3..acbae4e472 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -9,7 +9,7 @@ from param import _is_number from ..core import (ElementOperation, NdOverlay, Overlay, GridMatrix, - HoloMap, Columns) + HoloMap, Columns, Element) from ..core.util import find_minmax, sanitize_identifier from ..element.chart import Histogram, Curve, Scatter from ..element.raster import Raster, Image, RGB, QuadMesh From d1325f698594d16e2206e6a2790ac2721e07f7fa Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 18:31:15 +0000 Subject: [PATCH 172/212] Cleaned up Columns constructor utility --- holoviews/core/data.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index f0d42078b6..35e7115802 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -400,11 +400,7 @@ def shape(columns): def _process_data(cls, data, paramobjs, **kwargs): params = {} if isinstance(data, Element): - params['kdims'] = data.kdims - params['vdims'] = data.vdims - params['label'] = data.label - if data.group != data.params()['group'].default: - params['group'] = data.group + params = util.get_param_values(data) if isinstance(data, NdElement): params['kdims'] = [d for d in params['kdims'] if d != 'Index'] From 1875e1035be223085a3818aff39bc8b1959c2cbb Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 18:31:46 +0000 Subject: [PATCH 173/212] Fix for Table data source sharing --- holoviews/plotting/bokeh/plot.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/holoviews/plotting/bokeh/plot.py b/holoviews/plotting/bokeh/plot.py index 673f6fc8c6..57477a5929 100644 --- a/holoviews/plotting/bokeh/plot.py +++ b/holoviews/plotting/bokeh/plot.py @@ -103,10 +103,14 @@ def sync_sources(self): new_source = ColumnDataSource(source_data) for _, plot in group: renderer = plot.handles['glyph_renderer'] - renderer.update(data_source=new_source) + if 'data_source' in renderer.properties(): + renderer.update(data_source=new_source) + else: + renderer.update(source=new_source) plot.handles['source'] = new_source + class GridPlot(BokehPlot, GenericCompositePlot): """ Plot a group of elements in a grid layout based on a GridSpace element From e121d89e457ea241091a6d689f0cf76730f2c922 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 19:28:58 +0000 Subject: [PATCH 174/212] Fix for Element.mapping method --- holoviews/core/element.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 0143483b33..54b45df145 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -154,17 +154,18 @@ def mapping(self, as_table=False, **kwargs): This method transforms any ViewableElement type into a Table as long as it implements a dimension_values method. """ + length = len(self) if self.kdims: keys = zip(*[self.dimension_values(dim.name) for dim in self.kdims]) else: - keys = [()]*len(values) + keys = [()]*length if self.vdims: values = zip(*[self.dimension_values(dim.name) for dim in self.vdims]) else: - values = [()]*len(keys) + values = [()]*length data = zip(keys, values) mapping = NdElement(data, **dict(get_param_values(self), **kwargs)) From b2c3241120241378a168c58c2bbd84a5815bef6d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 7 Nov 2015 19:29:33 +0000 Subject: [PATCH 175/212] Fixes for datetime range handling --- holoviews/core/util.py | 59 ++++++++++++++++------------- holoviews/plotting/bokeh/element.py | 4 +- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 3bd8e44ad7..a905e07610 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -298,32 +298,39 @@ def max_range(ranges): def max_extents(extents, zrange=False): - """ - Computes the maximal extent in 2D and 3D space from - list of 4-tuples or 6-tuples. If zrange is enabled - all extents are converted to 6-tuples to comput - x-, y- and z-limits. - """ - - if zrange: - num = 6 - inds = [(0, 2), (1, 3)] - extents = [e if len(e) == 6 else (e[0], e[1], None, - e[2], e[3], None) - for e in extents] - else: - num = 4 - inds = [(0, 2), (1, 3)] - arr = np.array(extents, dtype=np.float, ndmin=2) - extents = [np.NaN] * num - if 0 in arr.shape: - return extents - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', r'All-NaN (slice|axis) encountered') - for lower, upper in inds: - extents[lower] = np.nanmin(arr[:, lower]) - extents[upper] = np.nanmax(arr[:, upper]) - return tuple(extents) + """ + Computes the maximal extent in 2D and 3D space from + list of 4-tuples or 6-tuples. If zrange is enabled + all extents are converted to 6-tuples to comput + x-, y- and z-limits. + """ + if zrange: + num = 6 + inds = [(0, 2), (1, 3)] + extents = [e if len(e) == 6 else (e[0], e[1], None, + e[2], e[3], None) + for e in extents] + else: + num = 4 + inds = [(0, 2), (1, 3)] + arr = list(zip(*extents)) if extents else [] + extents = [np.NaN] * num + if len(arr) == 0: + return extents + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', r'All-NaN (slice|axis) encountered') + for lidx, uidx in inds: + lower = [v for v in arr[lidx] if v is not None] + upper = [v for v in arr[uidx] if v is not None] + if lower and isinstance(lower[0], np.datetime64): + extents[lidx] = np.min(lower) + elif lower: + extents[lidx] = np.nanmin(lower) + if upper and isinstance(upper[0], np.datetime64): + extents[uidx] = np.max(upper) + elif upper: + extents[uidx] = np.nanmax(upper) + return tuple(extents) def int_to_alpha(n, upper=True): diff --git a/holoviews/plotting/bokeh/element.py b/holoviews/plotting/bokeh/element.py index 184ffed2c8..1ec992e278 100644 --- a/holoviews/plotting/bokeh/element.py +++ b/holoviews/plotting/bokeh/element.py @@ -188,7 +188,7 @@ def _axes_props(self, plots, subplots, element, ranges): if x_axis_type == 'datetime': low = convert_datetime(low) high = convert_datetime(high) - elif low == high: + elif low == high and low is not None: offset = low*0.1 if low else 0.5 low -= offset high += offset @@ -207,7 +207,7 @@ def _axes_props(self, plots, subplots, element, ranges): if y_axis_type == 'datetime': low = convert_datetime(low) high = convert_datetime(high) - elif low == high: + elif low == high and low is not None: offset = low*0.1 if low else 0.5 low -= offset high += offset From ff7c1f924fb20b39284a60adfbf28200072c23b3 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Sun, 8 Nov 2015 00:29:15 +0000 Subject: [PATCH 176/212] Deleted trailing whitespace and minor formatting fix --- holoviews/core/data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 35e7115802..5e1a2a0869 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -168,7 +168,7 @@ def select(self, selection_specs=None, **selection): return data else: return self.clone(data) - + @property def interface(self): @@ -342,7 +342,7 @@ def __len__(self): @property def shape(self): - """Returns the shape of the data.""" + "Returns the shape of the data." return self.interface.shape(self) @@ -571,7 +571,7 @@ def range(columns, dimension): def concat(columns_objs): return pd.concat([col.data for col in columns_objs]) - + @staticmethod def groupby(columns, dimensions, container_type, group_type, **kwargs): index_dims = [columns.get_dimension(d) for d in dimensions] @@ -741,7 +741,7 @@ def sort(columns, by=[]): idxs = [columns.get_dimension_index(dim) for dim in by] return data[np.lexsort(np.flipud(data[:, idxs].T))] - + @staticmethod def values(columns, dim): data = columns.data From dbc74ebbc68462611377ab4c4a0200d0aadfa18a Mon Sep 17 00:00:00 2001 From: jlstevens Date: Sun, 8 Nov 2015 00:29:46 +0000 Subject: [PATCH 177/212] All methods are now declared as classmethods throughout --- holoviews/core/data.py | 164 ++++++++++++++++++++--------------------- 1 file changed, 82 insertions(+), 82 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 5e1a2a0869..ec7fbdd350 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -373,8 +373,8 @@ def dframe(self, as_table=False): class ColumnarData(param.Parameterized): - @staticmethod - def range(columns, dimension): + @classmethod + def range(cls, columns, dimension): column = columns.dimension_values(dimension) if columns.get_dimension_type(dimension) is np.datetime64: return column.min(), column.max() @@ -386,13 +386,13 @@ def range(columns, dimension): return column[0], column[-1] - @staticmethod - def dframe(columns, as_table=False): + @classmethod + def dframe(cls, columns, as_table=False): return Element.dframe(columns, as_table) - @staticmethod - def shape(columns): + @classmethod + def shape(cls, columns): return columns.data.shape @@ -465,8 +465,8 @@ def _process_data(cls, data, paramobjs, **kwargs): return data, params - @staticmethod - def _process_df_dims(data, paramobjs, **kwargs): + @classmethod + def _process_df_dims(cls, data, paramobjs, **kwargs): columns = data.columns kdims = kwargs.get('kdims', []) vdims = kwargs.get('vdims', []) @@ -486,70 +486,70 @@ def _process_df_dims(data, paramobjs, **kwargs): return kdims, vdims - @staticmethod - def length(columns): + @classmethod + def length(cls, columns): return len(columns.data) - @staticmethod - def validate_data(columns, data): + @classmethod + def validate_data(cls, columns, data): return data class ColumnarNdElement(ColumnarData): - @staticmethod - def validate_data(columns, data): + @classmethod + def validate_data(cls, columns, data): return data - @staticmethod - def shape(columns): + @classmethod + def shape(cls, columns): return (len(columns), len(columns.dimensions())) - @staticmethod - def add_dimension(columns, dimension, dim_pos, values): + @classmethod + def add_dimension(cls, columns, dimension, dim_pos, values): return columns.data.add_dimension(dimension, dim_pos+1, values) - @staticmethod - def concat(columns_objs): + @classmethod + def concat(cls, columns_objs): return [(k[1:], v) for col in columns_objs for k, v in col.data.data.items()] - @staticmethod - def sort(columns, by=[]): + @classmethod + def sort(cls, columns, by=[]): if not len(by): by = columns.dimensions('key', True) return columns.data.sort(by) - @staticmethod - def values(columns, dim): + @classmethod + def values(cls, columns, dim): return columns.data.dimension_values(dim) - @staticmethod - def reindex(columns, kdims=None, vdims=None): + @classmethod + def reindex(cls, columns, kdims=None, vdims=None): return columns.data.reindex(kdims, vdims) - @staticmethod - def groupby(columns, dimensions, container_type, group_type, **kwargs): + @classmethod + def groupby(cls, columns, dimensions, container_type, group_type, **kwargs): if 'kdims' not in kwargs: kwargs['kdims'] = [d for d in columns.kdims if d not in dimensions] with item_check(False), sorted_context(False): return columns.data.groupby(dimensions, container_type, group_type, **kwargs) - @staticmethod - def select(columns, **selection): + @classmethod + def select(cls, columns, **selection): return columns.data.select(**selection) - @staticmethod - def collapse_data(data, function, kdims=None, **kwargs): + @classmethod + def collapse_data(cls, data, function, kdims=None, **kwargs): return data[0].collapse_data(data, function, kdims, **kwargs) - @staticmethod - def sample(columns, samples=[]): + @classmethod + def sample(cls, columns, samples=[]): return columns.data.sample(samples) - @staticmethod - def reduce(columns, reduce_dims, function): + @classmethod + def reduce(cls, columns, reduce_dims, function): return columns.data.reduce(columns.data, reduce_dims, function) @classmethod @@ -561,19 +561,19 @@ def aggregate(cls, columns, dimensions, function): class ColumnarDataFrame(ColumnarData): - @staticmethod - def range(columns, dimension): + @classmethod + def range(cls, columns, dimension): column = columns.data[columns.get_dimension(dimension).name] return (column.min(), column.max()) - @staticmethod - def concat(columns_objs): + @classmethod + def concat(cls, columns_objs): return pd.concat([col.data for col in columns_objs]) - @staticmethod - def groupby(columns, dimensions, container_type, group_type, **kwargs): + @classmethod + def groupby(cls, columns, dimensions, container_type, group_type, **kwargs): index_dims = [columns.get_dimension(d) for d in dimensions] element_dims = [kdim for kdim in columns.kdims if kdim not in index_dims] @@ -589,8 +589,8 @@ def groupby(columns, dimensions, container_type, group_type, **kwargs): return container_type(map_data, kdims=index_dims) - @staticmethod - def reduce(columns, reduce_dims, function=None): + @classmethod + def reduce(cls, columns, reduce_dims, function=None): """ The aggregate function accepts either a list of Dimensions and a function to apply to find the aggregate across @@ -614,19 +614,19 @@ def reduce(columns, reduce_dims, function=None): return reduced - @staticmethod - def reindex(columns, kdims=None, vdims=None): + @classmethod + def reindex(cls, columns, kdims=None, vdims=None): # DataFrame based tables don't need to be reindexed return columns.data - @staticmethod - def collapse_data(data, function, kdims, **kwargs): + @classmethod + def collapse_data(cls, data, function, kdims, **kwargs): return pd.concat(data).groupby([d.name for d in kdims]).agg(function).reset_index() - @staticmethod - def sort(columns, by=[]): + @classmethod + def sort(cls, columns, by=[]): import pandas as pd if not isinstance(by, list): by = [by] if not by: by = range(columns.ndims) @@ -638,8 +638,8 @@ def sort(columns, by=[]): return columns.data.sort_values(by=cols) - @staticmethod - def select(columns, selection_specs=None, **select): + @classmethod + def select(cls, columns, selection_specs=None, **select): df = columns.data selected_kdims = [] mask = True @@ -666,16 +666,16 @@ def select(columns, selection_specs=None, **select): return df - @staticmethod - def values(columns, dim): + @classmethod + def values(cls, columns, dim): data = columns.data[dim] if util.dd and isinstance(data, util.dd.Series): data = data.compute() return np.array(data) - @staticmethod - def aggregate(columns, dimensions, function): + @classmethod + def aggregate(cls, columns, dimensions, function): data = columns.data cols = [d.name for d in columns.kdims if d in dimensions] vdims = columns.dimensions('value', True) @@ -694,15 +694,15 @@ def sample(cls, columns, samples=[]): return data[mask] - @staticmethod - def add_dimension(columns, dimension, dim_pos, values): + @classmethod + def add_dimension(cls, columns, dimension, dim_pos, values): data = columns.data.copy() data.insert(dim_pos, dimension.name, values) return data - @staticmethod - def dframe(columns, as_table=False): + @classmethod + def dframe(cls, columns, as_table=False): if as_table: from ..element import Table return Table(columns) @@ -712,38 +712,38 @@ def dframe(columns, as_table=False): class ColumnarArray(ColumnarData): - @staticmethod - def validate_data(columns, data): + @classmethod + def validate_data(cls, columns, data): if data.ndim == 1: data = np.column_stack([np.arange(len(data)), data]) return data - @staticmethod - def add_dimension(columns, dimension, dim_pos, values): + @classmethod + def add_dimension(cls, columns, dimension, dim_pos, values): data = columns.data.copy() return np.insert(data, dim_pos, values, axis=1) - @staticmethod - def concat(columns_objs): + @classmethod + def concat(cls, columns_objs): return np.concatenate([col.data for col in columns_objs]) - @staticmethod - def dframe(columns, as_table=False): + @classmethod + def dframe(cls, columns, as_table=False): return Element.dframe(columns, as_table) - @staticmethod - def sort(columns, by=[]): + @classmethod + def sort(cls, columns, by=[]): data = columns.data idxs = [columns.get_dimension_index(dim) for dim in by] return data[np.lexsort(np.flipud(data[:, idxs].T))] - @staticmethod - def values(columns, dim): + @classmethod + def values(cls, columns, dim): data = columns.data dim_idx = columns.get_dimension_index(dim) if data.ndim == 1: @@ -751,16 +751,16 @@ def values(columns, dim): return data[:, dim_idx] - @staticmethod - def reindex(columns, kdims=None, vdims=None): + @classmethod + def reindex(cls, columns, kdims=None, vdims=None): # DataFrame based tables don't need to be reindexed dims = kdims + vdims data = [columns.dimension_values(d) for d in dims] return np.column_stack(data) - @staticmethod - def groupby(columns, dimensions, container_type=HoloMap, group_type=None, raw=False, **kwargs): + @classmethod + def groupby(cls, columns, dimensions, container_type=HoloMap, group_type=None, raw=False, **kwargs): data = columns.data # Get dimension objects, labels, indexes and data @@ -803,8 +803,8 @@ def groupby(columns, dimensions, container_type=HoloMap, group_type=None, raw=Fa return container_type(grouped_data, kdims=dimensions) - @staticmethod - def select(columns, **selection): + @classmethod + def select(cls, columns, **selection): data = columns.data mask = True selected_kdims = [] @@ -858,8 +858,8 @@ def collapse_data(cls, data, function, kdims=None, **kwargs): return np.array(rows) - @staticmethod - def sample(columns, samples=[]): + @classmethod + def sample(cls, columns, samples=[]): data = columns.data mask = False for sample in samples: @@ -869,8 +869,8 @@ def sample(columns, samples=[]): return data[mask] - @staticmethod - def reduce(columns, reduce_dims, function): + @classmethod + def reduce(cls, columns, reduce_dims, function): kdims = [kdim for kdim in columns.kdims if kdim not in reduce_dims] if len(kdims): reindexed = columns.reindex(kdims) From 9b440dc42229ed24e53e83df6d25869651d5d850 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Sun, 8 Nov 2015 00:30:09 +0000 Subject: [PATCH 178/212] Added docstring to the _validate_data method --- holoviews/core/data.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index ec7fbdd350..3cc8d060f9 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -64,6 +64,11 @@ def __init__(self, data, **kwargs): def _validate_data(self, data): + """ + Method that is often overridden in the implementation of + specific Elements for validating and transforming the input + data format. + """ return self.interface.validate_data(self, data) From 8af5ac538e422086fceaf9da601eecd8cad57f33 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Sun, 8 Nov 2015 00:30:33 +0000 Subject: [PATCH 179/212] Updated class names to newly agreed convention --- holoviews/core/data.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 3cc8d060f9..74454706c8 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -58,7 +58,7 @@ class Columns(Element): Defines the data type used for storing non-numeric data.""") def __init__(self, data, **kwargs): - data, params = ColumnarData._process_data(data, self.params(), **kwargs) + data, params = DataColumns._process_data(data, self.params(), **kwargs) super(Columns, self).__init__(data, **params) self.data = self._validate_data(self.data) @@ -182,11 +182,11 @@ def interface(self): operations on the data. """ if util.is_dataframe(self.data): - return ColumnarDataFrame + return DFColumns elif isinstance(self.data, np.ndarray): - return ColumnarArray + return ArrayColumns elif isinstance(self.data, NdElement): - return ColumnarNdElement + return NdColumns def reindex(self, kdims=None, vdims=None): @@ -315,9 +315,9 @@ def collapse_data(cls, data, function=None, kdims=None, **kwargs): if isinstance(data[0], NdElement): return data[0].collapse_data(data, function, kdims, **kwargs) elif isinstance(data[0], np.ndarray): - return ColumnarArray.collapse_data(data, function, kdims, **kwargs) + return ArrayColumns.collapse_data(data, function, kdims, **kwargs) elif util.is_dataframe(data[0]): - return ColumnarDataFrame.collapse_data(data, function, kdims, **kwargs) + return DFColumns.collapse_data(data, function, kdims, **kwargs) @classmethod @@ -376,7 +376,7 @@ def dframe(self, as_table=False): -class ColumnarData(param.Parameterized): +class DataColumns(param.Parameterized): @classmethod def range(cls, columns, dimension): @@ -502,7 +502,7 @@ def validate_data(cls, columns, data): -class ColumnarNdElement(ColumnarData): +class NdColumns(DataColumns): @classmethod def validate_data(cls, columns, data): @@ -563,7 +563,7 @@ def aggregate(cls, columns, dimensions, function): -class ColumnarDataFrame(ColumnarData): +class DFColumns(DataColumns): @classmethod @@ -715,7 +715,7 @@ def dframe(cls, columns, as_table=False): -class ColumnarArray(ColumnarData): +class ArrayColumns(DataColumns): @classmethod def validate_data(cls, columns, data): From 1faf9a409b2029dde4283956819d5e8566904488 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Nov 2015 19:03:39 +0000 Subject: [PATCH 180/212] Refactored Columns constructor and interfaces --- holoviews/core/data.py | 272 ++++++++++++++++++++++++----------------- tests/testcolumns.py | 12 +- 2 files changed, 169 insertions(+), 115 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 74454706c8..09bcca766c 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -52,14 +52,16 @@ class Columns(Element): along all dimensions. """ - data_type = param.ObjectSelector(default='mapping', allow_None=True, - objects=['pandas', 'mapping'], - doc=""" - Defines the data type used for storing non-numeric data.""") + datatype = param.List(['array', 'dictionary', 'dataframe' ], doc=""" + Defines the datatypes to be used for storage, the object will + attempt to use each data interface in turn falling back to the + each consecutive type in the list if the data type is not + understood.""") def __init__(self, data, **kwargs): - data, params = DataColumns._process_data(data, self.params(), **kwargs) + data, params, interface = DataColumns.initialize(type(self), data, kwargs) super(Columns, self).__init__(data, **params) + self.interface = interface self.data = self._validate_data(self.data) @@ -175,20 +177,6 @@ def select(self, selection_specs=None, **selection): return self.clone(data) - @property - def interface(self): - """ - Property that return the interface class to apply - operations on the data. - """ - if util.is_dataframe(self.data): - return DFColumns - elif isinstance(self.data, np.ndarray): - return ArrayColumns - elif isinstance(self.data, NdElement): - return NdColumns - - def reindex(self, kdims=None, vdims=None): """ Create a new object with a re-ordered set of dimensions. @@ -378,6 +366,60 @@ def dframe(self, as_table=False): class DataColumns(param.Parameterized): + interfaces = {} + + @classmethod + def initialize(cls, eltype, data, kwargs): + # Process params and dimensions + params = {} + kdims, vdims = None, None + if isinstance(data, Element): + params.update(util.get_param_values(data)) + params.update(kwargs) + kdims, vdims = params.get('kdims'), params.get('vdims') + + # Process Element data + if isinstance(data, NdElement): + pass + elif isinstance(data, Columns): + data = data.data + elif isinstance(data, Element): + dimensions = data.dimensions(label=True) + data = tuple(data.dimension_values(d) for d in data.dimensions()) + + # Set interface priority order + priorities = kwargs.get('datatype', eltype.datatype) + prioritized = [cls.interfaces[p] for p in priorities] + + # Prioritize interfaces which have matching types + data_type = type(data) + head = [intfc for intfc in prioritized + if data_type in intfc.types] + + # Iterate over interfaces until one that can interpret + # the input is found + selected_interface = None + for interface in head + prioritized: + try: + data, new_kdims, new_vdims = interface.reshape(eltype, data, kdims, vdims) + except: + pass + else: + selected_interface = interface + break + + if selected_interface is None: + raise ValueError("None of the available data backends could " + "process the data, ensure it is in a supported " + "format") + + # Combine input params with inferred + # parameters and dimensions + params['kdims'] = new_kdims + params['vdims'] = new_vdims + return data, params, selected_interface + + @classmethod def range(cls, columns, dimension): column = columns.dimension_values(dimension) @@ -401,96 +443,6 @@ def shape(cls, columns): return columns.data.shape - @classmethod - def _process_data(cls, data, paramobjs, **kwargs): - params = {} - if isinstance(data, Element): - params = util.get_param_values(data) - - if isinstance(data, NdElement): - params['kdims'] = [d for d in params['kdims'] if d != 'Index'] - elif isinstance(data, Element): - dimensions = data.dimensions(label=True) - data = tuple(data.dimension_values(d) for d in data.dimensions()) - - if isinstance(data, Columns): - data = data.data - elif util.is_dataframe(data): - kdims, vdims = cls._process_df_dims(data, paramobjs, **kwargs) - params['kdims'] = kdims - params['vdims'] = vdims - elif not isinstance(data, (NdElement, dict)): - if isinstance(data, np.ndarray): - array = data - elif isinstance(data, tuple): - try: - array = np.column_stack(data) - except: - array = None - else: - data = [] if data is None else list(data) - try: - array = np.array(data) - except: - array = None - # If ndim > 2 data is assumed to be a mapping - if (isinstance(data[0], tuple) and any(isinstance(d, tuple) for d in data[0]) - or (array is not None and array.ndim > 2)): - pass - elif array is None or array.dtype.kind in ['S', 'U', 'O']: - # Check if data is of non-numeric type - # Then use defined data type - data_type = kwargs.get('data_type', paramobjs['data_type'].default) - kdims = kwargs.get('kdims', paramobjs['kdims'].default) - vdims = kwargs.get('vdims', paramobjs['vdims'].default) - if data_type == 'pandas': - columns = [d.name if isinstance(d, Dimension) else d - for d in kdims+vdims] - if isinstance(data, tuple): - data = pd.DataFrame.from_items([(c, d) for c, d in - zip(columns, data)]) - else: - data = pd.DataFrame(data, columns=columns) - else: - if isinstance(data, tuple): - data = zip(*data) - ndims = len(kdims) - data = [(tuple(row[:ndims]), tuple(row[ndims:])) - for row in data] - else: - data = array - params.update(kwargs) - if 'kdims' not in params: - params['kdims'] = paramobjs['kdims'].default - if 'vdims' not in params: - params['vdims'] = paramobjs['vdims'].default - if isinstance(data, (dict, list)): - data = NdElement(data, kdims=params['kdims'], - vdims=params['vdims']) - return data, params - - - @classmethod - def _process_df_dims(cls, data, paramobjs, **kwargs): - columns = data.columns - kdims = kwargs.get('kdims', []) - vdims = kwargs.get('vdims', []) - ndim = paramobjs['kdims'].bounds[1] if paramobjs['kdims'].bounds else None - if 'kdims' in kwargs and 'vdims' not in kwargs: - vdims = [c for c in data.columns if c not in kdims] - elif 'kdims' not in kwargs and 'vdims' in kwargs: - kdims = [c for c in data.columns if c not in kdims][:ndim] - elif 'kdims' not in kwargs and 'vdims' not in kwargs: - kdims = list(data.columns[:ndim]) - vdims = list(data.columns[ndim:]) - col_labels = [c.name if isinstance(c, Dimension) else c - for c in kdims+vdims] - if not all(c in data.columns for c in col_labels): - raise ValueError("Supplied dimensions don't match columns" - "in the dataframe.") - return kdims, vdims - - @classmethod def length(cls, columns): return len(columns.data) @@ -504,6 +456,34 @@ def validate_data(cls, columns, data): class NdColumns(DataColumns): + types = (NdElement,) + + @classmethod + def reshape(cls, eltype, data, kdims, vdims): + if isinstance(data, NdElement): + kdims = [d for d in kdims if d != 'Index'] + else: + element_params = eltype.params() + kdims = kdims if kdims else element_params['kdims'].default + vdims = vdims if vdims else element_params['vdims'].default + + if not isinstance(data, (NdElement, dict)): + # If ndim > 2 data is assumed to be a mapping + if (isinstance(data[0], tuple) and any(isinstance(d, tuple) for d in data[0])): + pass + else: + if isinstance(data, tuple): + data = zip(*data) + ndims = len(kdims) + data = [(tuple(row[:ndims]), tuple(row[ndims:])) + for row in data] + if isinstance(data, (dict, list)): + data = NdElement(data, kdims=kdims, vdims=vdims) + elif not isinstance(data, NdElement): + raise ValueError("NdColumns interface couldn't convert data.""") + return data, kdims, vdims + + @classmethod def validate_data(cls, columns, data): return data @@ -565,6 +545,46 @@ def aggregate(cls, columns, dimensions, function): class DFColumns(DataColumns): + types = (pd.DataFrame if pd else None,) + + @classmethod + def reshape(cls, eltype, data, kdims, vdims): + element_params = eltype.params() + kdim_param = element_params['kdims'] + vdim_param = element_params['vdims'] + if util.is_dataframe(data): + columns = data.columns + ndim = kdim_param.bounds[1] if kdim_param.bounds else None + if kdims and not vdims: + vdims = [c for c in data.columns if c not in kdims] + elif vdims and not kdims: + kdims = [c for c in data.columns if c not in kdims][:ndim] + elif not kdims and not vdims: + kdims = list(data.columns[:ndim]) + vdims = list(data.columns[ndim:]) + else: + # Check if data is of non-numeric type + # Then use defined data type + kdims = kdims if kdims else kdim_param.default + vdims = vdims if vdims else vdim_param.default + columns = [d.name if isinstance(d, Dimension) else d + for d in kdims+vdims] + if isinstance(data, dict) and all(d in data for d in columns): + data = pd.DataFrame(data, columns=columns) + if isinstance(data, tuple): + data = pd.DataFrame.from_items([(c, d) for c, d in + zip(columns, data)]) + else: + data = pd.DataFrame(data, columns=columns) + return data, kdims, vdims + + + @classmethod + def _validate(cls, columns): + if not all(c in data.columns for c in columns.dimensions(label=True)): + raise ValueError("Supplied dimensions don't match columns " + "in the dataframe.") + @classmethod def range(cls, columns, dimension): @@ -717,6 +737,32 @@ def dframe(cls, columns, as_table=False): class ArrayColumns(DataColumns): + types = (np.ndarray,) + + @classmethod + def reshape(cls, eltype, data, kdims, vdims): + if isinstance(data, tuple): + try: + data = np.column_stack(data) + except: + data = None + elif not isinstance(data, np.ndarray): + data = np.array([], ndmin=2).T if data is None else list(data) + try: + data = np.array(data) + except: + data = None + + if data is None or data.ndim > 2 or data.dtype.kind in ['S', 'U', 'O']: + raise ValueError("ArrayColumns interface could not handle input type.") + + if kdims is None: + kdims = eltype.kdims + if vdims is None: + vdims = eltype.vdims + return data, kdims, vdims + + @classmethod def validate_data(cls, columns, data): if data.ndim == 1: @@ -906,3 +952,11 @@ def aggregate(cls, columns, dimensions, function): reduced = function(group, axis=0) rows.append(np.concatenate([k, (reduced,) if np.isscalar(reduced) else reduced])) return np.array(rows) + + +# Register available interfaces +DataColumns.interfaces.update([('array', ArrayColumns), + ('dictionary', NdColumns)]) +if pd: + DataColumns.interfaces['dataframe'] = DFColumns + diff --git a/tests/testcolumns.py b/tests/testcolumns.py index 57a5637659..a1aa2b7163 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -15,8 +15,8 @@ class ColumnsNdElementTest(ComparisonTestCase): """ def setUp(self): - self.data_type = Columns.data_type - Columns.data_type = 'mapping' + self.datatype = Columns.datatype + Columns.datatype = ['dictionary', 'array'] self.xs = range(11) self.ys = np.linspace(0, 1, 11) self.zs = np.sin(self.xs) @@ -28,7 +28,7 @@ def setUp(self): kdims=['x'], vdims=['y']) def tearDown(self): - Columns.data_type = self.data_type + Columns.datatype = self.datatype def test_columns_sort_vdim(self): columns = Columns(OrderedDict(zip(self.xs, -self.ys)), @@ -288,8 +288,8 @@ def test_columns_array(self): class ColumnsDFrameTest(ComparisonTestCase): def setUp(self): - self.data_type = Columns.data_type - Columns.data_type = 'pandas' + self.datatype = Columns.datatype + Columns.datatype = ['dataframe'] self.column_data = [('M',10, 15, 0.8), ('M',16, 18, 0.6), ('F',12, 10, 0.8)] self.kdims = ['Gender', 'Age'] @@ -301,7 +301,7 @@ def setUp(self): kdims=['x'], vdims=['y']) def tearDown(self): - Columns.data_type = self.data_type + Columns.datatype = self.datatype def test_columns_range(self): self.assertEqual(self.columns.range('y'), (0., 1.)) From 1add43777e0d3a960f64975c5b58e7c18f413ec6 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Nov 2015 19:11:43 +0000 Subject: [PATCH 181/212] Minor cleanup of DFColumns.reshape --- holoviews/core/data.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 09bcca766c..313d3d7b10 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -569,8 +569,6 @@ def reshape(cls, eltype, data, kdims, vdims): vdims = vdims if vdims else vdim_param.default columns = [d.name if isinstance(d, Dimension) else d for d in kdims+vdims] - if isinstance(data, dict) and all(d in data for d in columns): - data = pd.DataFrame(data, columns=columns) if isinstance(data, tuple): data = pd.DataFrame.from_items([(c, d) for c, d in zip(columns, data)]) From b163f390561161db90282536de7d965d8528cef6 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Sun, 8 Nov 2015 20:11:34 +0000 Subject: [PATCH 182/212] Minor formatting changes --- holoviews/core/data.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 313d3d7b10..cecef874d8 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -184,18 +184,15 @@ def reindex(self, kdims=None, vdims=None): and vice versa. """ if kdims is None: - key_dims = [d for d in self.kdims - if d not in vdims] + key_dims = [d for d in self.kdims if d not in vdims] else: key_dims = [self.get_dimension(k) for k in kdims] if vdims is None: - val_dims = [d for d in self.vdims - if d not in kdims] + val_dims = [d for d in self.vdims if d not in kdims] else: val_dims = [self.get_dimension(v) for v in vdims] - data = self.interface.reindex(self, key_dims, val_dims) return self.clone(data, kdims=key_dims, vdims=val_dims) From 46d3eebdedfafe7d5823efbf3a17dc22d6bbf7f3 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Sun, 8 Nov 2015 21:04:56 +0000 Subject: [PATCH 183/212] Updated class and parameter docstrings of Columns In addition, all docstrings of Columns have been wrapped to a width of 72 characters. --- holoviews/core/data.py | 146 ++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 81 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index cecef874d8..c1b07bbe28 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -29,34 +29,25 @@ class Columns(Element): """ - Columns provides a general baseclass for column based - Element types. Through the use of utility class interfaces - data may be supplied and stored in a range of formats. - - Data is assumed to be in a columnar data format with N - observations and at least D columns, where D is the number - of dimensions. Data supplied in one of the native formats - will be retained. Alternatively the columns maybe supplied - as a tuple or the rows as a list of tuples. If the data is - purely numeric the data will automatically be converted to - a numpy array, otherwise it will fall back to the specified - data_type. - - Currently either an NdElement or a pandas DataFrame are - supported as storage formats for heterogeneous data. An - NdElement is a HoloViews wrapper around dictionary objects, - which maps between the key dimensions and the value dimensions. - - The Columns class also provides various methods to transform - the data in various ways and allows indexing and selecting - along all dimensions. + Columns provides a general baseclass for column based Element types + that supports a range of data formats. + + Currently numpy arrays are supported for data with a uniform + type. For storage of columns with heterogenous types, either a + dictionary format or a pandas DataFrame may be used for storage. + + The Columns class supports various methods offering a consistent way + of working with the stored data regardless of the storage format + used. These operations include indexing, selection and various ways + of aggregating or collapsing the data with a supplied function. """ - datatype = param.List(['array', 'dictionary', 'dataframe' ], doc=""" - Defines the datatypes to be used for storage, the object will - attempt to use each data interface in turn falling back to the - each consecutive type in the list if the data type is not - understood.""") + datatype = param.List(['array', 'dictionary', 'dataframe' ], + doc=""" A priority list of the data types to be used for storage + on the .data attribute. If the input supplied to the element + constructor cannot be put into the requested format, the next + format listed will be used until a suitable format is found (or + the data fails to be understood).""") def __init__(self, data, **kwargs): data, params, interface = DataColumns.initialize(type(self), data, kwargs) @@ -68,16 +59,15 @@ def __init__(self, data, **kwargs): def _validate_data(self, data): """ Method that is often overridden in the implementation of - specific Elements for validating and transforming the input - data format. - """ + specific Elements for validating and transforming the input data + format.""" return self.interface.validate_data(self, data) def __setstate__(self, state): """ - Restores OrderedDict based Columns objects, converting - them to the up-to-date NdElement format. + Restores OrderedDict based Columns objects, converting them to + the up-to-date NdElement format. """ self.__dict__ = state if isinstance(self.data, OrderedDict): @@ -88,9 +78,8 @@ def __setstate__(self, state): def closest(self, coords): """ - Given single or multiple samples along the first - key dimension will return the closest actual sample - coordinates. + Given single or multiple samples along the first key dimension + will return the closest actual sample coordinates. """ if self.ndims > 1: NotImplementedError("Closest method currently only " @@ -104,8 +93,7 @@ def closest(self, coords): def sort(self, by=[]): """ - Sorts the data by the values along the supplied - dimensions. + Sorts the data by the values along the supplied dimensions. """ if not by: by = self.kdims sorted_columns = self.interface.sort(self, by) @@ -114,9 +102,9 @@ def sort(self, by=[]): def range(self, dim, data_range=True): """ - Computes the range of values along a supplied - dimension, taking into account the range and - soft_range defined on the Dimension object. + Computes the range of values along a supplied dimension, taking + into account the range and soft_range defined on the Dimension + object. """ dim = self.get_dimension(dim) if dim.range != (None, None): @@ -138,10 +126,10 @@ def range(self, dim, data_range=True): def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): """ - Create a new object with an additional key dimensions. - Requires the dimension name or object, the desired position - in the key dimensions and a key value scalar or sequence of - the same length as the existing keys. + Create a new object with an additional key dimensions. Requires + the dimension name or object, the desired position in the key + dimensions and a key value scalar or sequence of the same length + as the existing keys. """ if isinstance(dimension, str): dimension = Dimension(dimension) @@ -158,14 +146,13 @@ def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): def select(self, selection_specs=None, **selection): """ - Allows selecting data by the slices, sets and scalar - values along a particular dimension. The indices - should be supplied as keywords mapping between - the selected dimension and value. Additionally - selection_specs (taking the form of a list of - type.group.label strings, types or functions) may - be supplied, which will ensure the selection is - only applied if the specs match the selected object. + Allows selecting data by the slices, sets and scalar values + along a particular dimension. The indices should be supplied as + keywords mapping between the selected dimension and + value. Additionally selection_specs (taking the form of a list + of type.group.label strings, types or functions) may be + supplied, which will ensure the selection is only applied if the + specs match the selected object. """ if selection_specs and not self.matches(selection_specs): return self @@ -179,9 +166,8 @@ def select(self, selection_specs=None, **selection): def reindex(self, kdims=None, vdims=None): """ - Create a new object with a re-ordered set of dimensions. - Allows converting key dimensions to value dimensions - and vice versa. + Create a new object with a re-ordered set of dimensions. Allows + converting key dimensions to value dimensions and vice versa. """ if kdims is None: key_dims = [d for d in self.kdims if d not in vdims] @@ -202,16 +188,16 @@ def __getitem__(self, slices): Allows slicing and selecting values in the Columns object. Supports multiple indexing modes: - (1) Slicing and indexing along the values of each - dimension in the columns object using either - scalars, slices or sets of values. - (2) Supplying the name of a dimension as the first - argument will return the values along that - dimension as a numpy array. - (3) Slicing of all key dimensions and selecting - a single value dimension by name. - (4) A boolean array index matching the length of - the Columns object. + (1) Slicing and indexing along the values of each dimension + in the columns object using either scalars, slices or + sets of values. + (2) Supplying the name of a dimension as the first argument + will return the values along that dimension as a numpy + array. + (3) Slicing of all key dimensions and selecting a single + value dimension by name. + (4) A boolean array index matching the length of the Columns + object. """ if slices is (): return self if isinstance(slices, np.ndarray) and slices.dtype.kind == 'b': @@ -240,18 +226,18 @@ def __getitem__(self, slices): def sample(self, samples=[]): """ Allows sampling of Columns as an iterator of coordinates - matching the key dimensions, returning a new object - containing just the selected samples. + matching the key dimensions, returning a new object containing + just the selected samples. """ return self.clone(self.interface.sample(self, samples)) def reduce(self, dimensions=[], function=None, **reduce_map): """ - Allows reducing the values along one or more key dimension - with the supplied function. The dimensions may be supplied - as a list and a function to apply or a mapping between the - dimensions and functions to apply along each dimension. + Allows reducing the values along one or more key dimension with + the supplied function. The dimensions may be supplied as a list + and a function to apply or a mapping between the dimensions and + functions to apply along each dimension. """ reduce_dims, reduce_map = self._reduce_map(dimensions, function, reduce_map) reduced = self @@ -267,8 +253,8 @@ def reduce(self, dimensions=[], function=None, **reduce_map): def aggregate(self, dimensions=[], function=None): """ - Aggregates over the supplied key dimensions with the - defined function. + Aggregates over the supplied key dimensions with the defined + function. """ if not isinstance(dimensions, list): dimensions = [dimensions] if not dimensions: dimensions = self.kdims @@ -308,9 +294,9 @@ def collapse_data(cls, data, function=None, kdims=None, **kwargs): @classmethod def concat(cls, columns_objs): """ - Concatenates a list of Columns objects. If data types - don't match all types will be converted to that of - the first object before concatenation. + Concatenates a list of Columns objects. If data types don't + match all types will be converted to that of the first object + before concatenation. """ columns = columns_objs[0] if len({col.interface for col in columns_objs}) > 1: @@ -338,9 +324,8 @@ def shape(self): def dimension_values(self, dim, unique=False): """ - Returns the values along a particular - dimension. If unique values are requested - will return only unique values. + Returns the values along a particular dimension. If unique + values are requested will return only unique values. """ dim = self.get_dimension(dim).name dim_vals = self.interface.values(self, dim) @@ -352,9 +337,8 @@ def dimension_values(self, dim, unique=False): def dframe(self, as_table=False): """ - Returns the data in the form of a DataFrame, - if as_table is requested the data will be - wrapped in a Table object. + Returns the data in the form of a DataFrame, if as_table is + requested the data will be wrapped in a Table object. """ return self.interface.dframe(self, as_table) From 523bd77323117c89bcc0ea548ea19e01ff6f24c8 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Sun, 8 Nov 2015 21:08:20 +0000 Subject: [PATCH 184/212] Minor change to Columns constructor --- holoviews/core/data.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index c1b07bbe28..cdc780fa12 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -50,9 +50,8 @@ class Columns(Element): the data fails to be understood).""") def __init__(self, data, **kwargs): - data, params, interface = DataColumns.initialize(type(self), data, kwargs) + data, params, self.interface = DataColumns.initialize(type(self), data, kwargs) super(Columns, self).__init__(data, **params) - self.interface = interface self.data = self._validate_data(self.data) From 75226d2542f440b71f14212a72462a664079ab17 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Sun, 8 Nov 2015 22:03:17 +0000 Subject: [PATCH 185/212] Simplified DataColumns.initialize and made signature more explicit --- holoviews/core/data.py | 47 ++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index cdc780fa12..6ecaba4ff3 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -50,8 +50,12 @@ class Columns(Element): the data fails to be understood).""") def __init__(self, data, **kwargs): - data, params, self.interface = DataColumns.initialize(type(self), data, kwargs) - super(Columns, self).__init__(data, **params) + initialized = DataColumns.initialize(type(self), data, + kwargs.get('kdims'), + kwargs.get('vdims'), + datatype=kwargs.get('datatype')) + (data, kdims, vdims, self.interface) = initialized + super(Columns, self).__init__(data, **dict(kwargs, kdims=kdims, vdims=vdims)) self.data = self._validate_data(self.data) @@ -349,14 +353,12 @@ class DataColumns(param.Parameterized): interfaces = {} @classmethod - def initialize(cls, eltype, data, kwargs): + def initialize(cls, eltype, data, kdims, vdims, datatype=None): # Process params and dimensions - params = {} - kdims, vdims = None, None if isinstance(data, Element): - params.update(util.get_param_values(data)) - params.update(kwargs) - kdims, vdims = params.get('kdims'), params.get('vdims') + pvals = util.get_param_values(data) + kdims = pvals.get('kdims', kdims) + vdims = pvals.get('vdims', vdims) # Process Element data if isinstance(data, NdElement): @@ -368,36 +370,27 @@ def initialize(cls, eltype, data, kwargs): data = tuple(data.dimension_values(d) for d in data.dimensions()) # Set interface priority order - priorities = kwargs.get('datatype', eltype.datatype) - prioritized = [cls.interfaces[p] for p in priorities] + if datatype is None: + datatype = eltype.datatype + prioritized = [cls.interfaces[p] for p in datatype] # Prioritize interfaces which have matching types data_type = type(data) - head = [intfc for intfc in prioritized - if data_type in intfc.types] + head = [intfc for intfc in prioritized if data_type in intfc.types] # Iterate over interfaces until one that can interpret # the input is found - selected_interface = None for interface in head + prioritized: try: - data, new_kdims, new_vdims = interface.reshape(eltype, data, kdims, vdims) + (data, kdims, vdims) = interface.reshape(eltype, data, kdims, vdims) + break except: pass - else: - selected_interface = interface - break - - if selected_interface is None: - raise ValueError("None of the available data backends could " - "process the data, ensure it is in a supported " - "format") + else: + raise ValueError("None of the available storage backends " + "were able to support the supplied data format.") - # Combine input params with inferred - # parameters and dimensions - params['kdims'] = new_kdims - params['vdims'] = new_vdims - return data, params, selected_interface + return data, kdims, vdims, interface @classmethod From 8e5342d82b717ce374080459abe82ce4cd66fac5 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Nov 2015 22:13:00 +0000 Subject: [PATCH 186/212] Removed Columns.collapse_data using the interface directly instead --- holoviews/core/data.py | 18 +----------------- holoviews/core/spaces.py | 7 ++++++- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 6ecaba4ff3..fa0c080b84 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -58,7 +58,6 @@ def __init__(self, data, **kwargs): super(Columns, self).__init__(data, **dict(kwargs, kdims=kdims, vdims=vdims)) self.data = self._validate_data(self.data) - def _validate_data(self, data): """ Method that is often overridden in the implementation of @@ -279,21 +278,6 @@ def groupby(self, dimensions=[], container_type=HoloMap, group_type=None, **kwar return self.interface.groupby(self, dimensions, container_type, group_type, **kwargs) - @classmethod - def collapse_data(cls, data, function=None, kdims=None, **kwargs): - """ - Class method utility function to concatenate the supplied data - and apply a groupby operation along the supplied key dimensions - then aggregates across the groups with the supplied function. - """ - if isinstance(data[0], NdElement): - return data[0].collapse_data(data, function, kdims, **kwargs) - elif isinstance(data[0], np.ndarray): - return ArrayColumns.collapse_data(data, function, kdims, **kwargs) - elif util.is_dataframe(data[0]): - return DFColumns.collapse_data(data, function, kdims, **kwargs) - - @classmethod def concat(cls, columns_objs): """ @@ -896,7 +880,7 @@ def reduce(cls, columns, reduce_dims, function): kdims = [kdim for kdim in columns.kdims if kdim not in reduce_dims] if len(kdims): reindexed = columns.reindex(kdims) - reduced = reindexed.collapse_data([reindexed.data], function, kdims) + reduced = cls.collapse_data([reindexed.data], function, kdims) else: if isinstance(function, np.ufunc): reduced = function.reduce(columns.data, axis=0) diff --git a/holoviews/core/spaces.py b/holoviews/core/spaces.py index e41857e352..1ea2df40b4 100644 --- a/holoviews/core/spaces.py +++ b/holoviews/core/spaces.py @@ -207,7 +207,12 @@ def collapse(self, dimensions=None, function=None, **kwargs): if isinstance(function, MapOperation): collapsed[key] = function(group, **kwargs) else: - data = group.type.collapse_data([el.data for el in group], function, group.last.kdims, **kwargs) + group_data = [el.data for el in group] + args = (group_data, function, group.last.kdims) + if hasattr(group.last, 'interface'): + data = group.last.interface.collapse_data(*args, **kwargs) + else: + data = group.type.collapse_data(*args, **kwargs) collapsed[key] = group.last.clone(data) return collapsed if self.ndims > 1 else collapsed.last From a952a163cf543a413203d3d78cffbc97acb0d24f Mon Sep 17 00:00:00 2001 From: jlstevens Date: Sun, 8 Nov 2015 22:21:03 +0000 Subject: [PATCH 187/212] Avoiding duplication when matching interface prioritized --- holoviews/core/data.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index fa0c080b84..39282a427c 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -358,13 +358,13 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): datatype = eltype.datatype prioritized = [cls.interfaces[p] for p in datatype] - # Prioritize interfaces which have matching types - data_type = type(data) - head = [intfc for intfc in prioritized if data_type in intfc.types] + head = [intfc for intfc in prioritized if type(data) in intfc.types] + if head: + # Prioritize interfaces which have matching types + prioritized = head + [el for el in prioritized if el != head[0]] - # Iterate over interfaces until one that can interpret - # the input is found - for interface in head + prioritized: + # Iterate over interfaces until one can interpret the input + for interface in prioritized: try: (data, kdims, vdims) = interface.reshape(eltype, data, kdims, vdims) break From 8f9276c301e42db532577058ff1d17269f401ac3 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Nov 2015 22:20:18 +0000 Subject: [PATCH 188/212] Replaced np.isscalar with simpler instance check in Dimensioned.select --- holoviews/core/dimension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index a3b8f01b2b..bb1dc2d701 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -686,7 +686,7 @@ def select(self, selection_specs=None, **kwargs): else: selection = self - if np.isscalar(selection): + if not isinstance(selection, Dimensioned): return selection elif type(selection) is not type(self) and isinstance(selection, Dimensioned): # Apply the selection on the selected object of a different type From 6b7c9862ed729fbbc6b8765b9aa67785883944e1 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Nov 2015 23:49:19 +0000 Subject: [PATCH 189/212] Refactored Columns._validate data into interface --- holoviews/core/data.py | 40 +++++++++++-------------------- holoviews/element/chart.py | 5 ++-- holoviews/interface/seaborn.py | 9 +++---- holoviews/plotting/mpl/seaborn.py | 2 +- 4 files changed, 22 insertions(+), 34 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 39282a427c..a31311fbc5 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -56,14 +56,7 @@ def __init__(self, data, **kwargs): datatype=kwargs.get('datatype')) (data, kdims, vdims, self.interface) = initialized super(Columns, self).__init__(data, **dict(kwargs, kdims=kdims, vdims=vdims)) - self.data = self._validate_data(self.data) - - def _validate_data(self, data): - """ - Method that is often overridden in the implementation of - specific Elements for validating and transforming the input data - format.""" - return self.interface.validate_data(self, data) + self.interface.validate(self) def __setstate__(self, state): @@ -322,12 +315,13 @@ def dimension_values(self, dim, unique=False): return dim_vals - def dframe(self, as_table=False): + def dframe(self, dimensions=None): """ - Returns the data in the form of a DataFrame, if as_table is - requested the data will be wrapped in a Table object. + Returns the data in the form of a DataFrame. """ - return self.interface.dframe(self, as_table) + if dimensions: + dimensions = [self.get_dimension(d).name for d in dimensions] + return self.interface.dframe(self, dimensions) @@ -399,15 +393,15 @@ def dframe(cls, columns, as_table=False): def shape(cls, columns): return columns.data.shape - @classmethod def length(cls, columns): return len(columns.data) - @classmethod - def validate_data(cls, columns, data): - return data + def validate(cls, columns): + pass + + @@ -441,10 +435,6 @@ def reshape(cls, eltype, data, kdims, vdims): return data, kdims, vdims - @classmethod - def validate_data(cls, columns, data): - return data - @classmethod def shape(cls, columns): return (len(columns), len(columns.dimensions())) @@ -535,8 +525,8 @@ def reshape(cls, eltype, data, kdims, vdims): @classmethod - def _validate(cls, columns): - if not all(c in data.columns for c in columns.dimensions(label=True)): + def validate(cls, columns): + if not all(c in columns.data.columns for c in columns.dimensions(label=True)): raise ValueError("Supplied dimensions don't match columns " "in the dataframe.") @@ -710,6 +700,8 @@ def reshape(cls, eltype, data, kdims, vdims): if data is None or data.ndim > 2 or data.dtype.kind in ['S', 'U', 'O']: raise ValueError("ArrayColumns interface could not handle input type.") + elif data.ndim == 1: + data = np.column_stack([np.arange(len(data)), data]) if kdims is None: kdims = eltype.kdims @@ -719,10 +711,6 @@ def reshape(cls, eltype, data, kdims, vdims): @classmethod - def validate_data(cls, columns, data): - if data.ndim == 1: - data = np.column_stack([np.arange(len(data)), data]) - return data @classmethod diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 0ec6efa52e..cf9bd0c739 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -75,12 +75,11 @@ class ErrorBars(Chart): bounds=(2,2), constant=True) - def _validate_data(self, data): + def __init__(self, data, **params): + super(ErrorBars, self).__init__(data, **params) if self.shape[1] == 3: data = self.interface.add_dimension(self, self.vdims[1].name, 3, self.dimension_values(2)) - return super(ErrorBars, self)._validate_data(data) - def range(self, dim, data_range=True): drange = super(ErrorBars, self).range(dim, data_range) diff --git a/holoviews/interface/seaborn.py b/holoviews/interface/seaborn.py index 43046acf54..02bef64cdc 100644 --- a/holoviews/interface/seaborn.py +++ b/holoviews/interface/seaborn.py @@ -99,6 +99,10 @@ class Distribution(Chart): vdims = param.List(default=[Dimension('Frequency')]) + def __init__(self, data, **params): + super(Distribution, self).__init__(data, **params) + self.data = self.interface.reindex(self, [0], []) + def range(self, dimension): dim_idx = self.get_dimension_index(dimension) if dim_idx == 1: @@ -110,13 +114,10 @@ def range(self, dimension): else: return super(Distribution, self).dimension_values(dimension) - def _validate_data(self, data): - return data - def dimension_values(self, dimension): dim_idx = self.get_dimension_index(dimension) if dim_idx == 0: - return self.data + return self.interface.values(self, 0) elif dim_idx == 1: return [] else: diff --git a/holoviews/plotting/mpl/seaborn.py b/holoviews/plotting/mpl/seaborn.py index 538bf5632e..8759c420a6 100644 --- a/holoviews/plotting/mpl/seaborn.py +++ b/holoviews/plotting/mpl/seaborn.py @@ -177,7 +177,7 @@ def initialize_plot(self, ranges=None): def _update_plot(self, axis, view): label = view.label if self.overlaid == 1 else '' - sns.distplot(view.data, ax=axis, label=label, **self.style) + sns.distplot(view.dimension_values(0), ax=axis, label=label, **self.style) From 74133eca6a7fdbc589e606d5fc5cabdf4e34d110 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 8 Nov 2015 23:50:28 +0000 Subject: [PATCH 190/212] Removed as_table argument and made Table data type conversion easy --- holoviews/core/data.py | 34 ++++++++++---------- holoviews/core/element.py | 67 ++++++++++++++------------------------- 2 files changed, 41 insertions(+), 60 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index a31311fbc5..aa7a2b0ddb 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -281,11 +281,11 @@ def concat(cls, columns_objs): columns = columns_objs[0] if len({col.interface for col in columns_objs}) > 1: if isinstance(columns.data, NdElement): - columns_objs = [co.mapping(as_table=True) for co in columns_objs] + columns_objs = [co.table('dictionary') for co in columns_objs] elif isinstance(columns.data, np.ndarray): - columns_objs = [co.array(as_table=True) for co in columns_objs] + columns_objs = [co.table('array') for co in columns_objs] elif util.is_dataframe(data[0]): - columns_objs = [co.dframe(as_table=True) for co in columns_objs] + columns_objs = [co.table('dataframe') for co in columns_objs] return columns.clone(columns.interface.concat(columns_objs)) @@ -383,11 +383,13 @@ def range(cls, columns, dimension): column.sort() return column[0], column[-1] - @classmethod - def dframe(cls, columns, as_table=False): - return Element.dframe(columns, as_table) + def array(cls, columns, dimensions): + return Element.dframe(columns, dimensions) + @classmethod + def dframe(cls, columns, dimensions): + return Element.dframe(columns, dimensions) @classmethod def shape(cls, columns): @@ -672,11 +674,11 @@ def add_dimension(cls, columns, dimension, dim_pos, values): @classmethod - def dframe(cls, columns, as_table=False): - if as_table: - from ..element import Table - return Table(columns) - return columns.data + def dframe(cls, columns, dimensions): + if dimensions: + return columns.reindex(columns=dimensions) + else: + return columns.data @@ -711,6 +713,11 @@ def reshape(cls, eltype, data, kdims, vdims): @classmethod + def array(cls, columns, dimensions): + if dimensions: + return Element.dframe(columns, dimensions) + else: + return columns.data @classmethod @@ -724,11 +731,6 @@ def concat(cls, columns_objs): return np.concatenate([col.data for col in columns_objs]) - @classmethod - def dframe(cls, columns, as_table=False): - return Element.dframe(columns, as_table) - - @classmethod def sort(cls, columns, by=[]): data = columns.data diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 54b45df145..8aae4c1c16 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -133,49 +133,48 @@ def _reduce_map(self, dimensions, function, reduce_map): return dims, grouped - def table(self): + def table(self, datatype=None): + """ + Converts the data Element to a Table, optionally may + specify a supported data type. The default data types + are 'numpy' (for homogeneous data), 'dataframe', and + 'dictionary'. + """ + if datatype and not isinstance(datatype, list): + datatype = [datatype] from ..element import Table - return Table(self) + return Table(self, **(dict(datatype=datatype) if datatype else {})) - def dframe(self, as_table=False): + def dframe(self, dimensions=None): import pandas as pd - column_names = self.dimensions(label=True) + column_names = dimensions if dimensions else self.dimensions(label=True) dim_vals = OrderedDict([(dim, self[dim]) for dim in column_names]) - data = pd.DataFrame(dim_vals) - if as_table: - from ..element import Table - return Table(data, **get_param_values(self)) - return data + return pd.DataFrame(dim_vals) - def mapping(self, as_table=False, **kwargs): - """ - This method transforms any ViewableElement type into a Table - as long as it implements a dimension_values method. - """ + def mapping(self, kdims=None, vdims=None, **kwargs): length = len(self) - if self.kdims: + if not kdims: kdims = self.kdims + if kdims: keys = zip(*[self.dimension_values(dim.name) for dim in self.kdims]) else: keys = [()]*length - if self.vdims: + if not vdims: vdims = self.vdims + if vdims: values = zip(*[self.dimension_values(dim.name) - for dim in self.vdims]) + for dim in vdims]) else: values = [()]*length data = zip(keys, values) - mapping = NdElement(data, **dict(get_param_values(self), **kwargs)) - if as_table: - from ..element import Table - return Table(mapping) - return mapping + overrides = dict(kdims=kdims, vdims=vdims, **kwargs) + return NdElement(data, **dict(get_param_values(self), overrides)) - def array(self, as_table=False, dimensions=[]): + def array(self, dimensions=[]): if dimensions: dims = [self.get_dimension(d) for d in dimensions] else: @@ -187,16 +186,7 @@ def array(self, as_table=False, dimensions=[]): types.append(column.dtype.kind) if len(set(types)) > 1: columns = [c.astype('object') for c in columns] - array = np.column_stack(columns) - if as_table: - from ..element import Table - if array.dtype.kind in ['S', 'O', 'U']: - raise ValueError("%s data contains non-numeric type, " - "could not convert to array based " - "Element" % type(self).__name__) - return Table(array, **get_param_values(self)) - else: - return array + return np.column_stack(columns) @@ -550,17 +540,6 @@ def values(self): return [v[0] for v in values] return list(values) - def dframe(self, as_table=False): - try: - import pandas - except ImportError: - raise Exception("Cannot build a DataFrame without the pandas library.") - columns = [d.name for d in self.dimensions()] - df = pandas.DataFrame((k+v for (k, v) in self.data.items()), columns=columns) - if as_table: - from ..element import Table - return Table(df, **get_param_values(self)) - return df class Element3D(Element2D): From db437d38e3255c039548608f495497325a1bb842 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 01:22:51 +0000 Subject: [PATCH 191/212] Minor fixes to Columns constructor when passing Elements --- holoviews/core/data.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index aa7a2b0ddb..597aef3241 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -335,8 +335,8 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): # Process params and dimensions if isinstance(data, Element): pvals = util.get_param_values(data) - kdims = pvals.get('kdims', kdims) - vdims = pvals.get('vdims', vdims) + kdims = pvals.get('kdims') if kdims is None else kdims + vdims = pvals.get('vdims') if vdims is None else vdims # Process Element data if isinstance(data, NdElement): @@ -344,8 +344,7 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): elif isinstance(data, Columns): data = data.data elif isinstance(data, Element): - dimensions = data.dimensions(label=True) - data = tuple(data.dimension_values(d) for d in data.dimensions()) + data = {d: data.dimension_values(d) for d in kdims+vdims} # Set interface priority order if datatype is None: @@ -420,6 +419,10 @@ def reshape(cls, eltype, data, kdims, vdims): kdims = kdims if kdims else element_params['kdims'].default vdims = vdims if vdims else element_params['vdims'].default + if isinstance(data, dict) and all(d in data for d in kdims+vdims): + data = tuple(data.get(d.name if isinstance(d, Dimension) else d) + for d in dimensions) + if not isinstance(data, (NdElement, dict)): # If ndim > 2 data is assumed to be a mapping if (isinstance(data[0], tuple) and any(isinstance(d, tuple) for d in data[0])): @@ -688,7 +691,13 @@ class ArrayColumns(DataColumns): @classmethod def reshape(cls, eltype, data, kdims, vdims): - if isinstance(data, tuple): + if isinstance(data, dict): + dimensions = kdims + vdims + if all(d in data for d in dimensions): + columns = [data.get(d.name if isinstance(d, Dimension) else d) + for d in dimensions] + data = np.column_stack(columns) + elif isinstance(data, tuple): try: data = np.column_stack(data) except: From 6f4358a7760118074fd025d6f9efe6ab84e6f315 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 02:58:07 +0000 Subject: [PATCH 192/212] Fix for ErrorBars constructor --- holoviews/element/chart.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index cf9bd0c739..38f4ce3662 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -78,8 +78,8 @@ class ErrorBars(Chart): def __init__(self, data, **params): super(ErrorBars, self).__init__(data, **params) if self.shape[1] == 3: - data = self.interface.add_dimension(self, self.vdims[1].name, - 3, self.dimension_values(2)) + self.data = self.interface.add_dimension(self, self.vdims[1].name, + 3, self.dimension_values(2)) def range(self, dim, data_range=True): drange = super(ErrorBars, self).range(dim, data_range) From 48b84c0dbb9c2f2d2ce68d79f99cda2d6764301f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 02:58:23 +0000 Subject: [PATCH 193/212] Minor fix to gridmatrix operation --- holoviews/operation/element.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index acbae4e472..c10a0a783d 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -619,12 +619,11 @@ def _process(self, p, element, ranges={}): # Creates a unified Columns.data attribute # to draw the data from if isinstance(element.data, np.ndarray): - if Columns.data_type == 'mapping': - el_data = element.mapping() + if 'dataframe' in Columns.datatype: + el_data = element.table('dataframe') else: - el_data = element.dframe() - else: - el_data = element.data + el_data = element.table('dictionary') + el_data = element.data # Get dimensions to plot against each other dims = [d for d in element.dimensions() From 5e46ada5a6bd698cc76e8fe2de6c8764808b69c5 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 02:58:37 +0000 Subject: [PATCH 194/212] Minor fix for DFrame Element --- holoviews/interface/pandas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/holoviews/interface/pandas.py b/holoviews/interface/pandas.py index a73267f14c..3439f75e13 100644 --- a/holoviews/interface/pandas.py +++ b/holoviews/interface/pandas.py @@ -20,6 +20,7 @@ from ..core import ViewableElement, NdMapping, Columns, NdOverlay,\ NdLayout, GridSpace, NdElement, HoloMap +from ..core.data import DFColumns from ..element import (Chart, Table, Curve, Scatter, Bars, Points, VectorField, HeatMap, Scatter3D, Surface) @@ -85,6 +86,7 @@ def __init__(self, data, dimensions={}, kdims=None, clone_override=False, dims[list(data.columns).index(name)] = dim ViewableElement.__init__(self, data, kdims=dims, **params) + self.interface = DFColumns self.data.columns = self.dimensions('key', True) From 6859f80be9f1c387c8458ee9299d794c7d8790fd Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 03:00:41 +0000 Subject: [PATCH 195/212] Minor fix to Element.mapping --- holoviews/core/element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index 8aae4c1c16..d68a495e77 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -171,7 +171,7 @@ def mapping(self, kdims=None, vdims=None, **kwargs): data = zip(keys, values) overrides = dict(kdims=kdims, vdims=vdims, **kwargs) - return NdElement(data, **dict(get_param_values(self), overrides)) + return NdElement(data, **dict(get_param_values(self), **overrides)) def array(self, dimensions=[]): From 8b670567abc1b894c5ae903c5147bde2bc108f9e Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 13:10:21 +0000 Subject: [PATCH 196/212] Fix to HoloMap.hist method --- holoviews/core/spaces.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/spaces.py b/holoviews/core/spaces.py index 1ea2df40b4..e2ef429f4f 100644 --- a/holoviews/core/spaces.py +++ b/holoviews/core/spaces.py @@ -304,7 +304,7 @@ def hist(self, num_bins=20, bin_range=None, adjoin=True, individually=True, **kw for i, hist in enumerate(hists): histmaps[i][k] = hist else: - histmap[k] = hists + histmaps[0][k] = hists if adjoin: layout = self From d515cf0843044390d4e699c101359a11aa6317e7 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 13:35:09 +0000 Subject: [PATCH 197/212] Reimplemented extents setting on Chart types --- holoviews/element/chart.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 38f4ce3662..08386550ff 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -29,7 +29,17 @@ class Chart(Columns, Element2D): The value dimensions of the Chart, usually corresponding to a number of dependent variables.""") - _null_value = np.array([[], []]).T # For when data is None + def __getitem__(self, index): + sliced = super(Chart, self).__getitem__(index) + if not isinstance(index, tuple): index = (index,) + ndims = len(self.extents)/2 + lower_bounds, upper_bounds = [None]*ndims, [None]*ndims + for i, slc in enumerate(index[:ndims]): + if isinstance(slc, slice): + lower_bounds[i] = lbound if slc.start is None else slc.start + upper_bounds[i] = ubound if slc.stop is None else slc.stop + sliced.extents = tuple(lower_bounds+upper_bounds) + return sliced class Scatter(Chart): From 7880598fae8ca6a670a6b47be6984d06fac1919a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 13:39:09 +0000 Subject: [PATCH 198/212] Minor fix for Chart slicing extent setting --- holoviews/element/chart.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 08386550ff..f18badb80b 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -36,6 +36,8 @@ def __getitem__(self, index): lower_bounds, upper_bounds = [None]*ndims, [None]*ndims for i, slc in enumerate(index[:ndims]): if isinstance(slc, slice): + lbound = self.extents[i] + ubound = self.extents[ndims:][i] lower_bounds[i] = lbound if slc.start is None else slc.start upper_bounds[i] = ubound if slc.stop is None else slc.stop sliced.extents = tuple(lower_bounds+upper_bounds) From d65da6720616642042c2e6ebecbdac01e0a3374d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 13:43:53 +0000 Subject: [PATCH 199/212] Fixed issue with scalar indexing in Charts --- holoviews/element/chart.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index f18badb80b..7d100390e6 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -31,6 +31,9 @@ class Chart(Columns, Element2D): def __getitem__(self, index): sliced = super(Chart, self).__getitem__(index) + if not isinstance(sliced, Chart): + return sliced + if not isinstance(index, tuple): index = (index,) ndims = len(self.extents)/2 lower_bounds, upper_bounds = [None]*ndims, [None]*ndims From b989f20478c3e11d2f3b90b089d9a793822d143d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 14:05:07 +0000 Subject: [PATCH 200/212] Fixed missing interface for compatibility with old pickles --- holoviews/core/data.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 597aef3241..bfc3a44161 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -69,6 +69,11 @@ def __setstate__(self, state): self.data = NdElement(self.data, kdims=self.kdims, vdims=self.vdims, group=self.group, label=self.label) + self.interface = NdColumns + elif isinstance(self.data, np.ndarray): + self.interface = ArrayColumns + elif util.is_dataframe(self.data): + self.interface = DFColumns def closest(self, coords): From 9bfc2dd1dfb7af09e4240da2a766a21c65fe623c Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 14:26:54 +0000 Subject: [PATCH 201/212] Minor fixes to Columns constructor --- holoviews/core/data.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index bfc3a44161..c28759b395 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -349,7 +349,7 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): elif isinstance(data, Columns): data = data.data elif isinstance(data, Element): - data = {d: data.dimension_values(d) for d in kdims+vdims} + data = tuple(data.dimension_values(d) for d in kdims+vdims) # Set interface priority order if datatype is None: @@ -526,6 +526,10 @@ def reshape(cls, eltype, data, kdims, vdims): vdims = vdims if vdims else vdim_param.default columns = [d.name if isinstance(d, Dimension) else d for d in kdims+vdims] + + if isinstance(data, dict): + data = OrderedDict([(d.name if isinstance(d, Dimension) else d, v) + for d, v in data.items()]) if isinstance(data, tuple): data = pd.DataFrame.from_items([(c, d) for c, d in zip(columns, data)]) From 601f026bbf31e6a54d2c1402524416e835ac2a1c Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 15:57:36 +0000 Subject: [PATCH 202/212] Fixed Python3 issues in Columns interface --- holoviews/core/data.py | 3 +++ holoviews/core/element.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index c28759b395..7034bd5d35 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -3,6 +3,7 @@ data backends. """ +import sys from distutils.version import LooseVersion from collections import defaultdict, Iterable from itertools import groupby @@ -429,6 +430,8 @@ def reshape(cls, eltype, data, kdims, vdims): for d in dimensions) if not isinstance(data, (NdElement, dict)): + if sys.version_info.major >= 3: + data = list(data) # If ndim > 2 data is assumed to be a mapping if (isinstance(data[0], tuple) and any(isinstance(d, tuple) for d in data[0])): pass diff --git a/holoviews/core/element.py b/holoviews/core/element.py index d68a495e77..ee50fe701c 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -516,7 +516,7 @@ def aggregate(self, dimensions, function): for k, group in grouped.data.items(): reduced = group.reduce(group, group.kdims, function) if not np.isscalar(reduced): - reduced = reduced.values()[0] + reduced = list(reduced.values())[0] else: reduced = (reduced,) rows.append((k, reduced)) From 3265a8e73e58683c18c58fa8eb5d58c021560674 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 16:08:37 +0000 Subject: [PATCH 203/212] Fixed HeatMap dimension_values bug --- holoviews/element/raster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index a862f551cc..43b2154d8a 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -417,7 +417,7 @@ def dimension_values(self, dim, unique=False): if unique: return self._data.dimension_values(dim) else: - return np.flipud(self.data.T).flatten() + return np.rot90(self.data, 3).flatten() else: return super(HeatMap, self).dimension_values(dim) From 6a526bec1c3b17a6436cef7326e8e9caec686548 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 16:26:33 +0000 Subject: [PATCH 204/212] Fixed Python3 issue in Chart.__getitem__ --- holoviews/element/chart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index 7d100390e6..666c62a46f 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -35,7 +35,7 @@ def __getitem__(self, index): return sliced if not isinstance(index, tuple): index = (index,) - ndims = len(self.extents)/2 + ndims = len(self.extents)//2 lower_bounds, upper_bounds = [None]*ndims, [None]*ndims for i, slc in enumerate(index[:ndims]): if isinstance(slc, slice): From 2ebf686b081d78a8a802bedd912e3e26eb7ce9ec Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 16:44:38 +0000 Subject: [PATCH 205/212] Made Columns Python3 iterator handling more robust --- holoviews/core/data.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 7034bd5d35..61ca3b755d 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -351,6 +351,9 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): data = data.data elif isinstance(data, Element): data = tuple(data.dimension_values(d) for d in kdims+vdims) + elif (not (util.is_dataframe(data) or isinstance(data, (tuple, dict, list))) + and sys.version_info.major >= 3): + data = list(data) # Set interface priority order if datatype is None: @@ -410,8 +413,6 @@ def validate(cls, columns): - - class NdColumns(DataColumns): types = (NdElement,) @@ -430,8 +431,6 @@ def reshape(cls, eltype, data, kdims, vdims): for d in dimensions) if not isinstance(data, (NdElement, dict)): - if sys.version_info.major >= 3: - data = list(data) # If ndim > 2 data is assumed to be a mapping if (isinstance(data[0], tuple) and any(isinstance(d, tuple) for d in data[0])): pass From b9ddd324b74819dd4bb6952dcea7d4b27b148198 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 17:00:42 +0000 Subject: [PATCH 206/212] Minor Python3 fix for Raster.closest --- holoviews/element/raster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index 43b2154d8a..1ccd5d6046 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -512,6 +512,7 @@ def closest(self, coords=[], **kwargs): else: getter = [0, 1] getter = itemgetter(*sorted(getter)) + coords = list(coords) if len(coords) == 1: coords = coords[0] if isinstance(coords, tuple): From ca76526a17bcb8292e5816484df8c64f2873139a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 17:03:34 +0000 Subject: [PATCH 207/212] Minor fix for Sampling_Data notebook --- doc/Tutorials/Sampling_Data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/Tutorials/Sampling_Data.ipynb b/doc/Tutorials/Sampling_Data.ipynb index c95f9646d0..eaf8b312a9 100644 --- a/doc/Tutorials/Sampling_Data.ipynb +++ b/doc/Tutorials/Sampling_Data.ipynb @@ -462,7 +462,7 @@ "extents = (0, 0, 10, 10)\n", "img = hv.Image(np.random.rand(10, 10), bounds=extents)\n", "img_coords = hv.Points(img.table(), extents=extents)\n", - "img + img * img_coords * hv.Points([img.closest((5,5))])(style=dict(color='r')) + img.sample([(5, 5)])" + "img + img * img_coords * hv.Points([img.closest([(5,5)])])(style=dict(color='r')) + img.sample([(5, 5)])" ] }, { From 0bb304fd6b7374c8461c9336287150b39b7a09cc Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 9 Nov 2015 17:14:00 +0000 Subject: [PATCH 208/212] Updated reference_data submodule reference --- doc/reference_data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/reference_data b/doc/reference_data index 88cf88384f..3dccdd4852 160000 --- a/doc/reference_data +++ b/doc/reference_data @@ -1 +1 @@ -Subproject commit 88cf88384fc7594b853d7513f9d441dd5b19d9dc +Subproject commit 3dccdd4852475d76de3a162b534174ae195197c2 From 30efd80d758d3c6b21727282fbcf80a0b05e1b2c Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Nov 2015 17:52:23 +0000 Subject: [PATCH 209/212] Minor fix to HeatMap.dense_keys --- holoviews/element/raster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index 1ccd5d6046..8dc35cff26 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -402,7 +402,7 @@ def __getitem__(self, coords): def dense_keys(self): d1keys = np.unique(self._data.dimension_values(0)) d2keys = np.unique(self._data.dimension_values(1)) - return zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys]) + return list(zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys])) def dimension_values(self, dim, unique=False): From 245b6e24d3c61a27278d44047f405b89a59ffae9 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Mon, 9 Nov 2015 18:54:44 +0000 Subject: [PATCH 210/212] Minor formatting fixes --- holoviews/core/data.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 61ca3b755d..805d0c9ed8 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1,6 +1,6 @@ """ -The data module provides utility classes to interface with various -data backends. +The data module provides utility classes to interface with various data +backends. """ import sys @@ -578,10 +578,9 @@ def groupby(cls, columns, dimensions, container_type, group_type, **kwargs): @classmethod def reduce(cls, columns, reduce_dims, function=None): """ - The aggregate function accepts either a list of Dimensions - and a function to apply to find the aggregate across - those Dimensions or a list of dimension/function pairs - to apply one by one. + The aggregate function accepts either a list of Dimensions and a + function to apply to find the aggregate across those Dimensions + or a list of dimension/function pairs to apply one by one. """ kdims = [kdim.name for kdim in columns.kdims if kdim not in reduce_dims] vdims = columns.dimensions('value', True) @@ -776,7 +775,8 @@ def reindex(cls, columns, kdims=None, vdims=None): @classmethod - def groupby(cls, columns, dimensions, container_type=HoloMap, group_type=None, raw=False, **kwargs): + def groupby(cls, columns, dimensions, container_type=HoloMap, + group_type=None, raw=False, **kwargs): data = columns.data # Get dimension objects, labels, indexes and data From 21a210e933526ff8fcf643c1a54ec5da9dff4940 Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 9 Nov 2015 19:31:48 +0000 Subject: [PATCH 211/212] Updated reference_data submodule reference --- doc/reference_data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/reference_data b/doc/reference_data index 3dccdd4852..1655bd8757 160000 --- a/doc/reference_data +++ b/doc/reference_data @@ -1 +1 @@ -Subproject commit 3dccdd4852475d76de3a162b534174ae195197c2 +Subproject commit 1655bd87570cc2ca69f3da18f707b4cdb6dd0061 From 3fc7036c31b02076e3fd069b0fe84ff90e4c198f Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 9 Nov 2015 19:50:23 +0000 Subject: [PATCH 212/212] Updated reference_data submodule reference --- doc/reference_data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/reference_data b/doc/reference_data index 1655bd8757..7e08604134 160000 --- a/doc/reference_data +++ b/doc/reference_data @@ -1 +1 @@ -Subproject commit 1655bd87570cc2ca69f3da18f707b4cdb6dd0061 +Subproject commit 7e08604134959809a88223513ba7e461b875e949