From 6236444d575e11bff8987602f9e02a7157b0ebf7 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 4 Mar 2016 14:38:09 +0000 Subject: [PATCH 01/36] Added validation method to DictColumns interface --- holoviews/core/data.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 34f8c06a36..a10b030bdd 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1158,6 +1158,17 @@ def reshape(cls, eltype, data, kdims, vdims): return data, kdims, vdims + @classmethod + def validate(cls, columns): + dimensions = columns.dimensions(label=True) + not_found = [d for d in dimensions if d not in columns.data] + if not_found: + raise ValueError('Following dimensions not found in data: %s' % not_found) + lengths = [len(columns.data[dim]) for dim in dimensions] + if len({l for l in lengths if l > 1}) > 1: + raise ValueError('Length of columns do not match') + + @classmethod def unpack_scalar(cls, columns, data): """ From f00c306422b1c83d75870e7719fd4efd05caf7a0 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 4 Mar 2016 14:39:22 +0000 Subject: [PATCH 02/36] Fix for scalar columns in DictColumns --- holoviews/core/data.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index a10b030bdd..f9765477c8 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -6,7 +6,7 @@ import sys from distutils.version import LooseVersion from collections import OrderedDict -from itertools import compress +from itertools import compress, cycle try: import itertools.izip as zip @@ -1253,12 +1253,14 @@ def groupby(cls, columns, dimensions, container_type, group_type, **kwargs): group_kwargs.update(kwargs) # Find all the keys along supplied dimensions - keys = [tuple(columns.data[d.name][i] for d in dimensions) - for i in range(len(columns))] + key_data = [] + for d in dimensions: + data = columns.data[d.name] + key_data.append(cycle([data[0]]) if len(data) == 1 else data) # Iterate over the unique entries applying selection masks grouped_data = [] - for unique_key in util.unique_iterator(keys): + for unique_key in util.unique_iterator(zip(key_data)): mask = cls.select_mask(columns, dict(zip(dimensions, unique_key))) group_data = OrderedDict(((d.name, columns[d.name][mask]) for d in kdims+vdims)) group_data = group_type(group_data, **group_kwargs) From 9589707e1adbf48d6f772ac62c56cb40774bd168 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 7 Mar 2016 21:38:48 +0000 Subject: [PATCH 03/36] Added initial dense Columns interface --- holoviews/core/data.py | 258 +++++++++++++++++++++++++++++++++++++++-- holoviews/core/util.py | 13 +++ 2 files changed, 263 insertions(+), 8 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index f9765477c8..1b3cb3b207 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -3,9 +3,9 @@ backends. """ -import sys +import sys, warnings from distutils.version import LooseVersion -from collections import OrderedDict +from collections import OrderedDict, defaultdict from itertools import compress, cycle try: @@ -27,7 +27,6 @@ from .ndmapping import NdMapping, item_check, sorted_context from .spaces import HoloMap from . import util -from .util import wrap_tuple, basestring, unique_array class Columns(Element): @@ -52,6 +51,9 @@ class Columns(Element): format listed will be used until a suitable format is found (or the data fails to be understood).""") + dense = param.Boolean(default=False, doc="""Whether the supplied data + is in a dense format.""") + # In the 1D case the interfaces should not automatically add x-values # to supplied data _1d = False @@ -344,7 +346,7 @@ def dimension_values(self, dim, unique=False): dim = self.get_dimension(dim, strict=True).name dim_vals = self.interface.values(self, dim) if unique: - return unique_array(dim_vals) + return util.unique_array(dim_vals) else: return dim_vals @@ -735,7 +737,7 @@ def reshape(cls, eltype, data, kdims, vdims): (isinstance(data, NdElement) and all(c in data.dimensions() for c in columns))): data = OrderedDict(((d, data[d]) for d in columns)) elif isinstance(data, dict) and not all(d in data for d in columns): - column_data = zip(*((wrap_tuple(k)+wrap_tuple(v)) + column_data = zip(*((util.wrap_tuple(k)+util.wrap_tuple(v)) for k, v in data.items())) data = OrderedDict(((c, col) for c, col in zip(columns, column_data))) elif isinstance(data, np.ndarray): @@ -914,7 +916,7 @@ def reshape(cls, eltype, data, kdims, vdims): columns = [data[d] for d in dimensions] data = np.column_stack(columns) elif isinstance(data, dict) and not all(d in data for d in dimensions): - columns = zip(*((wrap_tuple(k)+wrap_tuple(v)) + columns = zip(*((util.wrap_tuple(k)+util.wrap_tuple(v)) for k, v in data.items())) data = np.column_stack(columns) elif isinstance(data, tuple): @@ -1143,7 +1145,7 @@ def reshape(cls, eltype, data, kdims, vdims): elif not isinstance(data, dict): data = {k: v for k, v in zip(dimensions, zip(*data))} elif isinstance(data, dict) and not all(d in data for d in dimensions): - dict_data = zip(*((wrap_tuple(k)+wrap_tuple(v)) + dict_data = zip(*((util.wrap_tuple(k)+util.wrap_tuple(v)) for k, v in data.items())) data = {k: np.array(v) for k, v in zip(dimensions, dict_data)} @@ -1199,7 +1201,7 @@ def array(cls, columns, dimensions): def add_dimension(cls, columns, dimension, dim_pos, values, vdim): dim = dimension.name if isinstance(dimension, Dimension) else dimension data = list(columns.data.items()) - if isinstance(values, basestring) or not hasattr(values, '__iter__'): + if isinstance(values, util.basestring) or not hasattr(values, '__iter__'): values = np.array([values]*len(columns)) data.insert(dim_pos, (dim, values)) return OrderedDict(data) @@ -1321,9 +1323,249 @@ def aggregate(cls, columns, kdims, function, **kwargs): +class NdArrayColumns(DictColumns): + """ + Interface for simple dictionary-based columns format. The dictionary + keys correspond to the column (i.e dimension) names and the values + are collections representing the values in that column. + """ + + types = (dict, OrderedDict, cyODict) + + datatype = 'ndarray' + + dense = True + + @classmethod + def reshape(cls, eltype, data, kdims, vdims): + if kdims is None: + kdims = eltype.kdims + if vdims is None: + vdims = eltype.vdims + for kdim in kdims: + name = kdim.name if isinstance(kdim, Dimension) else kdim + if not isinstance(data[name], np.ndarray): + data[name] = np.array(data[name]) + if vdims and 'array' not in data: + raise Exception + if len(vdims) == 1 and data['array'].ndim > len(kdims): + data['array'] = data['array'][..., 0] + + return data, kdims, vdims + + + @classmethod + def validate(cls, columns): + if not columns.vdims: + return + shape = columns.data['array'].shape + expected = [] + for kd in columns.kdims: + expected.append(len(columns.data[kd.name])) + if len(columns.vdims) > 1: + expected.append(len(columns.vdims)) + expected = tuple(expected) + if shape != expected: + raise ValueError('Key dimension values and value array ' + 'shapes do not match. Expected shape %s, ' + 'actual shape: %s' % (expected, shape)) + + + @classmethod + def dimension_type(cls, columns, dim): + if dim in columns.kdims: + arr = columns.data[dim.name] + elif dim in columns.vdims: + arr = columns.data['array'] + else: + return None + return arr.dtype.type + + + @classmethod + def shape(cls, columns): + return cls.length(columns), len(columns.dimensions()), + + + @classmethod + def length(cls, columns): + return np.product([len(columns.data[d.name]) for d in columns.kdims]) + + + @classmethod + def values(cls, columns, dim, dense=False, flat=True): + if dim in columns.kdims: + if dense: + return columns.data[dim] + prod = util.cartesian_product([columns.data[d.name] for d in columns.kdims]) + idx = columns.get_dimension_index(dim) + return prod[:, idx] + else: + values = columns.data.get('array') + if len(columns.vdims) > 1: + idx = columns.vdims.index(dim) + values = values[..., idx] + return values.flatten() if flat else values + + + @classmethod + def groupby(cls, columns, dim_names, container_type, group_type, **kwargs): + # Get dimensions information + dimensions = [columns.get_dimension(d) for d in dim_names] + kdims = [kdim for kdim in columns.kdims if kdim not in dimensions] + + # Update the kwargs appropriately for Element group types + group_kwargs = {} + group_type = dict if group_type == 'raw' else group_type + if issubclass(group_type, Element): + group_kwargs.update(util.get_param_values(columns)) + group_kwargs['kdims'] = kdims + group_kwargs.update(kwargs) + + # Find all the keys along supplied dimensions + keys = [columns.data[d.name] for d in dimensions] + + # Iterate over the unique entries applying selection masks + grouped_data = [] + for unique_key in util.cartesian_product(keys): + group_data = cls.select(columns, **dict(zip(dim_names, unique_key))) + group_data['array'] = np.squeeze(group_data['array']) + group_data = group_type(group_data, **group_kwargs) + grouped_data.append((tuple(unique_key), group_data)) + + if issubclass(container_type, NdMapping): + with item_check(False), sorted_context(False): + return container_type(grouped_data, kdims=dimensions) + else: + return container_type(grouped_data) + + + @classmethod + def coord_mask(cls, columns, arr, ind): + if isinstance(ind, tuple): + ind = slice(*ind) + if isinstance(ind, np.ndarray): + mask = ind + elif isinstance(ind, slice): + mask = True + if ind.start is not None: + mask &= ind.start <= arr + if ind.stop is not None: + mask &= arr < ind.stop + elif isinstance(ind, (set, list)): + iter_slcs = [] + for ik in k: + iter_slcs.append(arr == ik) + mask = np.logical_or.reduce(iter_slcs) + elif ind is None: + mask = None + else: + index_mask = arr == ind + if columns.ndims == 1 and np.sum(index_mask) == 0: + data_index = np.argmin(np.abs(arr - ind)) + mask = np.zeros(len(columns), dtype=np.bool) + mask[data_index] = True + else: + mask = index_mask + return mask + + + @classmethod + def select(cls, columns, selection_mask=None, **selection): + dimensions = columns.dimensions('key', label=True) + selection = [(d, selection.get(d)) for d in dimensions] + data = {} + value_select = [] + for dim, ind in selection: + arr = cls.values(columns, dim, True) + mask = cls.coord_mask(columns, arr, ind) + if mask is None: + mask = np.ones(arr.shape, dtype=bool) + else: + arr = arr[mask] + value_select.append(mask) + data[dim] = arr + int_inds = [np.argwhere(v) for v in value_select] + index = np.ix_(*[np.atleast_1d(np.squeeze(ind)) if ind.ndim > 1 else np.atleast_1d(ind) + for ind in int_inds]) + sliced = columns.data['array'][index] + data['array'] = sliced + return data + + + @classmethod + def sample(cls, columns, samples=[]): + """ + Samples the gridded data into columns of samples. + """ + ndims = columns.ndims + deep = len(columns.vdims) > 1 + dimensions = columns.dimensions(label=True) + array = columns.data['array'] + data = defaultdict(list) + for sample in samples: + if np.isscalar(sample): sample = [sample] + if len(sample) != ndims: + sample = [sample[i] if i < len(sample) else None + for i in range(ndims)] + sampled, int_inds = [], [] + for d, ind in zip(dimensions, sample): + cdata = columns.data[d] + mask = cls.coord_mask(columns, cdata, ind) + inds = np.arange(len(cdata)) if mask is None else np.argwhere(mask) + int_inds.append(inds) + sampled.append(cdata[mask]) + for d, arr in zip(dimensions, np.meshgrid(*sampled)): + data[d].append(arr) + for i, vdim in enumerate(columns.vdims): + val_ind = tuple(int_inds) + ((i,) if deep else ()) + flat_index = np.ravel_multi_index(val_ind, array.shape) + data[vdim.name].append(array.flat[flat_index]) + concatenated = {d: np.concatenate(arrays).flatten() for d, arrays in data.items()} + return concatenated + + + @classmethod + def aggregate(cls, columns, kdims, function, **kwargs): + kdims = [kd.name if isinstance(kd, Dimension) else kd for kd in kdims] + data = {kdim: columns.data[kdim] for kdim in kdims} + axes = tuple(columns.get_dimension_index(kdim) for kdim in columns.kdims + if kdim not in kdims) + data['array'] = function(columns.data['array'], axis=axes, **kwargs) + return data + + + @classmethod + def reindex(cls, columns, kdims, vdims): + if len(kdims) != columns.ndims: + raise ValueError('Cannot drop dimension varying dimension.') + if (any(kd for kd in kdims if kd not in columns.kdims) or + any(vd for vd in vdims if vd not in columns.vdims)): + return columns.clone(columns.columns()).reindex(kdims, vdims) + data = dict(columns.data) + if len(vdims) != len(columns.vdims): + data['array'] = data['array'][..., [columns.vdims.index(d) for d in vdims]] + if kdims != columns.kdims: + axes = [columns.kdims.index(d) for d in kdims]+[data['array'].ndim-1] + data['array'] = np.transpose(data['array'], axes) + return data + + + @classmethod + def add_dimension(cls, columns, dimension, dim_pos, values, vdim): + raise NotImplementedError + + + @classmethod + def sort(cls, columns, by=[]): + return columns.data + + + # Register available interfaces DataColumns.register(DictColumns) DataColumns.register(ArrayColumns) DataColumns.register(NdColumns) +DataColumns.register(NdArrayColumns) if pd: DataColumns.register(DFColumns) diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 1ab8caa71a..48b24cebdb 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -861,3 +861,16 @@ def groupby_python(self_or_cls, ndmapping, dimensions, container_type, return container_type(groups, kdims=dimensions) +def cartesian_product(arrays): + """ + Computes the cartesian product of a list of arrays. + """ + broadcastable = np.ix_(*arrays) + broadcasted = np.broadcast_arrays(*broadcastable) + rows, cols = reduce(np.multiply, broadcasted[0].shape), len(broadcasted) + out = np.empty(rows * cols, dtype=broadcasted[0].dtype) + start, end = 0, rows + for a in broadcasted: + out[start:end] = a.reshape(-1) + start, end = end, end + rows + return out.reshape(cols, rows).T From 06455357afc6de46029d146e1d5d1c5faf0c7594 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 8 Mar 2016 11:32:48 +0000 Subject: [PATCH 04/36] Consistently added expanded keyword to dimension_values method --- holoviews/core/data.py | 39 +++++++++++++++++------------ holoviews/core/dimension.py | 2 +- holoviews/core/element.py | 7 +++--- holoviews/core/layout.py | 11 +++++---- holoviews/core/ndmapping.py | 6 ++--- holoviews/core/overlay.py | 6 ++--- holoviews/element/raster.py | 42 +++++++++++++++++--------------- holoviews/element/tabular.py | 2 +- holoviews/operation/element.py | 4 +-- holoviews/plotting/mpl/raster.py | 4 +-- 10 files changed, 67 insertions(+), 56 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 1b3cb3b207..b18d508f75 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -338,17 +338,13 @@ def shape(self): return self.interface.shape(self) - def dimension_values(self, dim, unique=False): + def dimension_values(self, dim, expanded=True, flat=True): """ Returns the values along a particular dimension. If unique values are requested will return only unique values. """ dim = self.get_dimension(dim, strict=True).name - dim_vals = self.interface.values(self, dim) - if unique: - return util.unique_array(dim_vals) - else: - return dim_vals + return self.interface.values(self, dim, expanded, flat) def get_dimension_type(self, dim): @@ -656,8 +652,11 @@ def sort(cls, columns, by=[]): return columns.data.sort(by) @classmethod - def values(cls, columns, dim): - return columns.data.dimension_values(dim) + def values(cls, columns, dim, expanded, flat): + values = columns.data.dimension_values(dim) + if not expanded: + return util.unique_array(values) + return values @classmethod def reindex(cls, columns, kdims=None, vdims=None): @@ -855,10 +854,12 @@ def select(cls, columns, selection_mask=None, **selection): @classmethod - def values(cls, columns, dim): + def values(cls, columns, dim, expanded, flat): data = columns.data[dim] if util.dd and isinstance(data, util.dd.Series): data = data.compute() + if not expanded: + return util.unique_array(data) return np.array(data) @@ -981,12 +982,15 @@ def sort(cls, columns, by=[]): @classmethod - def values(cls, columns, dim): + def values(cls, columns, dim, expanded, flat): data = columns.data dim_idx = columns.get_dimension_index(dim) if data.ndim == 1: data = np.atleast_2d(data).T - return data[:, dim_idx] + values = data[:, dim_idx] + if not expanded: + return util.unique_array(values) + return values @classmethod @@ -1228,8 +1232,11 @@ def sort(cls, columns, by=[]): return OrderedDict([(d, v[sorting]) for d, v in columns.data.items()]) @classmethod - def values(cls, columns, dim): - return np.array(columns.data.get(columns.get_dimension(dim).name)) + def values(cls, columns, dim, expanded, flat): + values = np.array(columns.data.get(columns.get_dimension(dim).name)) + if not expanded: + return util.unique_array(values) + return values @classmethod @@ -1393,9 +1400,9 @@ def length(cls, columns): @classmethod - def values(cls, columns, dim, dense=False, flat=True): + def values(cls, columns, dim, expanded=True, flat=True): if dim in columns.kdims: - if dense: + if not expanded: return columns.data[dim] prod = util.cartesian_product([columns.data[d.name] for d in columns.kdims]) idx = columns.get_dimension_index(dim) @@ -1477,7 +1484,7 @@ def select(cls, columns, selection_mask=None, **selection): data = {} value_select = [] for dim, ind in selection: - arr = cls.values(columns, dim, True) + arr = cls.values(columns, dim, False) mask = cls.coord_mask(columns, arr, ind) if mask is None: mask = np.ones(arr.shape, dtype=bool) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index ed40c4620b..9ad621cc7a 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -754,7 +754,7 @@ def select(self, selection_specs=None, **kwargs): return selection - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): """ Returns the values along the specified dimension. This method must be implemented for all Dimensioned type. diff --git a/holoviews/core/element.py b/holoviews/core/element.py index bcb61740a3..bcc1309759 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -482,15 +482,16 @@ def aggregate(self, dimensions, function, **kwargs): return self.clone(rows, kdims=grouped.kdims) - def dimension_values(self, dim, unique=False): + def dimension_values(self, dim, expanded=True, flat=True): dim = self.get_dimension(dim, strict=True) value_dims = self.dimensions('value', label=True) if dim.name in value_dims: index = value_dims.index(dim.name) vals = np.array([v[index] for v in self.data.values()]) - return unique_array(vals) if unique else vals + return vals if not expanded else unique_array(vals) else: - return NdMapping.dimension_values(self, dim.name, unique) + return NdMapping.dimension_values(self, dim.name, + expanded, flat) def values(self): diff --git a/holoviews/core/layout.py b/holoviews/core/layout.py index ddd7b3d763..d24dccba06 100644 --- a/holoviews/core/layout.py +++ b/holoviews/core/layout.py @@ -126,9 +126,9 @@ def get(self, key, default=None): return self.data[key] if key in self.data else default - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): dimension = self.get_dimension(dimension, strict=True).name - return self.main.dimension_values(dimension, unique) + return self.main.dimension_values(dimension, expanded, flat) def __getitem__(self, key): @@ -433,7 +433,7 @@ def clone(self, *args, **overrides): return clone - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): "Returns the values along the specified dimension." dimension = self.get_dimension(dimension, strict=True).name all_dims = self.traverse(lambda x: [d.name for d in x.dimensions()]) @@ -441,9 +441,10 @@ def dimension_values(self, dimension, unique=False): values = [el.dimension_values(dimension) for el in self if dimension in el.dimensions(label=True)] vals = np.concatenate(values) - return unique_array(vals) if unique else vals + return vals if expanded else unique_array(vals) else: - return super(Layout, self).dimension_values(dimension, unique) + return super(Layout, self).dimension_values(dimension, + expanded, flat) def cols(self, ncols): diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index 41b45049b0..856cc2fe25 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -343,7 +343,7 @@ def drop_dimension(self, dimensions): kdims=dims) - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): "Returns the values along the specified dimension." dimension = self.get_dimension(dimension, strict=True).name if dimension in self.kdims: @@ -352,9 +352,9 @@ def dimension_values(self, dimension, unique=False): values = [el.dimension_values(dimension) for el in self if dimension in el.dimensions()] vals = np.concatenate(values) - return util.unique_array(vals) if unique else vals + return vals if expanded else util.unique_array(vals) else: - return super(MultiDimensionalMapping, self).dimension_values(dimension, unique) + return super(MultiDimensionalMapping, self).dimension_values(dimension, expanded, flat) def reindex(self, kdims=[], force=False): diff --git a/holoviews/core/overlay.py b/holoviews/core/overlay.py index f145a58756..69e0fbf21c 100644 --- a/holoviews/core/overlay.py +++ b/holoviews/core/overlay.py @@ -65,7 +65,7 @@ def hist(self, index=0, adjoin=True, dimension=None, **kwargs): return layout - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): values = [] found = False for el in self: @@ -73,12 +73,12 @@ def dimension_values(self, dimension, unique=False): values.append(el.dimension_values(dimension)) found = True if not found: - return super(CompositeOverlay, self).dimension_values(dimension, unique) + return super(CompositeOverlay, self).dimension_values(dimension, expanded, flat) values = [v for v in values if v is not None and len(v)] if not values: return np.array() vals = np.concatenate(values) - return unique_array(vals) if unique else vals + return vals if expanded else unique_array(vals) class Overlay(Layout, CompositeOverlay): diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index df21213a26..ee970576f8 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -129,7 +129,7 @@ def sample(self, samples=[], **sample_values): sample[sample_ind] = self._coord2matrix(coord_fn(sample_coord))[abs(sample_ind-1)] # Sample data - x_vals = self.dimension_values(other_dimension[0].name, unique=True) + x_vals = self.dimension_values(other_dimension[0].name, False) ydata = self._zdata[sample[::-1]] if hasattr(self, 'bounds') and sample_ind == 0: ydata = ydata[::-1] data = list(zip(x_vals, ydata)) @@ -154,7 +154,7 @@ def reduce(self, dimensions=None, function=None, **reduce_map): dimension = dims[0] other_dimension = [d for d in self.kdims if d.name != dimension] oidx = self.get_dimension_index(other_dimension[0]) - x_vals = self.dimension_values(other_dimension[0].name, unique=True) + x_vals = self.dimension_values(other_dimension[0].name, False) reduced = function(self._zdata, axis=oidx) data = zip(x_vals, reduced if not oidx else reduced[::-1]) params = dict(dict(self.get_param_values(onlychanged=True)), @@ -164,14 +164,14 @@ def reduce(self, dimensions=None, function=None, **reduce_map): return Table(data, **params) - def dimension_values(self, dim, unique=False): + def dimension_values(self, dim, expanded=True, flat=True): """ The set of samples available along a particular dimension. """ dim_idx = self.get_dimension_index(dim) - if unique and dim_idx == 0: + if not expanded and dim_idx == 0: return np.array(range(self.data.shape[1])) - elif unique and dim_idx == 1: + elif not expanded and dim_idx == 1: return np.array(range(self.data.shape[0])) elif dim_idx in [0, 1]: D1, D2 = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]] @@ -338,20 +338,20 @@ def range(self, dimension): super(QuadMesh, self).range(dimension) - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): idx = self.get_dimension_index(dimension) data = self.data[idx] if idx in [0, 1]: if not self._grid: return data.flatten() - odim = 1 if unique else self.data[2].shape[idx] + odim = self.data[2].shape[idx] if expanded else 1 vals = np.tile(np.convolve(data, np.ones((2,))/2, mode='valid'), odim) if idx: return np.sort(vals) else: return vals elif idx == 2: - return data.flatten() + return data.flatten() if flat else data else: return super(QuadMesh, self).dimension_values(idx) @@ -388,8 +388,8 @@ def __init__(self, data, extents=None, **params): def _compute_raster(self): - d1keys = self.dimension_values(0, True) - d2keys = self.dimension_values(1, True) + d1keys = self.dimension_values(0, False) + d2keys = self.dimension_values(1, False) coords = [(d1, d2, np.NaN) for d1 in d1keys for d2 in d2keys] dtype = 'dataframe' if pd else 'dictionary' dense_data = Columns(coords, kdims=self.kdims, vdims=self.vdims, datatype=[dtype]) @@ -438,8 +438,8 @@ def __setstate__(self, state): super(HeatMap, self).__setstate__(state) def dense_keys(self): - d1keys = self.dimension_values(0, True) - d2keys = self.dimension_values(1, True) + d1keys = self.dimension_values(0, False) + d2keys = self.dimension_values(1, False) return list(zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys])) @@ -603,7 +603,7 @@ def _coord2matrix(self, coord): return self.sheet2matrixidx(*coord) - def dimension_values(self, dim, unique=False): + def dimension_values(self, dim, expanded=True, flat=True): """ The set of samples available along a particular dimension. """ @@ -615,13 +615,14 @@ def dimension_values(self, dim, unique=False): d2_half_unit = (t - b)/dim2/2. d1lin = np.linspace(l+d1_half_unit, r-d1_half_unit, dim1) d2lin = np.linspace(b+d2_half_unit, t-d2_half_unit, dim2) - if unique: - return d2lin if dim_idx else d1lin - else: + if expanded: Y, X = np.meshgrid(d2lin, d1lin) return Y.flatten() if dim_idx else X.flatten() + else: + return d2lin if dim_idx else d1lin elif dim_idx == 2: - return np.flipud(self.data).T.flatten() + data = np.flipud(self.data).T + return data.flatten() if flat else data else: super(Image, self).dimension_values(dim) @@ -703,14 +704,15 @@ def load_image(cls, filename, height=1, array=False, bounds=None, bare=False, ** return rgb - def dimension_values(self, dim, unique=False): + def dimension_values(self, dim, expanded=True, flat=True): """ The set of samples available along a particular dimension. """ dim_idx = self.get_dimension_index(dim) if self.ndims <= dim_idx < len(self.dimensions()): - return np.flipud(self.data[:,:,dim_idx-self.ndims]).T.flatten() - return super(RGB, self).dimension_values(dim, unique=True) + data = np.flipud(self.data[:,:,dim_idx-self.ndims]).T + return data.flatten() if flat else data + return super(RGB, self).dimension_values(dim, expanded, flat) def __init__(self, data, **params): diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index a7948fc9be..cd7d0b0731 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -71,7 +71,7 @@ def collapse_data(cls, data, function, **kwargs): return OrderedDict(zip(data[0].keys(), function(groups, axis=-1, **kwargs))) - def dimension_values(self, dimension): + def dimension_values(self, dimension, expanded=True, flat=True): dimension = self.get_dimension(dimension, strict=True).name if dimension in self.dimensions('value', label=True): return np.array([self.data.get(dimension, np.NaN)]) diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index 94a08db4fe..85c6818d28 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -428,8 +428,8 @@ def _process(self, element, key=None): data = [element.data] elif isinstance(element, QuadMesh): - data = (element.dimension_values(0, True), - element.dimension_values(1, True), + data = (element.dimension_values(0, False), + element.dimension_values(1, False), element.data[2]) contour_set = contour_fn(*data, extent=extent, levels=self.p.levels) diff --git a/holoviews/plotting/mpl/raster.py b/holoviews/plotting/mpl/raster.py index aaa235139a..f90e820c19 100644 --- a/holoviews/plotting/mpl/raster.py +++ b/holoviews/plotting/mpl/raster.py @@ -111,7 +111,7 @@ def _annotate_plot(self, ax, annotations): def _annotate_values(self, element): val_dim = element.vdims[0] vals = np.rot90(element.raster, 3).flatten() - d1uniq, d2uniq = [np.unique(element.dimension_values(i)) for i in range(2)] + d1uniq, d2uniq = [element.dimension_values(i, False) for i in range(2)] num_x, num_y = len(d1uniq), len(d2uniq) xstep, ystep = 1.0/num_x, 1.0/num_y xpos = np.linspace(xstep/2., 1.0-xstep/2., num_x) @@ -127,7 +127,7 @@ def _annotate_values(self, element): def _compute_ticks(self, element, ranges): xdim, ydim = element.kdims - dim1_keys, dim2_keys = [element.dimension_values(i, True) + dim1_keys, dim2_keys = [element.dimension_values(i, False) for i in range(2)] num_x, num_y = len(dim1_keys), len(dim2_keys) x0, y0, x1, y1 = element.extents From ead988f6ae8fa861f49f79b8c808c85ad9bb4987 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 8 Mar 2016 11:56:32 +0000 Subject: [PATCH 05/36] Renamed NdArrayColumns to GridColumns --- holoviews/core/data.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index b18d508f75..393bcbc727 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1330,7 +1330,7 @@ def aggregate(cls, columns, kdims, function, **kwargs): -class NdArrayColumns(DictColumns): +class GridColumns(DictColumns): """ Interface for simple dictionary-based columns format. The dictionary keys correspond to the column (i.e dimension) names and the values @@ -1339,9 +1339,7 @@ class NdArrayColumns(DictColumns): types = (dict, OrderedDict, cyODict) - datatype = 'ndarray' - - dense = True + datatype = 'grid' @classmethod def reshape(cls, eltype, data, kdims, vdims): @@ -1573,6 +1571,6 @@ def sort(cls, columns, by=[]): DataColumns.register(DictColumns) DataColumns.register(ArrayColumns) DataColumns.register(NdColumns) -DataColumns.register(NdArrayColumns) +DataColumns.register(GridColumns) if pd: DataColumns.register(DFColumns) From f26d95f23f7246cc199774ad4f93e6ad2991951d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 8 Mar 2016 11:56:58 +0000 Subject: [PATCH 06/36] Cleanup and minor fixes in core.data module --- holoviews/core/data.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 393bcbc727..f0e4cb4b2d 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -3,7 +3,7 @@ backends. """ -import sys, warnings +import sys from distutils.version import LooseVersion from collections import OrderedDict, defaultdict from itertools import compress, cycle @@ -254,7 +254,7 @@ def sample(self, samples=[]): matching the key dimensions, returning a new object containing just the selected samples. """ - return self.clone(self.interface.sample(self, samples)) + return self.clone(self.interface.sample(self, samples), dense=False) def reduce(self, dimensions=[], function=None, spreadfn=None, **reduce_map): @@ -715,7 +715,6 @@ def reshape(cls, eltype, data, kdims, vdims): kdim_param = element_params['kdims'] vdim_param = element_params['vdims'] if util.is_dataframe(data): - columns = data.columns ndim = len(kdim_param.default) if kdim_param.default else None if kdims and vdims is None: vdims = [c for c in data.columns if c not in kdims] @@ -761,7 +760,7 @@ def range(cls, columns, dimension): column = columns.data[columns.get_dimension(dimension).name] if column.dtype.kind == 'O': if (not isinstance(columns.data, pd.DataFrame) or - LooseVersion(pd.__version__) < '0.17.0'): + LooseVersion(pd.__version__) < '0.17.0'): column = column.sort(inplace=False) else: column = column.sort_values() @@ -1459,7 +1458,7 @@ def coord_mask(cls, columns, arr, ind): mask &= arr < ind.stop elif isinstance(ind, (set, list)): iter_slcs = [] - for ik in k: + for ik in ind: iter_slcs.append(arr == ik) mask = np.logical_or.reduce(iter_slcs) elif ind is None: From 03c1e22a4e227bc93ae5e671ad682103d02d42c6 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 8 Mar 2016 19:48:52 +0000 Subject: [PATCH 07/36] Added validation to detect dense formats in existing interfaces --- holoviews/core/data.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index f0e4cb4b2d..3681cebf20 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -51,9 +51,6 @@ class Columns(Element): format listed will be used until a suitable format is found (or the data fails to be understood).""") - dense = param.Boolean(default=False, doc="""Whether the supplied data - is in a dense format.""") - # In the 1D case the interfaces should not automatically add x-values # to supplied data _1d = False @@ -254,7 +251,7 @@ def sample(self, samples=[]): matching the key dimensions, returning a new object containing just the selected samples. """ - return self.clone(self.interface.sample(self, samples), dense=False) + return self.clone(self.interface.sample(self, samples)) def reduce(self, dimensions=[], function=None, spreadfn=None, **reduce_map): @@ -466,6 +463,11 @@ def validate(cls, columns): "not found: %s" % repr(not_found)) + @classmethod + def check_dense(cls, arrays): + return any(array.shape not in [arrays[0].shape, (1,)] for array in arrays[1:]) + + @classmethod def select_mask(cls, columns, selection): """ @@ -748,6 +750,9 @@ def reshape(cls, eltype, data, kdims, vdims): data = tuple(data[:, i] for i in range(data.shape[1])) if isinstance(data, tuple): + data = [np.array(d) if not isinstance(d, np.ndarray) else d for d in data] + if cls.check_dense(data): + raise ValueError('Dimension values must all be the same shape.') data = pd.DataFrame.from_items([(c, d) for c, d in zip(columns, data)]) else: @@ -1156,6 +1161,8 @@ def reshape(cls, eltype, data, kdims, vdims): raise ValueError("DictColumns interface couldn't convert data.""") elif isinstance(data, dict): unpacked = [(d, np.array(data[d])) for d in data] + if cls.check_dense([d[1] for d in unpacked]): + raise ValueError('Dimension values must all be the same shape.') if isinstance(data, odict_types): data.update(unpacked) else: From 81bb78f26078e089ecc0218be11ec6456c1f9f16 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 8 Mar 2016 19:51:56 +0000 Subject: [PATCH 08/36] Added default arguments to interface values method --- holoviews/core/data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 3681cebf20..e61365e83d 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -858,7 +858,7 @@ def select(cls, columns, selection_mask=None, **selection): @classmethod - def values(cls, columns, dim, expanded, flat): + def values(cls, columns, dim, expanded=True, flat=True): data = columns.data[dim] if util.dd and isinstance(data, util.dd.Series): data = data.compute() @@ -986,7 +986,7 @@ def sort(cls, columns, by=[]): @classmethod - def values(cls, columns, dim, expanded, flat): + def values(cls, columns, dim, expanded=True, flat=True): data = columns.data dim_idx = columns.get_dimension_index(dim) if data.ndim == 1: @@ -1238,7 +1238,7 @@ def sort(cls, columns, by=[]): return OrderedDict([(d, v[sorting]) for d, v in columns.data.items()]) @classmethod - def values(cls, columns, dim, expanded, flat): + def values(cls, columns, dim, expanded=True, flat=True): values = np.array(columns.data.get(columns.get_dimension(dim).name)) if not expanded: return util.unique_array(values) From c5f4f613235318ab6b99d14c29d09276400c8af1 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 8 Mar 2016 19:52:20 +0000 Subject: [PATCH 09/36] Updated Column interface unit test --- tests/testcolumns.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testcolumns.py b/tests/testcolumns.py index ab7cdd1ccc..46c8dbbfb7 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -176,8 +176,8 @@ def test_columns_dataframe_init_ht(self): # Test literal formats - def test_columns_uniq_dimvals_ht(self): - self.assertEqual(self.table.dimension_values('Gender', unique=True), + def test_columns_expanded_dimvals_ht(self): + self.assertEqual(self.table.dimension_values('Gender', expanded=False), np.array(['M', 'F'])) def test_columns_implicit_indexing_init(self): From 3b5cadae6738173a5b1d472df111fa8a95c6427a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 8 Mar 2016 19:52:43 +0000 Subject: [PATCH 10/36] Reverted change to DictColumns interface --- holoviews/core/data.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index e61365e83d..37e2eb2efa 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1268,14 +1268,12 @@ def groupby(cls, columns, dimensions, container_type, group_type, **kwargs): group_kwargs.update(kwargs) # Find all the keys along supplied dimensions - key_data = [] - for d in dimensions: - data = columns.data[d.name] - key_data.append(cycle([data[0]]) if len(data) == 1 else data) + keys = [tuple(columns.data[d.name][i] for d in dimensions) + for i in range(len(columns))] # Iterate over the unique entries applying selection masks grouped_data = [] - for unique_key in util.unique_iterator(zip(key_data)): + for unique_key in util.unique_iterator(keys): mask = cls.select_mask(columns, dict(zip(dimensions, unique_key))) group_data = OrderedDict(((d.name, columns[d.name][mask]) for d in kdims+vdims)) group_data = group_type(group_data, **group_kwargs) From 631e91cf6fe47deb3d6605be08918c06292c769a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 8 Mar 2016 23:14:03 +0000 Subject: [PATCH 11/36] Small fixes for NdElement interface --- holoviews/core/data.py | 4 ++-- holoviews/core/element.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 37e2eb2efa..63069eea45 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -654,8 +654,8 @@ def sort(cls, columns, by=[]): return columns.data.sort(by) @classmethod - def values(cls, columns, dim, expanded, flat): - values = columns.data.dimension_values(dim) + def values(cls, columns, dim, expanded=True, flat=True): + values = columns.data.dimension_values(dim, expanded, flat) if not expanded: return util.unique_array(values) return values diff --git a/holoviews/core/element.py b/holoviews/core/element.py index bcc1309759..bdac467c7d 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -488,7 +488,7 @@ def dimension_values(self, dim, expanded=True, flat=True): if dim.name in value_dims: index = value_dims.index(dim.name) vals = np.array([v[index] for v in self.data.values()]) - return vals if not expanded else unique_array(vals) + return vals if expanded else unique_array(vals) else: return NdMapping.dimension_values(self, dim.name, expanded, flat) From 49e729aaf107c9e97f1a19e6a0af71d5fbd4a80f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 9 Mar 2016 21:52:14 +0000 Subject: [PATCH 12/36] Changed GridColumns format to expand vdims --- holoviews/core/data.py | 78 +++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 63069eea45..62d3a375b3 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -6,7 +6,7 @@ import sys from distutils.version import LooseVersion from collections import OrderedDict, defaultdict -from itertools import compress, cycle +from itertools import compress try: import itertools.izip as zip @@ -1351,14 +1351,19 @@ def reshape(cls, eltype, data, kdims, vdims): kdims = eltype.kdims if vdims is None: vdims = eltype.vdims - for kdim in kdims: - name = kdim.name if isinstance(kdim, Dimension) else kdim + + if 'vdims' in data: + vdim_array = data.pop('vdims') + for i, vdim in enumerate(vdims): + name = vdim.name if isinstance(vdim, Dimension) else vdim + data[name] = vdim_array[..., i] + + for dim in kdims+vdims: + name = dim.name if isinstance(dim, Dimension) else dim + if name not in data: + raise ValueError("Values for dimension %s not found" % dim) if not isinstance(data[name], np.ndarray): data[name] = np.array(data[name]) - if vdims and 'array' not in data: - raise Exception - if len(vdims) == 1 and data['array'].ndim > len(kdims): - data['array'] = data['array'][..., 0] return data, kdims, vdims @@ -1367,17 +1372,14 @@ def reshape(cls, eltype, data, kdims, vdims): def validate(cls, columns): if not columns.vdims: return - shape = columns.data['array'].shape - expected = [] - for kd in columns.kdims: - expected.append(len(columns.data[kd.name])) - if len(columns.vdims) > 1: - expected.append(len(columns.vdims)) - expected = tuple(expected) - if shape != expected: - raise ValueError('Key dimension values and value array ' - 'shapes do not match. Expected shape %s, ' - 'actual shape: %s' % (expected, shape)) + expected = [len(columns.data[kd.name]) + for kd in columns.kdims] + for vdim in columns.vdims: + shape = columns.data[vdim.name].shape + if shape != tuple(expected): + raise ValueError('Key dimension values and value array %s' + 'shape do not match. Expected shape %s, ' + 'actual shape: %s' % (expected, vdim, shape)) @classmethod @@ -1385,7 +1387,7 @@ def dimension_type(cls, columns, dim): if dim in columns.kdims: arr = columns.data[dim.name] elif dim in columns.vdims: - arr = columns.data['array'] + arr = columns.data[dim.name] else: return None return arr.dtype.type @@ -1410,10 +1412,8 @@ def values(cls, columns, dim, expanded=True, flat=True): idx = columns.get_dimension_index(dim) return prod[:, idx] else: - values = columns.data.get('array') - if len(columns.vdims) > 1: - idx = columns.vdims.index(dim) - values = values[..., idx] + dim = columns.get_dimension(dim) + values = columns.data.get(dim.name) return values.flatten() if flat else values @@ -1438,7 +1438,8 @@ def groupby(cls, columns, dim_names, container_type, group_type, **kwargs): grouped_data = [] for unique_key in util.cartesian_product(keys): group_data = cls.select(columns, **dict(zip(dim_names, unique_key))) - group_data['array'] = np.squeeze(group_data['array']) + for vdim in columns.vdims: + group_data[vdim.name] = np.squeeze(group_data[vdim.name]) group_data = group_type(group_data, **group_kwargs) grouped_data.append((tuple(unique_key), group_data)) @@ -1497,8 +1498,8 @@ def select(cls, columns, selection_mask=None, **selection): int_inds = [np.argwhere(v) for v in value_select] index = np.ix_(*[np.atleast_1d(np.squeeze(ind)) if ind.ndim > 1 else np.atleast_1d(ind) for ind in int_inds]) - sliced = columns.data['array'][index] - data['array'] = sliced + for vdim in columns.vdims: + data[vdim.name] = columns.data[vdim.name][index] return data @@ -1508,9 +1509,8 @@ def sample(cls, columns, samples=[]): Samples the gridded data into columns of samples. """ ndims = columns.ndims - deep = len(columns.vdims) > 1 dimensions = columns.dimensions(label=True) - array = columns.data['array'] + arrays = [columns.data[vdim.name] for vdim in columns.vdims] data = defaultdict(list) for sample in samples: if np.isscalar(sample): sample = [sample] @@ -1526,9 +1526,8 @@ def sample(cls, columns, samples=[]): sampled.append(cdata[mask]) for d, arr in zip(dimensions, np.meshgrid(*sampled)): data[d].append(arr) - for i, vdim in enumerate(columns.vdims): - val_ind = tuple(int_inds) + ((i,) if deep else ()) - flat_index = np.ravel_multi_index(val_ind, array.shape) + for vdim, array in zip(columns.vdims, arrays): + flat_index = np.ravel_multi_index(tuple(int_inds), array.shape) data[vdim.name].append(array.flat[flat_index]) concatenated = {d: np.concatenate(arrays).flatten() for d, arrays in data.items()} return concatenated @@ -1540,7 +1539,9 @@ def aggregate(cls, columns, kdims, function, **kwargs): data = {kdim: columns.data[kdim] for kdim in kdims} axes = tuple(columns.get_dimension_index(kdim) for kdim in columns.kdims if kdim not in kdims) - data['array'] = function(columns.data['array'], axis=axes, **kwargs) + for vdim in columns.vdims: + data[vdim.name] = function(columns.data[vdim.name], + axis=axes, **kwargs) return data @@ -1552,11 +1553,16 @@ def reindex(cls, columns, kdims, vdims): any(vd for vd in vdims if vd not in columns.vdims)): return columns.clone(columns.columns()).reindex(kdims, vdims) data = dict(columns.data) - if len(vdims) != len(columns.vdims): - data['array'] = data['array'][..., [columns.vdims.index(d) for d in vdims]] + + dropped_vdims = ([vdim for vdim in columns.vdims + if vdim not in vdims] if vdims else []) + for vdim in dropped_vdims: + del data[vdim.name] + if kdims != columns.kdims: - axes = [columns.kdims.index(d) for d in kdims]+[data['array'].ndim-1] - data['array'] = np.transpose(data['array'], axes) + axes = [columns.kdims.index(d) for d in kdims] + for vdim in vdims: + data[vdim.name] = np.transpose(data[vdim.name], axes) return data From 7d5dc262b154330f5fdf6702ee05e9718bbe9446 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 9 Mar 2016 22:57:51 +0000 Subject: [PATCH 13/36] Improved GridColumns validation --- holoviews/core/data.py | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 62d3a375b3..566ec00b61 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1352,6 +1352,18 @@ def reshape(cls, eltype, data, kdims, vdims): if vdims is None: vdims = eltype.vdims + if not vdims: + raise ValueError('GridColumns interface requires at least ' + 'one value dimension.') + + dimensions = [d.name if isinstance(d, Dimension) else + d for d in kdims + vdims] + if isinstance(data, tuple): + data = {d: v for d, v in zip(dimensions, data)} + elif not isinstance(data, dict): + raise ValueError('GridColumns must be instantiated as a ' + 'dictionary or tuple') + if 'vdims' in data: vdim_array = data.pop('vdims') for i, vdim in enumerate(vdims): @@ -1365,21 +1377,21 @@ def reshape(cls, eltype, data, kdims, vdims): if not isinstance(data[name], np.ndarray): data[name] = np.array(data[name]) + kdim_names = [d.name if isinstance(d, Dimension) else d for d in kdims] + vdim_names = [d.name if isinstance(d, Dimension) else d for d in vdims] + expected = [len(data[kd]) for kd in kdim_names] + for vdim in vdim_names: + shape = data[vdim].shape + if shape != tuple(expected): + raise ValueError('Key dimension values and value array %s' + 'shape do not match. Expected shape %s, ' + 'actual shape: %s' % (expected, vdim, shape)) return data, kdims, vdims @classmethod def validate(cls, columns): - if not columns.vdims: - return - expected = [len(columns.data[kd.name]) - for kd in columns.kdims] - for vdim in columns.vdims: - shape = columns.data[vdim.name].shape - if shape != tuple(expected): - raise ValueError('Key dimension values and value array %s' - 'shape do not match. Expected shape %s, ' - 'actual shape: %s' % (expected, vdim, shape)) + DataColumns.validate(columns) @classmethod @@ -1483,6 +1495,10 @@ def coord_mask(cls, columns, arr, ind): @classmethod def select(cls, columns, selection_mask=None, **selection): dimensions = columns.dimensions('key', label=True) + val_dims = [vdim for vdim in columns.vdims if vdim in selection] + if val_dims: + raise IndexError('Cannot slice value dimensions on dense ' + 'data, convert to sparse format first') selection = [(d, selection.get(d)) for d in dimensions] data = {} value_select = [] From 232b3728407875d203990ad05b98665404262220 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 10 Mar 2016 14:59:56 +0000 Subject: [PATCH 14/36] Fixed scalar return values from GridColumn slicing --- holoviews/core/data.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 566ec00b61..03dfc0feaa 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1499,6 +1499,8 @@ def select(cls, columns, selection_mask=None, **selection): if val_dims: raise IndexError('Cannot slice value dimensions on dense ' 'data, convert to sparse format first') + + indexed = cls.indexed(columns, selection) selection = [(d, selection.get(d)) for d in dimensions] data = {} value_select = [] @@ -1516,6 +1518,10 @@ def select(cls, columns, selection_mask=None, **selection): for ind in int_inds]) for vdim in columns.vdims: data[vdim.name] = columns.data[vdim.name][index] + + if indexed and len(data[columns.vdims[0].name]) == 1: + return data[columns.vdims[0].name][0] + return data From d6e34405b35ab5ec8d179b92eacc50c96f096f43 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 10 Mar 2016 15:00:22 +0000 Subject: [PATCH 15/36] Ensured GridColumns aggregate returns at least 1D array --- holoviews/core/data.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 03dfc0feaa..948bb2ef02 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1562,8 +1562,9 @@ def aggregate(cls, columns, kdims, function, **kwargs): axes = tuple(columns.get_dimension_index(kdim) for kdim in columns.kdims if kdim not in kdims) for vdim in columns.vdims: - data[vdim.name] = function(columns.data[vdim.name], - axis=axes, **kwargs) + data[vdim.name] = np.atleast_1d(function(columns.data[vdim.name], + axis=axes, **kwargs)) + return data From e30bc537f816d9cfd06a4eb6c6d6abc6728f870d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 10 Mar 2016 15:01:34 +0000 Subject: [PATCH 16/36] Implemented GridColumns add_dimension and sort methods --- holoviews/core/data.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 948bb2ef02..52d6aa2e8f 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1594,9 +1594,21 @@ def add_dimension(cls, columns, dimension, dim_pos, values, vdim): raise NotImplementedError + @classmethod + def add_dimension(cls, columns, dimension, dim_pos, values, vdim): + if not vdim: + raise Exception("Cannot add key dimension to a dense representation.") + dim = dimension.name if isinstance(dimension, Dimension) else dimension + return dict(columns.data, **{dim: values}) + + @classmethod def sort(cls, columns, by=[]): - return columns.data + if not by or by in [columns.kdims, columns.dimensions()]: + return columns.data + else: + raise Exception('Dense representation cannot be sorted, either instantiate ' + 'in the desired order or use a sparse format.') From 46c5e8aca82d2fea264d99b014c9f049442a50c1 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 10 Mar 2016 15:01:56 +0000 Subject: [PATCH 17/36] Added unit tests for GridColumns interface --- tests/testcolumns.py | 75 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tests/testcolumns.py b/tests/testcolumns.py index 46c8dbbfb7..f876e89146 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -399,3 +399,78 @@ def test_columns_double_zip_init(self): kdims=self.kdims, vdims=self.vdims) self.assertTrue(isinstance(columns.data, NdElement)) + +class GridColumnsTest(HomogeneousColumnTypes, ComparisonTestCase): + """ + Test of the NdColumns interface (mostly for backwards compatibility) + """ + + def setUp(self): + self.restore_datatype = Columns.datatype + Columns.datatype = ['grid'] + self.data_instance_type = dict + self.init_data() + + def init_data(self): + self.xs = range(11) + self.xs_2 = [el**2 for el in self.xs] + + self.y_ints = [i*2 for i in range(11)] + self.columns_hm = Columns((self.xs, self.y_ints), + kdims=['x'], vdims=['y']) + + def test_columns_array_init_hm(self): + "Tests support for arrays (homogeneous)" + exception = "None of the available storage backends "\ + "were able to support the supplied data format." + with self.assertRaisesRegexp(Exception, exception): + Columns(np.column_stack([self.xs, self.xs_2]), + kdims=['x'], vdims=['x2']) + + def test_columns_dataframe_init_hm(self): + "Tests support for homogeneous DataFrames" + if pd is None: + raise SkipTest("Pandas not available") + exception = "None of the available storage backends "\ + "were able to support the supplied data format." + with self.assertRaisesRegexp(Exception, exception): + Columns(pd.DataFrame({'x':self.xs, 'x2':self.xs_2}), + kdims=['x'], vdims=['x2']) + + def test_columns_ndelement_init_hm(self): + "Tests support for homogeneous NdElement (backwards compatibility)" + exception = "None of the available storage backends "\ + "were able to support the supplied data format." + with self.assertRaisesRegexp(Exception, exception): + Columns(NdElement(zip(self.xs, self.xs_2), + kdims=['x'], vdims=['x2'])) + + def test_columns_2D_aggregate_partial_hm(self): + array = np.random.rand(11, 11) + columns = Columns({'x':self.xs, 'y':self.y_ints, 'z': array}, + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.aggregate(['x'], np.mean), + Columns({'x':self.xs, 'z': np.mean(array, axis=1)}, + kdims=['x'], vdims=['z'])) + + def test_columns_2D_reduce_hm(self): + array = np.random.rand(11, 11) + columns = Columns({'x':self.xs, 'y':self.y_ints, 'z': array}, + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(np.array(columns.reduce(['x', 'y'], np.mean)), + np.mean(array)) + + def test_columns_add_dimensions_value_hm(self): + with self.assertRaisesRegexp(Exception, 'Cannot add key dimension to a dense representation.'): + self.columns_hm.add_dimension('z', 1, 0) + + def test_columns_add_dimensions_values_hm(self): + table = self.columns_hm.add_dimension('z', 1, range(1,12), vdim=True) + self.assertEqual(table.vdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12)))) + + def test_columns_sort_vdim_hm(self): + exception = 'Dense representation cannot be sorted, either instantiate '\ + 'in the desired order or use a sparse format.' + with self.assertRaisesRegexp(Exception, exception): + self.columns_hm.sort('y') \ No newline at end of file From 034e659510c4972dca002e49d6cc8f1037ecbb1f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 10 Mar 2016 15:17:06 +0000 Subject: [PATCH 18/36] Added missing import in core.util --- holoviews/core/util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 48b24cebdb..67bb4e25a3 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -4,6 +4,7 @@ import string, fnmatch import unicodedata from collections import defaultdict +from functools import reduce import numpy as np import param From 84a3667eb543d8cc0cf5fc179d42bc09e04fc453 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 13:30:59 +0000 Subject: [PATCH 19/36] Renamed Columns interface reshape method to init --- holoviews/core/data.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 52d6aa2e8f..674e69baaa 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -442,7 +442,7 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): # Iterate over interfaces until one can interpret the input for interface in prioritized: try: - (data, kdims, vdims) = interface.reshape(eltype, data, kdims, vdims) + (data, kdims, vdims) = interface.init(eltype, data, kdims, vdims) break except: pass @@ -583,7 +583,7 @@ class NdColumns(DataColumns): datatype = 'ndelement' @classmethod - def reshape(cls, eltype, data, kdims, vdims): + def init(cls, eltype, data, kdims, vdims): if isinstance(data, NdElement): kdims = [d for d in kdims if d != 'Index'] else: @@ -712,7 +712,7 @@ def dimension_type(cls, columns, dim): return columns.data.dtypes[idx].type @classmethod - def reshape(cls, eltype, data, kdims, vdims): + def init(cls, eltype, data, kdims, vdims): element_params = eltype.params() kdim_param = element_params['kdims'] vdim_param = element_params['vdims'] @@ -908,7 +908,7 @@ def dimension_type(cls, columns, dim): return columns.data.dtype.type @classmethod - def reshape(cls, eltype, data, kdims, vdims): + def init(cls, eltype, data, kdims, vdims): if kdims is None: kdims = eltype.kdims if vdims is None: @@ -1127,7 +1127,7 @@ def dimension_type(cls, columns, dim): return columns.data[name].dtype.type @classmethod - def reshape(cls, eltype, data, kdims, vdims): + def init(cls, eltype, data, kdims, vdims): odict_types = (OrderedDict, cyODict) if kdims is None: kdims = eltype.kdims @@ -1346,7 +1346,7 @@ class GridColumns(DictColumns): datatype = 'grid' @classmethod - def reshape(cls, eltype, data, kdims, vdims): + def init(cls, eltype, data, kdims, vdims): if kdims is None: kdims = eltype.kdims if vdims is None: From 5780d1d3516aea0624812f061433c27004d97e8f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 13:31:15 +0000 Subject: [PATCH 20/36] Removed stray GridColumns.add_dimension method --- holoviews/core/data.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 674e69baaa..1b38d52fc6 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1589,11 +1589,6 @@ def reindex(cls, columns, kdims, vdims): return data - @classmethod - def add_dimension(cls, columns, dimension, dim_pos, values, vdim): - raise NotImplementedError - - @classmethod def add_dimension(cls, columns, dimension, dim_pos, values, vdim): if not vdim: From 4d0e5475375438e40148d4a4476dc00b22925476 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 13:36:48 +0000 Subject: [PATCH 21/36] Renamed check_dense to expanded_format and improved validation --- holoviews/core/data.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 1b38d52fc6..0886b25514 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -464,8 +464,8 @@ def validate(cls, columns): @classmethod - def check_dense(cls, arrays): - return any(array.shape not in [arrays[0].shape, (1,)] for array in arrays[1:]) + def expanded_format(cls, arrays): + return not any(array.shape not in [arrays[0].shape, (1,)] for array in arrays[1:]) @classmethod @@ -612,6 +612,9 @@ def init(cls, eltype, data, kdims, vdims): if (isinstance(data[0], tuple) and any(isinstance(d, tuple) for d in data[0])): pass else: + data = [np.array(d) if not isinstance(d, np.ndarray) else d for d in data] + if not self.expanded_format(data): + raise ValueError('NdColumns expects data to be of uniform shape') if isinstance(data, tuple): data = zip(*data) ndims = len(kdims) @@ -751,8 +754,8 @@ def init(cls, eltype, data, kdims, vdims): if isinstance(data, tuple): data = [np.array(d) if not isinstance(d, np.ndarray) else d for d in data] - if cls.check_dense(data): - raise ValueError('Dimension values must all be the same shape.') + if cls.expanded_format(data): + raise ValueError('DFColumns expects data to be of uniform shape.') data = pd.DataFrame.from_items([(c, d) for c, d in zip(columns, data)]) else: @@ -925,10 +928,10 @@ def init(cls, eltype, data, kdims, vdims): for k, v in data.items())) data = np.column_stack(columns) elif isinstance(data, tuple): - try: + if cls.expanded_format(data): data = np.column_stack(data) - except: - data = None + else: + raise ValueError('ArrayColumns expects data to be of uniform shape.') elif not isinstance(data, np.ndarray): data = np.array([], ndmin=2).T if data is None else list(data) try: @@ -1161,8 +1164,8 @@ def init(cls, eltype, data, kdims, vdims): raise ValueError("DictColumns interface couldn't convert data.""") elif isinstance(data, dict): unpacked = [(d, np.array(data[d])) for d in data] - if cls.check_dense([d[1] for d in unpacked]): - raise ValueError('Dimension values must all be the same shape.') + if not cls.expanded_format([d[1] for d in unpacked]): + raise ValueError('DictColumns expects data to be of uniform shape.') if isinstance(data, odict_types): data.update(unpacked) else: From a38409b60265bfea49b92b1628d84ce18a8ee9e2 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 13:39:08 +0000 Subject: [PATCH 22/36] Renamed GridColumns coord_mask to key_select_mask --- holoviews/core/data.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 0886b25514..9b0d405e91 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1466,7 +1466,7 @@ def groupby(cls, columns, dim_names, container_type, group_type, **kwargs): @classmethod - def coord_mask(cls, columns, arr, ind): + def key_select_mask(cls, columns, values, ind): if isinstance(ind, tuple): ind = slice(*ind) if isinstance(ind, np.ndarray): @@ -1474,20 +1474,20 @@ def coord_mask(cls, columns, arr, ind): elif isinstance(ind, slice): mask = True if ind.start is not None: - mask &= ind.start <= arr + mask &= ind.start <= values if ind.stop is not None: - mask &= arr < ind.stop + mask &= values < ind.stop elif isinstance(ind, (set, list)): iter_slcs = [] for ik in ind: - iter_slcs.append(arr == ik) + iter_slcs.append(values == ik) mask = np.logical_or.reduce(iter_slcs) elif ind is None: mask = None else: - index_mask = arr == ind + index_mask = values == ind if columns.ndims == 1 and np.sum(index_mask) == 0: - data_index = np.argmin(np.abs(arr - ind)) + data_index = np.argmin(np.abs(values - ind)) mask = np.zeros(len(columns), dtype=np.bool) mask[data_index] = True else: @@ -1508,14 +1508,14 @@ def select(cls, columns, selection_mask=None, **selection): data = {} value_select = [] for dim, ind in selection: - arr = cls.values(columns, dim, False) - mask = cls.coord_mask(columns, arr, ind) + values = cls.values(columns, dim, False) + mask = cls.key_select_mask(columns, values, ind) if mask is None: - mask = np.ones(arr.shape, dtype=bool) + mask = np.ones(values.shape, dtype=bool) else: - arr = arr[mask] + values = values[mask] value_select.append(mask) - data[dim] = arr + data[dim] = values int_inds = [np.argwhere(v) for v in value_select] index = np.ix_(*[np.atleast_1d(np.squeeze(ind)) if ind.ndim > 1 else np.atleast_1d(ind) for ind in int_inds]) @@ -1545,7 +1545,7 @@ def sample(cls, columns, samples=[]): sampled, int_inds = [], [] for d, ind in zip(dimensions, sample): cdata = columns.data[d] - mask = cls.coord_mask(columns, cdata, ind) + mask = cls.key_select_mask(columns, cdata, ind) inds = np.arange(len(cdata)) if mask is None else np.argwhere(mask) int_inds.append(inds) sampled.append(cdata[mask]) From 487e667f3df565b8332d903cbe63f50a6d206e3a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 13:39:41 +0000 Subject: [PATCH 23/36] Added comment for Image dimension_values method --- holoviews/element/raster.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index ee970576f8..5821b57d6c 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -621,6 +621,8 @@ def dimension_values(self, dim, expanded=True, flat=True): else: return d2lin if dim_idx else d1lin elif dim_idx == 2: + # Raster arrays are stored with different orientation + # than expanded column format, reorient before expanding data = np.flipud(self.data).T return data.flatten() if flat else data else: From 5468271e6acd72131efa6c0c10efbb2c4e7c8fa2 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 13:40:05 +0000 Subject: [PATCH 24/36] Enforced samples have uniform length on GridColumns --- holoviews/core/data.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 9b0d405e91..e1e7afdcef 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1537,6 +1537,11 @@ def sample(cls, columns, samples=[]): dimensions = columns.dimensions(label=True) arrays = [columns.data[vdim.name] for vdim in columns.vdims] data = defaultdict(list) + + first_sample = util.wrap_tuple(samples[0]) + if any(len(util.wrap_tuple(s)) != len(first_sample) for s in samples): + raise IndexError('Sample coordinates must all be of the same length.') + for sample in samples: if np.isscalar(sample): sample = [sample] if len(sample) != ndims: From 626dd076c09d8fefe23b51f484527d88e2670c29 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 13:51:11 +0000 Subject: [PATCH 25/36] Allowed returning non-flat key dimensions from gridded Elements --- holoviews/core/data.py | 4 +++- holoviews/element/raster.py | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index e1e7afdcef..2114db0354 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1425,7 +1425,9 @@ def values(cls, columns, dim, expanded=True, flat=True): return columns.data[dim] prod = util.cartesian_product([columns.data[d.name] for d in columns.kdims]) idx = columns.get_dimension_index(dim) - return prod[:, idx] + values = prod[:, idx] + shape = tuple(len(columns.data[d]) for d in columns.dimensions('key', True)) + return values if flat else values.reshape(shape) else: dim = columns.get_dimension(dim) values = columns.data.get(dim.name) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index 5821b57d6c..a18c72a6a6 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -174,8 +174,8 @@ def dimension_values(self, dim, expanded=True, flat=True): elif not expanded and dim_idx == 1: return np.array(range(self.data.shape[0])) elif dim_idx in [0, 1]: - D1, D2 = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]] - return D1.flatten() if dim_idx == 0 else D2.flatten() + values = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]][dim_idx] + return values.flatten() if flat else values elif dim_idx == 2: return toarray(self.data.T).flatten() else: @@ -616,8 +616,8 @@ def dimension_values(self, dim, expanded=True, flat=True): d1lin = np.linspace(l+d1_half_unit, r-d1_half_unit, dim1) d2lin = np.linspace(b+d2_half_unit, t-d2_half_unit, dim2) if expanded: - Y, X = np.meshgrid(d2lin, d1lin) - return Y.flatten() if dim_idx else X.flatten() + values = np.meshgrid(d2lin, d1lin)[dim_idx] + return values.flatten() if flat else values else: return d2lin if dim_idx else d1lin elif dim_idx == 2: From b664343aeee167337af70d4c0f53ec0bff306040 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 14:24:22 +0000 Subject: [PATCH 26/36] Allowed dropping constant dimensions via GridColumns.reindex --- holoviews/core/data.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 2114db0354..536719b988 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1580,22 +1580,27 @@ def aggregate(cls, columns, kdims, function, **kwargs): @classmethod def reindex(cls, columns, kdims, vdims): - if len(kdims) != columns.ndims: - raise ValueError('Cannot drop dimension varying dimension.') + dropped_kdims = [kd for kd in columns.kdims if kd not in kdims] + if dropped_kdims and any(len(columns.data[kd.name]) > 1 for kd in dropped_kdims): + raise ValueError('Compressed format does not allow dropping key dimensions ' + 'which are not constant.') if (any(kd for kd in kdims if kd not in columns.kdims) or any(vd for vd in vdims if vd not in columns.vdims)): return columns.clone(columns.columns()).reindex(kdims, vdims) - data = dict(columns.data) - dropped_vdims = ([vdim for vdim in columns.vdims if vdim not in vdims] if vdims else []) - for vdim in dropped_vdims: - del data[vdim.name] + data = {k: values for k, values in columns.data.items() + if k not in dropped_kdims+dropped_vdims} if kdims != columns.kdims: - axes = [columns.kdims.index(d) for d in kdims] + dropped_axes = tuple(columns.kdims.index(d) for d in dropped_kdims) + old_kdims = [d for d in columns.kdims if not d in dropped_kdims] + axes = tuple(old_kdims.index(d) for d in kdims) for vdim in vdims: - data[vdim.name] = np.transpose(data[vdim.name], axes) + vdata = data[vdim.name] + if dropped_axes: + vdata = vdata.squeeze(axis=dropped_axes) + data[vdim.name] = np.transpose(vdata, axes) return data From fb8a2f206ad21261de039404ca190ea64997f237 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 14:24:53 +0000 Subject: [PATCH 27/36] Improved error message on GridColumns.sort --- holoviews/core/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 536719b988..236264a1a6 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1617,8 +1617,8 @@ def sort(cls, columns, by=[]): if not by or by in [columns.kdims, columns.dimensions()]: return columns.data else: - raise Exception('Dense representation cannot be sorted, either instantiate ' - 'in the desired order or use a sparse format.') + raise Exception('Compressed format cannot be sorted, either instantiate ' + 'in the desired order or use the expanded format.') From 0af0d1e25b2ec466647f1a8356533b16c886377a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 14:25:42 +0000 Subject: [PATCH 28/36] Disabled support for expanding vdims in GridColumns --- holoviews/core/data.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 236264a1a6..23a6459209 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1367,12 +1367,6 @@ def init(cls, eltype, data, kdims, vdims): raise ValueError('GridColumns must be instantiated as a ' 'dictionary or tuple') - if 'vdims' in data: - vdim_array = data.pop('vdims') - for i, vdim in enumerate(vdims): - name = vdim.name if isinstance(vdim, Dimension) else vdim - data[name] = vdim_array[..., i] - for dim in kdims+vdims: name = dim.name if isinstance(dim, Dimension) else dim if name not in data: From 4ff50a47620eaca0a906eaefbb0ba32d40d79119 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 14:44:04 +0000 Subject: [PATCH 29/36] Small fixes for NdColumns and DFColumns constructors --- holoviews/core/data.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 23a6459209..6591a5d5df 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -609,13 +609,14 @@ def init(cls, eltype, data, kdims, vdims): if not isinstance(data, (NdElement, dict)): # If ndim > 2 data is assumed to be a mapping + if (isinstance(data[0], tuple) and any(isinstance(d, tuple) for d in data[0])): pass else: - data = [np.array(d) if not isinstance(d, np.ndarray) else d for d in data] - if not self.expanded_format(data): - raise ValueError('NdColumns expects data to be of uniform shape') if isinstance(data, tuple): + data = tuple(np.array(d) if not isinstance(d, np.ndarray) else d for d in data) + if not cls.expanded_format(data): + raise ValueError('NdColumns expects data to be of uniform shape') data = zip(*data) ndims = len(kdims) data = [(tuple(row[:ndims]), tuple(row[ndims:])) @@ -750,11 +751,11 @@ def init(cls, eltype, data, kdims, vdims): else: data = (range(len(data)), data) else: - data = tuple(data[:, i] for i in range(data.shape[1])) + data = tuple(data[:, i] for i in range(data.shape[1])) if isinstance(data, tuple): data = [np.array(d) if not isinstance(d, np.ndarray) else d for d in data] - if cls.expanded_format(data): + if not cls.expanded_format(data): raise ValueError('DFColumns expects data to be of uniform shape.') data = pd.DataFrame.from_items([(c, d) for c, d in zip(columns, data)]) From 675dd4c4dcb9e9676a2276763faff1f34e361f7b Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 14:44:39 +0000 Subject: [PATCH 30/36] Updated GridColumns unit test --- tests/testcolumns.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/testcolumns.py b/tests/testcolumns.py index f876e89146..e7b82a2ed0 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -470,7 +470,8 @@ def test_columns_add_dimensions_values_hm(self): self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12)))) def test_columns_sort_vdim_hm(self): - exception = 'Dense representation cannot be sorted, either instantiate '\ - 'in the desired order or use a sparse format.' + exception = ('Compressed format cannot be sorted, either instantiate ' + 'in the desired order or use the expanded format.') with self.assertRaisesRegexp(Exception, exception): - self.columns_hm.sort('y') \ No newline at end of file + self.columns_hm.sort('y') + From b853d12deaed8bc2161e31745bad6852740cc280 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 14:46:32 +0000 Subject: [PATCH 31/36] Updated GridColumns value slicing exception --- holoviews/core/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 6591a5d5df..031a970b29 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1497,8 +1497,8 @@ def select(cls, columns, selection_mask=None, **selection): dimensions = columns.dimensions('key', label=True) val_dims = [vdim for vdim in columns.vdims if vdim in selection] if val_dims: - raise IndexError('Cannot slice value dimensions on dense ' - 'data, convert to sparse format first') + raise IndexError('Cannot slice value dimensions in compressed format, ' + 'convert to expanded format before slicing.') indexed = cls.indexed(columns, selection) selection = [(d, selection.get(d)) for d in dimensions] From 0ffcbf2cdfac2ee31a956ce46ad9149691b23384 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 15:34:28 +0000 Subject: [PATCH 32/36] Fixed ArrayColumns init bug --- holoviews/core/data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 031a970b29..783687bba1 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -929,6 +929,7 @@ def init(cls, eltype, data, kdims, vdims): for k, v in data.items())) data = np.column_stack(columns) elif isinstance(data, tuple): + data = [d if isinstance(d, np.ndarray) else np.array(d) for d in data] if cls.expanded_format(data): data = np.column_stack(data) else: From c509a07a4efdd231871e3d8b4764cc314e73af00 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 14 Mar 2016 15:34:54 +0000 Subject: [PATCH 33/36] Fixed inverted Image.dimension_values --- holoviews/element/raster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index a18c72a6a6..decbdb53f0 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -616,7 +616,7 @@ def dimension_values(self, dim, expanded=True, flat=True): d1lin = np.linspace(l+d1_half_unit, r-d1_half_unit, dim1) d2lin = np.linspace(b+d2_half_unit, t-d2_half_unit, dim2) if expanded: - values = np.meshgrid(d2lin, d1lin)[dim_idx] + values = np.meshgrid(d2lin, d1lin)[abs(dim_idx-1)] return values.flatten() if flat else values else: return d2lin if dim_idx else d1lin From 2a640c65e4063883e81af21b98119fd530f38a72 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Mon, 14 Mar 2016 16:54:25 +0000 Subject: [PATCH 34/36] Renamed expanded_format method to expanded --- holoviews/core/data.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 783687bba1..29042905c7 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -464,7 +464,7 @@ def validate(cls, columns): @classmethod - def expanded_format(cls, arrays): + def expanded(cls, arrays): return not any(array.shape not in [arrays[0].shape, (1,)] for array in arrays[1:]) @@ -615,7 +615,7 @@ def init(cls, eltype, data, kdims, vdims): else: if isinstance(data, tuple): data = tuple(np.array(d) if not isinstance(d, np.ndarray) else d for d in data) - if not cls.expanded_format(data): + if not cls.expanded(data): raise ValueError('NdColumns expects data to be of uniform shape') data = zip(*data) ndims = len(kdims) @@ -755,7 +755,7 @@ def init(cls, eltype, data, kdims, vdims): if isinstance(data, tuple): data = [np.array(d) if not isinstance(d, np.ndarray) else d for d in data] - if not cls.expanded_format(data): + if not cls.expanded(data): raise ValueError('DFColumns expects data to be of uniform shape.') data = pd.DataFrame.from_items([(c, d) for c, d in zip(columns, data)]) @@ -930,7 +930,7 @@ def init(cls, eltype, data, kdims, vdims): data = np.column_stack(columns) elif isinstance(data, tuple): data = [d if isinstance(d, np.ndarray) else np.array(d) for d in data] - if cls.expanded_format(data): + if cls.expanded(data): data = np.column_stack(data) else: raise ValueError('ArrayColumns expects data to be of uniform shape.') @@ -1166,7 +1166,7 @@ def init(cls, eltype, data, kdims, vdims): raise ValueError("DictColumns interface couldn't convert data.""") elif isinstance(data, dict): unpacked = [(d, np.array(data[d])) for d in data] - if not cls.expanded_format([d[1] for d in unpacked]): + if not cls.expanded([d[1] for d in unpacked]): raise ValueError('DictColumns expects data to be of uniform shape.') if isinstance(data, odict_types): data.update(unpacked) From c109675645e218b7fa31e66975dcd8c5b6ede251 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Mon, 14 Mar 2016 17:43:54 +0000 Subject: [PATCH 35/36] Updated the class docstring for GridColumns --- holoviews/core/data.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 29042905c7..aa45670cbb 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -1341,9 +1341,18 @@ def aggregate(cls, columns, kdims, function, **kwargs): class GridColumns(DictColumns): """ - Interface for simple dictionary-based columns format. The dictionary - keys correspond to the column (i.e dimension) names and the values - are collections representing the values in that column. + Interface for simple dictionary-based columns format using a + compressed representation that uses the cartesian product between + key dimensions. As with DictColumns, the dictionary keys correspond + to the column (i.e dimension) names and the values are NumPy arrays + representing the values in that column. + + To use this compressed format, the key dimensions must be orthogonal + to one another with each key dimension specifiying an axis of the + multidimensional space occupied by the value dimension data. For + instance, given an temperature recordings sampled regularly across + the earth surface, a list of N unique latitudes and M unique + longitudes can specify the position of NxM temperature samples. """ types = (dict, OrderedDict, cyODict) From 1b8d27af75f31ddb38a914b8a75683818342d935 Mon Sep 17 00:00:00 2001 From: jlstevens Date: Mon, 14 Mar 2016 17:48:12 +0000 Subject: [PATCH 36/36] Added 'grid' interface to default datatype list --- holoviews/core/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/data.py b/holoviews/core/data.py index aa45670cbb..4a89f74789 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -44,7 +44,7 @@ class Columns(Element): of aggregating or collapsing the data with a supplied function. """ - datatype = param.List(['array', 'dataframe', 'dictionary', 'ndelement'], + datatype = param.List(['array', 'dataframe', 'dictionary', 'grid', 'ndelement'], doc=""" A priority list of the data types to be used for storage on the .data attribute. If the input supplied to the element constructor cannot be put into the requested format, the next