diff --git a/holoviews/core/data.py b/holoviews/core/data.py index 34f8c06a36..4a89f74789 100644 --- a/holoviews/core/data.py +++ b/holoviews/core/data.py @@ -5,7 +5,7 @@ import sys from distutils.version import LooseVersion -from collections import OrderedDict +from collections import OrderedDict, defaultdict from itertools import compress try: @@ -27,7 +27,6 @@ from .ndmapping import NdMapping, item_check, sorted_context from .spaces import HoloMap from . import util -from .util import wrap_tuple, basestring, unique_array class Columns(Element): @@ -45,7 +44,7 @@ class Columns(Element): of aggregating or collapsing the data with a supplied function. """ - datatype = param.List(['array', 'dataframe', 'dictionary', 'ndelement'], + datatype = param.List(['array', 'dataframe', 'dictionary', 'grid', 'ndelement'], doc=""" A priority list of the data types to be used for storage on the .data attribute. If the input supplied to the element constructor cannot be put into the requested format, the next @@ -336,17 +335,13 @@ def shape(self): return self.interface.shape(self) - def dimension_values(self, dim, unique=False): + def dimension_values(self, dim, expanded=True, flat=True): """ Returns the values along a particular dimension. If unique values are requested will return only unique values. """ dim = self.get_dimension(dim, strict=True).name - dim_vals = self.interface.values(self, dim) - if unique: - return unique_array(dim_vals) - else: - return dim_vals + return self.interface.values(self, dim, expanded, flat) def get_dimension_type(self, dim): @@ -447,7 +442,7 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): # Iterate over interfaces until one can interpret the input for interface in prioritized: try: - (data, kdims, vdims) = interface.reshape(eltype, data, kdims, vdims) + (data, kdims, vdims) = interface.init(eltype, data, kdims, vdims) break except: pass @@ -468,6 +463,11 @@ def validate(cls, columns): "not found: %s" % repr(not_found)) + @classmethod + def expanded(cls, arrays): + return not any(array.shape not in [arrays[0].shape, (1,)] for array in arrays[1:]) + + @classmethod def select_mask(cls, columns, selection): """ @@ -583,7 +583,7 @@ class NdColumns(DataColumns): datatype = 'ndelement' @classmethod - def reshape(cls, eltype, data, kdims, vdims): + def init(cls, eltype, data, kdims, vdims): if isinstance(data, NdElement): kdims = [d for d in kdims if d != 'Index'] else: @@ -609,10 +609,14 @@ def reshape(cls, eltype, data, kdims, vdims): if not isinstance(data, (NdElement, dict)): # If ndim > 2 data is assumed to be a mapping + if (isinstance(data[0], tuple) and any(isinstance(d, tuple) for d in data[0])): pass else: if isinstance(data, tuple): + data = tuple(np.array(d) if not isinstance(d, np.ndarray) else d for d in data) + if not cls.expanded(data): + raise ValueError('NdColumns expects data to be of uniform shape') data = zip(*data) ndims = len(kdims) data = [(tuple(row[:ndims]), tuple(row[ndims:])) @@ -654,8 +658,11 @@ def sort(cls, columns, by=[]): return columns.data.sort(by) @classmethod - def values(cls, columns, dim): - return columns.data.dimension_values(dim) + def values(cls, columns, dim, expanded=True, flat=True): + values = columns.data.dimension_values(dim, expanded, flat) + if not expanded: + return util.unique_array(values) + return values @classmethod def reindex(cls, columns, kdims=None, vdims=None): @@ -709,12 +716,11 @@ def dimension_type(cls, columns, dim): return columns.data.dtypes[idx].type @classmethod - def reshape(cls, eltype, data, kdims, vdims): + def init(cls, eltype, data, kdims, vdims): element_params = eltype.params() kdim_param = element_params['kdims'] vdim_param = element_params['vdims'] if util.is_dataframe(data): - columns = data.columns ndim = len(kdim_param.default) if kdim_param.default else None if kdims and vdims is None: vdims = [c for c in data.columns if c not in kdims] @@ -735,7 +741,7 @@ def reshape(cls, eltype, data, kdims, vdims): (isinstance(data, NdElement) and all(c in data.dimensions() for c in columns))): data = OrderedDict(((d, data[d]) for d in columns)) elif isinstance(data, dict) and not all(d in data for d in columns): - column_data = zip(*((wrap_tuple(k)+wrap_tuple(v)) + column_data = zip(*((util.wrap_tuple(k)+util.wrap_tuple(v)) for k, v in data.items())) data = OrderedDict(((c, col) for c, col in zip(columns, column_data))) elif isinstance(data, np.ndarray): @@ -745,9 +751,12 @@ def reshape(cls, eltype, data, kdims, vdims): else: data = (range(len(data)), data) else: - data = tuple(data[:, i] for i in range(data.shape[1])) + data = tuple(data[:, i] for i in range(data.shape[1])) if isinstance(data, tuple): + data = [np.array(d) if not isinstance(d, np.ndarray) else d for d in data] + if not cls.expanded(data): + raise ValueError('DFColumns expects data to be of uniform shape.') data = pd.DataFrame.from_items([(c, d) for c, d in zip(columns, data)]) else: @@ -760,7 +769,7 @@ def range(cls, columns, dimension): column = columns.data[columns.get_dimension(dimension).name] if column.dtype.kind == 'O': if (not isinstance(columns.data, pd.DataFrame) or - LooseVersion(pd.__version__) < '0.17.0'): + LooseVersion(pd.__version__) < '0.17.0'): column = column.sort(inplace=False) else: column = column.sort_values() @@ -853,10 +862,12 @@ def select(cls, columns, selection_mask=None, **selection): @classmethod - def values(cls, columns, dim): + def values(cls, columns, dim, expanded=True, flat=True): data = columns.data[dim] if util.dd and isinstance(data, util.dd.Series): data = data.compute() + if not expanded: + return util.unique_array(data) return np.array(data) @@ -901,7 +912,7 @@ def dimension_type(cls, columns, dim): return columns.data.dtype.type @classmethod - def reshape(cls, eltype, data, kdims, vdims): + def init(cls, eltype, data, kdims, vdims): if kdims is None: kdims = eltype.kdims if vdims is None: @@ -914,14 +925,15 @@ def reshape(cls, eltype, data, kdims, vdims): columns = [data[d] for d in dimensions] data = np.column_stack(columns) elif isinstance(data, dict) and not all(d in data for d in dimensions): - columns = zip(*((wrap_tuple(k)+wrap_tuple(v)) + columns = zip(*((util.wrap_tuple(k)+util.wrap_tuple(v)) for k, v in data.items())) data = np.column_stack(columns) elif isinstance(data, tuple): - try: + data = [d if isinstance(d, np.ndarray) else np.array(d) for d in data] + if cls.expanded(data): data = np.column_stack(data) - except: - data = None + else: + raise ValueError('ArrayColumns expects data to be of uniform shape.') elif not isinstance(data, np.ndarray): data = np.array([], ndmin=2).T if data is None else list(data) try: @@ -979,12 +991,15 @@ def sort(cls, columns, by=[]): @classmethod - def values(cls, columns, dim): + def values(cls, columns, dim, expanded=True, flat=True): data = columns.data dim_idx = columns.get_dimension_index(dim) if data.ndim == 1: data = np.atleast_2d(data).T - return data[:, dim_idx] + values = data[:, dim_idx] + if not expanded: + return util.unique_array(values) + return values @classmethod @@ -1117,7 +1132,7 @@ def dimension_type(cls, columns, dim): return columns.data[name].dtype.type @classmethod - def reshape(cls, eltype, data, kdims, vdims): + def init(cls, eltype, data, kdims, vdims): odict_types = (OrderedDict, cyODict) if kdims is None: kdims = eltype.kdims @@ -1143,7 +1158,7 @@ def reshape(cls, eltype, data, kdims, vdims): elif not isinstance(data, dict): data = {k: v for k, v in zip(dimensions, zip(*data))} elif isinstance(data, dict) and not all(d in data for d in dimensions): - dict_data = zip(*((wrap_tuple(k)+wrap_tuple(v)) + dict_data = zip(*((util.wrap_tuple(k)+util.wrap_tuple(v)) for k, v in data.items())) data = {k: np.array(v) for k, v in zip(dimensions, dict_data)} @@ -1151,6 +1166,8 @@ def reshape(cls, eltype, data, kdims, vdims): raise ValueError("DictColumns interface couldn't convert data.""") elif isinstance(data, dict): unpacked = [(d, np.array(data[d])) for d in data] + if not cls.expanded([d[1] for d in unpacked]): + raise ValueError('DictColumns expects data to be of uniform shape.') if isinstance(data, odict_types): data.update(unpacked) else: @@ -1158,6 +1175,17 @@ def reshape(cls, eltype, data, kdims, vdims): return data, kdims, vdims + @classmethod + def validate(cls, columns): + dimensions = columns.dimensions(label=True) + not_found = [d for d in dimensions if d not in columns.data] + if not_found: + raise ValueError('Following dimensions not found in data: %s' % not_found) + lengths = [len(columns.data[dim]) for dim in dimensions] + if len({l for l in lengths if l > 1}) > 1: + raise ValueError('Length of columns do not match') + + @classmethod def unpack_scalar(cls, columns, data): """ @@ -1188,7 +1216,7 @@ def array(cls, columns, dimensions): def add_dimension(cls, columns, dimension, dim_pos, values, vdim): dim = dimension.name if isinstance(dimension, Dimension) else dimension data = list(columns.data.items()) - if isinstance(values, basestring) or not hasattr(values, '__iter__'): + if isinstance(values, util.basestring) or not hasattr(values, '__iter__'): values = np.array([values]*len(columns)) data.insert(dim_pos, (dim, values)) return OrderedDict(data) @@ -1215,8 +1243,11 @@ def sort(cls, columns, by=[]): return OrderedDict([(d, v[sorting]) for d, v in columns.data.items()]) @classmethod - def values(cls, columns, dim): - return np.array(columns.data.get(columns.get_dimension(dim).name)) + def values(cls, columns, dim, expanded=True, flat=True): + values = np.array(columns.data.get(columns.get_dimension(dim).name)) + if not expanded: + return util.unique_array(values) + return values @classmethod @@ -1308,9 +1339,298 @@ def aggregate(cls, columns, kdims, function, **kwargs): +class GridColumns(DictColumns): + """ + Interface for simple dictionary-based columns format using a + compressed representation that uses the cartesian product between + key dimensions. As with DictColumns, the dictionary keys correspond + to the column (i.e dimension) names and the values are NumPy arrays + representing the values in that column. + + To use this compressed format, the key dimensions must be orthogonal + to one another with each key dimension specifiying an axis of the + multidimensional space occupied by the value dimension data. For + instance, given an temperature recordings sampled regularly across + the earth surface, a list of N unique latitudes and M unique + longitudes can specify the position of NxM temperature samples. + """ + + types = (dict, OrderedDict, cyODict) + + datatype = 'grid' + + @classmethod + def init(cls, eltype, data, kdims, vdims): + if kdims is None: + kdims = eltype.kdims + if vdims is None: + vdims = eltype.vdims + + if not vdims: + raise ValueError('GridColumns interface requires at least ' + 'one value dimension.') + + dimensions = [d.name if isinstance(d, Dimension) else + d for d in kdims + vdims] + if isinstance(data, tuple): + data = {d: v for d, v in zip(dimensions, data)} + elif not isinstance(data, dict): + raise ValueError('GridColumns must be instantiated as a ' + 'dictionary or tuple') + + for dim in kdims+vdims: + name = dim.name if isinstance(dim, Dimension) else dim + if name not in data: + raise ValueError("Values for dimension %s not found" % dim) + if not isinstance(data[name], np.ndarray): + data[name] = np.array(data[name]) + + kdim_names = [d.name if isinstance(d, Dimension) else d for d in kdims] + vdim_names = [d.name if isinstance(d, Dimension) else d for d in vdims] + expected = [len(data[kd]) for kd in kdim_names] + for vdim in vdim_names: + shape = data[vdim].shape + if shape != tuple(expected): + raise ValueError('Key dimension values and value array %s' + 'shape do not match. Expected shape %s, ' + 'actual shape: %s' % (expected, vdim, shape)) + return data, kdims, vdims + + + @classmethod + def validate(cls, columns): + DataColumns.validate(columns) + + + @classmethod + def dimension_type(cls, columns, dim): + if dim in columns.kdims: + arr = columns.data[dim.name] + elif dim in columns.vdims: + arr = columns.data[dim.name] + else: + return None + return arr.dtype.type + + + @classmethod + def shape(cls, columns): + return cls.length(columns), len(columns.dimensions()), + + + @classmethod + def length(cls, columns): + return np.product([len(columns.data[d.name]) for d in columns.kdims]) + + + @classmethod + def values(cls, columns, dim, expanded=True, flat=True): + if dim in columns.kdims: + if not expanded: + return columns.data[dim] + prod = util.cartesian_product([columns.data[d.name] for d in columns.kdims]) + idx = columns.get_dimension_index(dim) + values = prod[:, idx] + shape = tuple(len(columns.data[d]) for d in columns.dimensions('key', True)) + return values if flat else values.reshape(shape) + else: + dim = columns.get_dimension(dim) + values = columns.data.get(dim.name) + return values.flatten() if flat else values + + + @classmethod + def groupby(cls, columns, dim_names, container_type, group_type, **kwargs): + # Get dimensions information + dimensions = [columns.get_dimension(d) for d in dim_names] + kdims = [kdim for kdim in columns.kdims if kdim not in dimensions] + + # Update the kwargs appropriately for Element group types + group_kwargs = {} + group_type = dict if group_type == 'raw' else group_type + if issubclass(group_type, Element): + group_kwargs.update(util.get_param_values(columns)) + group_kwargs['kdims'] = kdims + group_kwargs.update(kwargs) + + # Find all the keys along supplied dimensions + keys = [columns.data[d.name] for d in dimensions] + + # Iterate over the unique entries applying selection masks + grouped_data = [] + for unique_key in util.cartesian_product(keys): + group_data = cls.select(columns, **dict(zip(dim_names, unique_key))) + for vdim in columns.vdims: + group_data[vdim.name] = np.squeeze(group_data[vdim.name]) + group_data = group_type(group_data, **group_kwargs) + grouped_data.append((tuple(unique_key), group_data)) + + if issubclass(container_type, NdMapping): + with item_check(False), sorted_context(False): + return container_type(grouped_data, kdims=dimensions) + else: + return container_type(grouped_data) + + + @classmethod + def key_select_mask(cls, columns, values, ind): + if isinstance(ind, tuple): + ind = slice(*ind) + if isinstance(ind, np.ndarray): + mask = ind + elif isinstance(ind, slice): + mask = True + if ind.start is not None: + mask &= ind.start <= values + if ind.stop is not None: + mask &= values < ind.stop + elif isinstance(ind, (set, list)): + iter_slcs = [] + for ik in ind: + iter_slcs.append(values == ik) + mask = np.logical_or.reduce(iter_slcs) + elif ind is None: + mask = None + else: + index_mask = values == ind + if columns.ndims == 1 and np.sum(index_mask) == 0: + data_index = np.argmin(np.abs(values - ind)) + mask = np.zeros(len(columns), dtype=np.bool) + mask[data_index] = True + else: + mask = index_mask + return mask + + + @classmethod + def select(cls, columns, selection_mask=None, **selection): + dimensions = columns.dimensions('key', label=True) + val_dims = [vdim for vdim in columns.vdims if vdim in selection] + if val_dims: + raise IndexError('Cannot slice value dimensions in compressed format, ' + 'convert to expanded format before slicing.') + + indexed = cls.indexed(columns, selection) + selection = [(d, selection.get(d)) for d in dimensions] + data = {} + value_select = [] + for dim, ind in selection: + values = cls.values(columns, dim, False) + mask = cls.key_select_mask(columns, values, ind) + if mask is None: + mask = np.ones(values.shape, dtype=bool) + else: + values = values[mask] + value_select.append(mask) + data[dim] = values + int_inds = [np.argwhere(v) for v in value_select] + index = np.ix_(*[np.atleast_1d(np.squeeze(ind)) if ind.ndim > 1 else np.atleast_1d(ind) + for ind in int_inds]) + for vdim in columns.vdims: + data[vdim.name] = columns.data[vdim.name][index] + + if indexed and len(data[columns.vdims[0].name]) == 1: + return data[columns.vdims[0].name][0] + + return data + + + @classmethod + def sample(cls, columns, samples=[]): + """ + Samples the gridded data into columns of samples. + """ + ndims = columns.ndims + dimensions = columns.dimensions(label=True) + arrays = [columns.data[vdim.name] for vdim in columns.vdims] + data = defaultdict(list) + + first_sample = util.wrap_tuple(samples[0]) + if any(len(util.wrap_tuple(s)) != len(first_sample) for s in samples): + raise IndexError('Sample coordinates must all be of the same length.') + + for sample in samples: + if np.isscalar(sample): sample = [sample] + if len(sample) != ndims: + sample = [sample[i] if i < len(sample) else None + for i in range(ndims)] + sampled, int_inds = [], [] + for d, ind in zip(dimensions, sample): + cdata = columns.data[d] + mask = cls.key_select_mask(columns, cdata, ind) + inds = np.arange(len(cdata)) if mask is None else np.argwhere(mask) + int_inds.append(inds) + sampled.append(cdata[mask]) + for d, arr in zip(dimensions, np.meshgrid(*sampled)): + data[d].append(arr) + for vdim, array in zip(columns.vdims, arrays): + flat_index = np.ravel_multi_index(tuple(int_inds), array.shape) + data[vdim.name].append(array.flat[flat_index]) + concatenated = {d: np.concatenate(arrays).flatten() for d, arrays in data.items()} + return concatenated + + + @classmethod + def aggregate(cls, columns, kdims, function, **kwargs): + kdims = [kd.name if isinstance(kd, Dimension) else kd for kd in kdims] + data = {kdim: columns.data[kdim] for kdim in kdims} + axes = tuple(columns.get_dimension_index(kdim) for kdim in columns.kdims + if kdim not in kdims) + for vdim in columns.vdims: + data[vdim.name] = np.atleast_1d(function(columns.data[vdim.name], + axis=axes, **kwargs)) + + return data + + + @classmethod + def reindex(cls, columns, kdims, vdims): + dropped_kdims = [kd for kd in columns.kdims if kd not in kdims] + if dropped_kdims and any(len(columns.data[kd.name]) > 1 for kd in dropped_kdims): + raise ValueError('Compressed format does not allow dropping key dimensions ' + 'which are not constant.') + if (any(kd for kd in kdims if kd not in columns.kdims) or + any(vd for vd in vdims if vd not in columns.vdims)): + return columns.clone(columns.columns()).reindex(kdims, vdims) + dropped_vdims = ([vdim for vdim in columns.vdims + if vdim not in vdims] if vdims else []) + data = {k: values for k, values in columns.data.items() + if k not in dropped_kdims+dropped_vdims} + + if kdims != columns.kdims: + dropped_axes = tuple(columns.kdims.index(d) for d in dropped_kdims) + old_kdims = [d for d in columns.kdims if not d in dropped_kdims] + axes = tuple(old_kdims.index(d) for d in kdims) + for vdim in vdims: + vdata = data[vdim.name] + if dropped_axes: + vdata = vdata.squeeze(axis=dropped_axes) + data[vdim.name] = np.transpose(vdata, axes) + return data + + + @classmethod + def add_dimension(cls, columns, dimension, dim_pos, values, vdim): + if not vdim: + raise Exception("Cannot add key dimension to a dense representation.") + dim = dimension.name if isinstance(dimension, Dimension) else dimension + return dict(columns.data, **{dim: values}) + + + @classmethod + def sort(cls, columns, by=[]): + if not by or by in [columns.kdims, columns.dimensions()]: + return columns.data + else: + raise Exception('Compressed format cannot be sorted, either instantiate ' + 'in the desired order or use the expanded format.') + + + # Register available interfaces DataColumns.register(DictColumns) DataColumns.register(ArrayColumns) DataColumns.register(NdColumns) +DataColumns.register(GridColumns) if pd: DataColumns.register(DFColumns) diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index ed40c4620b..9ad621cc7a 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -754,7 +754,7 @@ def select(self, selection_specs=None, **kwargs): return selection - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): """ Returns the values along the specified dimension. This method must be implemented for all Dimensioned type. diff --git a/holoviews/core/element.py b/holoviews/core/element.py index bcb61740a3..bdac467c7d 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -482,15 +482,16 @@ def aggregate(self, dimensions, function, **kwargs): return self.clone(rows, kdims=grouped.kdims) - def dimension_values(self, dim, unique=False): + def dimension_values(self, dim, expanded=True, flat=True): dim = self.get_dimension(dim, strict=True) value_dims = self.dimensions('value', label=True) if dim.name in value_dims: index = value_dims.index(dim.name) vals = np.array([v[index] for v in self.data.values()]) - return unique_array(vals) if unique else vals + return vals if expanded else unique_array(vals) else: - return NdMapping.dimension_values(self, dim.name, unique) + return NdMapping.dimension_values(self, dim.name, + expanded, flat) def values(self): diff --git a/holoviews/core/layout.py b/holoviews/core/layout.py index ddd7b3d763..d24dccba06 100644 --- a/holoviews/core/layout.py +++ b/holoviews/core/layout.py @@ -126,9 +126,9 @@ def get(self, key, default=None): return self.data[key] if key in self.data else default - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): dimension = self.get_dimension(dimension, strict=True).name - return self.main.dimension_values(dimension, unique) + return self.main.dimension_values(dimension, expanded, flat) def __getitem__(self, key): @@ -433,7 +433,7 @@ def clone(self, *args, **overrides): return clone - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): "Returns the values along the specified dimension." dimension = self.get_dimension(dimension, strict=True).name all_dims = self.traverse(lambda x: [d.name for d in x.dimensions()]) @@ -441,9 +441,10 @@ def dimension_values(self, dimension, unique=False): values = [el.dimension_values(dimension) for el in self if dimension in el.dimensions(label=True)] vals = np.concatenate(values) - return unique_array(vals) if unique else vals + return vals if expanded else unique_array(vals) else: - return super(Layout, self).dimension_values(dimension, unique) + return super(Layout, self).dimension_values(dimension, + expanded, flat) def cols(self, ncols): diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index 41b45049b0..856cc2fe25 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -343,7 +343,7 @@ def drop_dimension(self, dimensions): kdims=dims) - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): "Returns the values along the specified dimension." dimension = self.get_dimension(dimension, strict=True).name if dimension in self.kdims: @@ -352,9 +352,9 @@ def dimension_values(self, dimension, unique=False): values = [el.dimension_values(dimension) for el in self if dimension in el.dimensions()] vals = np.concatenate(values) - return util.unique_array(vals) if unique else vals + return vals if expanded else util.unique_array(vals) else: - return super(MultiDimensionalMapping, self).dimension_values(dimension, unique) + return super(MultiDimensionalMapping, self).dimension_values(dimension, expanded, flat) def reindex(self, kdims=[], force=False): diff --git a/holoviews/core/overlay.py b/holoviews/core/overlay.py index f145a58756..69e0fbf21c 100644 --- a/holoviews/core/overlay.py +++ b/holoviews/core/overlay.py @@ -65,7 +65,7 @@ def hist(self, index=0, adjoin=True, dimension=None, **kwargs): return layout - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): values = [] found = False for el in self: @@ -73,12 +73,12 @@ def dimension_values(self, dimension, unique=False): values.append(el.dimension_values(dimension)) found = True if not found: - return super(CompositeOverlay, self).dimension_values(dimension, unique) + return super(CompositeOverlay, self).dimension_values(dimension, expanded, flat) values = [v for v in values if v is not None and len(v)] if not values: return np.array() vals = np.concatenate(values) - return unique_array(vals) if unique else vals + return vals if expanded else unique_array(vals) class Overlay(Layout, CompositeOverlay): diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 1ab8caa71a..67bb4e25a3 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -4,6 +4,7 @@ import string, fnmatch import unicodedata from collections import defaultdict +from functools import reduce import numpy as np import param @@ -861,3 +862,16 @@ def groupby_python(self_or_cls, ndmapping, dimensions, container_type, return container_type(groups, kdims=dimensions) +def cartesian_product(arrays): + """ + Computes the cartesian product of a list of arrays. + """ + broadcastable = np.ix_(*arrays) + broadcasted = np.broadcast_arrays(*broadcastable) + rows, cols = reduce(np.multiply, broadcasted[0].shape), len(broadcasted) + out = np.empty(rows * cols, dtype=broadcasted[0].dtype) + start, end = 0, rows + for a in broadcasted: + out[start:end] = a.reshape(-1) + start, end = end, end + rows + return out.reshape(cols, rows).T diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index df21213a26..decbdb53f0 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -129,7 +129,7 @@ def sample(self, samples=[], **sample_values): sample[sample_ind] = self._coord2matrix(coord_fn(sample_coord))[abs(sample_ind-1)] # Sample data - x_vals = self.dimension_values(other_dimension[0].name, unique=True) + x_vals = self.dimension_values(other_dimension[0].name, False) ydata = self._zdata[sample[::-1]] if hasattr(self, 'bounds') and sample_ind == 0: ydata = ydata[::-1] data = list(zip(x_vals, ydata)) @@ -154,7 +154,7 @@ def reduce(self, dimensions=None, function=None, **reduce_map): dimension = dims[0] other_dimension = [d for d in self.kdims if d.name != dimension] oidx = self.get_dimension_index(other_dimension[0]) - x_vals = self.dimension_values(other_dimension[0].name, unique=True) + x_vals = self.dimension_values(other_dimension[0].name, False) reduced = function(self._zdata, axis=oidx) data = zip(x_vals, reduced if not oidx else reduced[::-1]) params = dict(dict(self.get_param_values(onlychanged=True)), @@ -164,18 +164,18 @@ def reduce(self, dimensions=None, function=None, **reduce_map): return Table(data, **params) - def dimension_values(self, dim, unique=False): + def dimension_values(self, dim, expanded=True, flat=True): """ The set of samples available along a particular dimension. """ dim_idx = self.get_dimension_index(dim) - if unique and dim_idx == 0: + if not expanded and dim_idx == 0: return np.array(range(self.data.shape[1])) - elif unique and dim_idx == 1: + elif not expanded and dim_idx == 1: return np.array(range(self.data.shape[0])) elif dim_idx in [0, 1]: - D1, D2 = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]] - return D1.flatten() if dim_idx == 0 else D2.flatten() + values = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]][dim_idx] + return values.flatten() if flat else values elif dim_idx == 2: return toarray(self.data.T).flatten() else: @@ -338,20 +338,20 @@ def range(self, dimension): super(QuadMesh, self).range(dimension) - def dimension_values(self, dimension, unique=False): + def dimension_values(self, dimension, expanded=True, flat=True): idx = self.get_dimension_index(dimension) data = self.data[idx] if idx in [0, 1]: if not self._grid: return data.flatten() - odim = 1 if unique else self.data[2].shape[idx] + odim = self.data[2].shape[idx] if expanded else 1 vals = np.tile(np.convolve(data, np.ones((2,))/2, mode='valid'), odim) if idx: return np.sort(vals) else: return vals elif idx == 2: - return data.flatten() + return data.flatten() if flat else data else: return super(QuadMesh, self).dimension_values(idx) @@ -388,8 +388,8 @@ def __init__(self, data, extents=None, **params): def _compute_raster(self): - d1keys = self.dimension_values(0, True) - d2keys = self.dimension_values(1, True) + d1keys = self.dimension_values(0, False) + d2keys = self.dimension_values(1, False) coords = [(d1, d2, np.NaN) for d1 in d1keys for d2 in d2keys] dtype = 'dataframe' if pd else 'dictionary' dense_data = Columns(coords, kdims=self.kdims, vdims=self.vdims, datatype=[dtype]) @@ -438,8 +438,8 @@ def __setstate__(self, state): super(HeatMap, self).__setstate__(state) def dense_keys(self): - d1keys = self.dimension_values(0, True) - d2keys = self.dimension_values(1, True) + d1keys = self.dimension_values(0, False) + d2keys = self.dimension_values(1, False) return list(zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys])) @@ -603,7 +603,7 @@ def _coord2matrix(self, coord): return self.sheet2matrixidx(*coord) - def dimension_values(self, dim, unique=False): + def dimension_values(self, dim, expanded=True, flat=True): """ The set of samples available along a particular dimension. """ @@ -615,13 +615,16 @@ def dimension_values(self, dim, unique=False): d2_half_unit = (t - b)/dim2/2. d1lin = np.linspace(l+d1_half_unit, r-d1_half_unit, dim1) d2lin = np.linspace(b+d2_half_unit, t-d2_half_unit, dim2) - if unique: - return d2lin if dim_idx else d1lin + if expanded: + values = np.meshgrid(d2lin, d1lin)[abs(dim_idx-1)] + return values.flatten() if flat else values else: - Y, X = np.meshgrid(d2lin, d1lin) - return Y.flatten() if dim_idx else X.flatten() + return d2lin if dim_idx else d1lin elif dim_idx == 2: - return np.flipud(self.data).T.flatten() + # Raster arrays are stored with different orientation + # than expanded column format, reorient before expanding + data = np.flipud(self.data).T + return data.flatten() if flat else data else: super(Image, self).dimension_values(dim) @@ -703,14 +706,15 @@ def load_image(cls, filename, height=1, array=False, bounds=None, bare=False, ** return rgb - def dimension_values(self, dim, unique=False): + def dimension_values(self, dim, expanded=True, flat=True): """ The set of samples available along a particular dimension. """ dim_idx = self.get_dimension_index(dim) if self.ndims <= dim_idx < len(self.dimensions()): - return np.flipud(self.data[:,:,dim_idx-self.ndims]).T.flatten() - return super(RGB, self).dimension_values(dim, unique=True) + data = np.flipud(self.data[:,:,dim_idx-self.ndims]).T + return data.flatten() if flat else data + return super(RGB, self).dimension_values(dim, expanded, flat) def __init__(self, data, **params): diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index a7948fc9be..cd7d0b0731 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -71,7 +71,7 @@ def collapse_data(cls, data, function, **kwargs): return OrderedDict(zip(data[0].keys(), function(groups, axis=-1, **kwargs))) - def dimension_values(self, dimension): + def dimension_values(self, dimension, expanded=True, flat=True): dimension = self.get_dimension(dimension, strict=True).name if dimension in self.dimensions('value', label=True): return np.array([self.data.get(dimension, np.NaN)]) diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index 94a08db4fe..85c6818d28 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -428,8 +428,8 @@ def _process(self, element, key=None): data = [element.data] elif isinstance(element, QuadMesh): - data = (element.dimension_values(0, True), - element.dimension_values(1, True), + data = (element.dimension_values(0, False), + element.dimension_values(1, False), element.data[2]) contour_set = contour_fn(*data, extent=extent, levels=self.p.levels) diff --git a/holoviews/plotting/mpl/raster.py b/holoviews/plotting/mpl/raster.py index aaa235139a..f90e820c19 100644 --- a/holoviews/plotting/mpl/raster.py +++ b/holoviews/plotting/mpl/raster.py @@ -111,7 +111,7 @@ def _annotate_plot(self, ax, annotations): def _annotate_values(self, element): val_dim = element.vdims[0] vals = np.rot90(element.raster, 3).flatten() - d1uniq, d2uniq = [np.unique(element.dimension_values(i)) for i in range(2)] + d1uniq, d2uniq = [element.dimension_values(i, False) for i in range(2)] num_x, num_y = len(d1uniq), len(d2uniq) xstep, ystep = 1.0/num_x, 1.0/num_y xpos = np.linspace(xstep/2., 1.0-xstep/2., num_x) @@ -127,7 +127,7 @@ def _annotate_values(self, element): def _compute_ticks(self, element, ranges): xdim, ydim = element.kdims - dim1_keys, dim2_keys = [element.dimension_values(i, True) + dim1_keys, dim2_keys = [element.dimension_values(i, False) for i in range(2)] num_x, num_y = len(dim1_keys), len(dim2_keys) x0, y0, x1, y1 = element.extents diff --git a/tests/testcolumns.py b/tests/testcolumns.py index ab7cdd1ccc..e7b82a2ed0 100644 --- a/tests/testcolumns.py +++ b/tests/testcolumns.py @@ -176,8 +176,8 @@ def test_columns_dataframe_init_ht(self): # Test literal formats - def test_columns_uniq_dimvals_ht(self): - self.assertEqual(self.table.dimension_values('Gender', unique=True), + def test_columns_expanded_dimvals_ht(self): + self.assertEqual(self.table.dimension_values('Gender', expanded=False), np.array(['M', 'F'])) def test_columns_implicit_indexing_init(self): @@ -399,3 +399,79 @@ def test_columns_double_zip_init(self): kdims=self.kdims, vdims=self.vdims) self.assertTrue(isinstance(columns.data, NdElement)) + +class GridColumnsTest(HomogeneousColumnTypes, ComparisonTestCase): + """ + Test of the NdColumns interface (mostly for backwards compatibility) + """ + + def setUp(self): + self.restore_datatype = Columns.datatype + Columns.datatype = ['grid'] + self.data_instance_type = dict + self.init_data() + + def init_data(self): + self.xs = range(11) + self.xs_2 = [el**2 for el in self.xs] + + self.y_ints = [i*2 for i in range(11)] + self.columns_hm = Columns((self.xs, self.y_ints), + kdims=['x'], vdims=['y']) + + def test_columns_array_init_hm(self): + "Tests support for arrays (homogeneous)" + exception = "None of the available storage backends "\ + "were able to support the supplied data format." + with self.assertRaisesRegexp(Exception, exception): + Columns(np.column_stack([self.xs, self.xs_2]), + kdims=['x'], vdims=['x2']) + + def test_columns_dataframe_init_hm(self): + "Tests support for homogeneous DataFrames" + if pd is None: + raise SkipTest("Pandas not available") + exception = "None of the available storage backends "\ + "were able to support the supplied data format." + with self.assertRaisesRegexp(Exception, exception): + Columns(pd.DataFrame({'x':self.xs, 'x2':self.xs_2}), + kdims=['x'], vdims=['x2']) + + def test_columns_ndelement_init_hm(self): + "Tests support for homogeneous NdElement (backwards compatibility)" + exception = "None of the available storage backends "\ + "were able to support the supplied data format." + with self.assertRaisesRegexp(Exception, exception): + Columns(NdElement(zip(self.xs, self.xs_2), + kdims=['x'], vdims=['x2'])) + + def test_columns_2D_aggregate_partial_hm(self): + array = np.random.rand(11, 11) + columns = Columns({'x':self.xs, 'y':self.y_ints, 'z': array}, + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.aggregate(['x'], np.mean), + Columns({'x':self.xs, 'z': np.mean(array, axis=1)}, + kdims=['x'], vdims=['z'])) + + def test_columns_2D_reduce_hm(self): + array = np.random.rand(11, 11) + columns = Columns({'x':self.xs, 'y':self.y_ints, 'z': array}, + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(np.array(columns.reduce(['x', 'y'], np.mean)), + np.mean(array)) + + def test_columns_add_dimensions_value_hm(self): + with self.assertRaisesRegexp(Exception, 'Cannot add key dimension to a dense representation.'): + self.columns_hm.add_dimension('z', 1, 0) + + def test_columns_add_dimensions_values_hm(self): + table = self.columns_hm.add_dimension('z', 1, range(1,12), vdim=True) + self.assertEqual(table.vdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12)))) + + def test_columns_sort_vdim_hm(self): + exception = ('Compressed format cannot be sorted, either instantiate ' + 'in the desired order or use the expanded format.') + with self.assertRaisesRegexp(Exception, exception): + self.columns_hm.sort('y') +