diff --git a/doc/Tutorials/Pandas_Conversion.ipynb b/doc/Tutorials/Pandas_Conversion.ipynb index 34acac59c3..e1e7f1033e 100644 --- a/doc/Tutorials/Pandas_Conversion.ipynb +++ b/doc/Tutorials/Pandas_Conversion.ipynb @@ -140,24 +140,6 @@ "HTML(df.reset_index().to_html())" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For now though, the a, b, and c columns is all we'll need. To confirm the dataframe was converted correctly we can call the `.info` method on Table:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "example_table.info" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -267,7 +249,7 @@ }, "outputs": [], "source": [ - "macro_df = pd.read_csv('http://ioam.github.com/holoviews/Tutorials/macro.csv', sep='\\t')" + "macro_df = pd.read_csv('http://ioam.github.com/holoviews/Tutorials/macro.csv', '\\t')" ] }, { @@ -404,7 +386,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now that we've extracted the gdp_curves we can apply some operations to them. The collapse method applies some function across the data along the supplied dimensions. This lets us quickly compute the mean GDP Growth by year for example, but it also allows us to map a function with parameters to the data and visualize the resulting samples. A simple example is computing a curve for each percentile and embedding it in an NdOverlay.\n", + "Now that we've extracted the gdp_curves we can apply some operations to them. The collapse method applies some function across the data along the supplied dimensions. This let's us quickly compute a the mean GDP Growth by year for example, but it also allows us to map a function with parameters to the data and visualize the resulting samples. A simple example is computing a curve for each percentile and embedding it in an NdOverlay.\n", "\n", "Additionally we can apply a Palette to visualize the range of percentiles." ] @@ -462,9 +444,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Using the .select method we can pull out the data for just a few countries and specific years. We can also make more advanced use of the Palettes.\n", + "Using the .select method we can pull out the data for just a few countries and specific years. We can also make more advanced use the Palettes.\n", "\n", - "Palettes can be customized by selecting only a subrange of the underlying cmap to draw the colors from. The Palette draws samples from the colormap using the supplied sample_fn, which by default just draws linear samples but may be overriden with any function that draws samples in the supplied ranges. By slicing the Set1 colormap we draw colors only from the upper half of the palette and then reverse it." + "Palettes can customized by selecting only a subrange of the underlying cmap to draw the colors from. The Palette draws samples from the colormap using the supplied sample_fn, which by default just draws linear samples but may be overriden with any function that draws samples in the supplied ranges. By slicing the Set1 colormap we draw colors only from the upper half of the palette and then reverse it." ] }, { diff --git a/doc/Tutorials/Sampling_Data.ipynb b/doc/Tutorials/Sampling_Data.ipynb index 1112c8bcfe..eaf8b312a9 100644 --- a/doc/Tutorials/Sampling_Data.ipynb +++ b/doc/Tutorials/Sampling_Data.ipynb @@ -274,7 +274,7 @@ "outputs": [], "source": [ "raster = hv.Raster(np.random.rand(3, 3))\n", - "raster + hv.Points(raster.table().keys())[-1:3, -1:3] + raster.table()" + "raster + hv.Points(raster)[-1:3, -1:3] + raster.table()" ] }, { @@ -316,7 +316,7 @@ "source": [ "extents = (0, 0, 3, 3)\n", "img = hv.Image(np.random.rand(3, 3), bounds=extents)\n", - "img + hv.Points(img.table().keys(), extents=extents) + img.table()" + "img + hv.Points(img, extents=extents) + img.table()" ] }, { @@ -462,7 +462,7 @@ "extents = (0, 0, 10, 10)\n", "img = hv.Image(np.random.rand(10, 10), bounds=extents)\n", "img_coords = hv.Points(img.table(), extents=extents)\n", - "img + img * img_coords * hv.Points([img.closest((5,5))])(style=dict(color='r')) + img.sample([(5, 5)])" + "img + img * img_coords * hv.Points([img.closest([(5,5)])])(style=dict(color='r')) + img.sample([(5, 5)])" ] }, { @@ -481,7 +481,7 @@ "outputs": [], "source": [ "sampled = img.sample(y=5)\n", - "img + img * img_coords * hv.Points(zip(sampled.table().keys(), [img.closest((5,5))[1]]*10)) + sampled" + "img + img * img_coords * hv.Points(zip(sampled['x'], [img.closest(y=5)]*10)) + sampled" ] }, { @@ -610,8 +610,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "scrolled": true + "collapsed": false }, "outputs": [], "source": [ diff --git a/doc/Tutorials/Showcase.ipynb b/doc/Tutorials/Showcase.ipynb index 8d64337a74..9e0ac3c719 100644 --- a/doc/Tutorials/Showcase.ipynb +++ b/doc/Tutorials/Showcase.ipynb @@ -246,7 +246,7 @@ "source": [ "sample_pos = (0,0.25)\n", "annotated = circular_wave * hv.Points([sample_pos])\n", - "sample = circular_wave.sample(samples=[sample_pos]).reindex().to.curve('Phase', 'Amplitude')\n", + "sample = circular_wave.sample(samples=[sample_pos]).to.curve('Phase', 'Amplitude', ['Frequency'])\n", "annotated + sample" ] }, diff --git a/doc/reference_data b/doc/reference_data index 88cf88384f..7e08604134 160000 --- a/doc/reference_data +++ b/doc/reference_data @@ -1 +1 @@ -Subproject commit 88cf88384fc7594b853d7513f9d441dd5b19d9dc +Subproject commit 7e08604134959809a88223513ba7e461b875e949 diff --git a/holoviews/core/__init__.py b/holoviews/core/__init__.py index 69bb230619..484b50478b 100644 --- a/holoviews/core/__init__.py +++ b/holoviews/core/__init__.py @@ -1,4 +1,5 @@ from .boundingregion import * # pyflakes:ignore (API import) +from .data import * # pyflakes:ignore (API import) from .dimension import * # pyflakes:ignore (API import) from .element import * # pyflakes:ignore (API import) from .layout import * # pyflakes:ignore (API import) diff --git a/holoviews/core/data.py b/holoviews/core/data.py new file mode 100644 index 0000000000..805d0c9ed8 --- /dev/null +++ b/holoviews/core/data.py @@ -0,0 +1,927 @@ +""" +The data module provides utility classes to interface with various data +backends. +""" + +import sys +from distutils.version import LooseVersion +from collections import defaultdict, Iterable +from itertools import groupby + +try: + import itertools.izip as zip +except ImportError: + pass + +import numpy as np +try: + import pandas as pd +except ImportError: + pd = None + +import param + +from .dimension import OrderedDict, Dimension +from .element import Element, NdElement +from .ndmapping import NdMapping, item_check, sorted_context +from .spaces import HoloMap +from . import util + + +class Columns(Element): + """ + Columns provides a general baseclass for column based Element types + that supports a range of data formats. + + Currently numpy arrays are supported for data with a uniform + type. For storage of columns with heterogenous types, either a + dictionary format or a pandas DataFrame may be used for storage. + + The Columns class supports various methods offering a consistent way + of working with the stored data regardless of the storage format + used. These operations include indexing, selection and various ways + of aggregating or collapsing the data with a supplied function. + """ + + datatype = param.List(['array', 'dictionary', 'dataframe' ], + doc=""" A priority list of the data types to be used for storage + on the .data attribute. If the input supplied to the element + constructor cannot be put into the requested format, the next + format listed will be used until a suitable format is found (or + the data fails to be understood).""") + + def __init__(self, data, **kwargs): + initialized = DataColumns.initialize(type(self), data, + kwargs.get('kdims'), + kwargs.get('vdims'), + datatype=kwargs.get('datatype')) + (data, kdims, vdims, self.interface) = initialized + super(Columns, self).__init__(data, **dict(kwargs, kdims=kdims, vdims=vdims)) + self.interface.validate(self) + + + def __setstate__(self, state): + """ + Restores OrderedDict based Columns objects, converting them to + the up-to-date NdElement format. + """ + self.__dict__ = state + if isinstance(self.data, OrderedDict): + self.data = NdElement(self.data, kdims=self.kdims, + vdims=self.vdims, group=self.group, + label=self.label) + self.interface = NdColumns + elif isinstance(self.data, np.ndarray): + self.interface = ArrayColumns + elif util.is_dataframe(self.data): + self.interface = DFColumns + + + def closest(self, coords): + """ + Given single or multiple samples along the first key dimension + will return the closest actual sample coordinates. + """ + if self.ndims > 1: + NotImplementedError("Closest method currently only " + "implemented for 1D Elements") + + if not isinstance(coords, list): coords = [coords] + xs = self.dimension_values(0) + idxs = [np.argmin(np.abs(xs-coord)) for coord in coords] + return [xs[idx] for idx in idxs] if len(coords) > 1 else xs[idxs[0]] + + + def sort(self, by=[]): + """ + Sorts the data by the values along the supplied dimensions. + """ + if not by: by = self.kdims + sorted_columns = self.interface.sort(self, by) + return self.clone(sorted_columns) + + + def range(self, dim, data_range=True): + """ + Computes the range of values along a supplied dimension, taking + into account the range and soft_range defined on the Dimension + object. + """ + dim = self.get_dimension(dim) + if dim.range != (None, None): + return dim.range + elif dim in self.dimensions(): + if len(self): + drange = self.interface.range(self, dim) + else: + drange = (np.NaN, np.NaN) + if data_range: + soft_range = [r for r in dim.soft_range if r is not None] + if soft_range: + return util.max_range([drange, soft_range]) + else: + return drange + else: + return dim.soft_range + + + def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): + """ + Create a new object with an additional key dimensions. Requires + the dimension name or object, the desired position in the key + dimensions and a key value scalar or sequence of the same length + as the existing keys. + """ + if isinstance(dimension, str): + dimension = Dimension(dimension) + + if dimension.name in self.kdims: + raise Exception('{dim} dimension already defined'.format(dim=dimension.name)) + + dimensions = self.kdims[:] + dimensions.insert(dim_pos, dimension) + + data = self.interface.add_dimension(self, dimension, dim_pos, dim_val) + return self.clone(data, kdims=dimensions) + + + def select(self, selection_specs=None, **selection): + """ + Allows selecting data by the slices, sets and scalar values + along a particular dimension. The indices should be supplied as + keywords mapping between the selected dimension and + value. Additionally selection_specs (taking the form of a list + of type.group.label strings, types or functions) may be + supplied, which will ensure the selection is only applied if the + specs match the selected object. + """ + if selection_specs and not self.matches(selection_specs): + return self + + data = self.interface.select(self, **selection) + if np.isscalar(data): + return data + else: + return self.clone(data) + + + def reindex(self, kdims=None, vdims=None): + """ + Create a new object with a re-ordered set of dimensions. Allows + converting key dimensions to value dimensions and vice versa. + """ + if kdims is None: + key_dims = [d for d in self.kdims if d not in vdims] + else: + key_dims = [self.get_dimension(k) for k in kdims] + + if vdims is None: + val_dims = [d for d in self.vdims if d not in kdims] + else: + val_dims = [self.get_dimension(v) for v in vdims] + + data = self.interface.reindex(self, key_dims, val_dims) + return self.clone(data, kdims=key_dims, vdims=val_dims) + + + def __getitem__(self, slices): + """ + Allows slicing and selecting values in the Columns object. + Supports multiple indexing modes: + + (1) Slicing and indexing along the values of each dimension + in the columns object using either scalars, slices or + sets of values. + (2) Supplying the name of a dimension as the first argument + will return the values along that dimension as a numpy + array. + (3) Slicing of all key dimensions and selecting a single + value dimension by name. + (4) A boolean array index matching the length of the Columns + object. + """ + if slices is (): return self + if isinstance(slices, np.ndarray) and slices.dtype.kind == 'b': + if not len(slices) == len(self): + raise IndexError("Boolean index must match length of sliced object") + return self.clone(self.data[slices]) + if not isinstance(slices, tuple): slices = (slices,) + value_select = None + if len(slices) == 1 and slices[0] in self.dimensions(): + return self.dimension_values(slices[0]) + elif len(slices) == self.ndims+1 and slices[self.ndims] in self.dimensions(): + selection = dict(zip(self.dimensions('key', label=True), slices)) + value_select = slices[self.ndims] + else: + selection = dict(zip(self.dimensions(label=True), slices)) + data = self.select(**selection) + if value_select: + values = data.dimension_values(value_select) + if len(values) > 1: + return values + else: + return values[0] + return data + + + def sample(self, samples=[]): + """ + Allows sampling of Columns as an iterator of coordinates + matching the key dimensions, returning a new object containing + just the selected samples. + """ + return self.clone(self.interface.sample(self, samples)) + + + def reduce(self, dimensions=[], function=None, **reduce_map): + """ + Allows reducing the values along one or more key dimension with + the supplied function. The dimensions may be supplied as a list + and a function to apply or a mapping between the dimensions and + functions to apply along each dimension. + """ + reduce_dims, reduce_map = self._reduce_map(dimensions, function, reduce_map) + reduced = self + for reduce_fn, group in reduce_map: + reduced = self.interface.reduce(reduced, group, function) + + if np.isscalar(reduced): + return reduced + else: + kdims = [kdim for kdim in self.kdims if kdim not in reduce_dims] + return self.clone(reduced, kdims=kdims) + + + def aggregate(self, dimensions=[], function=None): + """ + Aggregates over the supplied key dimensions with the defined + function. + """ + if not isinstance(dimensions, list): dimensions = [dimensions] + if not dimensions: dimensions = self.kdims + aggregated = self.interface.aggregate(self, dimensions, function) + kdims = [self.get_dimension(d) for d in dimensions] + return self.clone(aggregated, kdims=kdims) + + + def groupby(self, dimensions=[], container_type=HoloMap, group_type=None, **kwargs): + if not isinstance(dimensions, list): dimensions = [dimensions] + if not len(dimensions): dimensions = self.dimensions('key', True) + dimensions = [self.get_dimension(d).name for d in dimensions] + invalid_dims = list(set(dimensions) - set(self.dimensions('key', True))) + if invalid_dims: + raise Exception('Following dimensions could not be found:\n%s.' + % invalid_dims) + if group_type is None: + group_type = type(self) + return self.interface.groupby(self, dimensions, container_type, group_type, **kwargs) + + + @classmethod + def concat(cls, columns_objs): + """ + Concatenates a list of Columns objects. If data types don't + match all types will be converted to that of the first object + before concatenation. + """ + columns = columns_objs[0] + if len({col.interface for col in columns_objs}) > 1: + if isinstance(columns.data, NdElement): + columns_objs = [co.table('dictionary') for co in columns_objs] + elif isinstance(columns.data, np.ndarray): + columns_objs = [co.table('array') for co in columns_objs] + elif util.is_dataframe(data[0]): + columns_objs = [co.table('dataframe') for co in columns_objs] + return columns.clone(columns.interface.concat(columns_objs)) + + + def __len__(self): + """ + Returns the number of rows in the Columns object. + """ + return self.interface.length(self) + + + @property + def shape(self): + "Returns the shape of the data." + return self.interface.shape(self) + + + def dimension_values(self, dim, unique=False): + """ + Returns the values along a particular dimension. If unique + values are requested will return only unique values. + """ + dim = self.get_dimension(dim).name + dim_vals = self.interface.values(self, dim) + if unique: + return np.unique(dim_vals) + else: + return dim_vals + + + def dframe(self, dimensions=None): + """ + Returns the data in the form of a DataFrame. + """ + if dimensions: + dimensions = [self.get_dimension(d).name for d in dimensions] + return self.interface.dframe(self, dimensions) + + + + +class DataColumns(param.Parameterized): + + interfaces = {} + + @classmethod + def initialize(cls, eltype, data, kdims, vdims, datatype=None): + # Process params and dimensions + if isinstance(data, Element): + pvals = util.get_param_values(data) + kdims = pvals.get('kdims') if kdims is None else kdims + vdims = pvals.get('vdims') if vdims is None else vdims + + # Process Element data + if isinstance(data, NdElement): + pass + elif isinstance(data, Columns): + data = data.data + elif isinstance(data, Element): + data = tuple(data.dimension_values(d) for d in kdims+vdims) + elif (not (util.is_dataframe(data) or isinstance(data, (tuple, dict, list))) + and sys.version_info.major >= 3): + data = list(data) + + # Set interface priority order + if datatype is None: + datatype = eltype.datatype + prioritized = [cls.interfaces[p] for p in datatype] + + head = [intfc for intfc in prioritized if type(data) in intfc.types] + if head: + # Prioritize interfaces which have matching types + prioritized = head + [el for el in prioritized if el != head[0]] + + # Iterate over interfaces until one can interpret the input + for interface in prioritized: + try: + (data, kdims, vdims) = interface.reshape(eltype, data, kdims, vdims) + break + except: + pass + else: + raise ValueError("None of the available storage backends " + "were able to support the supplied data format.") + + return data, kdims, vdims, interface + + + @classmethod + def range(cls, columns, dimension): + column = columns.dimension_values(dimension) + if columns.get_dimension_type(dimension) is np.datetime64: + return column.min(), column.max() + else: + try: + return (np.nanmin(column), np.nanmax(column)) + except TypeError: + column.sort() + return column[0], column[-1] + + @classmethod + def array(cls, columns, dimensions): + return Element.dframe(columns, dimensions) + + @classmethod + def dframe(cls, columns, dimensions): + return Element.dframe(columns, dimensions) + + @classmethod + def shape(cls, columns): + return columns.data.shape + + @classmethod + def length(cls, columns): + return len(columns.data) + + @classmethod + def validate(cls, columns): + pass + + + +class NdColumns(DataColumns): + + types = (NdElement,) + + @classmethod + def reshape(cls, eltype, data, kdims, vdims): + if isinstance(data, NdElement): + kdims = [d for d in kdims if d != 'Index'] + else: + element_params = eltype.params() + kdims = kdims if kdims else element_params['kdims'].default + vdims = vdims if vdims else element_params['vdims'].default + + if isinstance(data, dict) and all(d in data for d in kdims+vdims): + data = tuple(data.get(d.name if isinstance(d, Dimension) else d) + for d in dimensions) + + if not isinstance(data, (NdElement, dict)): + # If ndim > 2 data is assumed to be a mapping + if (isinstance(data[0], tuple) and any(isinstance(d, tuple) for d in data[0])): + pass + else: + if isinstance(data, tuple): + data = zip(*data) + ndims = len(kdims) + data = [(tuple(row[:ndims]), tuple(row[ndims:])) + for row in data] + if isinstance(data, (dict, list)): + data = NdElement(data, kdims=kdims, vdims=vdims) + elif not isinstance(data, NdElement): + raise ValueError("NdColumns interface couldn't convert data.""") + return data, kdims, vdims + + + @classmethod + def shape(cls, columns): + return (len(columns), len(columns.dimensions())) + + @classmethod + def add_dimension(cls, columns, dimension, dim_pos, values): + return columns.data.add_dimension(dimension, dim_pos+1, values) + + @classmethod + def concat(cls, columns_objs): + return [(k[1:], v) for col in columns_objs + for k, v in col.data.data.items()] + + @classmethod + def sort(cls, columns, by=[]): + if not len(by): by = columns.dimensions('key', True) + return columns.data.sort(by) + + @classmethod + def values(cls, columns, dim): + return columns.data.dimension_values(dim) + + @classmethod + def reindex(cls, columns, kdims=None, vdims=None): + return columns.data.reindex(kdims, vdims) + + @classmethod + def groupby(cls, columns, dimensions, container_type, group_type, **kwargs): + if 'kdims' not in kwargs: + kwargs['kdims'] = [d for d in columns.kdims if d not in dimensions] + with item_check(False), sorted_context(False): + return columns.data.groupby(dimensions, container_type, group_type, **kwargs) + + @classmethod + def select(cls, columns, **selection): + return columns.data.select(**selection) + + @classmethod + def collapse_data(cls, data, function, kdims=None, **kwargs): + return data[0].collapse_data(data, function, kdims, **kwargs) + + @classmethod + def sample(cls, columns, samples=[]): + return columns.data.sample(samples) + + @classmethod + def reduce(cls, columns, reduce_dims, function): + return columns.data.reduce(columns.data, reduce_dims, function) + + @classmethod + def aggregate(cls, columns, dimensions, function): + return columns.data.aggregate(dimensions, function) + + + +class DFColumns(DataColumns): + + types = (pd.DataFrame if pd else None,) + + @classmethod + def reshape(cls, eltype, data, kdims, vdims): + element_params = eltype.params() + kdim_param = element_params['kdims'] + vdim_param = element_params['vdims'] + if util.is_dataframe(data): + columns = data.columns + ndim = kdim_param.bounds[1] if kdim_param.bounds else None + if kdims and not vdims: + vdims = [c for c in data.columns if c not in kdims] + elif vdims and not kdims: + kdims = [c for c in data.columns if c not in kdims][:ndim] + elif not kdims and not vdims: + kdims = list(data.columns[:ndim]) + vdims = list(data.columns[ndim:]) + else: + # Check if data is of non-numeric type + # Then use defined data type + kdims = kdims if kdims else kdim_param.default + vdims = vdims if vdims else vdim_param.default + columns = [d.name if isinstance(d, Dimension) else d + for d in kdims+vdims] + + if isinstance(data, dict): + data = OrderedDict([(d.name if isinstance(d, Dimension) else d, v) + for d, v in data.items()]) + if isinstance(data, tuple): + data = pd.DataFrame.from_items([(c, d) for c, d in + zip(columns, data)]) + else: + data = pd.DataFrame(data, columns=columns) + return data, kdims, vdims + + + @classmethod + def validate(cls, columns): + if not all(c in columns.data.columns for c in columns.dimensions(label=True)): + raise ValueError("Supplied dimensions don't match columns " + "in the dataframe.") + + + @classmethod + def range(cls, columns, dimension): + column = columns.data[columns.get_dimension(dimension).name] + return (column.min(), column.max()) + + + @classmethod + def concat(cls, columns_objs): + return pd.concat([col.data for col in columns_objs]) + + + @classmethod + def groupby(cls, columns, dimensions, container_type, group_type, **kwargs): + index_dims = [columns.get_dimension(d) for d in dimensions] + element_dims = [kdim for kdim in columns.kdims + if kdim not in index_dims] + + element_kwargs = dict(util.get_param_values(columns), + kdims=element_dims) + element_kwargs.update(kwargs) + names = [d.name for d in columns.dimensions() + if d not in dimensions] + map_data = [(k, group_type(v, **element_kwargs)) + for k, v in columns.data.groupby(dimensions)] + with item_check(False), sorted_context(False): + return container_type(map_data, kdims=index_dims) + + + @classmethod + def reduce(cls, columns, reduce_dims, function=None): + """ + The aggregate function accepts either a list of Dimensions and a + function to apply to find the aggregate across those Dimensions + or a list of dimension/function pairs to apply one by one. + """ + kdims = [kdim.name for kdim in columns.kdims if kdim not in reduce_dims] + vdims = columns.dimensions('value', True) + if kdims: + reduced = columns.data.reindex(columns=kdims+vdims).\ + groupby(kdims).aggregate(function).reset_index() + else: + if isinstance(function, np.ufunc): + reduced = function.reduce(columns.data, axis=0) + else: + reduced = function(columns.data, axis=0)[vdims] + if len(reduced) == 1: + reduced = reduced[0] + else: + reduced = pd.DataFrame([reduced], columns=vdims) + return reduced + + + @classmethod + def reindex(cls, columns, kdims=None, vdims=None): + # DataFrame based tables don't need to be reindexed + return columns.data + + + @classmethod + def collapse_data(cls, data, function, kdims, **kwargs): + return pd.concat(data).groupby([d.name for d in kdims]).agg(function).reset_index() + + + @classmethod + def sort(cls, columns, by=[]): + import pandas as pd + if not isinstance(by, list): by = [by] + if not by: by = range(columns.ndims) + cols = [columns.get_dimension(d).name for d in by] + + if (not isinstance(columns.data, pd.DataFrame) or + LooseVersion(pd.__version__) < '0.17.0'): + return columns.data.sort(columns=cols) + return columns.data.sort_values(by=cols) + + + @classmethod + def select(cls, columns, selection_specs=None, **select): + df = columns.data + selected_kdims = [] + mask = True + for dim, k in select.items(): + if isinstance(k, tuple): + k = slice(*k) + if isinstance(k, slice): + if k.start is not None: + mask &= k.start <= df[dim] + if k.stop is not None: + mask &= df[dim] < k.stop + elif isinstance(k, (set, list)): + iter_slcs = [] + for ik in k: + iter_slcs.append(df[dim] == ik) + mask &= np.logical_or.reduce(iter_slcs) + else: + if dim in columns.kdims: selected_kdims.append(dim) + mask &= df[dim] == k + df = df.ix[mask] + if len(set(selected_kdims)) == columns.ndims: + if len(df) and len(columns.vdims) == 1: + df = df[columns.vdims[0].name].iloc[0] + return df + + + @classmethod + def values(cls, columns, dim): + data = columns.data[dim] + if util.dd and isinstance(data, util.dd.Series): + data = data.compute() + return np.array(data) + + + @classmethod + def aggregate(cls, columns, dimensions, function): + data = columns.data + cols = [d.name for d in columns.kdims if d in dimensions] + vdims = columns.dimensions('value', True) + return data.reindex(columns=cols+vdims).groupby(cols).\ + aggregate(function).reset_index() + + + @classmethod + def sample(cls, columns, samples=[]): + data = columns.data + mask = np.zeros(cls.length(columns), dtype=bool) + for sample in samples: + if np.isscalar(sample): sample = [sample] + for i, v in enumerate(sample): + mask = np.logical_or(mask, data.iloc[:, i]==v) + return data[mask] + + + @classmethod + def add_dimension(cls, columns, dimension, dim_pos, values): + data = columns.data.copy() + data.insert(dim_pos, dimension.name, values) + return data + + + @classmethod + def dframe(cls, columns, dimensions): + if dimensions: + return columns.reindex(columns=dimensions) + else: + return columns.data + + + +class ArrayColumns(DataColumns): + + types = (np.ndarray,) + + @classmethod + def reshape(cls, eltype, data, kdims, vdims): + if isinstance(data, dict): + dimensions = kdims + vdims + if all(d in data for d in dimensions): + columns = [data.get(d.name if isinstance(d, Dimension) else d) + for d in dimensions] + data = np.column_stack(columns) + elif isinstance(data, tuple): + try: + data = np.column_stack(data) + except: + data = None + elif not isinstance(data, np.ndarray): + data = np.array([], ndmin=2).T if data is None else list(data) + try: + data = np.array(data) + except: + data = None + + if data is None or data.ndim > 2 or data.dtype.kind in ['S', 'U', 'O']: + raise ValueError("ArrayColumns interface could not handle input type.") + elif data.ndim == 1: + data = np.column_stack([np.arange(len(data)), data]) + + if kdims is None: + kdims = eltype.kdims + if vdims is None: + vdims = eltype.vdims + return data, kdims, vdims + + + @classmethod + def array(cls, columns, dimensions): + if dimensions: + return Element.dframe(columns, dimensions) + else: + return columns.data + + + @classmethod + def add_dimension(cls, columns, dimension, dim_pos, values): + data = columns.data.copy() + return np.insert(data, dim_pos, values, axis=1) + + + @classmethod + def concat(cls, columns_objs): + return np.concatenate([col.data for col in columns_objs]) + + + @classmethod + def sort(cls, columns, by=[]): + data = columns.data + idxs = [columns.get_dimension_index(dim) for dim in by] + return data[np.lexsort(np.flipud(data[:, idxs].T))] + + + @classmethod + def values(cls, columns, dim): + data = columns.data + dim_idx = columns.get_dimension_index(dim) + if data.ndim == 1: + data = np.atleast_2d(data).T + return data[:, dim_idx] + + + @classmethod + def reindex(cls, columns, kdims=None, vdims=None): + # DataFrame based tables don't need to be reindexed + dims = kdims + vdims + data = [columns.dimension_values(d) for d in dims] + return np.column_stack(data) + + + @classmethod + def groupby(cls, columns, dimensions, container_type=HoloMap, + group_type=None, raw=False, **kwargs): + data = columns.data + + # Get dimension objects, labels, indexes and data + dimensions = [columns.get_dimension(d) for d in dimensions] + dim_idxs = [columns.get_dimension_index(d) for d in dimensions] + ndims = len(dimensions) + kwargs['kdims'] = [kdim for kdim in columns.kdims + if kdim not in dimensions] + + # Find unique entries along supplied dimensions + # by creating a view that treats the selected + # groupby keys as a single object. + indices = data[:, dim_idxs].copy() + view = indices.view(np.dtype((np.void, indices.dtype.itemsize * indices.shape[1]))) + _, idx = np.unique(view, return_index=True) + idx.sort() + unique_indices = indices[idx] + + params = util.get_param_values(columns) + params.update(kwargs) + + # Iterate over the unique entries building masks + # to apply the group selection + grouped_data = [] + for group in unique_indices: + mask = np.logical_and.reduce([data[:, i] == group[i] + for i in range(ndims)]) + group_data = data[mask, ndims:] + if not raw: + if group_type is None: + group_data = columns.clone(group_data, **params) + else: + group_data = group_type(group_data, **params) + grouped_data.append((tuple(group), group_data)) + + if raw: + return grouped_data + else: + with item_check(False), sorted_context(False): + return container_type(grouped_data, kdims=dimensions) + + + @classmethod + def select(cls, columns, **selection): + data = columns.data + mask = True + selected_kdims = [] + value = selection.pop('value', None) + for d, slc in selection.items(): + idx = columns.get_dimension_index(d) + if isinstance(slc, tuple): + slc = slice(*slc) + if isinstance(slc, slice): + if slc.start is not None: + mask &= slc.start <= data[:, idx] + if slc.stop is not None: + mask &= data[:, idx] < slc.stop + elif isinstance(slc, (set, list)): + mask &= np.in1d(data[:, idx], list(slc)) + else: + if d in columns.kdims: selected_kdims.append(d) + if columns.ndims == 1: + data_index = np.argmin(np.abs(data[:, idx] - slc)) + data = data[data_index, :] + break + else: + mask &= data[:, idx] == slc + if mask is not True: + data = data[mask, :] + data = np.atleast_2d(data) + if len(data) and len(set(selected_kdims)) == columns.ndims: + if len(data) == 1 and len(columns.vdims) == 1: + data = data[0, columns.ndims] + return data + + + @classmethod + def collapse_data(cls, data, function, kdims=None, **kwargs): + ndims = data[0].shape[1] + nkdims = len(kdims) + data = data[0] if len(data) == 0 else np.concatenate(data) + vdims = ['Value Dimension %s' % i for i in range(ndims-len(kdims))] + joined_data = Columns(data, kdims=kdims, vdims=vdims) + + rows = [] + for k, group in cls.groupby(joined_data, kdims, raw=True): + row = np.zeros(ndims) + row[:nkdims] = np.array(k) + if isinstance(function, np.ufunc): + collapsed = function.reduce(group) + else: + collapsed = function(group, axis=0, **kwargs) + row[nkdims:] = collapsed + rows.append(row) + return np.array(rows) + + + @classmethod + def sample(cls, columns, samples=[]): + data = columns.data + mask = False + for sample in samples: + if np.isscalar(sample): sample = [sample] + for i, v in enumerate(sample): + mask |= data[:, i]==v + return data[mask] + + + @classmethod + def reduce(cls, columns, reduce_dims, function): + kdims = [kdim for kdim in columns.kdims if kdim not in reduce_dims] + if len(kdims): + reindexed = columns.reindex(kdims) + reduced = cls.collapse_data([reindexed.data], function, kdims) + else: + if isinstance(function, np.ufunc): + reduced = function.reduce(columns.data, axis=0) + else: + reduced = function(columns.data, axis=0) + reduced = reduced[columns.ndims:] + if reduced.ndim == 1: + if len(reduced) == 1: + return reduced[0] + else: + return np.atleast_2d(reduced) + return reduced + + + @classmethod + def aggregate(cls, columns, dimensions, function): + if not isinstance(dimensions, Iterable): dimensions = [dimensions] + rows = [] + reindexed = columns.reindex(dimensions) + for k, group in cls.groupby(reindexed, dimensions, raw=True): + if isinstance(function, np.ufunc): + reduced = function.reduce(group, axis=0) + else: + reduced = function(group, axis=0) + rows.append(np.concatenate([k, (reduced,) if np.isscalar(reduced) else reduced])) + return np.array(rows) + + +# Register available interfaces +DataColumns.interfaces.update([('array', ArrayColumns), + ('dictionary', NdColumns)]) +if pd: + DataColumns.interfaces['dataframe'] = DFColumns + diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index 458ddf63a9..bb1dc2d701 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -11,6 +11,7 @@ except: from collections import OrderedDict +import numpy as np import param from ..core.util import basestring, sanitize_identifier, max_range, find_range @@ -172,9 +173,13 @@ def __str__(self): def __eq__(self, other): - "Dimensions are sorted alphanumerically by name" - return self.name == other.name if isinstance(other, Dimension) else self.name == other + "Implements equals operator including sanitized comparison." + dim_matches = [self.name, sanitize_identifier(self.name)] + return other.name in dim_matches if isinstance(other, Dimension) else other in dim_matches + def __ne__(self, other): + "Implements not equal operator including sanitized comparison." + return not self.__eq__(other) def __lt__(self, other): "Dimensions are sorted alphanumerically by name" @@ -251,7 +256,8 @@ def clone(self, data=None, shared_data=True, *args, **overrides): If shared_data is set to True and no data explicitly supplied, the clone will share data with the original. """ - settings = dict(self.get_param_values(), **overrides) + params = dict(self.get_param_values()) + settings = dict(params, **overrides) if data is None and shared_data: data = self.data return self.__class__(data, *args, **settings) @@ -516,8 +522,6 @@ def __init__(self, data, **params): self.ndims = len(self.kdims) cdims = [(d.name, val) for d, val in self.cdims.items()] self._cached_constants = OrderedDict(cdims) - self._cached_index_names = [d.name for d in self.kdims] - self._cached_value_names = [d.name for d in self.vdims] self._settings = None @@ -533,7 +537,7 @@ def _valid_dimensions(self, dimensions): valid_dimensions = [] for dim in dimensions: if isinstance(dim, Dimension): dim = dim.name - if dim not in self._cached_index_names: + if dim not in self.kdims: raise Exception("Supplied dimensions %s not found." % dim) valid_dimensions.append(dim) return valid_dimensions @@ -599,7 +603,7 @@ def get_dimension_index(self, dim): return IndexError('Dimension index out of bounds') try: sanitized = {sanitize_identifier(kd): kd - for kd in self._cached_index_names} + for kd in self.dimensions('key', True)} return [d.name for d in self.dimensions()].index(sanitized.get(dim, dim)) except ValueError: raise Exception("Dimension %s not found in %s." % @@ -651,13 +655,10 @@ def select(self, selection_specs=None, **kwargs): """ # Apply all indexes applying on this object - val_dim = ['value'] if self.vdims else [] - sanitized = {sanitize_identifier(kd): kd - for kd in self._cached_index_names} - local_dims = (self._cached_index_names - + list(sanitized.keys()) + val_dim) + vdims = self.vdims+['value'] if self.vdims else [] + kdims = self.kdims local_kwargs = {k: v for k, v in kwargs.items() - if k in local_dims} + if k in kdims+vdims} # Check selection_spec applies if selection_specs is not None: @@ -666,14 +667,16 @@ def select(self, selection_specs=None, **kwargs): else: matches = True + # Apply selection to self if local_kwargs and matches: - select = [slice(None) for i in range(self.ndims)] + ndims = (len(self.dimensions()) if any(d in self.vdims for d in kwargs) + else self.ndims) + select = [slice(None) for i in range(ndims)] for dim, val in local_kwargs.items(): if dim == 'value': select += [val] else: if isinstance(val, tuple): val = slice(*val) - dim = sanitized.get(dim, dim) select[self.get_dimension_index(dim)] = val if self._deep_indexable: selection = self.get(tuple(select), @@ -683,13 +686,15 @@ def select(self, selection_specs=None, **kwargs): else: selection = self - if type(selection) is not type(self): + if not isinstance(selection, Dimensioned): + return selection + elif type(selection) is not type(self) and isinstance(selection, Dimensioned): # Apply the selection on the selected object of a different type val_dim = ['value'] if selection.vdims else [] key_dims = selection.dimensions('key', label=True) + val_dim if any(kw in key_dims for kw in kwargs): selection = selection.select(selection_specs, **kwargs) - elif selection._deep_indexable: + elif isinstance(selection, Dimensioned) and selection._deep_indexable: # Apply the deep selection on each item in local selection items = [] for k, v in selection.items(): @@ -713,7 +718,7 @@ def dimension_values(self, dimension): """ val = self._cached_constants.get(dimension, None) if val: - return val + return np.array([val]) else: raise Exception("Dimension %s not found in %s." % (dimension, self.__class__.__name__)) diff --git a/holoviews/core/element.py b/holoviews/core/element.py index a91ebe019a..ee50fe701c 100644 --- a/holoviews/core/element.py +++ b/holoviews/core/element.py @@ -7,11 +7,12 @@ from .dimension import Dimension, Dimensioned, ViewableElement from .layout import Composable, Layout, AdjointLayout, NdLayout -from .ndmapping import OrderedDict, UniformNdMapping, NdMapping, item_check +from .ndmapping import OrderedDict, UniformNdMapping, NdMapping, item_check, sorted_context from .overlay import Overlayable, NdOverlay, Overlay, CompositeOverlay from .spaces import HoloMap, GridSpace from .tree import AttrTree -from .util import sanitize_identifier +from .util import sanitize_identifier, is_dataframe, dimension_sort, get_param_values + class Element(ViewableElement, Composable, Overlayable): """ @@ -55,32 +56,6 @@ def hist(self, dimension=None, num_bins=20, bin_range=None, # Subclassable methods # #======================# - def __init__(self, data, **params): - convert = isinstance(data, Element) - if convert: - params = dict(data.get_param_values(onlychanged=True), - **params) - element = data - data = [] - super(Element, self).__init__(data, **params) - if convert: - self.data = self._convert_element(element) - - - def _convert_element(self, element): - type_str = self.__class__.__name__ - type_name = type_str.lower() - table = element.table() - conversion = getattr(table.to, type_name) - if conversion is None: - return element - try: - converted = conversion(self._cached_index_names, - self._cached_value_names) - except: - raise - return converted.data - def __getitem__(self, key): if key is (): @@ -91,7 +66,7 @@ def __getitem__(self, key): @classmethod - def collapse_data(cls, data, function=None, **kwargs): + def collapse_data(cls, data, function=None, kdims=None, **kwargs): """ Class method to collapse a list of data matching the data format of the Element type. By implementing this @@ -99,8 +74,9 @@ def collapse_data(cls, data, function=None, **kwargs): same type. The kwargs are passed to the collapse function. The collapse function must support the numpy style axis selection. Valid function include: - np.mean, np.sum, np.product, np.std, - scipy.stats.kurtosis etc. + np.mean, np.sum, np.product, np.std, scipy.stats.kurtosis etc. + Some data backends also require the key dimensions + to aggregate over. """ raise NotImplementedError("Collapsing not implemented for %s." % cls.__name__) @@ -141,49 +117,80 @@ def _reduce_map(self, dimensions, function, reduce_map): if dimensions and reduce_map: raise Exception("Pass reduced dimensions either as an argument" "or as part of the kwargs not both.") + sanitized_dict = {sanitize_identifier(kd): kd + for kd in self.dimensions('key', True)} + if reduce_map: + reduce_map = reduce_map.items() if dimensions: - reduce_map = {d: function for d in dimensions} + reduce_map = [(d, function) for d in dimensions] elif not reduce_map: - reduce_map = {d: function for d in self._cached_index_names} - reduce_map = {(d if isinstance(d, Dimension) else d): fn - for d, fn in reduce_map.items()} - sanitized = {sanitize_identifier(kd): kd - for kd in self._cached_index_names} - return {sanitized.get(d, d): fn for d, fn in reduce_map.items()} + reduce_map = [(d, function) for d in self.kdims] + reduced = [(d.name if isinstance(d, Dimension) else d, fn) + for d, fn in reduce_map] + sanitized = [(sanitized_dict.get(d, d), fn) for d, fn in reduced] + grouped = [(fn, [dim for dim, _ in grp]) for fn, grp in groupby(sanitized, lambda x: x[1])] + dims = [d for grp in grouped for d in grp[1]] + return dims, grouped - def table(self, **kwargs): + def table(self, datatype=None): """ - This method transforms any ViewableElement type into a Table - as long as it implements a dimension_values method. + Converts the data Element to a Table, optionally may + specify a supported data type. The default data types + are 'numpy' (for homogeneous data), 'dataframe', and + 'dictionary'. """ + if datatype and not isinstance(datatype, list): + datatype = [datatype] from ..element import Table - keys = zip(*[self.dimension_values(dim.name) - for dim in self.kdims]) - values = zip(*[self.dimension_values(dim.name) - for dim in self.vdims]) - kwargs = {'label': self.label - for k, v in self.get_param_values(onlychanged=True) - if k in ['group', 'label']} - params = dict(kdims=self.kdims, - vdims=self.vdims, - label=self.label) - if not self.params()['group'].default == self.group: - params['group'] = self.group - if not keys: keys = [()]*len(values) - if not values: [()]*len(keys) - return Table(zip(keys, values), **dict(params, **kwargs)) - - - def dframe(self): - import pandas - column_names = self.dimensions(label=True) - dim_vals = np.vstack([self.dimension_values(dim) for dim in column_names]).T - return pandas.DataFrame(dim_vals, columns=column_names) - - - -class Tabular(NdMapping): + return Table(self, **(dict(datatype=datatype) if datatype else {})) + + + def dframe(self, dimensions=None): + import pandas as pd + column_names = dimensions if dimensions else self.dimensions(label=True) + dim_vals = OrderedDict([(dim, self[dim]) for dim in column_names]) + return pd.DataFrame(dim_vals) + + + def mapping(self, kdims=None, vdims=None, **kwargs): + length = len(self) + if not kdims: kdims = self.kdims + if kdims: + keys = zip(*[self.dimension_values(dim.name) + for dim in self.kdims]) + else: + keys = [()]*length + + if not vdims: vdims = self.vdims + if vdims: + values = zip(*[self.dimension_values(dim.name) + for dim in vdims]) + else: + values = [()]*length + + data = zip(keys, values) + overrides = dict(kdims=kdims, vdims=vdims, **kwargs) + return NdElement(data, **dict(get_param_values(self), **overrides)) + + + def array(self, dimensions=[]): + if dimensions: + dims = [self.get_dimension(d) for d in dimensions] + else: + dims = [d for d in self.kdims + self.vdims if d != 'Index'] + columns, types = [], [] + for dim in dims: + column = self.dimension_values(dim) + columns.append(column) + types.append(column.dtype.kind) + if len(set(types)) > 1: + columns = [c.astype('object') for c in columns] + return np.column_stack(columns) + + + +class Tabular(Element): """ Baseclass to give an NdMapping objects an API to generate a table representation. @@ -197,7 +204,7 @@ def rows(self): @property def cols(self): - return self.ndims + max([1, len(self.vdims)]) + return len(self.dimensions()) def pprint_cell(self, row, col): @@ -218,16 +225,8 @@ def pprint_cell(self, row, col): return str(self.kdims[col]) else: dim = self.get_dimension(col) - if col >= ndims: - row_values = self.values()[row-1] - if self.vdims: - val = row_values[col - ndims] - else: - val = row_values - else: - row_data = list(self.data.keys())[row-1] - val = row_data[col] - return dim.pprint_value(val) + values = self[dim.name] + return dim.pprint_value(values[row-1]) def cell_type(self, row, col): @@ -247,7 +246,7 @@ class Element2D(Element): defining the (left, bottom, right and top) edges.""") -class NdElement(Element, Tabular): +class NdElement(NdMapping, Tabular): """ An NdElement is an Element that stores the contained data as an NdMapping. In addition to the usual multi-dimensional keys @@ -276,21 +275,23 @@ class NdElement(Element, Tabular): will then be promoted to Dimension objects.""") _deep_indexable = False + _sorted = False def __init__(self, data=None, **params): - if isinstance(data, Element): - data = data.table() - elif isinstance(data, list) and all(np.isscalar(el) for el in data): - data = OrderedDict(list(((k,), v) for k, v in enumerate(data))) + if isinstance(data, list) and all(np.isscalar(el) for el in data): + data = (((k,), (v,)) for k, v in enumerate(data)) + kdims = params.get('kdims', self.kdims) + if (data is not None and not isinstance(data, NdMapping) + and 'Index' not in kdims): + params['kdims'] = ['Index'] + list(kdims) + data_items = data.items() if isinstance(data, dict) else data + data = [((i,)+((k,) if np.isscalar(k) else k), v) for i, (k, v) in enumerate(data_items)] super(NdElement, self).__init__(data, **params) - def _convert_element(self, element): - if isinstance(element, NdElement): - return element.data - if isinstance(element, Element): - return element.table().data - else: return element + @property + def shape(self): + return (len(self), len(self.dimensions())) def reindex(self, kdims=None, vdims=None, force=False): @@ -303,23 +304,30 @@ def reindex(self, kdims=None, vdims=None, force=False): if kdims is None: return super(NdElement, self).reindex(force=force) else: - vdims = self._cached_value_names + vdims = [d for d in self.vdims if d not in kdims] elif kdims is None: - kdims = [d for d in (self._cached_index_names + self._cached_value_names) - if d not in vdims] + kdims = [d for d in self.dimensions if d not in vdims] + if 'Index' not in kdims: kdims = ['Index'] + kdims key_dims = [self.get_dimension(k) for k in kdims] val_dims = [self.get_dimension(v) for v in vdims] - kidxs = [(i, k in self._cached_index_names, self.get_dimension_index(k)) + + kidxs = [(i, k in self.kdims, self.get_dimension_index(k)) for i, k in enumerate(kdims)] - vidxs = [(i, v in self._cached_index_names, self.get_dimension_index(v)) + vidxs = [(i, v in self.kdims, self.get_dimension_index(v)) for i, v in enumerate(vdims)] getter = operator.itemgetter(0) items = [] for k, v in self.data.items(): - _, key = zip(*sorted(((i, k[idx] if iskey else v[idx-self.ndims]) - for i, iskey, idx in kidxs), key=getter)) - _, val = zip(*sorted(((i, k[idx] if iskey else v[idx-self.ndims]) - for i, iskey, idx in vidxs), key=getter)) + if key_dims: + _, key = zip(*sorted(((i, k[idx] if iskey else v[idx-self.ndims]) + for i, iskey, idx in kidxs), key=getter)) + else: + key = () + if val_dims: + _, val = zip(*sorted(((i, k[idx] if iskey else v[idx-self.ndims]) + for i, iskey, idx in vidxs), key=getter)) + else: + val = () items.append((key, val)) reindexed = self.clone(items, kdims=key_dims, vdims=val_dims) if not force and len(reindexed) != len(items): @@ -369,19 +377,7 @@ def _filter_data(self, subtable, vdims): vdims = [self.vdims[i] for i in indices] items = [(k, tuple(v[i] for i in indices)) for (k,v) in subtable.items()] - if len(items) == 1: - data = items[0][1] - if len(vdims) == 1: - return data[0] - else: - from ..element.tabular import ItemTable - kwargs = {'label': self.label - for k, v in self.get_param_values(onlychanged=True) - if k in ['group', 'label']} - data = list(zip(vdims, data)) - return ItemTable(data, **kwargs) - else: - return subtable.clone(items, vdims=vdims) + return subtable.clone(items, vdims=vdims) def __getitem__(self, args): @@ -389,66 +385,91 @@ def __getitem__(self, args): In addition to usual NdMapping indexing, NdElements can be indexed by column name (or a slice over column names) """ - ndmap_index = args[:self.ndims] if isinstance(args, tuple) else args - subtable = NdMapping.__getitem__(self, ndmap_index) - - if len(self.vdims) > 1 and not isinstance(subtable, NdElement): - subtable = self.__class__([((), subtable)], label=self.label, - kdims=[], vdims=self.vdims) - - # If subtable is not a slice return as reduced type + if args in self.dimensions(): + return self.dimension_values(args) + if isinstance(args, np.ndarray) and args.dtype.kind == 'b': + return NdMapping.__getitem__(self, args) if not isinstance(args, tuple): args = (args,) - shallow = len(args) <= self.ndims - slcs = any(isinstance(a, (slice, set)) for a in args[:self.ndims]) - if shallow and not (slcs or len(args) == 0): - args = list(args) + [self.dimensions('value', True)] - elif shallow: + ndmap_index = args[:self.ndims] + val_index = args[self.ndims:] + if val_index: + if len(val_index) == 1 and val_index[0] in self.vdims: + val_index = val_index[0] + else: + reindexed = self.reindex(self.kdims+list(self.vdims)) + subtable = reindexed[args] + + if not val_index or not isinstance(val_index, tuple): + subtable = NdMapping.__getitem__(self, ndmap_index) + + if isinstance(subtable, NdElement) and all(np.isscalar(idx) for idx in ndmap_index[1:]): + if len(subtable) == 1: + subtable = list(subtable.data.values())[0] + if not isinstance(subtable, NdElement): + if len(self.vdims) > 1: + subtable = self.__class__([(args[1:], subtable)], label=self.label, + kdims=self.kdims[1:], vdims=self.vdims) + else: + if np.isscalar(subtable): + return subtable + return subtable[0] + + if val_index and not isinstance(val_index, tuple): + return self._filter_data(subtable, args[-1]) + else: return subtable - return self._filter_data(subtable, args[-1]) + + def sort(self, by=[]): + if not isinstance(by, list): by = [by] + if not by: by = range(self.ndims) + indexes = [self.get_dimension_index(d) for d in by] + return self.clone(dimension_sort(self.data, self.kdims, self.vdims, + False, indexes, self._cached_index_values)) def sample(self, samples=[]): """ Allows sampling of the Table with a list of samples. """ - sample_data = OrderedDict() - for sample in samples: - sample_data[sample] = self[sample] - return self.__class__(sample_data, **dict(self.get_param_values(onlychanged=True))) - - - def reduce(self, dimensions=None, function=None, **reduce_map): - """ - Allows collapsing the Table down by dimension by passing - the dimension name and reduce_fn as kwargs. Reduces - dimensionality of Table until only an ItemTable is left. - """ - reduce_map = self._reduce_map(dimensions, function, reduce_map) - - dim_labels = self._cached_index_names - reduced_table = self - for reduce_fn, group in groupby(reduce_map.items(), lambda x: x[1]): - dims = [dim for dim, _ in group] - split_dims = [self.get_dimension(d) for d in dim_labels if d not in dims] - if len(split_dims) and reduced_table.ndims > 1: - split_map = reduced_table.groupby([d.name for d in split_dims], container_type=HoloMap, - group_type=self.__class__) - reduced_table = self.clone(shared_data=False, kdims=split_dims) - for k, table in split_map.items(): - reduced = [] - for vdim in self.vdims: - valtable = table.select(value=vdim.name) if len(self.vdims) > 1 else table - reduced.append(reduce_fn(valtable.data.values())) - reduced_table[k] = reduced + sample_data = [] + offset = 0 + for i, sample in enumerate(samples): + sample = (sample,) if np.isscalar(sample) else sample + value = self[(slice(None),)+sample] + if isinstance(value, NdElement): + for idx, (k, v) in enumerate(value.data.items()): + sample_data.append(((i+offset+idx,)+k, v)) + offset += idx + else: + sample_data.append(((i+offset,)+sample, (value,))) + return self.clone(sample_data) + + + @classmethod + def reduce(cls, columns, reduce_dims, function): + """ + This implementation allows reducing dimensions by aggregating + over all the remaining key dimensions using the collapse_data + method. + """ + kdims = [kdim for kdim in columns.kdims if kdim not in reduce_dims] + if len(kdims) > 1: + reduced = columns.collapse_data([columns], function, kdims) + reindexed = reduced.reindex(kdims) + else: + reduced = [] + for vdim in columns.vdims: + data = columns[vdim.name] + if isinstance(function, np.ufunc): + reduced.append(function.reduce(data)) + else: + reduced.append(function(data)) + if len(reduced) == 1: + reduced = reduced[0] else: - reduced = tuple(reduce_fn(self.dimension_values(vdim.name)) - for vdim in self.vdims) - reduced_dims = [d for d in self.kdims if d.name not in reduce_map] - params = dict(group=self.group) if self.group != type(self).__name__ else {} - reduced_table = self.__class__([((), reduced)], label=self.label, kdims=reduced_dims, - vdims=self.vdims, **params) - return reduced_table + reduced = OrderedDict([((), tuple(reduced))]) + return reduced def _item_check(self, dim_vals, data): @@ -460,32 +481,64 @@ def _item_check(self, dim_vals, data): @classmethod - def collapse_data(cls, data, function, **kwargs): - groups = zip(*[(np.array(values) for values in odict.values()) for odict in data]) - return OrderedDict((key, np.squeeze(function(np.dstack(group), axis=-1, **kwargs), 0) - if group[0].shape[0] > 1 else - function(np.concatenate(group), **kwargs)) - for key, group in zip(data[0].keys(), groups)) + def collapse_data(cls, data, function, kdims=None, **kwargs): + offset = 0 + joined_data = data[0] + if len(data) > 1: + concatenated = [] + for d in data: + reindexed = [((i+offset,)+k[1:], v) for i, (k, v) in enumerate(d.data.items())] + concatenated += reindexed + offset += len(reindexed) + joined_data = joined_data.clone(concatenated, kdims=joined_data.kdims) + + collapsed = [] + vdims = joined_data.dimensions('value', True) + group_dims = kdims[1:] if 'Index' in kdims else kdims + with sorted_context(False): + grouped = joined_data.groupby([d.name for d in group_dims], + container_type=NdMapping).data.items() + for i, (k, group) in enumerate(grouped): + if isinstance(function, np.ufunc): + reduced = tuple(function.reduce(group[vdim]) for vdim in vdims) + else: + reduced = tuple(function(group[vdim], **kwargs) for vdim in vdims) + collapsed.append(((i,)+k, reduced)) + return joined_data.clone(collapsed, kdims=kdims) + + + def aggregate(self, dimensions, function): + """ + Allows aggregating. + """ + rows = [] + grouped = self.groupby(dimensions) + for k, group in grouped.data.items(): + reduced = group.reduce(group, group.kdims, function) + if not np.isscalar(reduced): + reduced = list(reduced.values())[0] + else: + reduced = (reduced,) + rows.append((k, reduced)) + return self.clone(rows, kdims=grouped.kdims) def dimension_values(self, dim): - dim = self.get_dimension(dim).name - if dim in self._cached_value_names: - index = self._cached_value_names.index(dim) - return [v[index] for v in self.data.values()] + dim = self.get_dimension(dim) + value_dims = self.dimensions('value', label=True) + if dim.name in value_dims: + index = value_dims.index(dim.name) + return np.array([v[index] for v in self.data.values()]) else: - return NdMapping.dimension_values(self, dim) + return NdMapping.dimension_values(self, dim.name) - def dframe(self, value_label='data'): - try: - import pandas - except ImportError: - raise Exception("Cannot build a DataFrame without the pandas library.") - labels = [d.name for d in self.dimensions()] - return pandas.DataFrame( - [dict(zip(labels, np.concatenate([np.array(k),v]))) - for (k, v) in self.data.items()]) + def values(self): + " Returns the values of all the elements." + values = self.data.values() + if len(self.vdims) == 1: + return [v[0] for v in values] + return list(values) @@ -570,11 +623,10 @@ def __call__(self): raise ValueError("Collator values must be Dimensioned objects " "before collation.") - dim_keys = zip(self._cached_index_names, key) + dim_keys = zip(self.kdims, key) varying_keys = [(d, k) for d, k in dim_keys if not self.drop_constant or (d not in constant_dims and d not in self.drop)] - constant_keys = [(d if isinstance(d, Dimension) else Dimension(d), k) - for d, k in dim_keys if d in constant_dims + constant_keys = [(d, k) for d, k in dim_keys if d in constant_dims and d not in self.drop and self.drop_constant] if varying_keys or constant_keys: data = self._add_dimensions(data, varying_keys, @@ -591,7 +643,7 @@ def __call__(self): def _add_item(self, key, value, sort=True): - Tabular._add_item(self, key, value, sort) + NdMapping._add_item(self, key, value, sort) @property @@ -601,7 +653,7 @@ def static_dimensions(self): """ dimensions = [] for dim in self.kdims: - if len(set(self.dimension_values(dim.name))) == 1: + if len(set(self[dim.name])) == 1: dimensions.append(dim) return dimensions diff --git a/holoviews/core/layout.py b/holoviews/core/layout.py index eab761e329..6de3eef87a 100644 --- a/holoviews/core/layout.py +++ b/holoviews/core/layout.py @@ -127,7 +127,7 @@ def get(self, key, default=None): def dimension_values(self, dimension): dimension = self.get_dimension(dimension).name - if dimension in self._cached_index_names: + if dimension in self.kdims: return self.layout_order[:len(self.data)] else: return self.main.dimension_values(dimension) diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index bc779c12c8..5a35f1c889 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -2,7 +2,7 @@ Supplies MultiDimensionalMapping and NdMapping which are multi-dimensional map types. The former class only allows indexing whereas the latter also enables slicing over multiple dimension ranges. -s""" +""" from collections import Sequence from itertools import cycle @@ -114,8 +114,10 @@ def __init__(self, initial_items=None, **params): if isinstance(initial_items, tuple): self._add_item(initial_items[0], initial_items[1]) elif not self._check_items and self._instantiated: - if isinstance(initial_items, (dict, MultiDimensionalMapping)): + if isinstance(initial_items, dict): initial_items = initial_items.items() + elif isinstance(initial_items, MultiDimensionalMapping): + initial_items = initial_items.data.items() self.data = OrderedDict((k if isinstance(k, tuple) else (k,), v) for k, v in initial_items) self._resort() @@ -159,19 +161,19 @@ def _add_item(self, dim_vals, data, sort=True): # Check and validate for categorical dimensions if self._cached_categorical: - valid_vals = zip(self._cached_index_names, dim_vals) + valid_vals = zip(self.kdims, dim_vals) else: valid_vals = [] for dim, val in valid_vals: - if self._cached_index_values[dim] == 'initial': self._cached_index_values[dim] = [] - vals = self._cached_index_values[dim] + vals = self._cached_index_values[dim.name] + if vals == 'initial': self._cached_index_values[dim.name] = [] if not self._instantiated and self.get_dimension(dim).values == 'initial': if val not in vals: - self._cached_index_values[dim].append(val) + self._cached_index_values[dim.name].append(val) elif vals and val not in vals: - raise KeyError('%s Dimension value %s not in' - ' specified Dimension values.' % (dim, repr(val))) + raise KeyError('%s dimension value %s not in' + ' specified dimension values.' % (dim, repr(val))) # Updates nested data structures rather than simply overriding them. if ((dim_vals in self.data) @@ -247,8 +249,9 @@ def _dataslice(self, data, indices): def _resort(self): if self._sorted: - resorted = dimension_sort(self.data, self.kdims, + resorted = dimension_sort(self.data, self.kdims, self.vdims, self._cached_categorical, + range(self.ndims), self._cached_index_values) self.data = OrderedDict(resorted) @@ -273,6 +276,7 @@ def groupby(self, dimensions, container_type=None, group_type=None, **kwargs): self.warning('Cannot split Map with only one dimension.') return self + dimensions = [self.get_dimension(d).name for d in dimensions] container_type = container_type if container_type else type(self) group_type = group_type if group_type else type(self) dims, inds = zip(*((self.get_dimension(dim), self.get_dimension_index(dim)) @@ -283,7 +287,8 @@ def groupby(self, dimensions, container_type=None, group_type=None, **kwargs): for key in self.data.keys()) with item_check(False): selects = group_select(list(selects)) - groups = [(k, group_type(v.reindex(inames), **kwargs)) + groups = [(k, group_type((v.reindex(inames) if isinstance(v, NdMapping) + else [((), (v,))]), **kwargs)) for k, v in iterative_select(self, dimensions, selects)] return container_type(groups, kdims=dims) @@ -298,17 +303,18 @@ def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): if isinstance(dimension, str): dimension = Dimension(dimension) - if dimension.name in self._cached_index_names: + if dimension in self.kdims: raise Exception('{dim} dimension already defined'.format(dim=dimension.name)) dimensions = self.kdims[:] dimensions.insert(dim_pos, dimension) - if isinstance(dim_val, list) and not len(dim_val) == len(self): - raise ValueError("Added dimension values must be same length" - "as existing keys.") - else: + if np.isscalar(dim_val): dim_val = cycle([dim_val]) + else: + if not len(dim_val) == len(self): + raise ValueError("Added dimension values must be same length" + "as existing keys.") items = OrderedDict() for dval, (key, val) in zip(dim_val, self.data.items()): @@ -324,9 +330,8 @@ def drop_dimension(self, dimensions): Returns a new mapping with the named dimension(s) removed. """ dimensions = [dimensions] if np.isscalar(dimensions) else dimensions - dim_labels = [d for d in self._cached_index_names if d not in dimensions] - dim_inds = [self.get_dimension_index(d) for d in dim_labels] - dims = [self.get_dimension(d) for d in dim_labels] + dims = [d for d in self.kdims if d not in dimensions] + dim_inds = [self.get_dimension_index(d) for d in dims] key_getter = itemgetter(*dim_inds) return self.clone([(key_getter(k), v) for k, v in self.data.items()], kdims=dims) @@ -335,8 +340,8 @@ def drop_dimension(self, dimensions): def dimension_values(self, dimension): "Returns the values along the specified dimension." dimension = self.get_dimension(dimension).name - if dimension in self._cached_index_names: - return [k[self.get_dimension_index(dimension)] for k in self.data.keys()] + if dimension in self.kdims: + return np.array([k[self.get_dimension_index(dimension)] for k in self.data.keys()]) if dimension in self.dimensions(label=True): values = [el.dimension_values(dimension) for el in self if dimension in el.dimensions()] @@ -355,15 +360,16 @@ def reindex(self, kdims=[], force=False): created object as the new labels must be sufficient to address each value uniquely. """ + old_kdims = [d.name for d in self.kdims] if not len(kdims): - kdims = [d for d in self._cached_index_names + kdims = [d for d in old_kdims if not len(set(self.dimension_values(d))) == 1] indices = [self.get_dimension_index(el) for el in kdims] keys = [tuple(k[i] for i in indices) for k in self.data.keys()] reindexed_items = OrderedDict( (k, v) for (k, v) in zip(keys, self.data.values())) - reduced_dims = set(self._cached_index_names).difference(kdims) + reduced_dims = set([d.name for d in self.kdims]).difference(kdims) dimensions = [self.get_dimension(d) for d in kdims if d not in reduced_dims] @@ -420,16 +426,13 @@ def info(self): def table(self, **kwargs): "Creates a table from the stored keys and data." - table = None + tables = [] for key, value in self.data.items(): value = value.table(**kwargs) for idx, (dim, val) in enumerate(zip(self.kdims, key)): value = value.add_dimension(dim, idx, val) - if table is None: - table = value - else: - table.update(value) - return table + tables.append(value) + return value.concat(tables) def dframe(self): @@ -438,7 +441,7 @@ def dframe(self): import pandas except ImportError: raise Exception("Cannot build a DataFrame without the pandas library.") - labels = self._cached_index_names + [self.group] + labels = self.dimensions('key', True) + [self.group] return pandas.DataFrame( [dict(zip(labels, k + (v,))) for (k, v) in self.data.items()]) @@ -451,8 +454,7 @@ def update(self, other): unchanged after the update. """ if isinstance(other, NdMapping): - dims = [d for d in other._cached_index_names - if d not in self._cached_index_names] + dims = [d for d in other.kdims if d not in self.kdims] if len(dims) == other.ndims: raise KeyError("Cannot update with NdMapping that has" " a different set of key dimensions.") @@ -556,6 +558,11 @@ def __getitem__(self, indexslice): """ if indexslice in [Ellipsis, ()]: return self + elif isinstance(indexslice, np.ndarray) and indexslice.dtype.kind == 'b': + if not len(indexslice) == len(self): + raise IndexError("Boolean index must match length of sliced object") + selection = zip(indexslice, self.data.items()) + return self.clone([item for c, item in selection if c]) map_slice, data_slice = self._split_index(indexslice) map_slice = self._transform_indices(map_slice) @@ -569,12 +576,17 @@ def __getitem__(self, indexslice): for cidx, (condition, dim) in enumerate(zip(conditions, self.kdims)): values = self._cached_index_values.get(dim.name, None) items = [(k, v) for k, v in items - if condition(values.index(k[cidx]) if values else k[cidx])] - items = [(k, self._dataslice(v, data_slice)) for k, v in items] - if len(items) == 0: + if condition(values.index(k[cidx]) + if values else k[cidx])] + sliced_items = [] + for k, v in items: + val_slice = self._dataslice(v, data_slice) + if val_slice or isinstance(val_slice, tuple): + sliced_items.append((k, val_slice)) + if len(sliced_items) == 0: raise KeyError('No items within specified slice.') with item_check(False): - return self.clone(items) + return self.clone(sliced_items) def _expand_slice(self, indices): @@ -805,7 +817,7 @@ def dframe(self): dframes = [] for key, view in self.data.items(): view_frame = view.dframe() - key_dims = reversed(list(zip(key, self._cached_index_names))) + key_dims = reversed(list(zip(key, self.dimensions('key', label)))) for val, dim in key_dims: dimn = 1 while dim in view_frame: diff --git a/holoviews/core/overlay.py b/holoviews/core/overlay.py index d3c4131dc0..09d7c54000 100644 --- a/holoviews/core/overlay.py +++ b/holoviews/core/overlay.py @@ -75,7 +75,7 @@ def dimension_values(self, dimension): if not found: return super(CompositeOverlay, self).dimension_values(dimension) values = [v for v in values if v is not None and len(v)] - return np.concatenate(values) if len(values) else [] + return np.concatenate(values) if len(values) else np.array() @@ -155,7 +155,7 @@ def collapse(self, function): "and cannot be collapsed.") else: return elements[0].clone(types[0].collapse_data([el.data for el in elements], - function)) + function, self.kdims)) @property def group(self): diff --git a/holoviews/core/spaces.py b/holoviews/core/spaces.py index 7f2d6a46f6..e2ef429f4f 100644 --- a/holoviews/core/spaces.py +++ b/holoviews/core/spaces.py @@ -3,6 +3,7 @@ import param +from . import traversal from .dimension import OrderedDict, Dimension, Dimensioned, ViewableElement from .layout import Layout, AdjointLayout, NdLayout from .ndmapping import UniformNdMapping, NdMapping, item_check @@ -31,8 +32,7 @@ def overlay(self, dimensions=None, **kwargs): with item_check(False): return NdOverlay(self, **kwargs) else: - dims = [d for d in self._cached_index_names - if d not in dimensions] + dims = [d for d in self.kdims if d not in dimensions] return self.groupby(dims, group_type=NdOverlay, **kwargs) @@ -92,7 +92,7 @@ def _dimension_keys(self): Helper for __mul__ that returns the list of keys together with the dimension labels. """ - return [tuple(zip(self._cached_index_names, [k] if self.ndims == 1 else k)) + return [tuple(zip([d.name for d in self.kdims], [k] if self.ndims == 1 else k)) for k in self.keys()] @@ -108,8 +108,8 @@ def __mul__(self, other): with completely different dimensions aren't overlaid. """ if isinstance(other, self.__class__): - self_set = set(self._cached_index_names) - other_set = set(other._cached_index_names) + self_set = {d.name for d in self.kdims} + other_set = {d.name for d in other.kdims} # Determine which is the subset, to generate list of keys and # dimension labels for the new view @@ -131,10 +131,10 @@ def __mul__(self, other): # Generate keys for both subset and superset and sort them by the dimension index. self_key = tuple(k for p, k in sorted( [(self.get_dimension_index(dim), v) for dim, v in dim_keys - if dim in self._cached_index_names])) + if dim in self.kdims])) other_key = tuple(k for p, k in sorted( [(other.get_dimension_index(dim), v) for dim, v in dim_keys - if dim in other._cached_index_names])) + if dim in other.kdims])) new_key = self_key if other_in_self else other_key # Append SheetOverlay of combined items if (self_key in self) and (other_key in other): @@ -195,9 +195,9 @@ def collapse(self, dimensions=None, function=None, **kwargs): """ from .operation import MapOperation if not dimensions: - dimensions = self._cached_index_names + dimensions = self.kdims if self.ndims > 1 and len(dimensions) != self.ndims: - groups = self.groupby([dim for dim in self._cached_index_names + groups = self.groupby([dim for dim in self.kdims if dim not in dimensions]) else: [self.get_dimension(dim) for dim in dimensions] @@ -207,7 +207,12 @@ def collapse(self, dimensions=None, function=None, **kwargs): if isinstance(function, MapOperation): collapsed[key] = function(group, **kwargs) else: - data = group.type.collapse_data([el.data for el in group], function, **kwargs) + group_data = [el.data for el in group] + args = (group_data, function, group.last.kdims) + if hasattr(group.last, 'interface'): + data = group.last.interface.collapse_data(*args, **kwargs) + else: + data = group.type.collapse_data(*args, **kwargs) collapsed[key] = group.last.clone(data) return collapsed if self.ndims > 1 else collapsed.last @@ -278,7 +283,8 @@ def relabel(self, label=None, group=None, depth=1): def hist(self, num_bins=20, bin_range=None, adjoin=True, individually=True, **kwargs): - histmap = self.clone(shared_data=False) + histmaps = [self.clone(shared_data=False) + for d in kwargs.get('dimension', range(1))] if individually: map_range = None @@ -291,16 +297,27 @@ def hist(self, num_bins=20, bin_range=None, adjoin=True, individually=True, **kw if issubclass(self.type, (NdOverlay, Overlay)) and 'index' not in kwargs: kwargs['index'] = 0 for k, v in self.data.items(): - histmap[k] = v.hist(adjoin=False, bin_range=bin_range, - individually=individually, num_bins=num_bins, - style_prefix=style_prefix, **kwargs) - - if adjoin and issubclass(self.type, (NdOverlay, Overlay)): - layout = (self << histmap) - layout.main_layer = kwargs['index'] + hists = v.hist(adjoin=False, bin_range=bin_range, + individually=individually, num_bins=num_bins, + style_prefix=style_prefix, **kwargs) + if isinstance(hists, Layout): + for i, hist in enumerate(hists): + histmaps[i][k] = hist + else: + histmaps[0][k] = hists + + if adjoin: + layout = self + for hist in histmaps: + layout = (layout << hist) + if issubclass(self.type, (NdOverlay, Overlay)): + layout.main_layer = kwargs['index'] return layout - - return (self << histmap) if adjoin else histmap + else: + if len(histmaps) > 1: + return Layout.from_values(histmaps) + else: + return histmaps[0] @@ -314,9 +331,6 @@ class GridSpace(UniformNdMapping): 2D parameter spaces. """ - # NOTE: If further composite types supporting Overlaying and Layout these - # classes may be moved to core/composite.py - kdims = param.List(default=[Dimension(name="X"), Dimension(name="Y")], bounds=(1,2)) @@ -361,7 +375,7 @@ def _transform_indices(self, key): ndims = self.ndims if all(not isinstance(el, slice) for el in key): dim_inds = [] - for dim in self._cached_index_names: + for dim in self.kdims: dim_type = self.get_dimension_type(dim) if isinstance(dim_type, type) and issubclass(dim_type, Number): dim_inds.append(self.get_dimension_index(dim)) @@ -440,3 +454,24 @@ def shape(self): if self.ndims == 1: return (len(keys), 1) return len(set(k[0] for k in keys)), len(set(k[1] for k in keys)) + + + +class GridMatrix(GridSpace): + """ + GridMatrix is container type for heterogeneous Element types + laid out in a grid. Unlike a GridSpace the axes of the Grid + must not represent an actual coordinate space, but may be used + to plot various dimensions against each other. The GridMatrix + is usually constructed using the gridmatrix operation, which + will generate a GridMatrix plotting each dimension in an + Element against each other. + """ + + + def _item_check(self, dim_vals, data): + if not traversal.uniform(NdMapping([(0, self), (1, data)])): + raise ValueError("HoloMaps dimensions must be consistent in %s." % + type(self).__name__) + NdMapping._item_check(self, dim_vals, data) + diff --git a/holoviews/core/util.py b/holoviews/core/util.py index 87ee6da278..a905e07610 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -8,6 +8,21 @@ import numpy as np import param +try: + import pandas as pd +except ImportError: + pd = None + +try: + import dask.dataframe as dd +except ImportError: + dd = None + +try: + from blaze import bz +except ImportError: + bz = None + # Python3 compatibility basestring = str if sys.version_info.major == 3 else basestring @@ -246,64 +261,76 @@ def find_minmax(lims, olims): def find_range(values, soft_range=[]): - """ - Safely finds either the numerical min and max of - a set of values, falling back to the first and - the last value in the sorted list of values. - """ - try: - values = np.array(values) - values = np.squeeze(values) if len(values.shape) > 1 else values - values = np.concatenate([values, soft_range]) - return np.nanmin(values), np.nanmax(values) - except: - try: - values = sorted(values) - return (values[0], values[-1]) - except: - return (None, None) + """ + Safely finds either the numerical min and max of + a set of values, falling back to the first and + the last value in the sorted list of values. + """ + try: + values = np.array(values) + values = np.squeeze(values) if len(values.shape) > 1 else values + if len(soft_range): + values = np.concatenate([values, soft_range]) + if values.dtype.kind == 'M': + return values.min(), values.max() + return np.nanmin(values), np.nanmax(values) + except: + try: + values = sorted(values) + return (values[0], values[-1]) + except: + return (None, None) def max_range(ranges): - """ - Computes the maximal lower and upper bounds from a list bounds. - """ - try: - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', r'All-NaN (slice|axis) encountered') - arr = np.array(ranges, dtype=np.float) - return (np.nanmin(arr[:, 0]), np.nanmax(arr[:, 1])) - except: - return (np.NaN, np.NaN) + """ + Computes the maximal lower and upper bounds from a list bounds. + """ + try: + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', r'All-NaN (slice|axis) encountered') + arr = np.array(ranges) + if arr.dtype.kind == 'M': + return arr[:, 0].min(), arr[:, 1].max() + return (np.nanmin(arr[:, 0]), np.nanmax(arr[:, 1])) + except: + return (np.NaN, np.NaN) def max_extents(extents, zrange=False): - """ - Computes the maximal extent in 2D and 3D space from - list of 4-tuples or 6-tuples. If zrange is enabled - all extents are converted to 6-tuples to comput - x-, y- and z-limits. - """ - - if zrange: - num = 6 - inds = [(0, 2), (1, 3)] - extents = [e if len(e) == 6 else (e[0], e[1], None, - e[2], e[3], None) - for e in extents] - else: - num = 4 - inds = [(0, 2), (1, 3)] - arr = np.array(extents, dtype=np.float, ndmin=2) - extents = [np.NaN] * num - if 0 in arr.shape: - return extents - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', r'All-NaN (slice|axis) encountered') - for lower, upper in inds: - extents[lower] = np.nanmin(arr[:, lower]) - extents[upper] = np.nanmax(arr[:, upper]) - return tuple(extents) + """ + Computes the maximal extent in 2D and 3D space from + list of 4-tuples or 6-tuples. If zrange is enabled + all extents are converted to 6-tuples to comput + x-, y- and z-limits. + """ + if zrange: + num = 6 + inds = [(0, 2), (1, 3)] + extents = [e if len(e) == 6 else (e[0], e[1], None, + e[2], e[3], None) + for e in extents] + else: + num = 4 + inds = [(0, 2), (1, 3)] + arr = list(zip(*extents)) if extents else [] + extents = [np.NaN] * num + if len(arr) == 0: + return extents + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', r'All-NaN (slice|axis) encountered') + for lidx, uidx in inds: + lower = [v for v in arr[lidx] if v is not None] + upper = [v for v in arr[uidx] if v is not None] + if lower and isinstance(lower[0], np.datetime64): + extents[lidx] = np.min(lower) + elif lower: + extents[lidx] = np.nanmin(lower) + if upper and isinstance(upper[0], np.datetime64): + extents[uidx] = np.max(upper) + elif upper: + extents[uidx] = np.nanmax(upper) + return tuple(extents) def int_to_alpha(n, upper=True): @@ -390,16 +417,26 @@ def python2sort(x,key=None): return itertools.chain.from_iterable(sorted(group, key=key) for group in groups) -def dimension_sort(odict, dimensions, categorical, cached_values): +def dimension_sort(odict, kdims, vdims, categorical, key_index, cached_values): """ Sorts data by key using usual Python tuple sorting semantics or sorts in categorical order for any categorical Dimensions. """ sortkws = {} - if categorical: - sortkws['key'] = lambda x: tuple(cached_values[d.name].index(x[0][i]) - if d.values else x[0][i] - for i, d in enumerate(dimensions)) + ndims = len(kdims) + dimensions = kdims+vdims + indexes = [(dimensions[i], int(i not in range(ndims)), + i if i in range(ndims) else i-ndims) + for i in key_index] + + if len(set(key_index)) != len(key_index): + raise ValueError("Cannot sort on duplicated dimensions") + elif categorical: + sortkws['key'] = lambda x: tuple(cached_values[dim.name].index(x[t][d]) + if dim.values else x[t][d] + for i, (dim, t, d) in enumerate(indexes)) + elif key_index != list(range(len(kdims+vdims))): + sortkws['key'] = lambda x: tuple(x[t][d] for _, t, d in indexes) if sys.version_info.major == 3: return python2sort(odict.items(), **sortkws) else: @@ -570,3 +607,21 @@ def find_file(folder, filename): for filename in fnmatch.filter(filenames, filename): matches.append(os.path.join(root, filename)) return matches[-1] + + +def is_dataframe(data): + """ + Checks whether the supplied data is DatFrame type. + """ + return((pd is not None and isinstance(data, pd.DataFrame)) or + (dd is not None and isinstance(data, dd.DataFrame)) or + (bz is not None and isinstance(data, bz.Data))) + + +def get_param_values(data): + params = dict(kdims=data.kdims, vdims=data.vdims, + label=data.label) + if data.group != data.params()['group'].default: + params['group'] = data.group + return params + diff --git a/holoviews/element/annotation.py b/holoviews/element/annotation.py index adaa388dbe..d925535af1 100644 --- a/holoviews/element/annotation.py +++ b/holoviews/element/annotation.py @@ -32,6 +32,7 @@ def __init__(self, data, **params): def __getitem__(self, key): + if key in self.dimensions(): return self.dimension_values(key) if not isinstance(key, tuple) or len(key) == 1: key = (key, slice(None)) elif len(key) == 0: return self.clone() diff --git a/holoviews/element/chart.py b/holoviews/element/chart.py index ae64e2b708..666c62a46f 100644 --- a/holoviews/element/chart.py +++ b/holoviews/element/chart.py @@ -3,11 +3,11 @@ import param from ..core import util -from ..core import OrderedDict, Dimension, UniformNdMapping, Element, Element2D, NdElement, HoloMap +from ..core import OrderedDict, Dimension, UniformNdMapping, Element, Columns, Element2D, NdElement, HoloMap from .tabular import ItemTable, Table from .util import compute_edges -class Chart(Element2D): +class Chart(Columns, Element2D): """ The data held within Chart is a numpy array of shape (N, D), where N is the number of samples and D the number of dimensions. @@ -29,193 +29,22 @@ class Chart(Element2D): The value dimensions of the Chart, usually corresponding to a number of dependent variables.""") - _null_value = np.array([[], []]).T # For when data is None + def __getitem__(self, index): + sliced = super(Chart, self).__getitem__(index) + if not isinstance(sliced, Chart): + return sliced - def __init__(self, data, **kwargs): - data, params = self._process_data(data) - params.update(kwargs) - super(Chart, self).__init__(data, **params) - self.data = self._validate_data(self.data) - - - def _convert_element(self, element): - if isinstance(element, Chart): - return element.data - elif isinstance(element, NdElement): - return np.vstack([np.concatenate([key, vals]) - for key, vals in element.data.items()]).astype(np.float) - else: - return super(Chart, self)._convert_element(element) - - - def _process_data(self, data): - params = {} - if isinstance(data, UniformNdMapping) or (isinstance(data, list) and data - and isinstance(data[0], Element2D)): - params = dict([v for v in data][0].get_param_values(onlychanged=True)) - data = np.concatenate([v.data for v in data]) - elif isinstance(data, Element): - pass - elif isinstance(data, tuple): - data = np.column_stack(data) - elif not isinstance(data, np.ndarray): - data = self._null_value if (data is None) else list(data) - if len(data): - data = np.array(data) - - return data, params - - - def _validate_data(self, data): - if data.ndim == 1: - data = np.array(list(zip(range(len(data)), data))) - if not data.shape[1] == len(self.dimensions()): - raise ValueError("Data has to match number of key and value dimensions") - return data - - - def closest(self, coords): - """ - Given single or multiple x-values, returns the list - of closest actual samples. - """ - if not isinstance(coords, list): coords = [coords] - xs = self.data[:, 0] - idxs = [np.argmin(np.abs(xs-coord)) for coord in coords] - return [xs[idx] for idx in idxs] - - - def __getitem__(self, slices): - """ - Implements slicing or indexing of the data by the data x-value. - If a single element is indexed reduces the Element2D to a single - Scatter object. - """ - if slices is (): - return self - if not isinstance(slices, tuple): slices = (slices,) - if len(slices) > self.ndims: - raise Exception("Slice must match number of key dimensions.") - - data = self.data - lower_bounds, upper_bounds = [], [] - for idx, slc in enumerate(slices): + if not isinstance(index, tuple): index = (index,) + ndims = len(self.extents)//2 + lower_bounds, upper_bounds = [None]*ndims, [None]*ndims + for i, slc in enumerate(index[:ndims]): if isinstance(slc, slice): - start = -float("inf") if slc.start is None else slc.start - stop = float("inf") if slc.stop is None else slc.stop - - clip_start = start <= data[:, idx] - clip_stop = data[:, idx] < stop - data = data[np.logical_and(clip_start, clip_stop), :] - lbound = self.extents[idx] - ubound = self.extents[self.ndims:][idx] - lower_bounds.append(lbound if slc.start is None else slc.start) - upper_bounds.append(ubound if slc.stop is None else slc.stop) - else: - if self.ndims == 1: - data_index = np.argmin(np.abs(data[:, idx] - slc)) - data = data[data_index, :] - else: - raise KeyError("Only 1D Chart types may be indexed.") - if not any(isinstance(slc, slice) for slc in slices): - if data.ndim == 1: - data = data[self.ndims:] - dims = data.shape[0] - else: - data = data[:, self.ndims:] - dims = data.shape[1] - return data[0] if dims == 1 else data - if self.ndims == 1: - lower_bounds.append(None) - upper_bounds.append(None) - - return self.clone(data, extents=tuple(lower_bounds + upper_bounds)) - - - @classmethod - def collapse_data(cls, data, function, **kwargs): - new_data = [arr[:, 1:] for arr in data] - if isinstance(function, np.ufunc): - collapsed = function.reduce(new_data) - else: - collapsed = function(np.dstack(new_data), axis=-1, **kwargs) - return np.hstack([data[0][:, 0, np.newaxis], collapsed]) - - - def sample(self, samples=[]): - """ - Allows sampling of Chart Elements using the default - syntax of providing a map of dimensions and sample pairs. - """ - sample_data = OrderedDict() - for sample in samples: - data = self[sample] - data = data if np.isscalar(data) else tuple(data) - sample_data[sample] = data - params = dict(self.get_param_values(onlychanged=True)) - params.pop('extents', None) - return Table(sample_data, **dict(params, kdims=self.kdims, - vdims=self.vdims)) - - - def reduce(self, dimensions=[], function=None, **reduce_map): - """ - Allows collapsing of Chart objects using the supplied map of - dimensions and reduce functions. - """ - reduce_map = self._reduce_map(dimensions, function, reduce_map) - - if len(reduce_map) > 1: - raise ValueError("Chart Elements may only be reduced to a point.") - dim, reduce_fn = list(reduce_map.items())[0] - if dim in self._cached_index_names: - reduced_data = OrderedDict(zip(self.vdims, reduce_fn(self.data[:, self.ndims:], axis=0))) - else: - raise Exception("Dimension %s not found in %s" % (dim, type(self).__name__)) - params = dict(self.get_param_values(onlychanged=True), vdims=self.vdims, - kdims=[]) - params.pop('extents', None) - return ItemTable(reduced_data, **params) - - - def __len__(self): - return len(self.data) - - - def dimension_values(self, dim): - index = self.get_dimension_index(dim) - if index < len(self.dimensions()): - return self.data[:, index] - else: - return super(Chart, self).dimension_values(dim) - - - def range(self, dim, data_range=True): - dim_idx = dim if isinstance(dim, int) else self.get_dimension_index(dim) - dim = self.get_dimension(dim_idx) - if dim.range != (None, None): - return dim.range - elif dim_idx < len(self.dimensions()): - if self.data.ndim == 1: - data = np.atleast_2d(self.data).T - else: - data = self.data - if len(data): - data = data[:, dim_idx] - data_range = np.nanmin(data), np.nanmax(data) - else: - data_range = (np.NaN, np.NaN) - if data_range: - return util.max_range([data_range, dim.soft_range]) - else: - return dim.soft_range - - - def dframe(self): - import pandas as pd - columns = [d.name for d in self.dimensions()] - return pd.DataFrame(self.data, columns=columns) - + lbound = self.extents[i] + ubound = self.extents[ndims:][i] + lower_bounds[i] = lbound if slc.start is None else slc.start + upper_bounds[i] = ubound if slc.stop is None else slc.stop + sliced.extents = tuple(lower_bounds+upper_bounds) + return sliced class Scatter(Chart): @@ -226,12 +55,6 @@ class Scatter(Chart): group = param.String(default='Scatter', constant=True) - @classmethod - def collapse_data(cls, data, function=None, **kwargs): - if function: - raise Exception("Scatter elements are inhomogenous and " - "cannot be collapsed with a function.") - return np.concatenate(data) class Curve(Chart): @@ -242,21 +65,6 @@ class Curve(Chart): group = param.String(default='Curve', constant=True) - def progressive(self): - """ - Create map indexed by Curve x-axis with progressively expanding number - of curve samples. - """ - vmap = HoloMap(None, kdims=self.kdims, - title=self.title+' {dims}') - for idx in range(len(self.data)): - x = self.data[0] - if x in vmap: - vmap[x].data.append(self.data[0:idx]) - else: - vmap[x] = self.clone(self.data[0:idx]) - return vmap - class ErrorBars(Chart): @@ -281,19 +89,22 @@ class ErrorBars(Chart): vdims = param.List(default=[Dimension('lerror'), Dimension('uerror')], bounds=(2,2), constant=True) - def _validate_data(self, data): - if data.shape[1] == 3: - return np.column_stack([data, data[:, 2]]) - else: - return data + def __init__(self, data, **params): + super(ErrorBars, self).__init__(data, **params) + if self.shape[1] == 3: + self.data = self.interface.add_dimension(self, self.vdims[1].name, + 3, self.dimension_values(2)) def range(self, dim, data_range=True): drange = super(ErrorBars, self).range(dim, data_range) didx = self.get_dimension_index(dim) if didx == 1 and data_range: - lower = np.nanmin(self.data[:, 1] - self.data[:, 2]) - upper = np.nanmax(self.data[:, 1] + self.data[:, 3]) + mean = self.dimension_values(1) + neg_error = self.dimension_values(2) + pos_error = self.dimension_values(3) + lower = np.nanmin(mean-neg_error) + upper = np.nanmax(mean+pos_error) return util.max_range([(lower, upper), drange]) else: return drange @@ -315,7 +126,7 @@ class Spread(ErrorBars): -class Bars(NdElement): +class Bars(Columns): """ Bars is an Element type, representing a number of stacked and grouped bars, depending the dimensionality of the key and value @@ -359,6 +170,7 @@ def __getitem__(self, key): """ Implements slicing or indexing of the Histogram """ + if key in self.dimensions(): return self.dimension_values(key) if key is (): return self # May no longer be necessary if isinstance(key, tuple) and len(key) > self.ndims: raise Exception("Slice must match number of key dimensions.") @@ -431,9 +243,9 @@ def extents(self, extents): def dimension_values(self, dim): dim = self.get_dimension(dim).name - if dim in self._cached_value_names: + if dim in self.vdims: return self.values - elif dim in self._cached_index_names: + elif dim in self.kdims: return np.convolve(self.edges, np.ones((2,))/2, mode='valid') else: return super(Histogram, self).dimension_values(dim) @@ -479,22 +291,14 @@ class Points(Chart): vdims = param.List(default=[]) - _min_dims = 2 # Minimum number of columns - def __len__(self): - return self.data.shape[0] - def __iter__(self): i = 0 while i < len(self): yield tuple(self.data[i, ...]) i += 1 - @classmethod - def collapse_data(cls, data, function, **kwargs): - return Scatter.collapse_data(data, function, **kwargs) - class VectorField(Points): @@ -535,8 +339,6 @@ class VectorField(Points): _min_dims = 3 # Minimum number of columns def __init__(self, data, **params): - if not isinstance(data, np.ndarray): - data = np.array([ - [el for el in (col.flat if isinstance(col,np.ndarray) else col)] - for col in data]).T + if isinstance(data, list) and all(isinstance(d, np.ndarray) for d in data): + data = np.column_stack([d.flat if d.ndim > 1 else d for d in data]) super(VectorField, self).__init__(data, **params) diff --git a/holoviews/element/comparison.py b/holoviews/element/comparison.py index b25e92b978..b977aa6c07 100644 --- a/holoviews/element/comparison.py +++ b/holoviews/element/comparison.py @@ -20,7 +20,7 @@ import numpy as np from unittest.util import safe_repr from unittest import TestCase -from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_array_equal, assert_array_almost_equal from . import * # pyflakes:ignore (All Elements need to support comparison) from ..core import Element, Empty, AdjointLayout, Overlay, Dimension, HoloMap, \ @@ -145,6 +145,7 @@ def register(cls): cls.equality_type_funcs[HeatMap] = cls.compare_heatmap # Charts + cls.equality_type_funcs[Columns] = cls.compare_columns cls.equality_type_funcs[Curve] = cls.compare_curve cls.equality_type_funcs[ErrorBars] = cls.compare_errorbars cls.equality_type_funcs[Spread] = cls.compare_spread @@ -210,9 +211,12 @@ def compare_floats(cls, arr1, arr2, msg='Floats'): @classmethod def compare_arrays(cls, arr1, arr2, msg='Arrays'): try: - assert_array_almost_equal(arr1, arr2) - except AssertionError as e: - raise cls.failureException(msg + str(e)[11:]) + assert_array_equal(arr1, arr2) + except: + try: + assert_array_almost_equal(arr1, arr2) + except AssertionError as e: + raise cls.failureException(msg + str(e)[11:]) @classmethod def bounds_check(cls, el1, el2, msg=None): @@ -430,106 +434,109 @@ def compare_bounds(cls, el1, el2, msg='Bounds'): #========# # Charts # #========# - + @classmethod - def compare_curve(cls, el1, el2, msg=None): + def compare_columns(cls, el1, el2, msg='Columns'): cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Curve data') + if len(el1) != len(el2): + raise AssertionError("%s not of matching length." % msg) + dimension_data = [(d, el1[d], el2[d]) for d in el1.dimensions()] + for dim, d1, d2 in dimension_data: + if d1.dtype != d2.dtype: + cls.failureException("%s %s columns have different type." % (msg, dim) + + " First has type %s, and second has type %s." + % (d1, d2)) + if d1.dtype.kind in 'SUOV': + if np.all(d1 != d2): + cls.failureException("Columns along dimension %s not equal." % dim) + else: + cls.compare_arrays(d1, d2, msg) - @classmethod - def compare_errorbars(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'ErrorBars data') @classmethod - def compare_spread(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Spread data') + def compare_curve(cls, el1, el2, msg='Curve'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_scatter(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Scatter data') + def compare_errorbars(cls, el1, el2, msg='ErrorBars'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_scatter3d(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Scatter3D data') + def compare_spread(cls, el1, el2, msg='Spread'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_trisurface(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Trisurface data') + def compare_scatter(cls, el1, el2, msg='Scatter'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_histogram(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.edges, el2.edges, "Histogram edges") - cls.compare_arrays(el1.values, el2.values, "Histogram values") + def compare_scatter3d(cls, el1, el2, msg='Scatter3D'): + cls.compare_columns(el1, el2, msg) + @classmethod + def compare_trisurface(cls, el1, el2, msg='Trisurface'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_points(cls, el1, el2, msg=None): + def compare_histogram(cls, el1, el2, msg='Histogram'): cls.compare_dimensioned(el1, el2) - if len(el1) != len(el2): - raise cls.failureException("Points objects have different numbers of points.") - - cls.compare_arrays(el1.data, el2.data, 'Points data') + cls.compare_arrays(el1.edges, el2.edges, ' '.join([msg, 'edges'])) + cls.compare_arrays(el1.values, el2.values, ' '.join([msg, 'values'])) @classmethod - def compare_vectorfield(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - if len(el1) != len(el2): - raise cls.failureException("VectorField objects have different numbers of vectors.") + def compare_points(cls, el1, el2, msg='Points'): + cls.compare_columns(el1, el2, msg) - cls.compare_arrays(el1.data, el2.data, 'VectorField data') @classmethod - def compare_bars(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_ndmappings(el1, el2, msg) + def compare_vectorfield(cls, el1, el2, msg='VectorField'): + cls.compare_columns(el1, el2, msg) + + @classmethod + def compare_bars(cls, el1, el2, msg='Bars'): + cls.compare_columns(el1, el2, msg) #=========# # Rasters # #=========# @classmethod - def compare_raster(cls, el1, el2, msg=None): + def compare_raster(cls, el1, el2, msg='Raster'): cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Raster data') + cls.compare_arrays(el1.data, el2.data, msg) @classmethod - def compare_quadmesh(cls, el1, el2, msg=None): + def compare_quadmesh(cls, el1, el2, msg='QuadMesh'): cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data[0], el2.data[0], 'QuadMesh x-data') - cls.compare_arrays(el1.data[1], el2.data[1], 'QuadMesh y-data') - cls.compare_arrays(el1.data[2], el2.data[2], 'QuadMesh z-data') + cls.compare_arrays(el1.data[0], el2.data[0], ' '.join([msg, 'x-data'])) + cls.compare_arrays(el1.data[1], el2.data[1], ' '.join([msg, 'y-data'])) + cls.compare_arrays(el1.data[2], el2.data[2], ' '.join([msg, 'z-data'])) @classmethod - def compare_heatmap(cls, el1, el2, msg=None): + def compare_heatmap(cls, el1, el2, msg='HeatMap'): cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'HeatMap data') + cls.compare_arrays(el1.data, el2.data, msg) @classmethod - def compare_image(cls, el1, el2, msg='Image data'): + def compare_image(cls, el1, el2, msg='Image'): cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, msg=msg) + cls.compare_arrays(el1.data, el2.data, msg) cls.bounds_check(el1,el2) @classmethod - def compare_rgb(cls, el1, el2, msg='RGB data'): + def compare_rgb(cls, el1, el2, msg='RGB'): cls.compare_dimensioned(el1, el2) cls.compare_arrays(el1.data, el2.data, msg=msg) cls.bounds_check(el1,el2) @classmethod - def compare_hsv(cls, el1, el2, msg='HSV data'): + def compare_hsv(cls, el1, el2, msg='HSV'): cls.compare_dimensioned(el1, el2) cls.compare_arrays(el1.data, el2.data, msg=msg) cls.bounds_check(el1,el2) @classmethod - def compare_surface(cls, el1, el2, msg='Surface data'): + def compare_surface(cls, el1, el2, msg='Surface'): cls.compare_dimensioned(el1, el2) cls.compare_arrays(el1.data, el2.data, msg=msg) @@ -552,27 +559,21 @@ def compare_itemtables(cls, el1, el2, msg=None): @classmethod - def compare_tables(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - if el1.rows != el2.rows: - raise cls.failureException("Tables have different numbers of rows.") - - if el1.cols != el2.cols: - raise cls.failureException("Tables have different numbers of columns.") - - cls.compare_ndmappings(el1, el2, msg) - + def compare_tables(cls, el1, el2, msg='Table'): + cls.compare_columns(el1, el2, msg) #========# # Pandas # #========# @classmethod - def compare_dframe(cls, el1, el2, msg=None): + def compare_dframe(cls, el1, el2, msg='DFrame'): cls.compare_dimensioned(el1, el2) from pandas.util.testing import assert_frame_equal try: - assert_frame_equal(el1.data, el2.data) + df1 = el1.data.reset_index(drop=True) + df2 = el2.data.reset_index(drop=True) + assert_frame_equal(df1, df2) except AssertionError as e: raise cls.failureException(msg+': '+str(e)) @@ -581,24 +582,20 @@ def compare_dframe(cls, el1, el2, msg=None): #=========# @classmethod - def compare_distribution(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Distribution data') + def compare_distribution(cls, el1, el2, msg='Distribution'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_timeseries(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'TimeSeries data') + def compare_timeseries(cls, el1, el2, msg='TimeSeries'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_bivariate(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Bivariate data') + def compare_bivariate(cls, el1, el2, msg='Bivariate'): + cls.compare_columns(el1, el2, msg) @classmethod - def compare_regression(cls, el1, el2, msg=None): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, 'Regression data') + def compare_regression(cls, el1, el2, msg='Regression'): + cls.compare_columns(el1, el2, msg) #=======# # Grids # diff --git a/holoviews/element/path.py b/holoviews/element/path.py index a76c7b3748..2142681f07 100644 --- a/holoviews/element/path.py +++ b/holoviews/element/path.py @@ -52,6 +52,7 @@ def __init__(self, data, **params): def __getitem__(self, key): + if key in self.dimensions(): return self.dimension_values(key) if not isinstance(key, tuple) or len(key) == 1: key = (key, slice(None)) elif len(key) == 0: return self.clone() @@ -65,7 +66,7 @@ def __getitem__(self, key): @classmethod - def collapse_data(cls, data_list, function=None, **kwargs): + def collapse_data(cls, data_list, function=None, kdims=None, **kwargs): if function is None: return [path for paths in data_list for path in paths] else: diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py index d331fb4da8..8dc35cff26 100644 --- a/holoviews/element/raster.py +++ b/holoviews/element/raster.py @@ -1,25 +1,26 @@ +from operator import itemgetter from itertools import product import numpy as np import colorsys import param from ..core import util -from ..core import OrderedDict, Dimension, NdMapping, Element2D, Overlay, Element +from ..core import (OrderedDict, Dimension, NdMapping, Element2D, + Overlay, Element, Columns) from ..core.boundingregion import BoundingRegion, BoundingBox from ..core.sheetcoords import SheetCoordinateSystem, Slice from .chart import Curve from .tabular import Table -from .util import compute_edges - +from .util import compute_edges, toarray class Raster(Element2D): """ - Raster is a basic 2D element type for presenting numpy arrays as - two dimensional raster images. + Raster is a basic 2D element type for presenting either numpy or + dask arrays as two dimensional raster images. - Arrays with a shape of (N,M) are valid inputs for Raster wheras - subclasses of Raster (e.g. RGB) may also accept 3D arrays - containing channel information. + Arrays with a shape of (N,M) are valid inputs for Raster wheras + subclasses of Raster (e.g. RGB) may also accept 3D arrays + containing channel information. Raster does not support slicing like the Image or RGB subclasses and the extents are in matrix coordinates if not explicitly @@ -49,13 +50,14 @@ def _zdata(self): def __getitem__(self, slices): + if slices in self.dimensions(): return self.dimension_values(slices) if not isinstance(slices, tuple): slices = (slices, slice(None)) slc_types = [isinstance(sl, slice) for sl in slices] data = self.data.__getitem__(slices[::-1]) if all(slc_types): return self.clone(data, extents=None) elif not any(slc_types): - return data + return toarray(data, index_value=True) else: return self.clone(np.expand_dims(data, axis=slc_types.index(True)), extents=None) @@ -66,7 +68,7 @@ def _coord2matrix(self, coord): @classmethod - def collapse_data(cls, data_list, function, **kwargs): + def collapse_data(cls, data_list, function, kdims=None, **kwargs): if isinstance(function, np.ufunc): return function.reduce(data_list) else: @@ -94,9 +96,8 @@ def sample(self, samples=[], **sample_values): samples = zip(*[c if isinstance(c, list) else [c] for didx, c in sorted([(self.get_dimension_index(k), v) for k, v in sample_values.items()])]) - table_data = OrderedDict() - for c in samples: - table_data[c] = self._zdata[self._coord2matrix(c)] + table_data = [c+(self._zdata[self._coord2matrix(c)],) + for c in samples] params['kdims'] = self.kdims return Table(table_data, **params) else: @@ -135,16 +136,15 @@ def reduce(self, dimensions=None, function=None, **reduce_map): """ reduce_map = self._reduce_map(dimensions, function, reduce_map) if len(reduce_map) == self.ndims: - reduced_view = self - for dim, reduce_fn in reduce_map.items(): - reduced_view = reduced_view.reduce(**{dim: reduce_fn}) - return reduced_view + if isinstance(function, np.ufunc): + return function.reduce(self.data, axis=None) + else: + return function(self.data) else: dimension, reduce_fn = list(reduce_map.items())[0] other_dimension = [d for d in self.kdims if d.name != dimension] oidx = self.get_dimension_index(other_dimension[0]) x_vals = self.dimension_values(other_dimension[0].name, unique=True) - if oidx: x_vals = np.sort(x_vals) reduced = reduce_fn(self._zdata, axis=oidx) data = zip(x_vals, reduced if not oidx else reduced[::-1]) params = dict(dict(self.get_param_values(onlychanged=True)), @@ -159,15 +159,15 @@ def dimension_values(self, dim, unique=False): The set of samples available along a particular dimension. """ dim_idx = self.get_dimension_index(dim) - if dim_idx in [0, 1]: - shape = self.data.shape[abs(dim_idx)] - dim_max = self.data.shape[abs(dim_idx-1)] - coords = list(range(0, dim_max)) - if not unique: - coords = coords * shape - return coords if dim_idx else sorted(coords) + if unique and dim_idx == 0: + return np.array(range(self.data.shape[1])) + elif unique and dim_idx == 1: + return np.array(range(self.data.shape[0])) + elif dim_idx in [0, 1]: + D1, D2 = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]] + return D1.flatten() if dim_idx == 0 else D2.flatten() elif dim_idx == 2: - return self.data.T.flatten() + return toarray(self.data.T).flatten() else: return super(Raster, self).dimension_values(dim) @@ -254,6 +254,7 @@ def _validate_data(self, data): def __getitem__(self, slices): + if slices in self.dimensions(): return self.dimension_values(key) if not self._grid: raise IndexError("Indexing of non-grid based QuadMesh" "currently not supported") @@ -286,7 +287,7 @@ def __getitem__(self, slices): @classmethod - def collapse_data(cls, data_list, function, **kwargs): + def collapse_data(cls, data_list, function, kdims=None, **kwargs): """ Allows collapsing the data of a number of QuadMesh Elements with a function. @@ -358,34 +359,30 @@ def __init__(self, data, extents=None, **params): def _process_data(self, data, params): dimensions = {group: params.get(group, getattr(self, group)) for group in self._dim_groups[:2]} - if isinstance(data, NdMapping): + if isinstance(data, Columns): if 'kdims' not in params: dimensions['kdims'] = data.kdims if 'vdims' not in params: dimensions['vdims'] = data.vdims elif isinstance(data, (dict, OrderedDict, type(None))): - data = NdMapping(data, **dimensions) + data = Columns(data, **dimensions) elif isinstance(data, Element): data = data.table() if not data.ndims == 2: raise TypeError('HeatMap conversion requires 2 key dimensions') else: - raise TypeError('HeatMap only accepts dict or NdMapping types.') + raise TypeError('HeatMap only accepts Columns or dict types.') - keys = list(data.keys()) - dim1_keys = NdMapping([(k[0], None) for k in keys], - kdims=[self.kdims[0]]).keys() - dim2_keys = NdMapping([(k[1], None) for k in keys], - kdims=[self.kdims[1]]).keys() - grid_keys = [((i1, d1), (i2, d2)) for i1, d1 in enumerate(dim1_keys) - for i2, d2 in enumerate(dim2_keys)] + if len(dimensions['vdims']) > 1: + raise ValueError("HeatMap data may only have one value dimension") - array = np.zeros((len(dim2_keys), len(dim1_keys))) - for (i1, d1), (i2, d2) in grid_keys: - val = data.get((d1, d2), np.NaN) - array[len(dim2_keys)-i2-1, i1] = val[0] if isinstance(val, tuple) else val - - return data, array, dimensions + d1keys = data.dimension_values(0, True) + d2keys = data.dimension_values(1, True) + coords = [(d1, d2, np.NaN) for d1 in d1keys for d2 in d2keys] + dense_data = data.clone(coords) + data = data.concat([data, dense_data]).aggregate(data.kdims, np.nanmean).sort(data.kdims) + array = data.dimension_values(2).reshape(len(d1keys), len(d2keys)) + return data, np.flipud(array.T), dimensions def clone(self, data=None, shared_data=True, *args, **overrides): @@ -398,27 +395,29 @@ def __getitem__(self, coords): """ Slice the underlying NdMapping. """ - return self.clone(self._data.select(**dict(zip(self._data._cached_index_names, coords)))) + if coords in self.dimensions(): return self.dimension_values(coords) + return self.clone(self._data.select(**dict(zip(self._data.kdims, coords)))) def dense_keys(self): - keys = list(self._data.keys()) - dim1_keys = NdMapping([(k[0], None) for k in keys], - kdims=[self.kdims[0]]).keys() - dim2_keys = NdMapping([(k[1], None) for k in keys], - kdims=[self.kdims[1]]).keys() - return dim1_keys, dim2_keys + d1keys = np.unique(self._data.dimension_values(0)) + d2keys = np.unique(self._data.dimension_values(1)) + return list(zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys])) - def dimension_values(self, dim, unique=True): + def dimension_values(self, dim, unique=False): dim = self.get_dimension(dim).name - if dim in self._cached_index_names: - idx = self.get_dimension_index(dim) - return [k[idx] for k in self._data.keys()] - elif dim in self._cached_value_names: - idx = self._cached_value_names.index(dim) - return [v[idx] if isinstance(v, tuple) else v - for v in self._data.values()] + if dim in self.kdims: + if unique: + return np.unique(self._data.dimension_values(dim)) + else: + idx = self.get_dimension_index(dim) + return self.dense_keys()[idx] + elif dim in self.vdims: + if unique: + return self._data.dimension_values(dim) + else: + return np.rot90(self.data, 3).flatten() else: return super(HeatMap, self).dimension_values(dim) @@ -484,22 +483,49 @@ def _convert_element(self, data): return super(Image, self)._convert_element(data) - def closest(self, coords): + def closest(self, coords=[], **kwargs): """ - Given a single coordinate tuple (or list of coordinates) - return the coordinate (or coordinatess) needed to address the - corresponding Image exactly. + Given a single coordinate or multiple coordinates as + a tuple or list of tuples or keyword arguments matching + the dimension closest will find the closest actual x/y + coordinates. """ + if kwargs and coords: + raise ValueError("Specify coordinate using as either a list " + "keyword arguments not both") + if kwargs: + coords = [] + getter = [] + for k, v in kwargs.items(): + idx = self.get_dimension_index(k) + if np.isscalar(v): + coords.append((0, v) if idx else (v, 0)) + else: + if isinstance(coords, tuple): + coords = [(0, c) if idx else (c, 0) for c in v] + if len(coords) not in [0, len(v)]: + raise ValueError("Length of samples must match") + elif len(coords): + coords = [(t[abs(idx-1)], c) if idx else (c, t[abs(idx-1)]) + for c, t in zip(v, coords)] + getter.append(idx) + else: + getter = [0, 1] + getter = itemgetter(*sorted(getter)) + coords = list(coords) + if len(coords) == 1: + coords = coords[0] if isinstance(coords, tuple): - return self.closest_cell_center(*coords) + return getter(self.closest_cell_center(*coords)) else: - return [self.closest_cell_center(*el) for el in coords] + return [getter(self.closest_cell_center(*el)) for el in coords] def __getitem__(self, coords): """ Slice the underlying numpy array in sheet coordinates. """ + if coords in self.dimensions(): return self.dimension_values(coords) if coords is () or coords == slice(None, None): return self @@ -528,15 +554,19 @@ def range(self, dim, data_range=True): elif dim_idx in [0, 1]: l, b, r, t = self.bounds.lbrt() if dim_idx: - data_range = (b, t) + drange = (b, t) else: - data_range = (l, r) + drange = (l, r) elif dim_idx < len(self.vdims) + 2: dim_idx -= 2 data = np.atleast_3d(self.data)[:, :, dim_idx] - data_range = (np.nanmin(data), np.nanmax(data)) + drange = (np.nanmin(data), np.nanmax(data)) if data_range: - return util.max_range([data_range, dim.soft_range]) + soft_range = [r for r in dim.soft_range if r is not None] + if soft_range: + return util.max_range([drange, soft_range]) + else: + return drange else: return dim.soft_range @@ -552,7 +582,7 @@ def dimension_values(self, dim, unique=False): dim_idx = self.get_dimension_index(dim) if dim_idx in [0, 1]: l, b, r, t = self.bounds.lbrt() - dim2, dim1 = self.data.shape + dim2, dim1 = self.data.shape[:2] d1_half_unit = (r - l)/dim1/2. d2_half_unit = (t - b)/dim2/2. d1lin = np.linspace(l+d1_half_unit, r-d1_half_unit, dim1) @@ -690,6 +720,7 @@ def __getitem__(self, coords): """ Slice the underlying numpy array in sheet coordinates. """ + if coords in self.dimensions(): return self.dimension_values(coords) if not isinstance(coords, slice) and len(coords) > self.ndims: value = coords[self.ndims:] if len(value) > 1: diff --git a/holoviews/element/tabular.py b/holoviews/element/tabular.py index 2d8e533013..191541cc51 100644 --- a/holoviews/element/tabular.py +++ b/holoviews/element/tabular.py @@ -2,7 +2,8 @@ import param -from ..core import OrderedDict, Dimension, Element, NdElement, HoloMap +from ..core import (OrderedDict, Dimension, Element, Columns, + Tabular, NdElement, HoloMap) class ItemTable(Element): @@ -59,9 +60,9 @@ def __getitem__(self, heading): """ if heading is (): return self - if heading not in self._cached_value_names: + if heading not in self.vdims: raise IndexError("%r not in available headings." % heading) - return self.data.get(heading, np.NaN) + return np.array(self.data.get(heading, np.NaN)) @classmethod @@ -73,7 +74,7 @@ def collapse_data(cls, data, function, **kwargs): def dimension_values(self, dimension): dimension = self.get_dimension(dimension).name if dimension in self.dimensions('value', label=True): - return [self.data.get(dimension, np.NaN)] + return np.array([self.data.get(dimension, np.NaN)]) else: return super(ItemTable, self).dimension_values(dimension) @@ -104,8 +105,8 @@ def pprint_cell(self, row, col): return str(self.dimensions('value')[row]) else: dim = self.get_dimension(row) - heading = self._cached_value_names[row] - return dim.pprint_value(self.data.get(heading, np.NaN)) + heading = self.vdims[row] + return dim.pprint_value(self.data.get(heading.name, np.NaN)) def hist(self, *args, **kwargs): @@ -136,25 +137,17 @@ def table(self): vdims=self.vdims) def values(self): - return tuple(self.data.get(k, np.NaN) - for k in self._cached_value_names) + return tuple(self.data.get(d.name, np.NaN) + for d in self.vdims) -class Table(NdElement): +class Table(Columns, Tabular): """ Table is an NdElement type, which gets displayed in a tabular format and is convertible to most other Element types. """ - kdims = param.List(default=[Dimension(name="Row")], doc=""" - One or more key dimensions. By default, the special 'Row' - dimension ensures that the table is always indexed by the row - number. - - If no key dimensions are set, only one entry can be stored - using the empty key ().""") - group = param.String(default='Table', constant=True, doc=""" The group is used to describe the Table.""") @@ -174,13 +167,6 @@ def _add_item(self, key, value, sort=True): value = value.data.values() super(Table, self)._add_item(key, value, sort) - @property - def indexed(self): - """ - Whether this is an indexed table: a table that has a single - key dimension called 'Row' corresponds to the row number. - """ - return self.ndims == 1 and self.kdims[0].name == 'Row' @property def to(self): @@ -190,11 +176,6 @@ def to(self): """ return TableConversion(self) - def dframe(self, value_label='data'): - dframe = super(Table, self).dframe(value_label=value_label) - # Drop 'Row' column as it is redundant with dframe index - if self.indexed: del dframe['Row'] - return dframe @@ -208,70 +189,92 @@ class TableConversion(object): def __init__(self, table): self._table = table - def _conversion(self, kdims=None, vdims=None, new_type=None, **kwargs): + def _conversion(self, kdims=None, vdims=None, mdims=None, new_type=None, sort=False, **kwargs): if kdims is None: - kdims = self._table._cached_index_names + kdims = self._table.kdims elif kdims and not isinstance(kdims, list): kdims = [kdims] if vdims is None: - vdims = self._table._cached_value_names - elif vdims and not isinstance(vdims, list): vdims = [vdims] - kdims = [kdim.name if isinstance(kdim, Dimension) else kdim for kdim in kdims] - vdims = [vdim.name if isinstance(vdim, Dimension) else vdim for vdim in vdims] - if (any(kd in self._table._cached_value_names for kd in kdims) or - any(vd in self._table._cached_index_names for vd in vdims)): - new_kdims = [kd for kd in self._table._cached_index_names - if kd not in kdims and kd not in vdims] + kdims - selected = self._table.reindex(new_kdims, vdims) - else: - selected = self._table.select(**{'value': vdims}) - all_dims = selected.dimensions(label=True) - invalid = [dim for dim in kdims+vdims if dim not in all_dims] - if invalid: - raise Exception("Dimensions %r could not be found during conversion to %s new_type" % - (invalid, new_type.__name__)) - group_dims = [dim for dim in selected._cached_index_names if not dim in kdims+vdims] - - params = dict({'kdims': [selected.get_dimension(kd) for kd in kdims], - 'vdims': [selected.get_dimension(vd) for vd in vdims]}, - **kwargs) + vdims = self._table.vdims + if mdims is None: + mdims = [d for d in self._table.kdims if d not in kdims] + if vdims and not isinstance(vdims, list): vdims = [vdims] + + selected = self._table.reindex(mdims+kdims, vdims) + params = {'kdims': [selected.get_dimension(kd) for kd in kdims], + 'vdims': [selected.get_dimension(vd) for vd in vdims], + 'label': selected.label} + if selected.group != selected.params()['group'].default: + params['group'] = selected.group + params.update(kwargs) if len(kdims) == selected.ndims: - return new_type(selected, **params) - return selected.groupby(group_dims, container_type=HoloMap, group_type=new_type, **params) + element = new_type(selected, **params) + return element.sort() if sort else element + group = selected.groupby(mdims, container_type=HoloMap, group_type=new_type, **params) + if sort: + return group.map(lambda x: x.sort(), [new_type]) + else: + group - def bars(self, kdims=None, vdims=None, **kwargs): + def bars(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Bars - return self._conversion(kdims, vdims, Bars, **kwargs) + return self._conversion(kdims, vdims, mdims, Bars, **kwargs) + + def bivariate(self, kdims=None, vdims=None, mdims=None, **kwargs): + from ..interface.seaborn import Bivariate + return self._convert(kdims, vdims, mdims, Bivariate, **kwargs) - def curve(self, kdims=None, vdims=None, **kwargs): + def curve(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Curve - return self._conversion(kdims, vdims, Curve, **kwargs) + return self._conversion(kdims, vdims, mdims, Curve, sort=True, **kwargs) - def heatmap(self, kdims=None, vdims=None, **kwargs): + def distribution(self, dim, mdims=[], **kwargs): + from ..interface.seaborn import Distribution + if mdims: + reindexed = self._table.reindex(mdims+[dim]) + return reindexed.groupby(mdims, HoloMap, Distribution, **kwargs) + else: + table = self._table + params = dict(kdims=[table.get_dimension(dim)], + label=table.label) + if table.group != table.params()['group'].default: + params['group'] = table.group + return Distribution((table.dimension_values(dim),), + **dict(params, **kwargs)) + + def heatmap(self, kdims=None, vdims=None, mdims=None, **kwargs): from .raster import HeatMap - return self._conversion(kdims, vdims, HeatMap, **kwargs) + return self._conversion(kdims, vdims, mdims, HeatMap, **kwargs) - def points(self, kdims=None, vdims=None, **kwargs): + def points(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Points - return self._conversion(kdims, vdims, Points, **kwargs) + return self._conversion(kdims, vdims, mdims, Points, **kwargs) - def scatter(self, kdims=None, vdims=None, **kwargs): + def scatter(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import Scatter - return self._conversion(kdims, vdims, Scatter, **kwargs) + return self._conversion(kdims, vdims, mdims, Scatter, **kwargs) - def scatter3d(self, kdims=None, vdims=None, **kwargs): + def scatter3d(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart3d import Scatter3D - return self._conversion(kdims, vdims, Scatter3D, **kwargs) + return self._conversion(kdims, vdims, mdims, Scatter3D, **kwargs) - def raster(self, kdims=None, vdims=None, **kwargs): + def trisurface(self, kdims=None, vdims=None, mdims=None, **kwargs): + from .chart3d import Trisurface + return self._conversion(kdims, vdims, mdims, Trisurface, **kwargs) + + def raster(self, kdims=None, vdims=None, mdims=None, **kwargs): from .raster import Raster heatmap = self.heatmap(kdims, vdims, **kwargs) return Raster(heatmap.data, **dict(self._table.get_param_values(onlychanged=True))) - def surface(self, kdims=None, vdims=None, **kwargs): + def regression(self, kdims=None, vdims=None, mdims=None, **kwargs): + return self._convert(kdims, vdims, mdims, Regression, **kwargs) + + def surface(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart3d import Surface heatmap = self.heatmap(kdims, vdims, **kwargs) return Surface(heatmap.data, **dict(self._table.get_param_values(onlychanged=True))) - def vectorfield(self, kdims=None, vdims=None, **kwargs): + def vectorfield(self, kdims=None, vdims=None, mdims=None, **kwargs): from .chart import VectorField return self._conversion(kdims, vdims, VectorField, **kwargs) + diff --git a/holoviews/element/util.py b/holoviews/element/util.py index 65e2529531..7d6d828c99 100644 --- a/holoviews/element/util.py +++ b/holoviews/element/util.py @@ -1,5 +1,22 @@ import numpy as np +try: + import dask +except: + dask = None + +def toarray(v, index_value=False): + """ + Interface helper function to turn dask Arrays into numpy arrays as + necessary. If index_value is True, a value is returned instead of + an array holding a single value. + """ + if dask and isinstance(v, dask.array.Array): + arr = v.compute() + return arr[()] if index_value else arr + else: + return v + def compute_edges(edges): """ Computes edges from a number of bin centers, diff --git a/holoviews/interface/pandas.py b/holoviews/interface/pandas.py index 0156ebf2d5..3439f75e13 100644 --- a/holoviews/interface/pandas.py +++ b/holoviews/interface/pandas.py @@ -18,12 +18,14 @@ import param -from ..core import ViewableElement, NdMapping, NdOverlay,\ - NdLayout, GridSpace, Element, HoloMap -from ..element import Chart, Table, Curve, Scatter, Bars, Points, VectorField, HeatMap, Scatter3D, Surface +from ..core import ViewableElement, NdMapping, Columns, NdOverlay,\ + NdLayout, GridSpace, NdElement, HoloMap +from ..core.data import DFColumns +from ..element import (Chart, Table, Curve, Scatter, Bars, Points, + VectorField, HeatMap, Scatter3D, Surface) -class DataFrameView(Element): +class DataFrameView(Columns): """ DataFrameView provides a convenient compatibility wrapper around Pandas DataFrames. It provides several core functions: @@ -84,43 +86,24 @@ def __init__(self, data, dimensions={}, kdims=None, clone_override=False, dims[list(data.columns).index(name)] = dim ViewableElement.__init__(self, data, kdims=dims, **params) - self.data.columns = self._cached_index_names + self.interface = DFColumns + self.data.columns = self.dimensions('key', True) - def __getitem__(self, key): - """ - Allows slicing and selecting along the DataFrameView dimensions. - """ - if key is (): - return self - else: - if len(key) <= self.ndims: - return self.select(**dict(zip(self._cached_index_names, key))) - else: - raise Exception('Selection contains %d dimensions, DataFrameView ' - 'only has %d index dimensions.' % (self.ndims, len(key))) - - - def select(self, selection_specs=None, **select): - """ - Allows slice and select individual values along the DataFrameView - dimensions. Supply the dimensions and values or slices as - keyword arguments. - """ - df = self.data - for dim, k in select.items(): - if isinstance(k, slice): - df = df[(k.start < df[dim]) & (df[dim] < k.stop)] - else: - df = df[df[dim] == k] - return self.clone(df) - + def groupby(self, dimensions, container_type=NdMapping): + invalid_dims = [d for d in dimensions if d not in self.dimensions()] + if invalid_dims: + raise Exception('Following dimensions could not be found %s.' + % invalid_dims) - def dimension_values(self, dim): - if dim in self.data.columns: - return np.array(self.data[dim]) - else: - return super(DataFrameView, self).dimension_values(dim) + index_dims = [self.get_dimension(d) for d in dimensions] + view_dims = [d for d in self.kdims if d not in dimensions] + mapping_data = [] + for k, v in self.data.groupby([self.get_dimension(d).name for d in dimensions]): + data = v.drop(dimensions, axis=1) + mapping_data.append((k, self.clone(data, kdims=[self.get_dimension(d) + for d in data.columns]))) + return container_type(mapping_data, kdims=index_dims) def apply(self, name, *args, **kwargs): @@ -131,60 +114,6 @@ def apply(self, name, *args, **kwargs): return self.clone(getattr(self.data, name)(*args, **kwargs), clone_override=True) - - def dframe(self): - """ - Returns a copy of the internal dframe. - """ - return self.data.copy() - - - def aggregate(self, dimensions=[], function=None, **reductions): - """ - The aggregate function accepts either a list of Dimensions - and a function to apply to find the aggregate across - those Dimensions or a list of dimension/function pairs - to apply one by one. - """ - if not dimensions and not reductions: - raise Exception("Supply either a list of Dimensions or" - "reductions as keyword arguments") - reduced = self.data - dfnumeric = reduced.applymap(np.isreal).all(axis=0) - unreducable = list(dfnumeric[dfnumeric == False].index) - if dimensions: - if not function: - raise Exception("Supply a function to reduce the Dimensions with") - reduced = reduced.groupby(dimensions+unreducable, as_index=True).aggregate(function) - reduced_indexes = [reduced.index.names.index(d) for d in unreducable if d not in dimensions] - reduced = reduced.reset_index(level=reduced_indexes) - if reductions: - for dim, fn in reductions.items(): - reduced = reduced.groupby(dim, as_index=True).aggregate(fn) - reduced_indexes = [reduced.index.names.index(d) for d in unreducable] - reduced = reduced.reset_index(level=reduced_indexes) - kdims = [self.get_dimension(d) for d in reduced.columns] - return self.clone(reduced, kdims=kdims) - - - def groupby(self, dimensions, container_type=NdMapping): - invalid_dims = list(set(dimensions) - set(self._cached_index_names)) - if invalid_dims: - raise Exception('Following dimensions could not be found %s.' - % invalid_dims) - - index_dims = [self.get_dimension(d) for d in dimensions] - mapping = container_type(None, kdims=index_dims) - view_dims = set(self._cached_index_names) - set(dimensions) - view_dims = [self.get_dimension(d) for d in view_dims] - for k, v in self.data.groupby(dimensions): - data = v.drop(dimensions, axis=1) - mapping[k] = self.clone(data, - kdims=[self.get_dimension(d) - for d in data.columns]) - return mapping - - def overlay(self, dimensions): return self.groupby(dimensions, NdOverlay) @@ -245,6 +174,12 @@ def _convert(self, kdims=[], vdims=[], mdims=[], reduce_fn=None, supplied the data is aggregated for each group along the key_dimensions. Also supports a dropna option. """ + + # Deprecation warning + self.warning("The DFrame conversion interface is deprecated " + "and has been superseded by a real integration " + "with pandas.") + if not isinstance(kdims, list): kdims = [kdims] if not isinstance(vdims, list): vdims = [vdims] @@ -270,7 +205,7 @@ def _convert(self, kdims=[], vdims=[], mdims=[], reduce_fn=None, groups = NdMapping({0: self}) mdims = ['Default'] create_kwargs = dict(kdims=key_dims, vdims=val_dims, - view_type=view_type) + view_type=view_type) create_kwargs.update(kwargs) # Convert each element in the HoloMap diff --git a/holoviews/interface/seaborn.py b/holoviews/interface/seaborn.py index a3675b4d53..02bef64cdc 100644 --- a/holoviews/interface/seaborn.py +++ b/holoviews/interface/seaborn.py @@ -67,10 +67,6 @@ def reduce(self, dimensions=[], function=None, **reduce_map): raise NotImplementedError('Reduction of TimeSeries not ' 'implemented.') - @property - def ylabel(self): - return str(self.vdims[0]) - class Bivariate(Chart): @@ -103,29 +99,25 @@ class Distribution(Chart): vdims = param.List(default=[Dimension('Frequency')]) - def _validate_data(self, data): - data = np.expand_dims(data, 1) if data.ndim == 1 else data - if not data.shape[1] == 1: - raise ValueError("Distribution only support single dimensional arrays.") - return data - + def __init__(self, data, **params): + super(Distribution, self).__init__(data, **params) + self.data = self.interface.reindex(self, [0], []) def range(self, dimension): dim_idx = self.get_dimension_index(dimension) - dim = self.get_dimension(dim_idx) - if dim.range != (None, None): - return dim.range - elif dim_idx == 0: - return (np.nanmin(self.data), np.nanmax(self.data)) - elif dim_idx == 1: - return (None, None) + if dim_idx == 1: + dim = self.get_dimension(dim_idx) + if dim.range != (None, None): + return dim.range + else: + return (None, None) else: return super(Distribution, self).dimension_values(dimension) def dimension_values(self, dimension): dim_idx = self.get_dimension_index(dimension) if dim_idx == 0: - return self.data + return self.interface.values(self, 0) elif dim_idx == 1: return [] else: @@ -202,20 +194,6 @@ def timeseries(self, kdims, vdims, mdims=[], reduce_fn=None, **kwargs): kdims=[self.get_dimension(dim) for dim in kdims], **kwargs) - @property - def ylabel(self): - return self.x2 if self.x2 else self.y - - @property - def ylim(self): - if self._ylim: - return self._ylim - elif self.x2 or self.y: - ydata = self.data[self.x2 if self.x2 else self.y] - return min(ydata), max(ydata) - else: - return None - __all__ = ['DFrame', 'Bivariate', 'Distribution', 'TimeSeries', 'Regression'] diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index b80d03b51f..c10a0a783d 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -6,10 +6,12 @@ import numpy as np import param +from param import _is_number -from ..core import ElementOperation, NdOverlay, Overlay +from ..core import (ElementOperation, NdOverlay, Overlay, GridMatrix, + HoloMap, Columns, Element) from ..core.util import find_minmax, sanitize_identifier -from ..element.chart import Histogram, Curve +from ..element.chart import Histogram, Curve, Scatter from ..element.raster import Raster, Image, RGB, QuadMesh from ..element.path import Contours, Polygons @@ -573,3 +575,77 @@ def _process(self, overlay, key=None): return Curve(np.array(data), group=self.p.group, label=self.get_overlay_label(overlay)) + + + +class gridmatrix(param.ParameterizedFunction): + """ + The gridmatrix operation takes an Element or HoloMap + of Elements as input and creates a GridMatrix object, + which plots each dimension in the Element against + each other dimension. This provides a very useful + overview of high-dimensional data and is inspired + by pandas and seaborn scatter_matrix implementations. + """ + + chart_type = param.Parameter(default=Scatter, doc=""" + The Element type used to display bivariate distributions + of the data.""") + + diagonal_type = param.Parameter(default=Histogram, doc=""" + The Element type along the diagonal, may be a Histogram or any + other plot type which can visualize a univariate distribution.""") + + overlay_dims = param.List(default=[], doc=""" + If a HoloMap is supplied this will allow overlaying one or + more of it's key dimensions.""") + + def __call__(self, data, **params): + p = param.ParamOverrides(self, params) + + if isinstance(data, HoloMap): + ranges = {d.name: data.range(d) for d in data.dimensions()} + data = data.clone({k: GridMatrix(self._process(p, v, ranges)) + for k, v in data.items()}).collate() + if p.overlay_dims: + data = data.map(lambda x: x.overlay(p.overlay_dims), (HoloMap,)) + return data + elif isinstance(data, Element): + data = self._process(p, data) + return GridMatrix(data) + + + def _process(self, p, element, ranges={}): + # Creates a unified Columns.data attribute + # to draw the data from + if isinstance(element.data, np.ndarray): + if 'dataframe' in Columns.datatype: + el_data = element.table('dataframe') + else: + el_data = element.table('dictionary') + el_data = element.data + + # Get dimensions to plot against each other + dims = [d for d in element.dimensions() + if _is_number(element.range(d)[0])] + permuted_dims = [(d1, d2) for d1 in dims + for d2 in dims[::-1]] + + data = {} + for d1, d2 in permuted_dims: + key = (d1.name, d2.name) + if d1 == d2: + if p.diagonal_type is Histogram: + bin_range = ranges.get(d1.name, element.range(d1)) + el = element.hist(dimension=d1.name, + bin_range=bin_range, + adjoin=False) + else: + values = element.dimension_values(d1) + el = p.diagonal_type(values, kdims=[d1]) + else: + el = p.chart_type(el_data, kdims=[d1], + vdims=[d2]) + data[(d1.name, d2.name)] = el + return data + diff --git a/holoviews/plotting/bokeh/__init__.py b/holoviews/plotting/bokeh/__init__.py index 8350944638..1eff95d596 100644 --- a/holoviews/plotting/bokeh/__init__.py +++ b/holoviews/plotting/bokeh/__init__.py @@ -1,13 +1,14 @@ -from ...core import Store, Overlay, NdOverlay, Layout, AdjointLayout, GridSpace +from ...core import (Store, Overlay, NdOverlay, Layout, AdjointLayout, + GridSpace, NdElement, Columns, GridMatrix) from ...element import (Curve, Points, Scatter, Image, Raster, Path, RGB, Histogram, Spread, HeatMap, Contours, Path, Box, Bounds, Ellipse, Polygons, ErrorBars, Text, HLine, VLine, Spline, - Table, ItemTable, Surface, Scatter3D) + Table, ItemTable, Surface, Scatter3D, Trisurface) from ...core.options import Options, Cycle, OptionTree from ...interface import DFrame from ..plot import PlotSelector -from ..mpl import SurfacePlot, Scatter3DPlot +from ..mpl import SurfacePlot, Scatter3DPlot, TrisurfacePlot from .annotation import TextPlot, LineAnnotationPlot, SplinePlot from .element import OverlayPlot, BokehMPLWrapper, BokehMPLRawWrapper @@ -23,20 +24,26 @@ Store.register({Overlay: OverlayPlot, NdOverlay: OverlayPlot, + GridSpace: GridPlot, + GridMatrix: GridPlot, + AdjointLayout: AdjointLayoutPlot, + Layout: LayoutPlot, + + # Charts Curve: CurvePlot, Points: PointPlot, Scatter: PointPlot, + ErrorBars: ErrorPlot, Spread: SpreadPlot, - HLine: LineAnnotationPlot, - VLine: LineAnnotationPlot, - GridSpace: GridPlot, + + # Rasters Image: RasterPlot, RGB: RGBPlot, Raster: RasterPlot, HeatMap: HeatmapPlot, Histogram: HistogramPlot, - AdjointLayout: AdjointLayoutPlot, - Layout: LayoutPlot, + + # Paths Path: PathPlot, Contours: PathPlot, Path: PathPlot, @@ -44,18 +51,30 @@ Bounds: PathPlot, Ellipse: PathPlot, Polygons: PolygonPlot, - ErrorBars: ErrorPlot, + + # Annotations + HLine: LineAnnotationPlot, + VLine: LineAnnotationPlot, Text: TextPlot, Spline: SplinePlot, + + # Tabular Table: TablePlot, ItemTable: TablePlot, DFrame: TablePlot, + NdElement: TablePlot, + Columns: TablePlot, + + # Wrapped mpl 3d plots Surface: PlotSelector(lambda x: 'bokeh', [('mpl', SurfacePlot), ('bokeh', BokehMPLRawWrapper)], True), Scatter3D: PlotSelector(lambda x: 'bokeh', [('mpl', Scatter3DPlot), - ('bokeh', BokehMPLRawWrapper)], True)}, + ('bokeh', BokehMPLRawWrapper)], True), + Trisurface: PlotSelector(lambda x: 'bokeh', + [('mpl', TrisurfacePlot), + ('bokeh', BokehMPLRawWrapper)], True)}, 'bokeh') diff --git a/holoviews/plotting/bokeh/chart.py b/holoviews/plotting/bokeh/chart.py index 9ac1b7e8fb..9861fcb711 100644 --- a/holoviews/plotting/bokeh/chart.py +++ b/holoviews/plotting/bokeh/chart.py @@ -1,5 +1,5 @@ import numpy as np - +from bokeh.models import Circle import param from ...core import Dimension @@ -8,7 +8,7 @@ from ..util import compute_sizes, get_sideplot_ranges from .element import ElementPlot, line_properties, fill_properties from .path import PathPlot, PolygonPlot -from .util import map_colors, get_cmap +from .util import map_colors, get_cmap, mpl_to_bokeh class PointPlot(ElementPlot): @@ -34,7 +34,8 @@ class PointPlot(ElementPlot): Function applied to size values before applying scaling, to remove values lower than zero.""") - style_opts = (['cmap', 'palette', 'marker', 'size', 's', 'alpha', 'color'] + + style_opts = (['cmap', 'palette', 'marker', 'size', 's', 'alpha', 'color', + 'unselected_color'] + line_properties + fill_properties) _plot_method = 'scatter' @@ -49,25 +50,47 @@ def get_data(self, element, ranges=None): cmap = style.get('palette', style.get('cmap', None)) if self.color_index < len(dims) and cmap: - mapping['color'] = 'color' + map_key = 'color_' + dims[self.color_index] + mapping['color'] = map_key cmap = get_cmap(cmap) - colors = element.data[:, self.color_index] + colors = element.dimension_values(self.color_index) crange = ranges.get(dims[self.color_index], None) - data['color'] = map_colors(colors, crange, cmap) + data[map_key] = map_colors(colors, crange, cmap) if self.size_index < len(dims): - mapping['size'] = 'size' + map_key = 'size_' + dims[self.size_index] + mapping['size'] = map_key ms = style.get('size', 1) - sizes = element.data[:, self.size_index] - data['size'] = compute_sizes(sizes, self.size_fn, - self.scaling_factor, ms) - data[dims[0]] = element.data[:, 0] - data[dims[1]] = element.data[:, 1] + sizes = element.dimension_values(self.size_index) + data[map_key] = compute_sizes(sizes, self.size_fn, + self.scaling_factor, ms) + data[dims[0]] = element.dimension_values(0) + data[dims[1]] = element.dimension_values(1) if 'hover' in self.tools: for d in dims[2:]: data[d] = element.dimension_values(d) return data, mapping + def _init_glyph(self, plot, mapping, properties): + """ + Returns a Bokeh glyph object. + """ + properties = mpl_to_bokeh(properties) + unselect_color = properties.pop('unselected_color', None) + if (any(t in self.tools for t in ['box_select', 'lasso_select']) + and unselect_color is not None): + source = properties.pop('source') + color = properties.pop('color', None) + color = mapping.pop('color', color) + properties.pop('legend', None) + unselected = Circle(**dict(properties, fill_color=unselect_color, **mapping)) + selected = Circle(**dict(properties, fill_color=color, **mapping)) + plot.add_glyph(source, selected, selection_glyph=selected, + nonselection_glyph=unselected) + else: + getattr(plot, self._plot_method)(**dict(properties, **mapping)) + + class CurvePlot(ElementPlot): @@ -75,8 +98,11 @@ class CurvePlot(ElementPlot): _plot_method = 'line' def get_data(self, element, ranges=None): - return (dict(x=element.data[:, 0], y=element.data[:, 1]), - dict(x='x', y='y')) + x = element.get_dimension(0).name + y = element.get_dimension(1).name + return ({x: element.dimension_values(0), + y: element.dimension_values(1)}, + dict(x=x, y=y)) class SpreadPlot(PolygonPlot): diff --git a/holoviews/plotting/bokeh/element.py b/holoviews/plotting/bokeh/element.py index 9f7a04949b..1ec992e278 100644 --- a/holoviews/plotting/bokeh/element.py +++ b/holoviews/plotting/bokeh/element.py @@ -18,7 +18,7 @@ from ...element import RGB from ..plot import GenericElementPlot, GenericOverlayPlot from .plot import BokehPlot -from .util import mpl_to_bokeh +from .util import mpl_to_bokeh, convert_datetime # Define shared style properties for bokeh plots @@ -170,13 +170,25 @@ def _axes_props(self, plots, subplots, element, ranges): if plot.yaxis[0].axis_label == xlabel: plot_ranges['x_range'] = plot.y_range + if element.get_dimension_type(0) is np.datetime64: + x_axis_type = 'datetime' + else: + x_axis_type = 'log' if self.logx else 'auto' + if element.get_dimension_type(1) is np.datetime64: + y_axis_type = 'datetime' + else: + y_axis_type = 'log' if self.logy else 'auto' + if not 'x_range' in plot_ranges: if 'x_range' in ranges: plot_ranges['x_range'] = ranges['x_range'] else: l, b, r, t = self.get_extents(element, ranges) low, high = (b, t) if self.invert_axes else (l, r) - if low == high: + if x_axis_type == 'datetime': + low = convert_datetime(low) + high = convert_datetime(high) + elif low == high and low is not None: offset = low*0.1 if low else 0.5 low -= offset high += offset @@ -192,7 +204,10 @@ def _axes_props(self, plots, subplots, element, ranges): else: l, b, r, t = self.get_extents(element, ranges) low, high = (l, r) if self.invert_axes else (b, t) - if low == high: + if y_axis_type == 'datetime': + low = convert_datetime(low) + high = convert_datetime(high) + elif low == high and low is not None: offset = low*0.1 if low else 0.5 low -= offset high += offset @@ -205,8 +220,6 @@ def _axes_props(self, plots, subplots, element, ranges): end=yrange.start) else: plot_ranges['y_range'] = yrange[::-1] - x_axis_type = 'log' if self.logx else 'auto' - y_axis_type = 'log' if self.logy else 'auto' return (x_axis_type, y_axis_type), (xlabel, ylabel, zlabel), plot_ranges @@ -379,6 +392,8 @@ def initialize_plot(self, ranges=None, plot=None, plots=None, source=None): # Get element key and ranges for frame element = self.hmap.last key = self.keys[-1] + self.current_frame = element + self.current_key = key ranges = self.compute_ranges(self.hmap, key, ranges) ranges = util.match_spec(element, ranges) @@ -396,6 +411,7 @@ def initialize_plot(self, ranges=None, plot=None, plots=None, source=None): properties = self._glyph_properties(plot, element, source, ranges) self._init_glyph(plot, mapping, properties) glyph = plot.renderers[-1].glyph + self.handles['glyph_renderer'] = plot.renderers[-1] self.handles['glyph'] = glyph # Update plot, source and glyph diff --git a/holoviews/plotting/bokeh/plot.py b/holoviews/plotting/bokeh/plot.py index c6d8b43af9..57477a5929 100644 --- a/holoviews/plotting/bokeh/plot.py +++ b/holoviews/plotting/bokeh/plot.py @@ -1,21 +1,23 @@ +from collections import defaultdict +from itertools import groupby import numpy as np import param from bokeh.io import gridplot, vplot, hplot from bokeh.models import ColumnDataSource -from bokeh.models.widgets import Panel, Tabs +from bokeh.models.widgets import Panel, Tabs, DataTable from ...core import OrderedDict, CompositeOverlay, Element from ...core import Store, Layout, AdjointLayout, NdLayout, Empty, GridSpace, HoloMap from ...core.options import Compositor from ...core import traversal from ...core.util import basestring -from ..plot import Plot, GenericCompositePlot, GenericLayoutPlot +from ..plot import Plot, DimensionedPlot, GenericCompositePlot, GenericLayoutPlot from .renderer import BokehRenderer from .util import layout_padding -class BokehPlot(Plot): +class BokehPlot(DimensionedPlot): """ Plotting baseclass for the Bokeh backends, implementing the basic plotting interface for Bokeh based plots. @@ -81,6 +83,33 @@ def _fontsize(self, key, label='fontsize', common=True): for k, v in size.items()} + def sync_sources(self): + """ + Syncs data sources between Elements, which draw data + from the same object. + """ + get_sources = lambda x: (id(x.current_frame.data), x) + filter_fn = lambda x: (x.current_frame and + not isinstance(x.current_frame.data, np.ndarray) + and 'source' in x.handles) + data_sources = self.traverse(get_sources, [filter_fn]) + grouped_sources = groupby(sorted(data_sources), lambda x: x[0]) + for gid, group in grouped_sources: + group = list(group) + if len(group) > 1: + source_data = {} + for _, plot in group: + source_data.update(plot.handles['source'].data) + new_source = ColumnDataSource(source_data) + for _, plot in group: + renderer = plot.handles['glyph_renderer'] + if 'data_source' in renderer.properties(): + renderer.update(data_source=new_source) + else: + renderer.update(source=new_source) + plot.handles['source'] = new_source + + class GridPlot(BokehPlot, GenericCompositePlot): """ @@ -88,6 +117,11 @@ class GridPlot(BokehPlot, GenericCompositePlot): object. """ + shared_datasource = param.Boolean(default=True, doc=""" + Whether Elements drawing the data from the same object should + share their Bokeh data source allowing for linked brushing + and other linked behaviors.""") + def __init__(self, layout, ranges=None, keys=None, dimensions=None, layout_num=1, **params): if not isinstance(layout, GridSpace): @@ -131,17 +165,17 @@ def _create_subplots(self, layout, ranges): # Create axes kwargs = {} if c == 0 and r != 0: - kwargs['xaxis'] = 'left-bare' + kwargs['xaxis'] = 'bottom-bare' kwargs['width'] = 175 if c != 0 and r == 0 and not layout.ndims == 1: - kwargs['yaxis'] = 'bottom-bare' + kwargs['yaxis'] = 'left-bare' kwargs['height'] = 175 if c == 0 and r == 0: kwargs['width'] = 175 kwargs['height'] = 175 if r != 0 and c != 0: - kwargs['xaxis'] = 'left-bare' - kwargs['yaxis'] = 'bottom-bare' + kwargs['xaxis'] = 'bottom-bare' + kwargs['yaxis'] = 'left-bare' if 'width' not in kwargs: kwargs['width'] = 125 @@ -182,6 +216,8 @@ def initialize_plot(self, ranges=None, plots=[]): passed_plots.append(None) self.handles['plot'] = gridplot(plots[::-1]) self.handles['plots'] = plots + if self.shared_datasource: + self.sync_sources() self.drawn = True return self.handles['plot'] @@ -209,6 +245,11 @@ class LayoutPlot(BokehPlot, GenericLayoutPlot): shared_axes = param.Boolean(default=True, doc=""" Whether axes should be shared across plots""") + shared_datasource = param.Boolean(default=True, doc=""" + Whether Elements drawing the data from the same object should + share their Bokeh data source allowing for linked brushing + and other linked behaviors.""") + tabs = param.Boolean(default=False, doc=""" Whether to display overlaid plots in separate panes""") @@ -422,6 +463,9 @@ def initialize_plot(self, ranges=None): self.handles['plot'] = layout_plot self.handles['plots'] = plots + if self.shared_datasource: + self.sync_sources() + self.drawn = True return self.handles['plot'] diff --git a/holoviews/plotting/bokeh/raster.py b/holoviews/plotting/bokeh/raster.py index 2ef2950cb8..b96666d198 100644 --- a/holoviews/plotting/bokeh/raster.py +++ b/holoviews/plotting/bokeh/raster.py @@ -51,9 +51,10 @@ def _glyph_properties(self, plot, element, source, ranges): def _update_glyph(self, glyph, properties, mapping): allowed_properties = glyph.properties() - cmap = properties.pop('color_mapper') - glyph.color_mapper.low = cmap.low - glyph.color_mapper.high = cmap.high + cmap = properties.pop('color_mapper', None) + if cmap: + glyph.color_mapper.low = cmap.low + glyph.color_mapper.high = cmap.high merged = dict(properties, **mapping) glyph.set(**{k: v for k, v in merged.items() if k in allowed_properties}) @@ -95,7 +96,7 @@ class HeatmapPlot(ElementPlot): def _axes_props(self, plots, subplots, element, ranges): labels = self._axis_labels(element, plots) - xvals, yvals = element.dense_keys() + xvals, yvals = [element.dimension_values(i, True) for i in range(2)] plot_ranges = {'x_range': [str(x) for x in xvals], 'y_range': [str(y) for y in yvals]} return ('auto', 'auto'), labels, plot_ranges @@ -106,8 +107,9 @@ def get_data(self, element, ranges=None): cmap = style.get('palette', style.get('cmap', None)) cmap = get_cmap(cmap) x, y, z = element.dimensions(label=True) - zvals = np.rot90(element.data, 3).flatten() + zvals = element.dimension_values(z) colors = map_colors(zvals, ranges[z], cmap) - xvals, yvals = zip(*product(*element.dense_keys())) + xvals, yvals = [[str(v) for v in element.dimension_values(i)] + for i in range(2)] return ({x: xvals, y: yvals, z: zvals, 'color': colors}, {'x': x, 'y': y, 'fill_color': 'color', 'height': 1, 'width': 1}) diff --git a/holoviews/plotting/bokeh/tabular.py b/holoviews/plotting/bokeh/tabular.py index 1e4f074527..d3001a20e9 100644 --- a/holoviews/plotting/bokeh/tabular.py +++ b/holoviews/plotting/bokeh/tabular.py @@ -18,7 +18,7 @@ class TablePlot(BokehPlot, GenericElementPlot): def get_data(self, element, ranges=None): dims = element.dimensions() - return ({d.name: element.dimension_values(d.name) for d in dims}, + return ({d.name: element.dimension_values(d) for d in dims}, {d.name: d.name for d in dims}) @@ -29,6 +29,8 @@ def initialize_plot(self, ranges=None, plot=None, plots=None, source=None): # Get element key and ranges for frame element = self.hmap.last key = self.keys[-1] + self.current_frame = element + self.current_key = key data, mapping = self.get_data(element, ranges) if source is None: @@ -41,6 +43,7 @@ def initialize_plot(self, ranges=None, plot=None, plots=None, source=None): table = DataTable(source=source, columns=columns, height=self.height, width=self.width, **properties) self.handles['plot'] = table + self.handles['glyph_renderer'] = table self.drawn = True return table diff --git a/holoviews/plotting/bokeh/util.py b/holoviews/plotting/bokeh/util.py index 8f074103e6..edce38c62f 100644 --- a/holoviews/plotting/bokeh/util.py +++ b/holoviews/plotting/bokeh/util.py @@ -114,3 +114,7 @@ def layout_padding(plots): p.ygrid.grid_line_color = None expanded_plots[r].append(p) return expanded_plots + + +def convert_datetime(time): + return time.astype('datetime64[s]').astype(float)*1000 diff --git a/holoviews/plotting/mpl/__init__.py b/holoviews/plotting/mpl/__init__.py index e3633ff641..02b24a77cb 100644 --- a/holoviews/plotting/mpl/__init__.py +++ b/holoviews/plotting/mpl/__init__.py @@ -12,7 +12,7 @@ from matplotlib import rc_params_from_file -from ...core import Layout, NdOverlay, Collator +from ...core import Layout, NdOverlay, Collator, GridMatrix from ...core.options import Cycle, Palette, Options from ...element import * # pyflakes:ignore (API import) from ..plot import PlotSelector @@ -111,6 +111,7 @@ def grid_selector(grid): # General plots GridSpace: GridPlot, + GridMatrix: GridPlot, NdLayout: LayoutPlot, Layout: LayoutPlot, AdjointLayout: AdjointLayoutPlot, @@ -127,6 +128,8 @@ def grid_selector(grid): # Tabular plots ItemTable: TablePlot, Table: TablePlot, + NdElement: TablePlot, + Columns: TablePlot, Collator: TablePlot, # Raster plots @@ -181,6 +184,9 @@ def grid_selector(grid): options.RGB = Options('style', interpolation='nearest') # Composites options.Layout = Options('plot', sublabel_format='{Alpha}') +options.GridMatrix = Options('plot', fig_size=160, shared_xaxis=True, + shared_yaxis=True, xaxis=None, yaxis=None) + # Annotations options.VLine = Options('style', color=Cycle()) options.HLine = Options('style', color=Cycle()) diff --git a/holoviews/plotting/mpl/chart.py b/holoviews/plotting/mpl/chart.py index 2e4659efe1..a0ff298319 100644 --- a/holoviews/plotting/mpl/chart.py +++ b/holoviews/plotting/mpl/chart.py @@ -53,8 +53,8 @@ def _cyclic_curves(self, curveview): """ Mutate the lines object to generate a rotated cyclic curves. """ - x_values = list(curveview.data[:, 0]) - y_values = list(curveview.data[:, 1]) + x_values = list(curveview.dimension_values(0)) + y_values = list(curveview.dimension_values(1)) if self.center_cyclic: rotate_n = self.peak_argmax+len(x_values)/2 y_values = self._rotate(y_values, n=rotate_n) @@ -128,7 +128,8 @@ def initialize_plot(self, ranges=None): # Create line segments and apply style style = self.style[self.cyclic_index] legend = element.label if self.show_legend else '' - line_segment = axis.plot(data[:, 0], data[:, 1], label=legend, + line_segment = axis.plot(element.dimension_values(0), + element.dimension_values(1), label=legend, zorder=self.zorder, **style)[0] self.handles['artist'] = line_segment @@ -136,12 +137,11 @@ def initialize_plot(self, ranges=None): def update_handles(self, axis, element, key, ranges=None): - data = element.data artist = self.handles['artist'] if self.cyclic_range is not None: data = self._cyclic_curves(element) - artist.set_xdata(data[:, 0]) - artist.set_ydata(data[:, 1]) + artist.set_xdata(element.dimension_values(0)) + artist.set_ydata(element.dimension_values(1)) @@ -172,8 +172,8 @@ def initialize_plot(self, ranges=None): error_kwargs = dict(self.style[self.cyclic_index], fmt='none', zorder=self.zorder) error_kwargs['yerr'] = element.data[:, 2:4].T - _, (bottoms, tops), verts = axis.errorbar(element.data[:, 0], - element.data[:, 1], + _, (bottoms, tops), verts = axis.errorbar(element.dimension_values(0), + element.dimension_values(1), **error_kwargs) self.handles['bottoms'] = bottoms self.handles['tops'] = tops @@ -238,9 +238,10 @@ def initialize_plot(self, ranges=None): def update_handles(self, axis, element, key, ranges=None): if 'paths' in self.handles: self.handles['paths'].remove() - paths = axis.fill_between(element.data[:, 0], - element.data[:, 1]-element.data[:, 2], - element.data[:, 1]+element.data[:, 3], + yvals = element.data[:, 1] + paths = axis.fill_between(element.dimension_values(0), + yvals-element.dimension_values(2), + yvals+element.dimension_values(3), zorder=self.zorder, label=element.label if self.show_legend else None, **self.style[self.cyclic_index]) @@ -547,10 +548,10 @@ def initialize_plot(self, ranges=None): ranges = self.compute_ranges(self.hmap, self.keys[-1], ranges) ranges = match_spec(points, ranges) - ndims = points.data.shape[1] - xs = points.data[:, 0] if len(points.data) else [] - ys = points.data[:, 1] if len(points.data) else [] - cs = points.data[:, self.color_index] if self.color_index < ndims else None + ndims = points.shape[1] + xs = points.dimension_values(0) if len(points.data) else [] + ys = points.dimension_values(1) if len(points.data) else [] + cs = points.dimension_values(self.color_index) if self.color_index < ndims else None style = self.style[self.cyclic_index] if self.size_index < ndims and self.scaling_factor > 1: @@ -576,21 +577,21 @@ def initialize_plot(self, ranges=None): def _compute_size(self, element, opts): - sizes = element.data[:, self.size_index] + sizes = element.dimension_values(self.size_index) ms = opts.pop('s') if 's' in opts else plt.rcParams['lines.markersize'] return compute_sizes(sizes, self.size_fn, self.scaling_factor, ms) def update_handles(self, axis, element, key, ranges=None): paths = self.handles['artist'] - paths.set_offsets(element.data[:, 0:2]) - ndims = element.data.shape[1] + paths.set_offsets(element.array(dimensions=[0, 1])) + ndims = element.shape[1] dims = element.dimensions(label=True) if self.size_index < ndims: opts = self.style[self.cyclic_index] paths.set_sizes(self._compute_size(element, opts)) if self.color_index < ndims: - cs = element.data[:, self.color_index] + cs = element.dimension_values(self.color_index) val_dim = dims[self.color_index] paths.set_clim(ranges[val_dim]) paths.set_array(cs) @@ -649,10 +650,10 @@ def _get_map_info(self, vmap): def _get_info(self, vfield, input_scale, ranges): - xs = vfield.data[:, 0] if len(vfield.data) else [] - ys = vfield.data[:, 1] if len(vfield.data) else [] - radians = vfield.data[:, 2] if len(vfield.data) else [] - magnitudes = vfield.data[:, 3] if vfield.data.shape[1]>=4 else np.array([1.0] * len(xs)) + xs = vfield.dimension_values(0) if len(vfield.data) else [] + ys = vfield.dimension_values(1) if len(vfield.data) else [] + radians = vfield.dimension_values(2) if len(vfield.data) else [] + magnitudes = vfield.dimension_values(3) if vfield.data.shape[1]>=4 else np.array([1.0] * len(xs)) colors = magnitudes if self.color_dim == 'magnitude' else radians if vfield.data.shape[1] >= 4: @@ -691,7 +692,7 @@ def initialize_plot(self, ranges=None): ranges = match_spec(vfield, ranges) xs, ys, angles, lens, colors, scale = self._get_info(vfield, input_scale, ranges) - args = (xs, ys, lens, [0.0] * len(vfield.data)) + args = (xs, ys, lens, [0.0] * len(vfield)) args = args + (colors,) if colorized else args if not self.arrow_heads: @@ -923,11 +924,12 @@ def _create_bars(self, axis, element): label_key[idx] = stk style_key[idx] = stk_name val_key[si] = stk_name - val = element.get(tuple(val_key), (np.NaN,)) + vals = element.sample([tuple(val_key)]).dimension_values(element.vdims[0].name) + val = float(vals[0]) if len(vals) else np.NaN label = ', '.join(label_key) style = dict(style_opts, label='' if label in labels else label, **dict(zip(sopts, color_groups[tuple(style_key)]))) - bar = axis.bar([xpos], val, width=width, bottom=prev, + bar = axis.bar([xpos], [val], width=width, bottom=prev, **style) # Update variables diff --git a/holoviews/plotting/mpl/chart3d.py b/holoviews/plotting/mpl/chart3d.py index 0ff31ceb3e..5d8212d906 100644 --- a/holoviews/plotting/mpl/chart3d.py +++ b/holoviews/plotting/mpl/chart3d.py @@ -126,11 +126,9 @@ def initialize_plot(self, ranges=None): return self._finalize_axis(key, ranges=ranges) def update_handles(self, axis, points, key, ranges=None): - ndims = points.data.shape[1] - xs = points.data[:, 0] if len(points.data) else [] - ys = points.data[:, 1] if len(points.data) else [] - zs = points.data[:, 2] if len(points.data) else [] - cs = points.data[:, self.color_index] if self.color_index < ndims else None + ndims = points.shape[1] + xs, ys, zs = (points.dimension_values(i) for i in range(3)) + cs = points.dimension_values(self.color_index) if self.color_index < ndims else None style = self.style[self.cyclic_index] if self.size_index < ndims and self.scaling_factor > 1: diff --git a/holoviews/plotting/mpl/element.py b/holoviews/plotting/mpl/element.py index 167b339bb7..c4bed9ebb7 100644 --- a/holoviews/plotting/mpl/element.py +++ b/holoviews/plotting/mpl/element.py @@ -7,7 +7,8 @@ import param from ...core import util -from ...core import OrderedDict, Collator, NdOverlay, HoloMap, CompositeOverlay, Element3D +from ...core import (OrderedDict, Collator, NdOverlay, HoloMap, + CompositeOverlay, Element3D, Columns, NdElement) from ...element import Table, ItemTable, Raster from ..plot import GenericElementPlot, GenericOverlayPlot from .plot import MPLPlot @@ -102,7 +103,7 @@ class ElementPlot(GenericElementPlot, MPLPlot): # Element Plots should declare the valid style options for matplotlib call style_opts = [] - _suppressed = [Table, Collator, ItemTable] + _suppressed = [Table, NdElement, Collator, Columns, ItemTable] def __init__(self, element, **params): super(ElementPlot, self).__init__(element, **params) diff --git a/holoviews/plotting/mpl/raster.py b/holoviews/plotting/mpl/raster.py index 537a9d7d2b..7591c34fa0 100644 --- a/holoviews/plotting/mpl/raster.py +++ b/holoviews/plotting/mpl/raster.py @@ -104,7 +104,8 @@ def initialize_plot(self, ranges=None): def _compute_ticks(self, element, ranges): if isinstance(element, HeatMap): xdim, ydim = element.kdims - dim1_keys, dim2_keys = element.dense_keys() + dim1_keys, dim2_keys = [element.dimension_values(i, True) + for i in range(2)] num_x, num_y = len(dim1_keys), len(dim2_keys) x0, y0, x1, y1 = element.extents xstep, ystep = ((x1-x0)/num_x, (y1-y0)/num_y) @@ -120,23 +121,16 @@ def _compute_ticks(self, element, ranges): def _annotate_values(self, element): axis = self.handles['axis'] val_dim = element.vdims[0] - dim1_keys, dim2_keys = element.dense_keys() - num_x, num_y = len(dim1_keys), len(dim2_keys) + d1keys, d2keys, vals = [element.dimension_values(i) for i in range(3)] + d1uniq, d2uniq = [element.dimension_values(i, True) for i in range(2)] + num_x, num_y = len(d1uniq), len(d2uniq) xstep, ystep = 1.0/num_x, 1.0/num_y xpos = np.linspace(xstep/2., 1.0-xstep/2., num_x) ypos = np.linspace(ystep/2., 1.0-ystep/2., num_y) - coords = product(dim1_keys, dim2_keys) plot_coords = product(xpos, ypos) - for plot_coord, coord in zip(plot_coords, coords): - if isinstance(element, HeatMap): - val = element._data.get(coord, np.NaN) - val = val[0] if isinstance(val, tuple) else val - else: - val = element[coord] - val = val_dim.type(val) if val_dim.type else val - val = val[0] if isinstance(val, tuple) else val - text = val_dim.pprint_value(val) - text = '' if val is np.nan else text + for plot_coord, v in zip(plot_coords, vals): + text = val_dim.pprint_value(v) + text = '' if v is np.nan else text if plot_coord not in self.handles['annotations']: annotation = axis.annotate(text, xy=plot_coord, xycoords='axes fraction', diff --git a/holoviews/plotting/mpl/seaborn.py b/holoviews/plotting/mpl/seaborn.py index 538bf5632e..8759c420a6 100644 --- a/holoviews/plotting/mpl/seaborn.py +++ b/holoviews/plotting/mpl/seaborn.py @@ -177,7 +177,7 @@ def initialize_plot(self, ranges=None): def _update_plot(self, axis, view): label = view.label if self.overlaid == 1 else '' - sns.distplot(view.data, ax=axis, label=label, **self.style) + sns.distplot(view.dimension_values(0), ax=axis, label=label, **self.style) diff --git a/tests/testcharts.py b/tests/testcharts.py deleted file mode 100644 index a1547d82f0..0000000000 --- a/tests/testcharts.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -Tests for the Chart Element types. -""" - -import numpy as np -from holoviews import OrderedDict, Chart, Curve, ItemTable -from holoviews.element.comparison import ComparisonTestCase - -class ChartTest(ComparisonTestCase): - """ - Test for the Chart baseclass methods. - """ - - def setUp(self): - self.xs = range(11) - self.ys = np.linspace(0, 1, 11) - self.chart = Chart(zip(self.xs, self.ys)) - self.curve = Curve(zip(self.xs, self.ys)) - - def test_yvalue_constructor(self): - ys = np.linspace(0, 1, 11) - Chart(ys) - - def test_chart_index(self): - self.assertEqual(self.chart[5], self.ys[5]) - - def test_chart_slice(self): - chart_slice = Curve(zip(range(5, 9), np.linspace(0.5,0.8, 4))) - self.assertEqual(self.curve[5:9], chart_slice) - - def test_chart_closest(self): - closest = self.chart.closest([0.51, 1, 9.9]) - self.assertEqual(closest, [1., 1., 10.]) - - def test_chart_reduce(self): - mean = self.chart.reduce(x=np.mean) - itable = ItemTable(OrderedDict([('y', np.mean(self.ys))])) - self.assertEqual(mean, itable) - - def test_chart_sample(self): - samples = self.chart.sample([0, 5, 10]).values() - self.assertEqual(samples, [(0,), (0.5,), (1,)]) diff --git a/tests/testcolumns.py b/tests/testcolumns.py new file mode 100644 index 0000000000..a1aa2b7163 --- /dev/null +++ b/tests/testcolumns.py @@ -0,0 +1,473 @@ +""" +Tests for the Columns Element types. +""" + +import pandas as pd + +import numpy as np +from holoviews import OrderedDict, Columns, Curve, ItemTable, NdElement, HoloMap +from holoviews.element.comparison import ComparisonTestCase + + +class ColumnsNdElementTest(ComparisonTestCase): + """ + Test for the Chart baseclass methods. + """ + + def setUp(self): + self.datatype = Columns.datatype + Columns.datatype = ['dictionary', 'array'] + self.xs = range(11) + self.ys = np.linspace(0, 1, 11) + self.zs = np.sin(self.xs) + self.keys1 = [('M',10), ('M',16), ('F',12)] + self.values1 = [(15, 0.8), (18, 0.6), (10, 0.8)] + self.kdims = ['Gender', 'Age'] + self.vdims = ['Weight', 'Height'] + self.columns = Columns(dict(zip(self.xs, self.ys)), + kdims=['x'], vdims=['y']) + + def tearDown(self): + Columns.datatype = self.datatype + + def test_columns_sort_vdim(self): + columns = Columns(OrderedDict(zip(self.xs, -self.ys)), + kdims=['x'], vdims=['y']) + columns_sorted = Columns(OrderedDict(zip(self.xs[::-1], -self.ys[::-1])), + kdims=['x'], vdims=['y']) + self.assertEqual(columns.sort('y'), columns_sorted) + + def test_columns_sort_heterogeneous_string(self): + columns = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) + keys = [('F',12), ('M',10), ('M',16)] + values = [(10, 0.8), (15, 0.8), (18, 0.6)] + columns_sorted = Columns(zip(keys, values), + kdims=self.kdims, vdims=self.vdims) + self.assertEqual(columns.sort(), columns_sorted) + + def test_columns_shape(self): + self.assertEqual(self.columns.shape, (11, 2)) + + def test_columns_range(self): + self.assertEqual(self.columns.range('y'), (0., 1.)) + + def test_columns_odict_construct(self): + columns = Columns(OrderedDict(zip(self.xs, self.ys)), kdims=['A'], vdims=['B']) + self.assertTrue(isinstance(columns.data, NdElement)) + + def test_columns_closest(self): + closest = self.columns.closest([0.51, 1, 9.9]) + self.assertEqual(closest, [1., 1., 10.]) + + def test_columns_dict_construct(self): + self.assertTrue(isinstance(self.columns.data, NdElement)) + + def test_columns_ndelement_construct(self): + columns = Columns(NdElement(zip(self.xs, self.ys))) + self.assertTrue(isinstance(columns.data, NdElement)) + + def test_columns_items_construct(self): + columns = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) + self.assertTrue(isinstance(columns.data, NdElement)) + + def test_columns_sample(self): + samples = self.columns.sample([0, 5, 10]).dimension_values('y') + self.assertEqual(samples, np.array([0, 0.5, 1])) + + def test_columns_index_row_gender(self): + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) + indexed = Columns(OrderedDict([(('F', 12), (10, 0.8))]), + kdims=self.kdims, vdims=self.vdims) + row = table['F',:] + self.assertEquals(row, indexed) + + def test_columns_index_rows_gender(self): + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) + row = table['M',:] + indexed = Columns(OrderedDict([(('M', 10), (15, 0.8)), + (('M', 16), (18, 0.6))]), + kdims=self.kdims, vdims=self.vdims) + self.assertEquals(row, indexed) + + def test_columns_index_row_age(self): + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) + indexed = Columns(OrderedDict([(('F', 12), (10, 0.8))]), + kdims=self.kdims, vdims=self.vdims) + self.assertEquals(table[:, 12], indexed) + + def test_columns_index_item_table(self): + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) + indexed = Columns(OrderedDict([(('F', 12), (10, 0.8))]), + kdims=self.kdims, vdims=self.vdims) + self.assertEquals(table['F', 12], indexed) + + + def test_columns_index_value1(self): + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) + self.assertEquals(table['F', 12, 'Weight'], 10) + + def test_columns_index_value2(self): + table = Columns(zip(self.keys1, self.values1), + kdims=self.kdims, vdims=self.vdims) + self.assertEquals(table['F', 12, 'Height'], 0.8) + + def test_columns_getitem_column(self): + self.compare_arrays(self.columns['y'], self.ys) + + def test_columns_add_dimensions_value(self): + table = self.columns.add_dimension('z', 1, 0) + self.assertEqual(table.kdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.zeros(len(table))) + + def test_columns_add_dimensions_values(self): + table = self.columns.add_dimension('z', 1, range(1,12)) + self.assertEqual(table.kdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12)))) + + def test_columns_collapse(self): + collapsed = HoloMap({i: Columns(dict(zip(self.xs, self.ys*i)), kdims=['x'], vdims=['y']) + for i in range(10)}, kdims=['z']).collapse('z', np.mean) + self.compare_columns(collapsed, Columns(zip(zip(self.xs), self.ys*4.5), + kdims=['x'], vdims=['y'])) + + def test_columns_1d_reduce(self): + self.assertEqual(self.columns.reduce('x', np.mean), np.float64(0.5)) + + def test_columns_2d_reduce(self): + columns = Columns(zip(zip(self.xs, self.ys), self.zs), + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(np.array(columns.reduce(['x', 'y'], np.mean)), + np.array(0.12828985192891)) + + def test_columns_2d_partial_reduce(self): + columns = Columns(zip(zip(self.xs, self.ys), self.zs), + kdims=['x', 'y'], vdims=['z']) + reduced = Columns(zip(zip(self.xs), self.zs), + kdims=['x'], vdims=['z']) + self.assertEqual(columns.reduce(['y'], np.mean), reduced) + + def test_columns_heterogeneous_reduce(self): + columns = Columns(zip(self.keys1, self.values1), kdims=self.kdims, + vdims=self.vdims) + reduced = Columns(zip([k[1:] for k in self.keys1], self.values1), + kdims=self.kdims[1:], vdims=self.vdims) + self.assertEqual(columns.reduce(['Gender'], np.mean), reduced) + + def test_columns_heterogeneous_reduce2d(self): + columns = Columns(zip(self.keys1, self.values1), kdims=self.kdims, + vdims=self.vdims) + reduced = Columns([((), (14.333333333333334, 0.73333333333333339))], kdims=[], vdims=self.vdims) + self.assertEqual(columns.reduce(function=np.mean), reduced) + + def test_column_heterogeneous_aggregate(self): + columns = Columns(zip(self.keys1, self.values1), kdims=self.kdims, + vdims=self.vdims) + aggregated = Columns(OrderedDict([('M', (16.5, 0.7)), ('F', (10., 0.8))]), + kdims=self.kdims[:1], vdims=self.vdims) + self.compare_columns(columns.aggregate(['Gender'], np.mean), aggregated) + + def test_columns_2d_aggregate_partial(self): + columns = Columns(zip(zip(self.xs, self.ys), self.zs), + kdims=['x', 'y'], vdims=['z']) + reduced = Columns(zip(zip(self.xs), self.zs), + kdims=['x'], vdims=['z']) + self.assertEqual(columns.aggregate(['x'], np.mean), reduced) + + def test_columns_array(self): + self.assertEqual(self.columns.array(), np.column_stack([self.xs, self.ys])) + + +class ColumnsNdArrayTest(ComparisonTestCase): + + def setUp(self): + self.xs = range(11) + self.ys = np.linspace(0, 1, 11) + self.zs = np.sin(self.xs) + self.columns = Columns((self.xs, self.ys), kdims=['x'], vdims=['y']) + + def test_columns_shape(self): + self.assertEqual(self.columns.shape, (11, 2)) + + def test_columns_range(self): + self.assertEqual(self.columns.range('y'), (0., 1.)) + + def test_columns_closest(self): + closest = self.columns.closest([0.51, 1, 9.9]) + self.assertEqual(closest, [1., 1., 10.]) + + def test_columns_values_construct(self): + columns = Columns(self.ys) + self.assertTrue(isinstance(columns.data, np.ndarray)) + + def test_columns_tuple_construct(self): + columns = Columns((self.xs, self.ys)) + self.assertTrue(isinstance(columns.data, np.ndarray)) + + def test_columns_array_construct(self): + columns = Columns(np.column_stack([self.xs, self.ys])) + self.assertTrue(isinstance(columns.data, np.ndarray)) + + def test_columns_tuple_list_construct(self): + columns = Columns(zip(self.xs, self.ys)) + self.assertTrue(isinstance(columns.data, np.ndarray)) + + def test_columns_sort_vdim(self): + columns = Columns((self.xs, -self.ys), kdims=['x'], vdims=['y']) + columns_sorted = Columns((self.xs[::-1], -self.ys[::-1]), + kdims=['x'], vdims=['y']) + self.assertEqual(columns.sort('y'), columns_sorted) + + def test_columns_index(self): + self.assertEqual(self.columns[5], self.ys[5]) + + def test_columns_slice(self): + columns_slice = Columns(zip(range(5, 9), np.linspace(0.5,0.8, 4)), + kdims=['x'], vdims=['y']) + self.assertEqual(self.columns[5:9], columns_slice) + + def test_columns_closest(self): + closest = self.columns.closest([0.51, 1, 9.9]) + self.assertEqual(closest, [1., 1., 10.]) + + def test_columns_getitem_column(self): + self.compare_arrays(self.columns['y'], self.ys) + + def test_columns_sample(self): + samples = self.columns.sample([0, 5, 10]).dimension_values('y') + self.assertEqual(samples, np.array([0, 0.5, 1])) + + def test_columns_add_dimensions_value(self): + table = Columns((self.xs, self.ys), + kdims=['x'], vdims=['y']) + table = table.add_dimension('z', 1, 0) + self.assertEqual(table.kdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.zeros(len(table))) + + def test_columns_add_dimensions_values(self): + table = Columns((self.xs, self.ys), + kdims=['x'], vdims=['y']) + table = table.add_dimension('z', 1, range(1,12)) + self.assertEqual(table.kdims[1], 'z') + self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12)))) + + def test_columns_collapse(self): + collapsed = HoloMap({i: Columns((self.xs, self.ys*i), kdims=['x'], vdims=['y']) + for i in range(10)}, kdims=['z']).collapse('z', np.mean) + self.compare_columns(collapsed, Columns((self.xs, self.ys*4.5), kdims=['x'], vdims=['y'])) + + def test_columns_1d_reduce(self): + columns = Columns((self.xs, self.ys), kdims=['x'], vdims=['y']) + self.assertEqual(columns.reduce('x', np.mean), np.float64(0.5)) + + def test_columns_2d_reduce(self): + columns = Columns((self.xs, self.ys, self.zs), kdims=['x', 'y'], vdims=['z']) + self.assertEqual(np.array(columns.reduce(['x', 'y'], np.mean)), + np.array(0.12828985192891)) + + def test_columns_2d_partial_reduce(self): + columns = Columns((self.xs, self.ys, self.zs), kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.reduce(['y'], np.mean), + Columns((self.xs, self.zs), kdims=['x'], vdims=['z'])) + + def test_columns_2d_aggregate_partial(self): + columns = Columns((self.xs, self.ys, self.zs), kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.aggregate(['x'], np.mean), + Columns((self.xs, self.zs), kdims=['x'], vdims=['z'])) + + def test_columns_array(self): + self.assertEqual(self.columns.array(), np.column_stack([self.xs, self.ys])) + + +class ColumnsDFrameTest(ComparisonTestCase): + + def setUp(self): + self.datatype = Columns.datatype + Columns.datatype = ['dataframe'] + self.column_data = [('M',10, 15, 0.8), ('M',16, 18, 0.6), + ('F',12, 10, 0.8)] + self.kdims = ['Gender', 'Age'] + self.vdims = ['Weight', 'Height'] + self.xs = range(11) + self.ys = np.linspace(0, 1, 11) + self.zs = np.sin(self.xs) + self.columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys}), + kdims=['x'], vdims=['y']) + + def tearDown(self): + Columns.datatype = self.datatype + + def test_columns_range(self): + self.assertEqual(self.columns.range('y'), (0., 1.)) + + def test_columns_shape(self): + self.assertEqual(self.columns.shape, (11, 2)) + + def test_columns_closest(self): + closest = self.columns.closest([0.51, 1, 9.9]) + self.assertEqual(closest, [1., 1., 10.]) + + def test_columns_sample(self): + samples = self.columns.sample([0, 5, 10]).dimension_values('y') + self.assertEqual(samples, np.array([0, 0.5, 1])) + + def test_columns_df_construct(self): + self.assertTrue(isinstance(self.columns.data, pd.DataFrame)) + + def test_columns_tuple_list_construct(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + self.assertTrue(isinstance(self.columns.data, pd.DataFrame)) + + def test_columns_slice(self): + data = [('x', range(5, 9)), ('y', np.linspace(0.5, 0.8, 4))] + columns_slice = Columns(pd.DataFrame.from_items(data), + kdims=['x'], vdims=['y']) + self.assertEqual(self.columns[5:9], columns_slice) + + def test_columns_index_row_gender(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + row = columns['F',:] + self.assertEquals(type(row), Columns) + self.compare_columns(row, Columns(self.column_data[2:], + kdims=self.kdims, + vdims=self.vdims)) + + def test_columns_index_rows_gender(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + row = columns['M',:] + self.assertEquals(type(row), Columns) + self.compare_columns(row, Columns(self.column_data[:2], + kdims=self.kdims, + vdims=self.vdims)) + + def test_columns_index_row_age(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + row = columns[:, 12] + self.assertEquals(type(row), Columns) + self.compare_columns(row, Columns(self.column_data[2:], + kdims=self.kdims, + vdims=self.vdims)) + + def test_columns_index_single_row(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + row = columns['F', 12] + self.assertEquals(type(row), Columns) + self.compare_columns(row, Columns(self.column_data[2:], + kdims=self.kdims, + vdims=self.vdims)) + + def test_columns_index_value1(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + self.assertEquals(columns['F', 12, 'Weight'], 10) + + def test_columns_index_value2(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + self.assertEquals(columns['F', 12, 'Height'], 0.8) + + def test_columns_sort_vdim(self): + columns = Columns(pd.DataFrame({'x': self.xs, 'y': -self.ys}), + kdims=['x'], vdims=['y']) + columns_sorted = Columns(pd.DataFrame({'x': self.xs[::-1], 'y': -self.ys[::-1]}), + kdims=['x'], vdims=['y']) + self.assertEqual(columns.sort('y'), columns_sorted) + + def test_columns_sort_heterogeneous_string(self): + columns = Columns(self.column_data, kdims=self.kdims, vdims=self.vdims) + columns_sorted = Columns([self.column_data[i] for i in [2, 0, 1]], + kdims=self.kdims, vdims=self.vdims) + self.assertEqual(columns.sort(), columns_sorted) + + def test_columns_add_dimensions_value(self): + columns = self.columns.add_dimension('z', 1, 0) + self.assertEqual(columns.kdims[1], 'z') + self.compare_arrays(columns.dimension_values('z'), np.zeros(len(columns))) + + def test_columns_add_dimensions_values(self): + columns = self.columns.add_dimension('z', 1, range(1,12)) + self.assertEqual(columns.kdims[1], 'z') + self.compare_arrays(columns.dimension_values('z'), np.array(list(range(1,12)))) + + def test_columns_getitem_column(self): + self.compare_arrays(self.columns['y'], self.ys) + + def test_columns_collapse(self): + collapsed = HoloMap({i: Columns(pd.DataFrame({'x': self.xs, 'y': self.ys*i}), kdims=['x'], vdims=['y']) + for i in range(10)}, kdims=['z']).collapse('z', np.mean) + self.compare_columns(collapsed, Columns(pd.DataFrame({'x': self.xs, 'y': self.ys*4.5}), kdims=['x'], vdims=['y'])) + + def test_columns_1d_reduce(self): + self.assertEqual(self.columns.reduce('x', np.mean), np.float64(0.5)) + + def test_columns_2d_reduce(self): + columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys, 'z': self.zs}), + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(np.array(columns.reduce(['x', 'y'], np.mean)), + np.array(0.12828985192891)) + + def test_columns_2d_partial_reduce(self): + columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys, 'z': self.zs}), + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.reduce(['y'], np.mean), + Columns(pd.DataFrame({'x': self.xs, 'z': self.zs}), + kdims=['x'], vdims=['z'])) + + def test_columns_heterogeneous_reduce(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + reduced_data = pd.DataFrame([(10, 15, 0.8), (12, 10, 0.8), (16, 18, 0.6)], + columns=columns.dimensions(label=True)[1:]) + reduced = Columns(reduced_data, kdims=self.kdims[1:], + vdims=self.vdims) + self.assertEqual(columns.reduce(['Gender'], np.mean), reduced) + + def test_columns_heterogeneous_reduce2d(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + reduced_data = pd.DataFrame([d[1:] for d in self.column_data], + columns=columns.dimensions(label=True)[1:]) + reduced = Columns(pd.DataFrame([(14.333333333333334, 0.73333333333333339)], columns=self.vdims), + kdims=[], vdims=self.vdims) + self.assertEqual(columns.reduce(function=np.mean), reduced) + + + def test_columns_groupby(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + cols = self.kdims + self.vdims + group1 = pd.DataFrame(self.column_data[:2], columns=cols) + group2 = pd.DataFrame(self.column_data[2:], columns=cols) + grouped = HoloMap({'M': Columns(group1, kdims=['Age'], vdims=self.vdims), + 'F': Columns(group2, kdims=['Age'], vdims=self.vdims)}, + kdims=['Gender']) + self.assertEqual(columns.groupby(['Gender']), grouped) + + def test_columns_heterogeneous_aggregate(self): + columns = Columns(self.column_data, kdims=self.kdims, + vdims=self.vdims) + aggregated = Columns(pd.DataFrame([('F', 10., 0.8), ('M', 16.5, 0.7)], + columns=['Gender']+self.vdims), + kdims=self.kdims[:1], vdims=self.vdims) + self.compare_columns(columns.aggregate(['Gender'], np.mean), aggregated) + + def test_columns_2d_partial_reduce(self): + columns = Columns(pd.DataFrame({'x': self.xs, 'y': self.ys, 'z': self.zs}), + kdims=['x', 'y'], vdims=['z']) + self.assertEqual(columns.aggregate(['x'], np.mean), + Columns(pd.DataFrame({'x': self.xs, 'z': self.zs}), + kdims=['x'], vdims=['z'])) + + def test_columns_array(self): + self.assertEqual(self.columns.array(), np.column_stack([self.xs, self.ys])) diff --git a/tests/testcomparisonchart.py b/tests/testcomparisonchart.py index cb6ae5080a..5eb7c7c2bd 100644 --- a/tests/testcomparisonchart.py +++ b/tests/testcomparisonchart.py @@ -22,8 +22,8 @@ def test_curves_unequal(self): try: self.assertEqual(self.curve1, self.curve2) except AssertionError as e: - if not str(e).startswith("Curve data not almost equal to 6 decimals"): - raise self.failureException("Curve data mismatch error not raised.") + if not str(e).startswith("Curve not of matching length."): + raise self.failureException("Curve mismatch error not raised.") @@ -55,8 +55,8 @@ def test_bars_unequal_1(self): try: self.assertEqual(self.bars1, self.bars2) except AssertionError as e: - if not str(e) == '(16,) != (17,)': - raise Exception('Bars mismatched data error not raised.') + if not 'not almost equal' in str(e): + raise Exception('Bars mismatched data error not raised. %s' % e) def test_bars_unequal_keydims(self): try: @@ -135,14 +135,14 @@ def test_scatter_unequal_data_shape(self): try: self.assertEqual(self.scatter1, self.scatter2) except AssertionError as e: - if not str(e).startswith("Scatter data not almost equal to 6 decimals"): + if not str(e).startswith("Scatter not of matching length."): raise self.failureException("Scatter data mismatch error not raised.") def test_scatter_unequal_data_values(self): try: self.assertEqual(self.scatter1, self.scatter3) except AssertionError as e: - if not str(e).startswith("Scatter data not almost equal to 6 decimals"): + if not str(e).startswith("Scatter not almost equal to 6 decimals"): raise self.failureException("Scatter data mismatch error not raised.") @@ -170,14 +170,14 @@ def test_points_unequal_data_shape(self): try: self.assertEqual(self.points1, self.points2) except AssertionError as e: - if not str(e).startswith("Points objects have different numbers of points."): + if not str(e).startswith("Points not of matching length."): raise self.failureException("Points count mismatch error not raised.") def test_points_unequal_data_values(self): try: self.assertEqual(self.points1, self.points3) except AssertionError as e: - if not str(e).startswith("Points data not almost equal to 6 decimals"): + if not str(e).startswith("Points not almost equal to 6 decimals"): raise self.failureException("Points data mismatch error not raised.") @@ -206,6 +206,6 @@ def test_vfield_unequal_1(self): try: self.assertEqual(self.vfield1, self.vfield2) except AssertionError as e: - if not str(e).startswith("VectorField data not almost equal to 6 decimals"): + if not str(e).startswith("VectorField not almost equal to 6 decimals"): raise self.failureException("VectorField data mismatch error not raised.") diff --git a/tests/testcomparisonraster.py b/tests/testcomparisonraster.py index 319cc3b5c6..1b709b4c9f 100644 --- a/tests/testcomparisonraster.py +++ b/tests/testcomparisonraster.py @@ -101,7 +101,7 @@ def test_unequal_arrays(self): self.assertEqual(self.mat1, self.mat2) raise AssertionError("Array mismatch not raised") except AssertionError as e: - if not str(e).startswith('Image data not almost equal to 6 decimals\n'): + if not str(e).startswith('Image not almost equal to 6 decimals\n'): raise self.failureException("Image data mismatch error not raised.") def test_bounds_mismatch(self): @@ -124,8 +124,8 @@ def test_element_mismatch(self): try: self.assertEqual(self.overlay1_depth2, self.overlay2_depth2) except AssertionError as e: - if not str(e).startswith('Image data not almost equal to 6 decimals\n'): - raise self.failureException("Image data mismatch error not raised.") + if not str(e).startswith('Image not almost equal to 6 decimals\n'): + raise self.failureException("Image mismatch error not raised.") @@ -167,5 +167,5 @@ def test_element_mismatch(self): self.assertEqual(self.map1_1D, self.map4_1D) raise AssertionError("Pane mismatch in array data not raised.") except AssertionError as e: - if not str(e).startswith('Image data not almost equal to 6 decimals\n'): + if not str(e).startswith('Image not almost equal to 6 decimals\n'): raise self.failureException("Image mismatch error not raised.") diff --git a/tests/testtabular.py b/tests/testtabular.py deleted file mode 100644 index eb26af67ec..0000000000 --- a/tests/testtabular.py +++ /dev/null @@ -1,68 +0,0 @@ -""" -Unit tests of tabular elements -""" - -from collections import OrderedDict -from holoviews import Table, ItemTable -from holoviews.element.comparison import ComparisonTestCase - -class TestTable(ComparisonTestCase): - - - def setUp(self): - self.keys1 = [('M',10), ('M',16), ('F',12)] - self.values1 = [(15, 0.8), (18, 0.6), (10, 0.8)] - self.key_dims1 = ['Gender', 'Age'] - self.val_dims1 = ['Weight', 'Height'] - - def test_table_init(self): - self.table1 =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - - def test_table_index_row_gender(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - row = table['F',:] - self.assertEquals(type(row), Table) - self.assertEquals(row.data, OrderedDict([(('F', 12), (10, 0.8))])) - - def test_table_index_rows_gender(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - row = table['M',:] - self.assertEquals(type(row), Table) - self.assertEquals(row.data, - OrderedDict([(('M', 10), (15, 0.8)), (('M', 16), (18, 0.6))])) - - def test_table_index_row_age(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - row = table[:, 12] - self.assertEquals(type(row), Table) - self.assertEquals(row.data, OrderedDict([(('F', 12), (10, 0.8))])) - - def test_table_index_item_table(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - itemtable = table['F', 12] - self.assertEquals(type(itemtable), ItemTable) - self.assertEquals(itemtable.data, OrderedDict([('Weight', 10), ('Height', 0.8)])) - - - def test_table_index_value1(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - self.assertEquals(table['F', 12, 'Weight'], 10) - - def test_table_index_value2(self): - table =Table(zip(self.keys1, self.values1), - kdims = self.key_dims1, - vdims = self.val_dims1) - self.assertEquals(table['F', 12, 'Height'], 0.8) -