Skip to content

Commit

Permalink
Merge pull request #542 from ioam/dense_interface
Browse files Browse the repository at this point in the history
Adding a general interface for N-dimensional gridded data
  • Loading branch information
jlstevens committed Mar 14, 2016
2 parents 913faac + 1b8d27a commit d4779cb
Show file tree
Hide file tree
Showing 12 changed files with 492 additions and 76 deletions.
382 changes: 351 additions & 31 deletions holoviews/core/data.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion holoviews/core/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,7 @@ def select(self, selection_specs=None, **kwargs):
return selection


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
"""
Returns the values along the specified dimension. This method
must be implemented for all Dimensioned type.
Expand Down
7 changes: 4 additions & 3 deletions holoviews/core/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,15 +481,16 @@ def aggregate(self, dimensions, function, **kwargs):
return self.clone(rows, kdims=grouped.kdims)


def dimension_values(self, dim, unique=False):
def dimension_values(self, dim, expanded=True, flat=True):
dim = self.get_dimension(dim, strict=True)
value_dims = self.dimensions('value', label=True)
if dim.name in value_dims:
index = value_dims.index(dim.name)
vals = np.array([v[index] for v in self.data.values()])
return unique_array(vals) if unique else vals
return vals if expanded else unique_array(vals)
else:
return NdMapping.dimension_values(self, dim.name, unique)
return NdMapping.dimension_values(self, dim.name,
expanded, flat)


def values(self):
Expand Down
11 changes: 6 additions & 5 deletions holoviews/core/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ def get(self, key, default=None):
return self.data[key] if key in self.data else default


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
dimension = self.get_dimension(dimension, strict=True).name
return self.main.dimension_values(dimension, unique)
return self.main.dimension_values(dimension, expanded, flat)


def __getitem__(self, key):
Expand Down Expand Up @@ -433,17 +433,18 @@ def clone(self, *args, **overrides):
return clone


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
"Returns the values along the specified dimension."
dimension = self.get_dimension(dimension, strict=True).name
all_dims = self.traverse(lambda x: [d.name for d in x.dimensions()])
if dimension in chain.from_iterable(all_dims):
values = [el.dimension_values(dimension) for el in self
if dimension in el.dimensions(label=True)]
vals = np.concatenate(values)
return unique_array(vals) if unique else vals
return vals if expanded else unique_array(vals)
else:
return super(Layout, self).dimension_values(dimension, unique)
return super(Layout, self).dimension_values(dimension,
expanded, flat)


def cols(self, ncols):
Expand Down
6 changes: 3 additions & 3 deletions holoviews/core/ndmapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def drop_dimension(self, dimensions):
kdims=dims)


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
"Returns the values along the specified dimension."
dimension = self.get_dimension(dimension, strict=True).name
if dimension in self.kdims:
Expand All @@ -352,9 +352,9 @@ def dimension_values(self, dimension, unique=False):
values = [el.dimension_values(dimension) for el in self
if dimension in el.dimensions()]
vals = np.concatenate(values)
return util.unique_array(vals) if unique else vals
return vals if expanded else util.unique_array(vals)
else:
return super(MultiDimensionalMapping, self).dimension_values(dimension, unique)
return super(MultiDimensionalMapping, self).dimension_values(dimension, expanded, flat)


def reindex(self, kdims=[], force=False):
Expand Down
6 changes: 3 additions & 3 deletions holoviews/core/overlay.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,20 @@ def hist(self, index=0, adjoin=True, dimension=None, **kwargs):
return layout


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
values = []
found = False
for el in self:
if dimension in el.dimensions(label=True):
values.append(el.dimension_values(dimension))
found = True
if not found:
return super(CompositeOverlay, self).dimension_values(dimension, unique)
return super(CompositeOverlay, self).dimension_values(dimension, expanded, flat)
values = [v for v in values if v is not None and len(v)]
if not values:
return np.array()
vals = np.concatenate(values)
return unique_array(vals) if unique else vals
return vals if expanded else unique_array(vals)


class Overlay(Layout, CompositeOverlay):
Expand Down
14 changes: 14 additions & 0 deletions holoviews/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import string, fnmatch
import unicodedata
from collections import defaultdict
from functools import reduce

import numpy as np
import param
Expand Down Expand Up @@ -861,3 +862,16 @@ def groupby_python(self_or_cls, ndmapping, dimensions, container_type,
return container_type(groups, kdims=dimensions)


def cartesian_product(arrays):
"""
Computes the cartesian product of a list of arrays.
"""
broadcastable = np.ix_(*arrays)
broadcasted = np.broadcast_arrays(*broadcastable)
rows, cols = reduce(np.multiply, broadcasted[0].shape), len(broadcasted)
out = np.empty(rows * cols, dtype=broadcasted[0].dtype)
start, end = 0, rows
for a in broadcasted:
out[start:end] = a.reshape(-1)
start, end = end, end + rows
return out.reshape(cols, rows).T
50 changes: 27 additions & 23 deletions holoviews/element/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def sample(self, samples=[], **sample_values):
sample[sample_ind] = self._coord2matrix(coord_fn(sample_coord))[abs(sample_ind-1)]

# Sample data
x_vals = self.dimension_values(other_dimension[0].name, unique=True)
x_vals = self.dimension_values(other_dimension[0].name, False)
ydata = self._zdata[sample[::-1]]
if hasattr(self, 'bounds') and sample_ind == 0: ydata = ydata[::-1]
data = list(zip(x_vals, ydata))
Expand All @@ -154,7 +154,7 @@ def reduce(self, dimensions=None, function=None, **reduce_map):
dimension = dims[0]
other_dimension = [d for d in self.kdims if d.name != dimension]
oidx = self.get_dimension_index(other_dimension[0])
x_vals = self.dimension_values(other_dimension[0].name, unique=True)
x_vals = self.dimension_values(other_dimension[0].name, False)
reduced = function(self._zdata, axis=oidx)
data = zip(x_vals, reduced if not oidx else reduced[::-1])
params = dict(dict(self.get_param_values(onlychanged=True)),
Expand All @@ -164,18 +164,18 @@ def reduce(self, dimensions=None, function=None, **reduce_map):
return Table(data, **params)


def dimension_values(self, dim, unique=False):
def dimension_values(self, dim, expanded=True, flat=True):
"""
The set of samples available along a particular dimension.
"""
dim_idx = self.get_dimension_index(dim)
if unique and dim_idx == 0:
if not expanded and dim_idx == 0:
return np.array(range(self.data.shape[1]))
elif unique and dim_idx == 1:
elif not expanded and dim_idx == 1:
return np.array(range(self.data.shape[0]))
elif dim_idx in [0, 1]:
D1, D2 = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]]
return D1.flatten() if dim_idx == 0 else D2.flatten()
values = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]][dim_idx]
return values.flatten() if flat else values
elif dim_idx == 2:
return toarray(self.data.T).flatten()
else:
Expand Down Expand Up @@ -338,20 +338,20 @@ def range(self, dimension):
super(QuadMesh, self).range(dimension)


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
idx = self.get_dimension_index(dimension)
data = self.data[idx]
if idx in [0, 1]:
if not self._grid:
return data.flatten()
odim = 1 if unique else self.data[2].shape[idx]
odim = self.data[2].shape[idx] if expanded else 1
vals = np.tile(np.convolve(data, np.ones((2,))/2, mode='valid'), odim)
if idx:
return np.sort(vals)
else:
return vals
elif idx == 2:
return data.flatten()
return data.flatten() if flat else data
else:
return super(QuadMesh, self).dimension_values(idx)

Expand Down Expand Up @@ -388,8 +388,8 @@ def __init__(self, data, extents=None, **params):


def _compute_raster(self):
d1keys = self.dimension_values(0, True)
d2keys = self.dimension_values(1, True)
d1keys = self.dimension_values(0, False)
d2keys = self.dimension_values(1, False)
coords = [(d1, d2, np.NaN) for d1 in d1keys for d2 in d2keys]
dtype = 'dataframe' if pd else 'dictionary'
dense_data = Columns(coords, kdims=self.kdims, vdims=self.vdims, datatype=[dtype])
Expand Down Expand Up @@ -438,8 +438,8 @@ def __setstate__(self, state):
super(HeatMap, self).__setstate__(state)

def dense_keys(self):
d1keys = self.dimension_values(0, True)
d2keys = self.dimension_values(1, True)
d1keys = self.dimension_values(0, False)
d2keys = self.dimension_values(1, False)
return list(zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys]))


Expand Down Expand Up @@ -603,7 +603,7 @@ def _coord2matrix(self, coord):
return self.sheet2matrixidx(*coord)


def dimension_values(self, dim, unique=False):
def dimension_values(self, dim, expanded=True, flat=True):
"""
The set of samples available along a particular dimension.
"""
Expand All @@ -615,13 +615,16 @@ def dimension_values(self, dim, unique=False):
d2_half_unit = (t - b)/dim2/2.
d1lin = np.linspace(l+d1_half_unit, r-d1_half_unit, dim1)
d2lin = np.linspace(b+d2_half_unit, t-d2_half_unit, dim2)
if unique:
return d2lin if dim_idx else d1lin
if expanded:
values = np.meshgrid(d2lin, d1lin)[abs(dim_idx-1)]
return values.flatten() if flat else values
else:
Y, X = np.meshgrid(d2lin, d1lin)
return Y.flatten() if dim_idx else X.flatten()
return d2lin if dim_idx else d1lin
elif dim_idx == 2:
return np.flipud(self.data).T.flatten()
# Raster arrays are stored with different orientation
# than expanded column format, reorient before expanding
data = np.flipud(self.data).T
return data.flatten() if flat else data
else:
super(Image, self).dimension_values(dim)

Expand Down Expand Up @@ -703,14 +706,15 @@ def load_image(cls, filename, height=1, array=False, bounds=None, bare=False, **
return rgb


def dimension_values(self, dim, unique=False):
def dimension_values(self, dim, expanded=True, flat=True):
"""
The set of samples available along a particular dimension.
"""
dim_idx = self.get_dimension_index(dim)
if self.ndims <= dim_idx < len(self.dimensions()):
return np.flipud(self.data[:,:,dim_idx-self.ndims]).T.flatten()
return super(RGB, self).dimension_values(dim, unique=True)
data = np.flipud(self.data[:,:,dim_idx-self.ndims]).T
return data.flatten() if flat else data
return super(RGB, self).dimension_values(dim, expanded, flat)


def __init__(self, data, **params):
Expand Down
2 changes: 1 addition & 1 deletion holoviews/element/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def collapse_data(cls, data, function, **kwargs):
return OrderedDict(zip(data[0].keys(), function(groups, axis=-1, **kwargs)))


def dimension_values(self, dimension):
def dimension_values(self, dimension, expanded=True, flat=True):
dimension = self.get_dimension(dimension, strict=True).name
if dimension in self.dimensions('value', label=True):
return np.array([self.data.get(dimension, np.NaN)])
Expand Down
4 changes: 2 additions & 2 deletions holoviews/operation/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,8 @@ def _process(self, element, key=None):
data = [element.data]

elif isinstance(element, QuadMesh):
data = (element.dimension_values(0, True),
element.dimension_values(1, True),
data = (element.dimension_values(0, False),
element.dimension_values(1, False),
element.data[2])
contour_set = contour_fn(*data, extent=extent,
levels=self.p.levels)
Expand Down
4 changes: 2 additions & 2 deletions holoviews/plotting/mpl/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def _annotate_plot(self, ax, annotations):
def _annotate_values(self, element):
val_dim = element.vdims[0]
vals = np.rot90(element.raster, 3).flatten()
d1uniq, d2uniq = [np.unique(element.dimension_values(i)) for i in range(2)]
d1uniq, d2uniq = [element.dimension_values(i, False) for i in range(2)]
num_x, num_y = len(d1uniq), len(d2uniq)
xstep, ystep = 1.0/num_x, 1.0/num_y
xpos = np.linspace(xstep/2., 1.0-xstep/2., num_x)
Expand All @@ -127,7 +127,7 @@ def _annotate_values(self, element):

def _compute_ticks(self, element, ranges):
xdim, ydim = element.kdims
dim1_keys, dim2_keys = [element.dimension_values(i, True)
dim1_keys, dim2_keys = [element.dimension_values(i, False)
for i in range(2)]
num_x, num_y = len(dim1_keys), len(dim2_keys)
x0, y0, x1, y1 = element.extents
Expand Down
80 changes: 78 additions & 2 deletions tests/testcolumns.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ def test_columns_dataframe_init_ht(self):

# Test literal formats

def test_columns_uniq_dimvals_ht(self):
self.assertEqual(self.table.dimension_values('Gender', unique=True),
def test_columns_expanded_dimvals_ht(self):
self.assertEqual(self.table.dimension_values('Gender', expanded=False),
np.array(['M', 'F']))

def test_columns_implicit_indexing_init(self):
Expand Down Expand Up @@ -399,3 +399,79 @@ def test_columns_double_zip_init(self):
kdims=self.kdims, vdims=self.vdims)
self.assertTrue(isinstance(columns.data, NdElement))


class GridColumnsTest(HomogeneousColumnTypes, ComparisonTestCase):
"""
Test of the NdColumns interface (mostly for backwards compatibility)
"""

def setUp(self):
self.restore_datatype = Columns.datatype
Columns.datatype = ['grid']
self.data_instance_type = dict
self.init_data()

def init_data(self):
self.xs = range(11)
self.xs_2 = [el**2 for el in self.xs]

self.y_ints = [i*2 for i in range(11)]
self.columns_hm = Columns((self.xs, self.y_ints),
kdims=['x'], vdims=['y'])

def test_columns_array_init_hm(self):
"Tests support for arrays (homogeneous)"
exception = "None of the available storage backends "\
"were able to support the supplied data format."
with self.assertRaisesRegexp(Exception, exception):
Columns(np.column_stack([self.xs, self.xs_2]),
kdims=['x'], vdims=['x2'])

def test_columns_dataframe_init_hm(self):
"Tests support for homogeneous DataFrames"
if pd is None:
raise SkipTest("Pandas not available")
exception = "None of the available storage backends "\
"were able to support the supplied data format."
with self.assertRaisesRegexp(Exception, exception):
Columns(pd.DataFrame({'x':self.xs, 'x2':self.xs_2}),
kdims=['x'], vdims=['x2'])

def test_columns_ndelement_init_hm(self):
"Tests support for homogeneous NdElement (backwards compatibility)"
exception = "None of the available storage backends "\
"were able to support the supplied data format."
with self.assertRaisesRegexp(Exception, exception):
Columns(NdElement(zip(self.xs, self.xs_2),
kdims=['x'], vdims=['x2']))

def test_columns_2D_aggregate_partial_hm(self):
array = np.random.rand(11, 11)
columns = Columns({'x':self.xs, 'y':self.y_ints, 'z': array},
kdims=['x', 'y'], vdims=['z'])
self.assertEqual(columns.aggregate(['x'], np.mean),
Columns({'x':self.xs, 'z': np.mean(array, axis=1)},
kdims=['x'], vdims=['z']))

def test_columns_2D_reduce_hm(self):
array = np.random.rand(11, 11)
columns = Columns({'x':self.xs, 'y':self.y_ints, 'z': array},
kdims=['x', 'y'], vdims=['z'])
self.assertEqual(np.array(columns.reduce(['x', 'y'], np.mean)),
np.mean(array))

def test_columns_add_dimensions_value_hm(self):
with self.assertRaisesRegexp(Exception, 'Cannot add key dimension to a dense representation.'):
self.columns_hm.add_dimension('z', 1, 0)

def test_columns_add_dimensions_values_hm(self):
table = self.columns_hm.add_dimension('z', 1, range(1,12), vdim=True)
self.assertEqual(table.vdims[1], 'z')
self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12))))

def test_columns_sort_vdim_hm(self):
exception = ('Compressed format cannot be sorted, either instantiate '
'in the desired order or use the expanded format.')
with self.assertRaisesRegexp(Exception, exception):
self.columns_hm.sort('y')

0 comments on commit d4779cb

Please sign in to comment.