Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding a general interface for N-dimensional gridded data #542

Merged
merged 36 commits into from
Mar 14, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
6236444
Added validation method to DictColumns interface
philippjfr Mar 4, 2016
f00c306
Fix for scalar columns in DictColumns
philippjfr Mar 4, 2016
9589707
Added initial dense Columns interface
philippjfr Mar 7, 2016
0645535
Consistently added expanded keyword to dimension_values method
philippjfr Mar 8, 2016
ead988f
Renamed NdArrayColumns to GridColumns
philippjfr Mar 8, 2016
f26d95f
Cleanup and minor fixes in core.data module
philippjfr Mar 8, 2016
03c1e22
Added validation to detect dense formats in existing interfaces
philippjfr Mar 8, 2016
81bb78f
Added default arguments to interface values method
philippjfr Mar 8, 2016
c5f4f61
Updated Column interface unit test
philippjfr Mar 8, 2016
3b5cada
Reverted change to DictColumns interface
philippjfr Mar 8, 2016
631e91c
Small fixes for NdElement interface
philippjfr Mar 8, 2016
49e729a
Changed GridColumns format to expand vdims
philippjfr Mar 9, 2016
7d5dc26
Improved GridColumns validation
philippjfr Mar 9, 2016
232b372
Fixed scalar return values from GridColumn slicing
philippjfr Mar 10, 2016
d6e3440
Ensured GridColumns aggregate returns at least 1D array
philippjfr Mar 10, 2016
e30bc53
Implemented GridColumns add_dimension and sort methods
philippjfr Mar 10, 2016
46c5e8a
Added unit tests for GridColumns interface
philippjfr Mar 10, 2016
034e659
Added missing import in core.util
philippjfr Mar 10, 2016
84a3667
Renamed Columns interface reshape method to init
philippjfr Mar 14, 2016
5780d1d
Removed stray GridColumns.add_dimension method
philippjfr Mar 14, 2016
4d0e547
Renamed check_dense to expanded_format and improved validation
philippjfr Mar 14, 2016
a38409b
Renamed GridColumns coord_mask to key_select_mask
philippjfr Mar 14, 2016
487e667
Added comment for Image dimension_values method
philippjfr Mar 14, 2016
5468271
Enforced samples have uniform length on GridColumns
philippjfr Mar 14, 2016
626dd07
Allowed returning non-flat key dimensions from gridded Elements
philippjfr Mar 14, 2016
b664343
Allowed dropping constant dimensions via GridColumns.reindex
philippjfr Mar 14, 2016
fb8a2f2
Improved error message on GridColumns.sort
philippjfr Mar 14, 2016
0af0d1e
Disabled support for expanding vdims in GridColumns
philippjfr Mar 14, 2016
4ff50a4
Small fixes for NdColumns and DFColumns constructors
philippjfr Mar 14, 2016
675dd4c
Updated GridColumns unit test
philippjfr Mar 14, 2016
b853d12
Updated GridColumns value slicing exception
philippjfr Mar 14, 2016
0ffcbf2
Fixed ArrayColumns init bug
philippjfr Mar 14, 2016
c509a07
Fixed inverted Image.dimension_values
philippjfr Mar 14, 2016
2a640c6
Renamed expanded_format method to expanded
jlstevens Mar 14, 2016
c109675
Updated the class docstring for GridColumns
jlstevens Mar 14, 2016
1b8d27a
Added 'grid' interface to default datatype list
jlstevens Mar 14, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
382 changes: 351 additions & 31 deletions holoviews/core/data.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion holoviews/core/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,7 @@ def select(self, selection_specs=None, **kwargs):
return selection


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
"""
Returns the values along the specified dimension. This method
must be implemented for all Dimensioned type.
Expand Down
7 changes: 4 additions & 3 deletions holoviews/core/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,15 +482,16 @@ def aggregate(self, dimensions, function, **kwargs):
return self.clone(rows, kdims=grouped.kdims)


def dimension_values(self, dim, unique=False):
def dimension_values(self, dim, expanded=True, flat=True):
dim = self.get_dimension(dim, strict=True)
value_dims = self.dimensions('value', label=True)
if dim.name in value_dims:
index = value_dims.index(dim.name)
vals = np.array([v[index] for v in self.data.values()])
return unique_array(vals) if unique else vals
return vals if expanded else unique_array(vals)
else:
return NdMapping.dimension_values(self, dim.name, unique)
return NdMapping.dimension_values(self, dim.name,
expanded, flat)


def values(self):
Expand Down
11 changes: 6 additions & 5 deletions holoviews/core/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ def get(self, key, default=None):
return self.data[key] if key in self.data else default


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
dimension = self.get_dimension(dimension, strict=True).name
return self.main.dimension_values(dimension, unique)
return self.main.dimension_values(dimension, expanded, flat)


def __getitem__(self, key):
Expand Down Expand Up @@ -433,17 +433,18 @@ def clone(self, *args, **overrides):
return clone


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
"Returns the values along the specified dimension."
dimension = self.get_dimension(dimension, strict=True).name
all_dims = self.traverse(lambda x: [d.name for d in x.dimensions()])
if dimension in chain.from_iterable(all_dims):
values = [el.dimension_values(dimension) for el in self
if dimension in el.dimensions(label=True)]
vals = np.concatenate(values)
return unique_array(vals) if unique else vals
return vals if expanded else unique_array(vals)
else:
return super(Layout, self).dimension_values(dimension, unique)
return super(Layout, self).dimension_values(dimension,
expanded, flat)


def cols(self, ncols):
Expand Down
6 changes: 3 additions & 3 deletions holoviews/core/ndmapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def drop_dimension(self, dimensions):
kdims=dims)


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
"Returns the values along the specified dimension."
dimension = self.get_dimension(dimension, strict=True).name
if dimension in self.kdims:
Expand All @@ -352,9 +352,9 @@ def dimension_values(self, dimension, unique=False):
values = [el.dimension_values(dimension) for el in self
if dimension in el.dimensions()]
vals = np.concatenate(values)
return util.unique_array(vals) if unique else vals
return vals if expanded else util.unique_array(vals)
else:
return super(MultiDimensionalMapping, self).dimension_values(dimension, unique)
return super(MultiDimensionalMapping, self).dimension_values(dimension, expanded, flat)


def reindex(self, kdims=[], force=False):
Expand Down
6 changes: 3 additions & 3 deletions holoviews/core/overlay.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,20 @@ def hist(self, index=0, adjoin=True, dimension=None, **kwargs):
return layout


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
values = []
found = False
for el in self:
if dimension in el.dimensions(label=True):
values.append(el.dimension_values(dimension))
found = True
if not found:
return super(CompositeOverlay, self).dimension_values(dimension, unique)
return super(CompositeOverlay, self).dimension_values(dimension, expanded, flat)
values = [v for v in values if v is not None and len(v)]
if not values:
return np.array()
vals = np.concatenate(values)
return unique_array(vals) if unique else vals
return vals if expanded else unique_array(vals)


class Overlay(Layout, CompositeOverlay):
Expand Down
14 changes: 14 additions & 0 deletions holoviews/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import string, fnmatch
import unicodedata
from collections import defaultdict
from functools import reduce

import numpy as np
import param
Expand Down Expand Up @@ -861,3 +862,16 @@ def groupby_python(self_or_cls, ndmapping, dimensions, container_type,
return container_type(groups, kdims=dimensions)


def cartesian_product(arrays):
"""
Computes the cartesian product of a list of arrays.
"""
broadcastable = np.ix_(*arrays)
broadcasted = np.broadcast_arrays(*broadcastable)
rows, cols = reduce(np.multiply, broadcasted[0].shape), len(broadcasted)
out = np.empty(rows * cols, dtype=broadcasted[0].dtype)
start, end = 0, rows
for a in broadcasted:
out[start:end] = a.reshape(-1)
start, end = end, end + rows
return out.reshape(cols, rows).T
50 changes: 27 additions & 23 deletions holoviews/element/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def sample(self, samples=[], **sample_values):
sample[sample_ind] = self._coord2matrix(coord_fn(sample_coord))[abs(sample_ind-1)]

# Sample data
x_vals = self.dimension_values(other_dimension[0].name, unique=True)
x_vals = self.dimension_values(other_dimension[0].name, False)
ydata = self._zdata[sample[::-1]]
if hasattr(self, 'bounds') and sample_ind == 0: ydata = ydata[::-1]
data = list(zip(x_vals, ydata))
Expand All @@ -154,7 +154,7 @@ def reduce(self, dimensions=None, function=None, **reduce_map):
dimension = dims[0]
other_dimension = [d for d in self.kdims if d.name != dimension]
oidx = self.get_dimension_index(other_dimension[0])
x_vals = self.dimension_values(other_dimension[0].name, unique=True)
x_vals = self.dimension_values(other_dimension[0].name, False)
reduced = function(self._zdata, axis=oidx)
data = zip(x_vals, reduced if not oidx else reduced[::-1])
params = dict(dict(self.get_param_values(onlychanged=True)),
Expand All @@ -164,18 +164,18 @@ def reduce(self, dimensions=None, function=None, **reduce_map):
return Table(data, **params)


def dimension_values(self, dim, unique=False):
def dimension_values(self, dim, expanded=True, flat=True):
"""
The set of samples available along a particular dimension.
"""
dim_idx = self.get_dimension_index(dim)
if unique and dim_idx == 0:
if not expanded and dim_idx == 0:
return np.array(range(self.data.shape[1]))
elif unique and dim_idx == 1:
elif not expanded and dim_idx == 1:
return np.array(range(self.data.shape[0]))
elif dim_idx in [0, 1]:
D1, D2 = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]]
return D1.flatten() if dim_idx == 0 else D2.flatten()
values = np.mgrid[0:self.data.shape[1], 0:self.data.shape[0]][dim_idx]
return values.flatten() if flat else values
elif dim_idx == 2:
return toarray(self.data.T).flatten()
else:
Expand Down Expand Up @@ -338,20 +338,20 @@ def range(self, dimension):
super(QuadMesh, self).range(dimension)


def dimension_values(self, dimension, unique=False):
def dimension_values(self, dimension, expanded=True, flat=True):
idx = self.get_dimension_index(dimension)
data = self.data[idx]
if idx in [0, 1]:
if not self._grid:
return data.flatten()
odim = 1 if unique else self.data[2].shape[idx]
odim = self.data[2].shape[idx] if expanded else 1
vals = np.tile(np.convolve(data, np.ones((2,))/2, mode='valid'), odim)
if idx:
return np.sort(vals)
else:
return vals
elif idx == 2:
return data.flatten()
return data.flatten() if flat else data
else:
return super(QuadMesh, self).dimension_values(idx)

Expand Down Expand Up @@ -388,8 +388,8 @@ def __init__(self, data, extents=None, **params):


def _compute_raster(self):
d1keys = self.dimension_values(0, True)
d2keys = self.dimension_values(1, True)
d1keys = self.dimension_values(0, False)
d2keys = self.dimension_values(1, False)
coords = [(d1, d2, np.NaN) for d1 in d1keys for d2 in d2keys]
dtype = 'dataframe' if pd else 'dictionary'
dense_data = Columns(coords, kdims=self.kdims, vdims=self.vdims, datatype=[dtype])
Expand Down Expand Up @@ -438,8 +438,8 @@ def __setstate__(self, state):
super(HeatMap, self).__setstate__(state)

def dense_keys(self):
d1keys = self.dimension_values(0, True)
d2keys = self.dimension_values(1, True)
d1keys = self.dimension_values(0, False)
d2keys = self.dimension_values(1, False)
return list(zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys]))


Expand Down Expand Up @@ -603,7 +603,7 @@ def _coord2matrix(self, coord):
return self.sheet2matrixidx(*coord)


def dimension_values(self, dim, unique=False):
def dimension_values(self, dim, expanded=True, flat=True):
"""
The set of samples available along a particular dimension.
"""
Expand All @@ -615,13 +615,16 @@ def dimension_values(self, dim, unique=False):
d2_half_unit = (t - b)/dim2/2.
d1lin = np.linspace(l+d1_half_unit, r-d1_half_unit, dim1)
d2lin = np.linspace(b+d2_half_unit, t-d2_half_unit, dim2)
if unique:
return d2lin if dim_idx else d1lin
if expanded:
values = np.meshgrid(d2lin, d1lin)[abs(dim_idx-1)]
return values.flatten() if flat else values
else:
Y, X = np.meshgrid(d2lin, d1lin)
return Y.flatten() if dim_idx else X.flatten()
return d2lin if dim_idx else d1lin
elif dim_idx == 2:
return np.flipud(self.data).T.flatten()
# Raster arrays are stored with different orientation
# than expanded column format, reorient before expanding
data = np.flipud(self.data).T
return data.flatten() if flat else data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The values are the flattened, transposed flipud of the data array? This may well be correct but we will need to know why this is necessary and document it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will add some comments.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

else:
super(Image, self).dimension_values(dim)

Expand Down Expand Up @@ -703,14 +706,15 @@ def load_image(cls, filename, height=1, array=False, bounds=None, bare=False, **
return rgb


def dimension_values(self, dim, unique=False):
def dimension_values(self, dim, expanded=True, flat=True):
"""
The set of samples available along a particular dimension.
"""
dim_idx = self.get_dimension_index(dim)
if self.ndims <= dim_idx < len(self.dimensions()):
return np.flipud(self.data[:,:,dim_idx-self.ndims]).T.flatten()
return super(RGB, self).dimension_values(dim, unique=True)
data = np.flipud(self.data[:,:,dim_idx-self.ndims]).T
return data.flatten() if flat else data
return super(RGB, self).dimension_values(dim, expanded, flat)


def __init__(self, data, **params):
Expand Down
2 changes: 1 addition & 1 deletion holoviews/element/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def collapse_data(cls, data, function, **kwargs):
return OrderedDict(zip(data[0].keys(), function(groups, axis=-1, **kwargs)))


def dimension_values(self, dimension):
def dimension_values(self, dimension, expanded=True, flat=True):
dimension = self.get_dimension(dimension, strict=True).name
if dimension in self.dimensions('value', label=True):
return np.array([self.data.get(dimension, np.NaN)])
Expand Down
4 changes: 2 additions & 2 deletions holoviews/operation/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,8 @@ def _process(self, element, key=None):
data = [element.data]

elif isinstance(element, QuadMesh):
data = (element.dimension_values(0, True),
element.dimension_values(1, True),
data = (element.dimension_values(0, False),
element.dimension_values(1, False),
element.data[2])
contour_set = contour_fn(*data, extent=extent,
levels=self.p.levels)
Expand Down
4 changes: 2 additions & 2 deletions holoviews/plotting/mpl/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def _annotate_plot(self, ax, annotations):
def _annotate_values(self, element):
val_dim = element.vdims[0]
vals = np.rot90(element.raster, 3).flatten()
d1uniq, d2uniq = [np.unique(element.dimension_values(i)) for i in range(2)]
d1uniq, d2uniq = [element.dimension_values(i, False) for i in range(2)]
num_x, num_y = len(d1uniq), len(d2uniq)
xstep, ystep = 1.0/num_x, 1.0/num_y
xpos = np.linspace(xstep/2., 1.0-xstep/2., num_x)
Expand All @@ -127,7 +127,7 @@ def _annotate_values(self, element):

def _compute_ticks(self, element, ranges):
xdim, ydim = element.kdims
dim1_keys, dim2_keys = [element.dimension_values(i, True)
dim1_keys, dim2_keys = [element.dimension_values(i, False)
for i in range(2)]
num_x, num_y = len(dim1_keys), len(dim2_keys)
x0, y0, x1, y1 = element.extents
Expand Down
80 changes: 78 additions & 2 deletions tests/testcolumns.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ def test_columns_dataframe_init_ht(self):

# Test literal formats

def test_columns_uniq_dimvals_ht(self):
self.assertEqual(self.table.dimension_values('Gender', unique=True),
def test_columns_expanded_dimvals_ht(self):
self.assertEqual(self.table.dimension_values('Gender', expanded=False),
np.array(['M', 'F']))

def test_columns_implicit_indexing_init(self):
Expand Down Expand Up @@ -399,3 +399,79 @@ def test_columns_double_zip_init(self):
kdims=self.kdims, vdims=self.vdims)
self.assertTrue(isinstance(columns.data, NdElement))


class GridColumnsTest(HomogeneousColumnTypes, ComparisonTestCase):
"""
Test of the NdColumns interface (mostly for backwards compatibility)
"""

def setUp(self):
self.restore_datatype = Columns.datatype
Columns.datatype = ['grid']
self.data_instance_type = dict
self.init_data()

def init_data(self):
self.xs = range(11)
self.xs_2 = [el**2 for el in self.xs]

self.y_ints = [i*2 for i in range(11)]
self.columns_hm = Columns((self.xs, self.y_ints),
kdims=['x'], vdims=['y'])

def test_columns_array_init_hm(self):
"Tests support for arrays (homogeneous)"
exception = "None of the available storage backends "\
"were able to support the supplied data format."
with self.assertRaisesRegexp(Exception, exception):
Columns(np.column_stack([self.xs, self.xs_2]),
kdims=['x'], vdims=['x2'])

def test_columns_dataframe_init_hm(self):
"Tests support for homogeneous DataFrames"
if pd is None:
raise SkipTest("Pandas not available")
exception = "None of the available storage backends "\
"were able to support the supplied data format."
with self.assertRaisesRegexp(Exception, exception):
Columns(pd.DataFrame({'x':self.xs, 'x2':self.xs_2}),
kdims=['x'], vdims=['x2'])

def test_columns_ndelement_init_hm(self):
"Tests support for homogeneous NdElement (backwards compatibility)"
exception = "None of the available storage backends "\
"were able to support the supplied data format."
with self.assertRaisesRegexp(Exception, exception):
Columns(NdElement(zip(self.xs, self.xs_2),
kdims=['x'], vdims=['x2']))

def test_columns_2D_aggregate_partial_hm(self):
array = np.random.rand(11, 11)
columns = Columns({'x':self.xs, 'y':self.y_ints, 'z': array},
kdims=['x', 'y'], vdims=['z'])
self.assertEqual(columns.aggregate(['x'], np.mean),
Columns({'x':self.xs, 'z': np.mean(array, axis=1)},
kdims=['x'], vdims=['z']))

def test_columns_2D_reduce_hm(self):
array = np.random.rand(11, 11)
columns = Columns({'x':self.xs, 'y':self.y_ints, 'z': array},
kdims=['x', 'y'], vdims=['z'])
self.assertEqual(np.array(columns.reduce(['x', 'y'], np.mean)),
np.mean(array))

def test_columns_add_dimensions_value_hm(self):
with self.assertRaisesRegexp(Exception, 'Cannot add key dimension to a dense representation.'):
self.columns_hm.add_dimension('z', 1, 0)

def test_columns_add_dimensions_values_hm(self):
table = self.columns_hm.add_dimension('z', 1, range(1,12), vdim=True)
self.assertEqual(table.vdims[1], 'z')
self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12))))

def test_columns_sort_vdim_hm(self):
exception = ('Compressed format cannot be sorted, either instantiate '
'in the desired order or use the expanded format.')
with self.assertRaisesRegexp(Exception, exception):
self.columns_hm.sort('y')