From 3b9b4ea06f62ec891e1a3e2be7badb998efed204 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 14 May 2017 20:57:08 +0100
Subject: [PATCH 01/20] Small fix for auto-indexing
---
holoviews/core/data/array.py | 11 ++++++-----
holoviews/core/data/dictionary.py | 2 +-
holoviews/core/data/pandas.py | 4 ++--
3 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py
index 69a55c2864..722abe399f 100644
--- a/holoviews/core/data/array.py
+++ b/holoviews/core/data/array.py
@@ -54,18 +54,19 @@ def init(cls, eltype, data, kdims, vdims):
except:
data = None
+ if kdims is None:
+ kdims = eltype.kdims
+ if vdims is None:
+ vdims = eltype.vdims
+
if data is None or data.ndim > 2 or data.dtype.kind in ['S', 'U', 'O']:
raise ValueError("ArrayInterface interface could not handle input type.")
elif data.ndim == 1:
- if eltype._auto_indexable_1d:
+ if eltype._auto_indexable_1d and len(kdims)+len(vdims)>1:
data = np.column_stack([np.arange(len(data)), data])
else:
data = np.atleast_2d(data).T
- if kdims is None:
- kdims = eltype.kdims
- if vdims is None:
- vdims = eltype.vdims
return data, {'kdims':kdims, 'vdims':vdims}, {}
@classmethod
diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py
index 7668f54898..812e88cc6b 100644
--- a/holoviews/core/data/dictionary.py
+++ b/holoviews/core/data/dictionary.py
@@ -49,7 +49,7 @@ def init(cls, eltype, data, kdims, vdims):
data = {d: data[d] for d in dimensions}
elif isinstance(data, np.ndarray):
if data.ndim == 1:
- if eltype._auto_indexable_1d:
+ if eltype._auto_indexable_1d and len(kdims)+len(vdims)>1:
data = np.column_stack([np.arange(len(data)), data])
else:
data = np.atleast_2d(data).T
diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py
index cee7e6e2fe..0e9e0e96d8 100644
--- a/holoviews/core/data/pandas.py
+++ b/holoviews/core/data/pandas.py
@@ -61,8 +61,8 @@ def init(cls, eltype, data, kdims, vdims):
data = cyODict(((c, col) for c, col in zip(columns, column_data)))
elif isinstance(data, np.ndarray):
if data.ndim == 1:
- if eltype._auto_indexable_1d:
- data = (range(len(data)), data)
+ if eltype._auto_indexable_1d and len(kdims)+len(vdims)>1:
+ data = (np.arange(len(data)), data)
else:
data = np.atleast_2d(data).T
else:
From c2bc41be3a1801dd44ca06df8a6e3a66a1f63bf4 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 14 May 2017 20:59:18 +0100
Subject: [PATCH 02/20] Added iloc tabular indexing interface
---
holoviews/core/data/__init__.py | 10 +-
holoviews/core/data/array.py | 19 +++
holoviews/core/data/dask.py | 26 +++-
holoviews/core/data/dictionary.py | 25 ++++
holoviews/core/data/grid.py | 23 ++++
holoviews/core/data/interface.py | 47 ++++++++
holoviews/core/data/ndelement.py | 27 ++++-
holoviews/core/data/pandas.py | 21 ++++
tests/testdataset.py | 194 ++++++++++++++++++++++++++++--
9 files changed, 380 insertions(+), 12 deletions(-)
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
index b7baec5abe..90dd463a6e 100644
--- a/holoviews/core/data/__init__.py
+++ b/holoviews/core/data/__init__.py
@@ -9,7 +9,7 @@
import param
from ..dimension import redim
-from .interface import Interface
+from .interface import Interface, TabularIndex
from .array import ArrayInterface
from .dictionary import DictInterface
from .grid import GridInterface
@@ -624,6 +624,14 @@ def to(self):
return self._conversion_interface(self)
+ @property
+ def iloc(self):
+ """
+ Returns a TabularIndex, providing a convenient interface to
+ slice and index into the Dataset using row and column indices.
+ """
+ return TabularIndex(self)
+
# Aliases for pickle backward compatibility
Columns = Dataset
diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py
index 722abe399f..073258a246 100644
--- a/holoviews/core/data/array.py
+++ b/holoviews/core/data/array.py
@@ -233,4 +233,23 @@ def aggregate(cls, dataset, dimensions, function, **kwargs):
return np.atleast_2d(rows)
+ @classmethod
+ def iloc(cls, dataset, index):
+ rows, cols = index
+ if np.isscalar(cols):
+ if isinstance(cols, util.basestring):
+ cols = dataset.get_dimension_index(cols)
+ if np.isscalar(rows):
+ return dataset.data[rows, cols]
+ cols = [dataset.get_dimension_index(cols)]
+ elif not isinstance(cols, slice):
+ cols = [dataset.get_dimension_index(d) for d in cols]
+
+ if np.isscalar(rows):
+ rows = [rows]
+ data = dataset.data[rows, :][:, cols]
+ if data.ndim == 1:
+ return np.atleast_2d(data).T
+ return data
+
Interface.register(ArrayInterface)
diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py
index 88e4ca21a9..984dbd8efc 100644
--- a/holoviews/core/data/dask.py
+++ b/holoviews/core/data/dask.py
@@ -12,7 +12,7 @@
from .. import util
from ..element import Element
-from ..ndmapping import NdMapping, item_check
+from ..ndmapping import NdMapping, item_check, OrderedDict
from .interface import Interface
from .pandas import PandasInterface
@@ -241,6 +241,30 @@ def dframe(cls, columns, dimensions):
def nonzero(cls, dataset):
return True
+ @classmethod
+ def iloc(cls, dataset, index):
+ """
+ Dask does not support iloc, therefore iloc will execute
+ the call graph and lose the laziness of the operation.
+ """
+ rows, cols = index
+ scalar = False
+ if isinstance(cols, slice):
+ cols = [d.name for d in dataset.dimensions()][cols]
+ elif np.isscalar(cols):
+ scalar = np.isscalar(rows)
+ cols = [dataset.get_dimension(cols).name]
+ else:
+ cols = [dataset.get_dimension(d).name for d in index[1]]
+ if np.isscalar(rows):
+ rows = [rows]
+
+ data = OrderedDict()
+ for c in cols:
+ data[c] = dataset.data[c].compute().iloc[rows].values
+ if scalar:
+ return data[cols[0]][0]
+ return tuple(data.values())
Interface.register(DaskInterface)
diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py
index 812e88cc6b..e912aa3a9b 100644
--- a/holoviews/core/data/dictionary.py
+++ b/holoviews/core/data/dictionary.py
@@ -261,4 +261,29 @@ def aggregate(cls, dataset, kdims, function, **kwargs):
return aggregated
+ @classmethod
+ def iloc(cls, dataset, index):
+ rows, cols = index
+ scalar = False
+ if np.isscalar(cols):
+ scalar = np.isscalar(rows)
+ cols = [dataset.get_dimension(cols, strict=True)]
+ elif isinstance(cols, slice):
+ cols = dataset.dimensions()[cols]
+ else:
+ cols = [dataset.get_dimension(d, strict=True) for d in cols]
+
+ if np.isscalar(rows):
+ rows = [rows]
+
+ new_data = OrderedDict()
+ for d, values in dataset.data.items():
+ if d in cols:
+ new_data[d] = values[rows]
+
+ if scalar:
+ return new_data[cols[0].name][0]
+ return new_data
+
+
Interface.register(DictInterface)
diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py
index 45fecb8d5b..1c42d511c0 100644
--- a/holoviews/core/data/grid.py
+++ b/holoviews/core/data/grid.py
@@ -391,5 +391,28 @@ def sort(cls, dataset, by=[]):
raise Exception('Compressed format cannot be sorted, either instantiate '
'in the desired order or use the expanded format.')
+ @classmethod
+ def iloc(cls, dataset, index):
+ rows, cols = index
+ scalar = False
+ if np.isscalar(cols):
+ scalar = np.isscalar(rows)
+ cols = [dataset.get_dimension(cols, strict=True)]
+ elif isinstance(cols, slice):
+ cols = dataset.dimensions()[cols]
+ else:
+ cols = [dataset.get_dimension(d, strict=True) for d in cols]
+
+ if np.isscalar(rows):
+ rows = [rows]
+
+ new_data = []
+ for d in cols:
+ new_data.append(dataset.dimension_values(d)[rows])
+
+ if scalar:
+ return new_data[0][0]
+ return tuple(new_data)
+
Interface.register(GridInterface)
diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py
index 6b7d528da3..f25fbea38a 100644
--- a/holoviews/core/data/interface.py
+++ b/holoviews/core/data/interface.py
@@ -6,6 +6,53 @@
from .. import util
+class TabularIndex(object):
+ """
+ TabularIndex is small wrapper object that allows row, column
+ based indexing into a Dataset using the ``iloc`` property.
+ It supports the usual numpy and pandas iloc indexing semantics
+ including integer indices, slices, lists and arrays of values.
+ """
+
+ def __init__(self, dataset):
+ self.dataset = dataset
+
+ def __getitem__(self, index):
+ index = util.wrap_tuple(index)
+ if len(index) == 1:
+ index = (index[0], slice(None))
+ elif len(index) > 2:
+ raise IndexError('Tabular index not understood, index '
+ 'must be at most length 2.')
+
+ rows, cols = index
+ if rows is Ellipsis:
+ rows = slice(None)
+ data = self.dataset.interface.iloc(self.dataset, (rows, cols))
+ kdims = self.dataset.kdims
+ vdims = self.dataset.vdims
+ if np.isscalar(data):
+ return data
+ elif cols == slice(None):
+ pass
+ else:
+ if isinstance(cols, slice):
+ dims = self.dataset.dimensions()[index[1]]
+ elif np.isscalar(cols):
+ dims = [self.dataset.get_dimension(cols)]
+ else:
+ dims = [self.dataset.get_dimension(d) for d in cols]
+ kdims = [d for d in dims if d in kdims]
+ vdims = [d for d in dims if d in vdims]
+
+ datatype = [dt for dt in self.dataset.datatype
+ if dt in Interface.interfaces and
+ not Interface.interfaces[dt].gridded]
+ if not datatype: datatype = ['dataframe', 'dictionary']
+ return self.dataset.clone(data, kdims=kdims, vdims=vdims,
+ datatype=datatype)
+
+
class Interface(param.Parameterized):
interfaces = {}
diff --git a/holoviews/core/data/ndelement.py b/holoviews/core/data/ndelement.py
index 066b0dba34..dcc6e03bf4 100644
--- a/holoviews/core/data/ndelement.py
+++ b/holoviews/core/data/ndelement.py
@@ -8,7 +8,7 @@
from .interface import Interface
from ..dimension import Dimension, Dimensioned
from ..element import NdElement
-from ..ndmapping import item_check
+from ..ndmapping import item_check, OrderedDict
from .. import util
@@ -141,5 +141,30 @@ def unpack_scalar(cls, columns, data):
else:
return data
+ @classmethod
+ def iloc(cls, dataset, index):
+ data = dataset.columns()
+ rows, cols = index
+ scalar = False
+ if np.isscalar(cols):
+ scalar = np.isscalar(rows)
+ cols = [dataset.get_dimension(cols, strict=True)]
+ elif isinstance(cols, slice):
+ cols = dataset.dimensions()[cols]
+ else:
+ cols = [dataset.get_dimension(d, strict=True) for d in cols]
+
+ if np.isscalar(rows):
+ rows = [rows]
+
+ new_data = OrderedDict()
+ for d, values in data.items():
+ if d in cols:
+ new_data[d] = values[rows]
+
+ if scalar:
+ return new_data[cols[0].name][0]
+ return new_data
+
Interface.register(NdElementInterface)
diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py
index 0e9e0e96d8..c49d6ac01a 100644
--- a/holoviews/core/data/pandas.py
+++ b/holoviews/core/data/pandas.py
@@ -252,4 +252,25 @@ def dframe(cls, columns, dimensions):
return columns.data.copy()
+ @classmethod
+ def iloc(cls, dataset, index):
+ rows, cols = index
+ scalar = False
+ columns = list(dataset.data.columns)
+ if isinstance(cols, slice):
+ cols = [d.name for d in dataset.dimensions()][cols]
+ elif np.isscalar(cols):
+ scalar = np.isscalar(rows)
+ cols = [dataset.get_dimension(cols).name]
+ else:
+ cols = [dataset.get_dimension(d).name for d in index[1]]
+ cols = [columns.index(c) for c in cols]
+ if np.isscalar(rows):
+ rows = [rows]
+
+ if scalar:
+ return dataset.data.iloc[rows[0], cols[0]]
+ return dataset.data.iloc[rows, cols]
+
+
Interface.register(PandasInterface)
diff --git a/tests/testdataset.py b/tests/testdataset.py
index 87bb4ee86b..99835b14eb 100644
--- a/tests/testdataset.py
+++ b/tests/testdataset.py
@@ -59,10 +59,10 @@ def setUp(self):
self.data_instance_type = None
def init_column_data(self):
- self.xs = range(11)
- self.xs_2 = [el**2 for el in self.xs]
+ self.xs = np.array(range(11))
+ self.xs_2 = self.xs**2
- self.y_ints = [i*2 for i in range(11)]
+ self.y_ints = self.xs*2
self.dataset_hm = Dataset((self.xs, self.y_ints),
kdims=['x'], vdims=['y'])
self.dataset_hm_alias = Dataset((self.xs, self.y_ints),
@@ -248,6 +248,89 @@ def test_dataset_array_ht(self):
self.assertEqual(self.dataset_hm.array(),
np.column_stack([self.xs, self.y_ints]))
+ # Tabular indexing
+
+ def test_dataset_iloc_slice_rows(self):
+ sliced = self.dataset_hm.iloc[1:4]
+ table = Dataset({'x': self.xs[1:4], 'y': self.y_ints[1:4]},
+ kdims=['x'], vdims=['y'], datatype=['dictionary'])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_slice_rows_slice_cols(self):
+ sliced = self.dataset_hm.iloc[1:4, 1:]
+ table = Dataset({'y': self.y_ints[1:4]}, kdims=[], vdims=['y'],
+ datatype=['dictionary'])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_slice_rows_list_cols(self):
+ sliced = self.dataset_hm.iloc[1:4, [0, 1]]
+ table = Dataset({'x': self.xs[1:4], 'y': self.y_ints[1:4]},
+ kdims=['x'], vdims=['y'], datatype=['dictionary'])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_slice_rows_index_cols(self):
+ sliced = self.dataset_hm.iloc[1:4, 1]
+ table = Dataset({'y': self.y_ints[1:4]}, kdims=[], vdims=['y'],
+ datatype=['dictionary'])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_list_rows(self):
+ sliced = self.dataset_hm.iloc[[0, 2]]
+ table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]},
+ kdims=['x'], vdims=['y'], datatype=['dictionary'])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_list_rows_list_cols(self):
+ sliced = self.dataset_hm.iloc[[0, 2], [0, 1]]
+ table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]},
+ kdims=['x'], vdims=['y'], datatype=['dictionary'])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_list_rows_list_cols_by_name(self):
+ sliced = self.dataset_hm.iloc[[0, 2], ['x', 'y']]
+ table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]},
+ kdims=['x'], vdims=['y'], datatype=['dictionary'])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_list_rows_slice_cols(self):
+ sliced = self.dataset_hm.iloc[[0, 2], slice(0, 2)]
+ table = Dataset({'x': self.xs[[0, 2]], 'y': self.y_ints[[0, 2]]},
+ kdims=['x'], vdims=['y'], datatype=['dictionary'])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_index_rows_index_cols(self):
+ indexed = self.dataset_hm.iloc[1, 1]
+ self.assertEqual(indexed, self.y_ints[1])
+
+ def test_dataset_iloc_index_rows_slice_cols(self):
+ indexed = self.dataset_hm.iloc[1, :2]
+ table = Dataset({'x':self.xs[[1]], 'y':self.y_ints[[1]]},
+ kdims=['x'], vdims=['y'], datatype=['dictionary'])
+ self.assertEqual(indexed, table)
+
+ def test_dataset_iloc_list_cols(self):
+ sliced = self.dataset_hm.iloc[:, [0, 1]]
+ table = Dataset({'x':self.xs, 'y':self.y_ints},
+ kdims=['x'], vdims=['y'], datatype=['dictionary'])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_list_cols_by_name(self):
+ sliced = self.dataset_hm.iloc[:, ['x', 'y']]
+ table = Dataset({'x':self.xs, 'y':self.y_ints},
+ kdims=['x'], vdims=['y'], datatype=['dictionary'])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_ellipsis_list_cols(self):
+ sliced = self.dataset_hm.iloc[..., [0, 1]]
+ table = Dataset({'x':self.xs, 'y':self.y_ints},
+ kdims=['x'], vdims=['y'], datatype=['dictionary'])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_ellipsis_list_cols_by_name(self):
+ sliced = self.dataset_hm.iloc[..., ['x', 'y']]
+ table = Dataset({'x':self.xs, 'y':self.y_ints},
+ kdims=['x'], vdims=['y'], datatype=['dictionary'])
+ self.assertEqual(sliced, table)
class HeterogeneousColumnTypes(HomogeneousColumnTypes):
@@ -258,8 +341,8 @@ class HeterogeneousColumnTypes(HomogeneousColumnTypes):
def init_column_data(self):
self.kdims = ['Gender', 'Age']
self.vdims = ['Weight', 'Height']
- self.gender, self.age = ['M','M','F'], [10,16,12]
- self.weight, self.height = [15,18,10], [0.8,0.6,0.8]
+ self.gender, self.age = np.array(['M','M','F']), np.array([10,16,12])
+ self.weight, self.height = np.array([15,18,10]), np.array([0.8,0.6,0.8])
self.table = Dataset({'Gender':self.gender, 'Age':self.age,
'Weight':self.weight, 'Height':self.height},
kdims=self.kdims, vdims=self.vdims)
@@ -551,6 +634,91 @@ def test_dataset_value_dim_scalar_index(self):
row = self.table['M', 10, 'Weight']
self.assertEquals(row, 15)
+ # Tabular indexing
+
+ def test_dataset_iloc_slice_rows(self):
+ sliced = self.table.iloc[1:2]
+ table = Dataset({'Gender':self.gender[1:2], 'Age':self.age[1:2],
+ 'Weight':self.weight[1:2], 'Height':self.height[1:2]},
+ kdims=self.kdims, vdims=self.vdims)
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_slice_rows_slice_cols(self):
+ sliced = self.table.iloc[1:2, 1:3]
+ table = Dataset({'Age':self.age[1:2], 'Weight':self.weight[1:2]},
+ kdims=self.kdims[1:], vdims=self.vdims[:1])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_slice_rows_list_cols(self):
+ sliced = self.table.iloc[1:2, [1, 3]]
+ table = Dataset({'Age':self.age[1:2], 'Height':self.height[1:2]},
+ kdims=self.kdims[1:], vdims=self.vdims[1:])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_slice_rows_index_cols(self):
+ sliced = self.table.iloc[1:2, 2]
+ table = Dataset({'Weight':self.weight[1:2]}, kdims=[], vdims=self.vdims[:1])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_list_rows(self):
+ sliced = self.table.iloc[[0, 2]]
+ table = Dataset({'Gender':self.gender[[0, 2]], 'Age':self.age[[0, 2]],
+ 'Weight':self.weight[[0, 2]], 'Height':self.height[[0, 2]]},
+ kdims=self.kdims, vdims=self.vdims)
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_list_rows_list_cols(self):
+ sliced = self.table.iloc[[0, 2], [0, 2]]
+ table = Dataset({'Gender':self.gender[[0, 2]], 'Weight':self.weight[[0, 2]]},
+ kdims=self.kdims[:1], vdims=self.vdims[:1])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_list_rows_list_cols_by_name(self):
+ sliced = self.table.iloc[[0, 2], ['Gender', 'Weight']]
+ table = Dataset({'Gender':self.gender[[0, 2]], 'Weight':self.weight[[0, 2]]},
+ kdims=self.kdims[:1], vdims=self.vdims[:1])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_list_rows_slice_cols(self):
+ sliced = self.table.iloc[[0, 2], slice(1, 3)]
+ table = Dataset({'Age':self.age[[0, 2]], 'Weight':self.weight[[0, 2]]},
+ kdims=self.kdims[1:], vdims=self.vdims[:1])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_index_rows_index_cols(self):
+ indexed = self.table.iloc[1, 1]
+ self.assertEqual(indexed, self.age[1])
+
+ def test_dataset_iloc_index_rows_slice_cols(self):
+ indexed = self.table.iloc[1, 1:3]
+ table = Dataset({'Age':self.age[[1]], 'Weight':self.weight[[1]]},
+ kdims=self.kdims[1:], vdims=self.vdims[:1])
+ self.assertEqual(indexed, table)
+
+ def test_dataset_iloc_list_cols(self):
+ sliced = self.table.iloc[:, [0, 2]]
+ table = Dataset({'Gender':self.gender, 'Weight':self.weight},
+ kdims=self.kdims[:1], vdims=self.vdims[:1])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_list_cols_by_name(self):
+ sliced = self.table.iloc[:, ['Gender', 'Weight']]
+ table = Dataset({'Gender':self.gender, 'Weight':self.weight},
+ kdims=self.kdims[:1], vdims=self.vdims[:1])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_ellipsis_list_cols(self):
+ sliced = self.table.iloc[..., [0, 2]]
+ table = Dataset({'Gender':self.gender, 'Weight':self.weight},
+ kdims=self.kdims[:1], vdims=self.vdims[:1])
+ self.assertEqual(sliced, table)
+
+ def test_dataset_iloc_ellipsis_list_cols_by_name(self):
+ sliced = self.table.iloc[..., ['Gender', 'Weight']]
+ table = Dataset({'Gender':self.gender, 'Weight':self.weight},
+ kdims=self.kdims[:1], vdims=self.vdims[:1])
+ self.assertEqual(sliced, table)
+
# Casting
def test_dataset_array_ht(self):
@@ -562,6 +730,9 @@ class ArrayDatasetTest(HomogeneousColumnTypes, ComparisonTestCase):
"""
Test of the ArrayDataset interface.
"""
+
+ datatype = 'array'
+
def setUp(self):
self.restore_datatype = Dataset.datatype
Dataset.datatype = ['array']
@@ -574,6 +745,8 @@ class DFDatasetTest(HeterogeneousColumnTypes, ComparisonTestCase):
Test of the pandas DFDataset interface.
"""
+ datatype = 'dataframe'
+
def setUp(self):
if pd is None:
raise SkipTest("Pandas not available")
@@ -588,6 +761,8 @@ class DaskDatasetTest(HeterogeneousColumnTypes, ComparisonTestCase):
Test of the pandas DaskDataset interface.
"""
+ datatype = 'dask'
+
def setUp(self):
if dd is None:
raise SkipTest("dask not available")
@@ -630,6 +805,8 @@ class DictDatasetTest(HeterogeneousColumnTypes, ComparisonTestCase):
Test of the generic dictionary interface.
"""
+ datatype = 'dictionary'
+
def setUp(self):
self.restore_datatype = Dataset.datatype
Dataset.datatype = ['dictionary']
@@ -698,10 +875,9 @@ def setUp(self):
self.init_data()
def init_column_data(self):
- self.xs = range(11)
- self.xs_2 = [el**2 for el in self.xs]
-
- self.y_ints = [i*2 for i in range(11)]
+ self.xs = np.arange(11)
+ self.xs_2 = self.xs**2
+ self.y_ints = self.xs*2
self.dataset_hm = Dataset((self.xs, self.y_ints),
kdims=['x'], vdims=['y'])
self.dataset_hm_alias = Dataset((self.xs, self.y_ints),
From 48d7a06ce6eef17b85eae95777f42bbf83d3fb8e Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 12:21:31 +0100
Subject: [PATCH 03/20] Small docstring improvements
---
holoviews/core/data/__init__.py | 4 +++-
holoviews/core/data/interface.py | 2 +-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
index 90dd463a6e..f82c2e9a64 100644
--- a/holoviews/core/data/__init__.py
+++ b/holoviews/core/data/__init__.py
@@ -628,7 +628,9 @@ def to(self):
def iloc(self):
"""
Returns a TabularIndex, providing a convenient interface to
- slice and index into the Dataset using row and column indices.
+ slice and index into the Dataset using row and column indices,
+ allow selection by integer index, slice and list of integer
+ indices and boolean arrays.
"""
return TabularIndex(self)
diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py
index f25fbea38a..9a0d888a6c 100644
--- a/holoviews/core/data/interface.py
+++ b/holoviews/core/data/interface.py
@@ -9,7 +9,7 @@
class TabularIndex(object):
"""
TabularIndex is small wrapper object that allows row, column
- based indexing into a Dataset using the ``iloc`` property.
+ based indexing into a Dataset using the ``.iloc`` property.
It supports the usual numpy and pandas iloc indexing semantics
including integer indices, slices, lists and arrays of values.
"""
From ddff78553cac14c897ca5edcdb62b58a3c5f13ae Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 12:25:20 +0100
Subject: [PATCH 04/20] Updated Point selection example to use .iloc
---
examples/streams/bokeh/point_selection1D.ipynb | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/examples/streams/bokeh/point_selection1D.ipynb b/examples/streams/bokeh/point_selection1D.ipynb
index 33f092775f..1e1b84e332 100644
--- a/examples/streams/bokeh/point_selection1D.ipynb
+++ b/examples/streams/bokeh/point_selection1D.ipynb
@@ -42,12 +42,12 @@
"\n",
"# Write function that uses the selection indices to slice points and compute stats\n",
"def selected_info(index):\n",
- " arr = points.array()[index]\n",
+ " selected = points.iloc[index]\n",
" if index:\n",
- " label = 'Mean x, y: %.3f, %.3f' % tuple(arr.mean(axis=0))\n",
+ " label = 'Mean x, y: %.3f, %.3f' % tuple(selected.array().mean(axis=0))\n",
" else:\n",
" label = 'No selection'\n",
- " return points.clone(arr, label=label)(style=dict(color='red'))\n",
+ " return selected.relabel(label)(style=dict(color='red'))\n",
"\n",
"# Combine points and DynamicMap\n",
"points + hv.DynamicMap(selected_info, streams=[selection])"
From 0fcf16156b234cac40227a4f9d454a3b9121d2e6 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 12:32:02 +0100
Subject: [PATCH 05/20] Renamed TabularIndex object to iloc
---
holoviews/core/data/__init__.py | 6 +++---
holoviews/core/data/interface.py | 10 +++++-----
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
index f82c2e9a64..3f1a1bb718 100644
--- a/holoviews/core/data/__init__.py
+++ b/holoviews/core/data/__init__.py
@@ -9,7 +9,7 @@
import param
from ..dimension import redim
-from .interface import Interface, TabularIndex
+from .interface import Interface, iloc
from .array import ArrayInterface
from .dictionary import DictInterface
from .grid import GridInterface
@@ -627,12 +627,12 @@ def to(self):
@property
def iloc(self):
"""
- Returns a TabularIndex, providing a convenient interface to
+ Returns an iloc object, providing a convenient interface to
slice and index into the Dataset using row and column indices,
allow selection by integer index, slice and list of integer
indices and boolean arrays.
"""
- return TabularIndex(self)
+ return iloc(self)
# Aliases for pickle backward compatibility
diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py
index 9a0d888a6c..645d96d48d 100644
--- a/holoviews/core/data/interface.py
+++ b/holoviews/core/data/interface.py
@@ -6,12 +6,12 @@
from .. import util
-class TabularIndex(object):
+class iloc(object):
"""
- TabularIndex is small wrapper object that allows row, column
- based indexing into a Dataset using the ``.iloc`` property.
- It supports the usual numpy and pandas iloc indexing semantics
- including integer indices, slices, lists and arrays of values.
+ iloc is small wrapper object that allows row, column based
+ indexing into a Dataset using the ``.iloc`` property. It supports
+ the usual numpy and pandas iloc indexing semantics including
+ integer indices, slices, lists and arrays of values.
"""
def __init__(self, dataset):
From c93995c9767aee262090745c3a4e034c1d168e35 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 14:27:04 +0100
Subject: [PATCH 06/20] Added ndloc indexing interface
---
holoviews/core/data/__init__.py | 7 ++++-
holoviews/core/data/grid.py | 49 +++++++++++++++++++++++++++++++-
holoviews/core/data/image.py | 7 +++++
holoviews/core/data/interface.py | 21 ++++++++++++++
4 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
index 3f1a1bb718..f5be2c39bc 100644
--- a/holoviews/core/data/__init__.py
+++ b/holoviews/core/data/__init__.py
@@ -9,7 +9,7 @@
import param
from ..dimension import redim
-from .interface import Interface, iloc
+from .interface import Interface, iloc, ndloc
from .array import ArrayInterface
from .dictionary import DictInterface
from .grid import GridInterface
@@ -635,6 +635,11 @@ def iloc(self):
return iloc(self)
+ @property
+ def ndloc(self):
+ return ndloc(self)
+
+
# Aliases for pickle backward compatibility
Columns = Dataset
ArrayColumns = ArrayInterface
diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py
index 1c42d511c0..a66949aaa8 100644
--- a/holoviews/core/data/grid.py
+++ b/holoviews/core/data/grid.py
@@ -1,4 +1,4 @@
-from collections import OrderedDict, defaultdict
+from collections import OrderedDict, defaultdict, Iterable
try:
import itertools.izip as zip
@@ -167,6 +167,53 @@ def canonicalize(cls, dataset, data, coord_dims=None):
return data
+ @classmethod
+ def invert_index(cls, index, length):
+ if np.isscalar(index):
+ return length - index
+ elif isinstance(index, slice):
+ start, stop = index.start, index.stop
+ new_start, new_stop = None, None
+ if start is not None:
+ new_stop = length - start
+ if stop is not None:
+ new_start = length - stop
+ return slice(new_start-1, new_stop-1)
+ elif isinstance(index, Iterable):
+ new_index = []
+ for ind in index:
+ new_index.append(length-ind)
+ return new_index
+
+
+ @classmethod
+ def ndloc(cls, dataset, indices):
+ selected = {}
+ adjusted_inds = []
+ all_scalar = True
+ for kd, ind in zip(dataset.kdims[::-1], indices):
+ coords = cls.coords(dataset, kd.name)
+ if np.all(coords[1:] < coords[:-1]):
+ ind = cls.invert_index(ind, len(coords))
+ if np.isscalar(ind):
+ ind = [ind]
+ else:
+ all_scalar = False
+ selected[kd.name] = coords[ind]
+ adjusted_inds.append(ind)
+ for kd in dataset.kdims:
+ if kd.name not in selected:
+ coords = cls.coords(dataset, kd.name)
+ selected[kd.name] = coords
+ all_scalar = False
+ for vd in dataset.vdims:
+ arr = dataset.dimension_values(vd, flat=False)
+ if all_scalar and len(dataset.vdims) == 1:
+ return arr[tuple(ind[0] for ind in adjusted_inds)]
+ selected[vd.name] = arr[tuple(adjusted_inds)]
+ return tuple(selected[d.name] for d in dataset.dimensions())
+
+
@classmethod
def values(cls, dataset, dim, expanded=True, flat=True):
dim = dataset.get_dimension(dim, strict=True)
diff --git a/holoviews/core/data/image.py b/holoviews/core/data/image.py
index 81a8bbb837..70ac560ffd 100644
--- a/holoviews/core/data/image.py
+++ b/holoviews/core/data/image.py
@@ -89,6 +89,13 @@ def reindex(cls, dataset, kdims=None, vdims=None):
return data[..., inds] if len(inds) > 1 else data[..., inds[0]]
return data
+ @classmethod
+ def coords(cls, dataset, dim, ordered=False, expanded=False):
+ dim = dataset.get_dimension(dim, strict=True)
+ if expanded:
+ return util.expand_grid_coords(dataset, dim)
+ return cls.values(dataset, dim, expanded=False)
+
@classmethod
def range(cls, obj, dim):
dim_idx = obj.get_dimension_index(dim)
diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py
index 645d96d48d..6c0a49ae14 100644
--- a/holoviews/core/data/interface.py
+++ b/holoviews/core/data/interface.py
@@ -53,6 +53,27 @@ def __getitem__(self, index):
datatype=datatype)
+class ndloc(object):
+
+ def __init__(self, dataset):
+ self.dataset = dataset
+
+ def __getitem__(self, indices):
+ ds = self.dataset
+ indices = util.wrap_tuple(indices)
+ if not ds.interface.gridded:
+ raise IndexError('Cannot use ndloc on non nd-dimensional datastructure')
+ selected = self.dataset.interface.ndloc(ds, indices)
+ if np.isscalar(selected):
+ return selected
+ datatype = [dt for dt in ds.datatype if dt in Interface.interfaces and
+ Interface.interfaces[dt].gridded]
+ params = {}
+ if hasattr(ds, 'bounds'):
+ params['bounds'] = None
+ return self.dataset.clone(selected, datatype=[ds.interface.datatype]+datatype, **params)
+
+
class Interface(param.Parameterized):
interfaces = {}
From 0d2a9c14c4632905b6bd05027dcfb2d0badf49c0 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 16:48:09 +0100
Subject: [PATCH 07/20] Implemented Image indexing using ndloc
---
holoviews/core/util.py | 2 +-
holoviews/element/raster.py | 28 ++++++++++++++++++----------
tests/testdataset.py | 6 +++---
3 files changed, 22 insertions(+), 14 deletions(-)
diff --git a/holoviews/core/util.py b/holoviews/core/util.py
index e8770dd233..b93f89e1da 100644
--- a/holoviews/core/util.py
+++ b/holoviews/core/util.py
@@ -1449,7 +1449,7 @@ def bound_range(vals, density):
"""
low, high = vals.min(), vals.max()
invert = False
- if vals[0] > vals[1]:
+ if len(vals) > 1 and vals[0] > vals[1]:
invert = True
if not density:
density = round(1./((high-low)/(len(vals)-1)), sys.float_info.dig)
diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py
index d629a3fc57..d4c3e58fab 100644
--- a/holoviews/element/raster.py
+++ b/holoviews/element/raster.py
@@ -7,7 +7,7 @@
from ..core.data import ImageInterface
from ..core import Dimension, Element2D, Overlay, Dataset
from ..core.boundingregion import BoundingRegion, BoundingBox
-from ..core.sheetcoords import SheetCoordinateSystem
+from ..core.sheetcoords import SheetCoordinateSystem, Slice
from ..core.util import max_range
from .chart import Curve
from .tabular import Table
@@ -298,21 +298,29 @@ def select(self, selection_specs=None, **selection):
coords = tuple(selection[kd.name] if kd.name in selection else slice(None)
for kd in self.kdims)
+ shape = self.interface.shape(self, gridded=True)
if any([isinstance(el, slice) for el in coords]):
- shape = self.interface.shape(self, gridded=True)
bounds = compute_slice_bounds(coords, self, shape[:2])
xdim, ydim = self.kdims
l, b, r, t = bounds.lbrt()
- selection = {xdim.name: slice(l, r), ydim.name: slice(b, t)}
- else:
- selection = {kd.name: c for kd, c in zip(self.kdims, self.closest(coords))}
- data = self.interface.select(self, **selection)
- if isinstance(data, np.ndarray) and data.ndim == 1:
- return self.clone([tuple(data)], kdims=[], new_type=Dataset)
- elif np.isscalar(data):
- return data
+ # Situate resampled region into overall slice
+ y0, y1, x0, x1 = Slice(bounds, self)
+ y0, y1 = shape[0]-y1, shape[0]-y0
+ selection = (slice(y0, y1), slice(x0, x1))
+ sliced = True
+ else:
+ y, x = self.sheet2matrixidx(coords[0], coords[1])
+ y = shape[0]-y-1
+ selection = (y, x)
+ sliced = False
+
+ data = self.interface.ndloc(self, selection)
+ if not sliced:
+ if np.isscalar(data):
+ return data
+ return self.clone(data[self.ndims:], kdims=[], new_type=Dataset)
else:
return self.clone(data, xdensity=self.xdensity,
ydensity=self.ydensity, bounds=bounds)
diff --git a/tests/testdataset.py b/tests/testdataset.py
index 99835b14eb..4d7d10345b 100644
--- a/tests/testdataset.py
+++ b/tests/testdataset.py
@@ -884,9 +884,9 @@ def init_column_data(self):
kdims=[('x', 'X')], vdims=[('y', 'Y')])
def init_grid_data(self):
- self.grid_xs = [0, 1]
- self.grid_ys = [0.1, 0.2, 0.3]
- self.grid_zs = [[0, 1], [2, 3], [4, 5]]
+ self.grid_xs = np.array([0, 1])
+ self.grid_ys = np.array([0.1, 0.2, 0.3])
+ self.grid_zs = np.array([[0, 1], [2, 3], [4, 5]])
self.dataset_grid = self.eltype((self.grid_xs, self.grid_ys,
self.grid_zs), kdims=['x', 'y'],
vdims=['z'])
From 969c06cc017614844d015729729e0c9c0876512d Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 17:28:18 +0100
Subject: [PATCH 08/20] Implemented Image.sample on top of ndloc interface
---
holoviews/core/data/__init__.py | 4 ++-
holoviews/element/raster.py | 46 +++++++++++++++++++++++++++++++++
2 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
index f5be2c39bc..8966efb682 100644
--- a/holoviews/core/data/__init__.py
+++ b/holoviews/core/data/__init__.py
@@ -433,7 +433,9 @@ def sample(self, samples=[], closest=True, **kwargs):
else:
selection = tuple(selection.columns(kdims+self.vdims).values())
- return self.clone(selection, kdims=kdims, new_type=new_type)
+ datatype = list(util.unique_iterator(self.datatype+['dataframe', 'dict']))
+ return self.clone(selection, kdims=kdims, new_type=new_type,
+ datatype=datatype)
lens = set(len(util.wrap_tuple(s)) for s in samples)
if len(lens) > 1:
diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py
index d4c3e58fab..93a4d6fd03 100644
--- a/holoviews/element/raster.py
+++ b/holoviews/element/raster.py
@@ -326,6 +326,52 @@ def select(self, selection_specs=None, **selection):
ydensity=self.ydensity, bounds=bounds)
+ def sample(self, samples=[], **kwargs):
+ """
+ Allows sampling of an Image as an iterator of coordinates
+ matching the key dimensions, returning a new object containing
+ just the selected samples. Alternatively may supply kwargs to
+ sample a coordinate on an object. On an Image the coordinates
+ are continuously indexed and will always snap to the nearest
+ coordinate.
+ """
+ if kwargs and samples:
+ raise Exception('Supply explicit list of samples or kwargs, not both.')
+ elif kwargs:
+ sample = [slice(None) for _ in range(self.ndims)]
+ for dim, val in kwargs.items():
+ sample[self.get_dimension_index(dim)] = val
+ samples = [tuple(sample)]
+
+ # If a 1D cross-section of 2D space return Curve
+ if len(samples) == 1:
+ dims = [kd for kd, v in zip(self.kdims, samples[0]) if not np.isscalar(v)]
+ if len(dims) == 1:
+ kdims = [self.get_dimension(kd) for kd in dims]
+ sel = {kd.name: s for kd, s in zip(self.kdims, samples[0])}
+ dims = [kd for kd, v in sel.items() if not np.isscalar(v)]
+ selection = self.select(**sel)
+ selection = tuple(selection.columns(kdims+self.vdims).values())
+ datatype = list(util.unique_iterator(self.datatype+['dataframe', 'dict']))
+ return self.clone(selection, kdims=kdims, new_type=Curve,
+ datatype=datatype)
+ else:
+ new_type = Table
+ kdims = self.kdims
+ else:
+ new_type = Dataset
+ kdims = self.kdims
+
+ xs, ys = [], []
+ for s in samples:
+ if len(s) > 1:
+ y, x = self.sheet2matrixidx(*s)
+ xs.append(x)
+ ys.append(y)
+ data = self.interface.ndloc(self, (ys, xs))
+ return self.clone(data, new_type=Dataset, datatype=['dataframe', 'dict'])
+
+
def closest(self, coords=[], **kwargs):
"""
Given a single coordinate or multiple coordinates as
From 2b727850415ff4254698726732e6086cd116e3e8 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 18:55:55 +0100
Subject: [PATCH 09/20] Fixed bug in Dataset unit test setup
---
tests/testdataset.py | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/tests/testdataset.py b/tests/testdataset.py
index 4d7d10345b..4822b84709 100644
--- a/tests/testdataset.py
+++ b/tests/testdataset.py
@@ -872,7 +872,8 @@ def setUp(self):
self.restore_datatype = Dataset.datatype
Dataset.datatype = ['grid']
self.data_instance_type = dict
- self.init_data()
+ self.init_column_data()
+ self.init_grid_data()
def init_column_data(self):
self.xs = np.arange(11)
@@ -1308,10 +1309,10 @@ class XArrayDaskArrayDatasetTest(XArrayDatasetTest):
def init_column_data(self):
import dask.array
- self.xs = range(11)
- self.xs_2 = [el**2 for el in self.xs]
+ self.xs = np.array(range(11))
+ self.xs_2 = self.xs**2
- self.y_ints = [i*2 for i in range(11)]
+ self.y_ints = self.xs*2
dask_y = dask.array.from_array(np.array(self.y_ints), 2)
self.dataset_hm = Dataset((self.xs, dask_y),
kdims=['x'], vdims=['y'])
From 9638fb892abfc615ed2d1d5edc40973330f305cb Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 19:01:54 +0100
Subject: [PATCH 10/20] Fixed closest bug in Image.sample
---
holoviews/element/raster.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py
index 93a4d6fd03..adbfd7cf32 100644
--- a/holoviews/element/raster.py
+++ b/holoviews/element/raster.py
@@ -335,6 +335,7 @@ def sample(self, samples=[], **kwargs):
are continuously indexed and will always snap to the nearest
coordinate.
"""
+ kwargs = {k: v for k, v in kwargs.items() if k != 'closest'}
if kwargs and samples:
raise Exception('Supply explicit list of samples or kwargs, not both.')
elif kwargs:
From 0da749b76618117fa9d03d23a00adb9a628ef75f Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 20:05:16 +0100
Subject: [PATCH 11/20] Fixed Image.sample y-coord index
---
holoviews/element/raster.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py
index adbfd7cf32..fa7e241179 100644
--- a/holoviews/element/raster.py
+++ b/holoviews/element/raster.py
@@ -345,6 +345,7 @@ def sample(self, samples=[], **kwargs):
samples = [tuple(sample)]
# If a 1D cross-section of 2D space return Curve
+ shape = self.interface.shape(self, gridded=True)
if len(samples) == 1:
dims = [kd for kd, v in zip(self.kdims, samples[0]) if not np.isscalar(v)]
if len(dims) == 1:
@@ -368,9 +369,9 @@ def sample(self, samples=[], **kwargs):
if len(s) > 1:
y, x = self.sheet2matrixidx(*s)
xs.append(x)
- ys.append(y)
+ ys.append(shape[0]-y)
data = self.interface.ndloc(self, (ys, xs))
- return self.clone(data, new_type=Dataset, datatype=['dataframe', 'dict'])
+ return self.clone(data, new_type=Table, datatype=['dataframe', 'dict'])
def closest(self, coords=[], **kwargs):
From 4af6c8c098db0b0e3db81249c3f4b95fae26002a Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 21:27:13 +0100
Subject: [PATCH 12/20] Minor fixes for sampling
---
doc/Tutorials/Introduction.ipynb | 2 +-
holoviews/element/raster.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/Tutorials/Introduction.ipynb b/doc/Tutorials/Introduction.ipynb
index 237b10a1a9..38e14b72cc 100644
--- a/doc/Tutorials/Introduction.ipynb
+++ b/doc/Tutorials/Introduction.ipynb
@@ -550,7 +550,7 @@
"source": [
"print(rgb_parrot)\n",
"print(rgb_parrot[0,0])\n",
- "print(rgb_parrot[0,0][0])"
+ "print(rgb_parrot[0,0].iloc[0, 0])"
]
},
{
diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py
index fa7e241179..af805d9fcc 100644
--- a/holoviews/element/raster.py
+++ b/holoviews/element/raster.py
@@ -369,7 +369,7 @@ def sample(self, samples=[], **kwargs):
if len(s) > 1:
y, x = self.sheet2matrixidx(*s)
xs.append(x)
- ys.append(shape[0]-y)
+ ys.append(shape[0]-y-1)
data = self.interface.ndloc(self, (ys, xs))
return self.clone(data, new_type=Table, datatype=['dataframe', 'dict'])
From 3f5ba0509b3a7d6405d3d5ce5831b32df59be7d0 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 22:17:05 +0100
Subject: [PATCH 13/20] Added Image sampling test
---
tests/testimageinterfaces.py | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/tests/testimageinterfaces.py b/tests/testimageinterfaces.py
index 698ef195b7..9e6924b5b5 100644
--- a/tests/testimageinterfaces.py
+++ b/tests/testimageinterfaces.py
@@ -2,7 +2,7 @@
from nose.plugins.attrib import attr
import numpy as np
-from holoviews import Dimension, Image, Curve, RGB, HSV, Dataset
+from holoviews import Dimension, Image, Curve, RGB, HSV, Dataset, Table
from holoviews.element.comparison import ComparisonTestCase
from .testdataset import DatatypeContext
@@ -118,6 +118,18 @@ def test_sample_ycoord(self):
self.assertEqual(self.image.sample(y=5),
Curve((xs, zs), kdims=['x'], vdims=['z']))
+ def test_sample_coords(self):
+ arr = np.arange(10)*np.arange(5)[np.newaxis].T
+ xs = np.linspace(0.12, 0.81, 10)
+ ys = np.linspace(0.12, 0.391, 5)
+ img = Image((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype])
+ sampled = img.sample([(0.15, 0.15), (0.15, 0.4), (0.8, 0.4), (0.8, 0.15)])
+ self.assertIsInstance(sampled, Table)
+ yidx = [0, 4, 4, 0]
+ xidx = [0, 0, 9, 9]
+ table = Table((xs[xidx], ys[yidx], arr[yidx, xidx]), kdims=['x', 'y'], vdims=['z'])
+ self.assertEqual(sampled, table)
+
def test_reduce_to_scalar(self):
self.assertEqual(self.image.reduce(['x', 'y'], function=np.mean),
20.25)
From 788cea806401e5cb20b3685b38caa479072c4d98 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 23:00:47 +0100
Subject: [PATCH 14/20] Small fix for ndloc
---
holoviews/core/data/interface.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py
index 6c0a49ae14..c34c828cac 100644
--- a/holoviews/core/data/interface.py
+++ b/holoviews/core/data/interface.py
@@ -66,12 +66,10 @@ def __getitem__(self, indices):
selected = self.dataset.interface.ndloc(ds, indices)
if np.isscalar(selected):
return selected
- datatype = [dt for dt in ds.datatype if dt in Interface.interfaces and
- Interface.interfaces[dt].gridded]
params = {}
if hasattr(ds, 'bounds'):
params['bounds'] = None
- return self.dataset.clone(selected, datatype=[ds.interface.datatype]+datatype, **params)
+ return self.dataset.clone(selected, datatype=[ds.interface.datatype]+ds.datatype, **params)
class Interface(param.Parameterized):
From bdf2ad310505593507d0c941a548d79a900b28ca Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 23:01:24 +0100
Subject: [PATCH 15/20] Vectorized Image.sample
---
holoviews/element/raster.py | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py
index af805d9fcc..cb07d6ebbd 100644
--- a/holoviews/element/raster.py
+++ b/holoviews/element/raster.py
@@ -364,13 +364,10 @@ def sample(self, samples=[], **kwargs):
new_type = Dataset
kdims = self.kdims
- xs, ys = [], []
- for s in samples:
- if len(s) > 1:
- y, x = self.sheet2matrixidx(*s)
- xs.append(x)
- ys.append(shape[0]-y-1)
- data = self.interface.ndloc(self, (ys, xs))
+ xs, ys = zip(*samples)
+ yidx, xidx = self.sheet2matrixidx(np.array(xs), np.array(ys))
+ yidx = shape[0]-yidx-1
+ data = self.interface.ndloc(self, (yidx, xidx))
return self.clone(data, new_type=Table, datatype=['dataframe', 'dict'])
From 6571609d4fc2f19a046fff5cfa22b78c8fc5c79d Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 23:01:52 +0100
Subject: [PATCH 16/20] Added ndloc unit tests
---
tests/testdataset.py | 41 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/tests/testdataset.py b/tests/testdataset.py
index 4822b84709..56af76ef4a 100644
--- a/tests/testdataset.py
+++ b/tests/testdataset.py
@@ -911,6 +911,44 @@ def test_canonical_vdim(self):
self.assertEqual(dataset.dimension_values('z', flat=False),
canonical)
+ def test_dataset_ndloc_index(self):
+ xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5)
+ arr = np.arange(10)*np.arange(5)[np.newaxis].T
+ ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype])
+ self.assertEqual(ds.ndloc[0,0], arr[0, 0])
+
+ def test_dataset_ndloc_index2(self):
+ xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5)
+ arr = np.arange(10)*np.arange(5)[np.newaxis].T
+ ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype])
+ self.assertEqual(ds.ndloc[4, 9], arr[4, 9])
+
+ def test_dataset_ndloc_slice(self):
+ xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5)
+ arr = np.arange(10)*np.arange(5)[np.newaxis].T
+ ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype])
+ sliced = Dataset((xs[2:5], ys[1:], arr[1:, 2:5]), kdims=['x', 'y'], vdims=['z'],
+ datatype=[self.datatype])
+ self.assertEqual(ds.ndloc[1:, 2:5], sliced)
+
+ def test_dataset_ndloc_lists(self):
+ xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5)
+ arr = np.arange(10)*np.arange(5)[np.newaxis].T
+ ds = Dataset((xs, ys, arr), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype, 'dictionary'])
+ sliced = Dataset((xs[[1, 2, 3]], ys[[0, 1, 2]], arr[[0, 1, 2], [1, 2, 3]]), kdims=['x', 'y'], vdims=['z'],
+ datatype=['dictionary'])
+ self.assertEqual(ds.ndloc[[0, 1, 2], [1, 2, 3]], sliced)
+
+ def test_dataset_ndloc_slice_two_vdims(self):
+ xs, ys = np.linspace(0.12, 0.81, 10), np.linspace(0.12, 0.391, 5)
+ arr = np.arange(10)*np.arange(5)[np.newaxis].T
+ arr2 = (np.arange(10)*np.arange(5)[np.newaxis].T)[::-1]
+ ds = Dataset((xs, ys, arr, arr2), kdims=['x', 'y'], vdims=['z', 'z2'], datatype=[self.datatype, 'dictionary'])
+ sliced = Dataset((xs[[1, 2, 3]], ys[[0, 1, 2]], arr[[0, 1, 2], [1, 2, 3]],
+ arr2[[0, 1, 2], [1, 2, 3]]), kdims=['x', 'y'], vdims=['z', 'z2'],
+ datatype=['dictionary'])
+ self.assertEqual(ds.ndloc[[0, 1, 2], [1, 2, 3]], sliced)
+
def test_dataset_dim_vals_grid_kdims_xs(self):
self.assertEqual(self.dataset_grid.dimension_values(0, expanded=False),
np.array([0, 1]))
@@ -1248,6 +1286,9 @@ def test_dataset_groupby_drop_dims_with_vdim(self):
def test_dataset_groupby_drop_dims_dynamic_with_vdim(self):
raise SkipTest("Not supported")
+ def test_dataset_ndloc_slice_two_vdims(self):
+ raise SkipTest("Not supported")
+
@attr(optional=1)
class XArrayDatasetTest(GridDatasetTest):
From 8607c20bd83f7f1da0afa579c775f2a94139a792 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 23:27:51 +0100
Subject: [PATCH 17/20] Simplified decimate operation using iloc
---
holoviews/operation/element.py | 12 +-----------
1 file changed, 1 insertion(+), 11 deletions(-)
diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py
index 7530791dfe..47bc2d00ae 100644
--- a/holoviews/operation/element.py
+++ b/holoviews/operation/element.py
@@ -597,17 +597,7 @@ def _process_layer(self, element, key=None):
if len(sliced) > self.p.max_samples:
prng = np.random.RandomState(self.p.random_seed)
- length = len(sliced)
- if element.interface is PandasInterface:
- data = sliced.data.sample(self.p.max_samples,
- random_state=prng)
- else:
- inds = prng.choice(length, self.p.max_samples, False)
- if isinstance(element.interface, DictInterface):
- data = {k: v[inds] for k, v in sliced.data.items()}
- else:
- data = sliced.data[inds, :]
- sliced = element.clone(data)
+ return element.iloc[prng.choice(len(sliced), self.p.max_samples, False)]
return sliced
def _process(self, element, key=None):
From 922accd701e5b122f6a97c5ddd96f646f022d4cb Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sun, 18 Jun 2017 23:37:00 +0100
Subject: [PATCH 18/20] Use iloc in Tabular.pprint_cell
---
holoviews/core/element.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/holoviews/core/element.py b/holoviews/core/element.py
index 55bfd64f52..1e424c5375 100644
--- a/holoviews/core/element.py
+++ b/holoviews/core/element.py
@@ -228,8 +228,7 @@ def pprint_cell(self, row, col):
return self.kdims[col].pprint_label
else:
dim = self.get_dimension(col)
- values = self[dim.name]
- return dim.pprint_value(values[row-1])
+ return dim.pprint_value(self.iloc[row-1, col])
def cell_type(self, row, col):
From 5a1d93062dd87c21ef57eaefd77968a056a6516c Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Mon, 19 Jun 2017 00:34:08 +0100
Subject: [PATCH 19/20] Improved iloc and ndloc docstrings
---
holoviews/core/data/__init__.py | 46 +++++++++++++++++++++++++++++---
holoviews/core/data/interface.py | 10 ++++++-
2 files changed, 51 insertions(+), 5 deletions(-)
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
index 8966efb682..d8d2836483 100644
--- a/holoviews/core/data/__init__.py
+++ b/holoviews/core/data/__init__.py
@@ -629,16 +629,54 @@ def to(self):
@property
def iloc(self):
"""
- Returns an iloc object, providing a convenient interface to
- slice and index into the Dataset using row and column indices,
- allow selection by integer index, slice and list of integer
- indices and boolean arrays.
+ Returns an iloc object providing a convenient interface to
+ slice and index into the Dataset using row and column indices.
+ Allow selection by integer index, slice and list of integer
+ indices and boolean arrays, e.g.:
+
+ Examples:
+
+ * Index the first row and column:
+
+ dataset.iloc[0, 0]
+
+ * Select rows 1 and 2 with a slice:
+
+ dataset.iloc[1:3, :]
+
+ * Select with a list of integer coordinates:
+
+ dataset.iloc[[0, 2, 3]]
"""
return iloc(self)
@property
def ndloc(self):
+ """
+ Returns an ndloc object providing nd-array like indexing for
+ gridded datasets. Follows NumPy array indexing conventions,
+ allowing for indexing, slicing and selecting a list of indices
+ on multi-dimensional arrays using integer indices. The order
+ of array indices is inverted Dataset key dimensions, e.g. an
+ Image with key dimensions 'x' and 'y' can be indexed with
+ ``image.ndloc[iy, ix]``, where ``iy`` and ``ix`` are integer
+ indices along the y and x dimensions.
+
+ Examples:
+
+ * Index value in 2D array:
+
+ dataset.ndloc[3, 1]
+
+ * Slice along y-axis of 2D array:
+
+ dataset.ndloc[2:5, :]
+
+ * Select with integer coordinates along x- and y-axes:
+
+ dataset.ndloc[[1, 2, 3], [0, 2, 3]]
+ """
return ndloc(self)
diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py
index c34c828cac..8694e95260 100644
--- a/holoviews/core/data/interface.py
+++ b/holoviews/core/data/interface.py
@@ -11,7 +11,8 @@ class iloc(object):
iloc is small wrapper object that allows row, column based
indexing into a Dataset using the ``.iloc`` property. It supports
the usual numpy and pandas iloc indexing semantics including
- integer indices, slices, lists and arrays of values.
+ integer indices, slices, lists and arrays of values. For more
+ information see the ``Dataset.iloc`` property docstring.
"""
def __init__(self, dataset):
@@ -54,6 +55,13 @@ def __getitem__(self, index):
class ndloc(object):
+ """
+ ndloc is a small wrapper object that allows ndarray-like indexing
+ for gridded Datasets using the ``.ndloc`` property. It supports
+ the standard NumPy ndarray indexing semantics including
+ integer indices, slices, lists and arrays of values. For more
+ information see the ``Dataset.ndloc`` property docstring.
+ """
def __init__(self, dataset):
self.dataset = dataset
From 3ecff25bc06acc8c1487e364204f22dd90257607 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Mon, 19 Jun 2017 02:13:36 +0100
Subject: [PATCH 20/20] Small docstring fixes for iloc and ndloc
---
holoviews/core/data/__init__.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
index d8d2836483..4944bb4780 100644
--- a/holoviews/core/data/__init__.py
+++ b/holoviews/core/data/__init__.py
@@ -632,7 +632,7 @@ def iloc(self):
Returns an iloc object providing a convenient interface to
slice and index into the Dataset using row and column indices.
Allow selection by integer index, slice and list of integer
- indices and boolean arrays, e.g.:
+ indices and boolean arrays.
Examples:
@@ -658,10 +658,10 @@ def ndloc(self):
gridded datasets. Follows NumPy array indexing conventions,
allowing for indexing, slicing and selecting a list of indices
on multi-dimensional arrays using integer indices. The order
- of array indices is inverted Dataset key dimensions, e.g. an
- Image with key dimensions 'x' and 'y' can be indexed with
- ``image.ndloc[iy, ix]``, where ``iy`` and ``ix`` are integer
- indices along the y and x dimensions.
+ of array indices is inverted relative to the Dataset key
+ dimensions, e.g. an Image with key dimensions 'x' and 'y' can
+ be indexed with ``image.ndloc[iy, ix]``, where ``iy`` and
+ ``ix`` are integer indices along the y and x dimensions.
Examples:
@@ -673,7 +673,7 @@ def ndloc(self):
dataset.ndloc[2:5, :]
- * Select with integer coordinates along x- and y-axes:
+ * Vectorized (non-orthogonal) indexing along x- and y-axes:
dataset.ndloc[[1, 2, 3], [0, 2, 3]]
"""