Added iloc tabular indexing interface

holoviz · May 14, 2017 · fd350c3 · fd350c3
1 parent f4a27cc
commit fd350c3
Show file tree

Hide file tree

Showing 9 changed files with 380 additions and 11 deletions.
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
@@ -9,7 +9,7 @@
 import param
 
 from ..dimension import redim
-from .interface import Interface
+from .interface import Interface, TabularIndex
 from .array import ArrayInterface
 from .dictionary import DictInterface
 from .grid import GridInterface
@@ -616,6 +616,14 @@ def to(self):
         return self._conversion_interface(self)
 
 
+    @property
+    def iloc(self):
+        """
+        Returns a TabularIndex, providing a convenient interface to
+        slice and index into the Dataset using row and column indices.
+        """
+        return TabularIndex(self)
+
 
 # Aliases for pickle backward compatibility
 Columns      = Dataset

diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py
@@ -233,4 +233,23 @@ def aggregate(cls, dataset, dimensions, function, **kwargs):
         return np.atleast_2d(rows)
 
 
+    @classmethod
+    def iloc(cls, dataset, index):
+        rows, cols = index
+        if np.isscalar(cols):
+            if isinstance(cols, util.basestring):
+                cols = dataset.get_dimension_index(cols)
+            if np.isscalar(rows):
+                return dataset.data[rows, cols]
+            cols = [dataset.get_dimension_index(cols)]
+        elif not isinstance(cols, slice):
+            cols = [dataset.get_dimension_index(d) for d in cols]
+
+        if np.isscalar(rows):
+            rows = [rows]
+        data = dataset.data[rows, :][:, cols]
+        if data.ndim == 1:
+            return np.atleast_2d(data).T
+        return data
+
 Interface.register(ArrayInterface)
diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py
@@ -12,7 +12,7 @@
 
 from .. import util
 from ..element import Element
-from ..ndmapping import NdMapping, item_check
+from ..ndmapping import NdMapping, item_check, OrderedDict
 from .interface import Interface
 from .pandas import PandasInterface
 
@@ -241,6 +241,30 @@ def dframe(cls, columns, dimensions):
     def nonzero(cls, dataset):
         return True
 
+    @classmethod
+    def iloc(cls, dataset, index):
+        """
+        Dask does not support iloc, therefore iloc will execute
+        the call graph and lose the laziness of the operation.
+        """
+        rows, cols = index
+        scalar = False
+        if isinstance(cols, slice):
+            cols = [d.name for d in dataset.dimensions()][cols]
+        elif np.isscalar(cols):
+            scalar = np.isscalar(rows)
+            cols = [dataset.get_dimension(cols).name]
+        else:
+            cols = [dataset.get_dimension(d).name for d in index[1]]
+        if np.isscalar(rows):
+            rows = [rows]
+
+        data = OrderedDict()
+        for c in cols:
+            data[c] = dataset.data[c].compute().iloc[rows].values
+        if scalar:
+            return data[cols[0]][0]
+        return tuple(data.values())
 
 
 Interface.register(DaskInterface)
diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py
@@ -261,4 +261,29 @@ def aggregate(cls, dataset, kdims, function, **kwargs):
         return aggregated
 
 
+    @classmethod
+    def iloc(cls, dataset, index):
+        rows, cols = index
+        scalar = False
+        if np.isscalar(cols):
+            scalar = np.isscalar(rows)
+            cols = [dataset.get_dimension(cols, strict=True)]
+        elif isinstance(cols, slice):
+            cols = dataset.dimensions()[cols]
+        else:
+            cols = [dataset.get_dimension(d, strict=True) for d in cols]
+
+        if np.isscalar(rows):
+            rows = [rows]
+
+        new_data = OrderedDict()
+        for d, values in dataset.data.items():
+            if d in cols:
+                new_data[d] = values[rows]
+
+        if scalar:
+            return new_data[cols[0].name][0]
+        return new_data
+
+
 Interface.register(DictInterface)
diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py
@@ -391,5 +391,28 @@ def sort(cls, dataset, by=[]):
             raise Exception('Compressed format cannot be sorted, either instantiate '
                             'in the desired order or use the expanded format.')
 
+    @classmethod
+    def iloc(cls, dataset, index):
+        rows, cols = index
+        scalar = False
+        if np.isscalar(cols):
+            scalar = np.isscalar(rows)
+            cols = [dataset.get_dimension(cols, strict=True)]
+        elif isinstance(cols, slice):
+            cols = dataset.dimensions()[cols]
+        else:
+            cols = [dataset.get_dimension(d, strict=True) for d in cols]
+
+        if np.isscalar(rows):
+            rows = [rows]
+
+        new_data = []
+        for d in cols:
+            new_data.append(dataset.dimension_values(d)[rows])
+
+        if scalar:
+            return new_data[0][0]
+        return tuple(new_data)
+
 
 Interface.register(GridInterface)
diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py
@@ -6,6 +6,53 @@
 from .. import util
 
 
+class TabularIndex(object):
+    """
+    TabularIndex is small wrapper object that allows row, column
+    based indexing into a Dataset using the ``iloc`` property.
+    It supports the usual numpy and pandas iloc indexing semantics
+    including integer indices, slices, lists and arrays of values.
+    """
+
+    def __init__(self, dataset):
+        self.dataset = dataset
+
+    def __getitem__(self, index):
+        index = util.wrap_tuple(index)
+        if len(index) == 1:
+            index = (index[0], slice(None))
+        elif len(index) > 2:
+            raise IndexError('Tabular index not understood, index '
+                             'must be at most length 2.')
+
+        rows, cols = index
+        if rows is Ellipsis:
+            rows = slice(None)
+        data = self.dataset.interface.iloc(self.dataset, (rows, cols))
+        kdims = self.dataset.kdims
+        vdims = self.dataset.vdims
+        if np.isscalar(data):
+            return data
+        elif cols == slice(None):
+            pass
+        else:
+            if isinstance(cols, slice):
+                dims = self.dataset.dimensions()[index[1]]
+            elif np.isscalar(cols):
+                dims = [self.dataset.get_dimension(cols)]
+            else:
+                dims = [self.dataset.get_dimension(d) for d in cols]
+            kdims = [d for d in dims if d in kdims]
+            vdims = [d for d in dims if d in vdims]
+
+        datatype = [dt for dt in self.dataset.datatype
+                    if dt in Interface.interfaces and
+                    not Interface.interfaces[dt].gridded]
+        if not datatype: datatype = ['dataframe', 'dictionary']
+        return self.dataset.clone(data, kdims=kdims, vdims=vdims,
+                                  datatype=datatype)
+
+
 class Interface(param.Parameterized):
 
     interfaces = {}

diff --git a/holoviews/core/data/ndelement.py b/holoviews/core/data/ndelement.py
@@ -8,7 +8,7 @@
 from .interface import Interface
 from ..dimension import Dimension, Dimensioned
 from ..element import NdElement
-from ..ndmapping import item_check
+from ..ndmapping import item_check, OrderedDict
 from .. import util
 
 
@@ -141,5 +141,30 @@ def unpack_scalar(cls, columns, data):
         else:
             return data
 
+    @classmethod
+    def iloc(cls, dataset, index):
+        data = dataset.columns()
+        rows, cols = index
+        scalar = False
+        if np.isscalar(cols):
+            scalar = np.isscalar(rows)
+            cols = [dataset.get_dimension(cols, strict=True)]
+        elif isinstance(cols, slice):
+            cols = dataset.dimensions()[cols]
+        else:
+            cols = [dataset.get_dimension(d, strict=True) for d in cols]
+
+        if np.isscalar(rows):
+            rows = [rows]
+
+        new_data = OrderedDict()
+        for d, values in data.items():
+            if d in cols:
+                new_data[d] = values[rows]
+
+        if scalar:
+            return new_data[cols[0].name][0]
+        return new_data
+
 
 Interface.register(NdElementInterface)
diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py
@@ -246,4 +246,25 @@ def dframe(cls, columns, dimensions):
             return columns.data.copy()
 
 
+    @classmethod
+    def iloc(cls, dataset, index):
+        rows, cols = index
+        scalar = False
+        columns = list(dataset.data.columns)
+        if isinstance(cols, slice):
+            cols = [d.name for d in dataset.dimensions()][cols]
+        elif np.isscalar(cols):
+            scalar = np.isscalar(rows)
+            cols = [dataset.get_dimension(cols).name]
+        else:
+            cols = [dataset.get_dimension(d).name for d in index[1]]
+        cols = [columns.index(c) for c in cols]
+        if np.isscalar(rows):
+            rows = [rows]
+
+        if scalar:
+            return dataset.data.iloc[rows[0], cols[0]]
+        return dataset.data.iloc[rows, cols]
+
+
 Interface.register(PandasInterface)