Skip to content

Commit

Permalink
Added iloc tabular indexing interface
Browse files Browse the repository at this point in the history
  • Loading branch information
philippjfr committed May 14, 2017
1 parent f4a27cc commit fd350c3
Show file tree
Hide file tree
Showing 9 changed files with 380 additions and 11 deletions.
10 changes: 9 additions & 1 deletion holoviews/core/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import param

from ..dimension import redim
from .interface import Interface
from .interface import Interface, TabularIndex
from .array import ArrayInterface
from .dictionary import DictInterface
from .grid import GridInterface
Expand Down Expand Up @@ -616,6 +616,14 @@ def to(self):
return self._conversion_interface(self)


@property
def iloc(self):
"""
Returns a TabularIndex, providing a convenient interface to
slice and index into the Dataset using row and column indices.
"""
return TabularIndex(self)


# Aliases for pickle backward compatibility
Columns = Dataset
Expand Down
19 changes: 19 additions & 0 deletions holoviews/core/data/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,4 +233,23 @@ def aggregate(cls, dataset, dimensions, function, **kwargs):
return np.atleast_2d(rows)


@classmethod
def iloc(cls, dataset, index):
rows, cols = index
if np.isscalar(cols):
if isinstance(cols, util.basestring):
cols = dataset.get_dimension_index(cols)
if np.isscalar(rows):
return dataset.data[rows, cols]
cols = [dataset.get_dimension_index(cols)]
elif not isinstance(cols, slice):
cols = [dataset.get_dimension_index(d) for d in cols]

if np.isscalar(rows):
rows = [rows]
data = dataset.data[rows, :][:, cols]
if data.ndim == 1:
return np.atleast_2d(data).T
return data

Interface.register(ArrayInterface)
26 changes: 25 additions & 1 deletion holoviews/core/data/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from .. import util
from ..element import Element
from ..ndmapping import NdMapping, item_check
from ..ndmapping import NdMapping, item_check, OrderedDict
from .interface import Interface
from .pandas import PandasInterface

Expand Down Expand Up @@ -241,6 +241,30 @@ def dframe(cls, columns, dimensions):
def nonzero(cls, dataset):
return True

@classmethod
def iloc(cls, dataset, index):
"""
Dask does not support iloc, therefore iloc will execute
the call graph and lose the laziness of the operation.
"""
rows, cols = index
scalar = False
if isinstance(cols, slice):
cols = [d.name for d in dataset.dimensions()][cols]
elif np.isscalar(cols):
scalar = np.isscalar(rows)
cols = [dataset.get_dimension(cols).name]
else:
cols = [dataset.get_dimension(d).name for d in index[1]]
if np.isscalar(rows):
rows = [rows]

data = OrderedDict()
for c in cols:
data[c] = dataset.data[c].compute().iloc[rows].values
if scalar:
return data[cols[0]][0]
return tuple(data.values())


Interface.register(DaskInterface)
25 changes: 25 additions & 0 deletions holoviews/core/data/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,4 +261,29 @@ def aggregate(cls, dataset, kdims, function, **kwargs):
return aggregated


@classmethod
def iloc(cls, dataset, index):
rows, cols = index
scalar = False
if np.isscalar(cols):
scalar = np.isscalar(rows)
cols = [dataset.get_dimension(cols, strict=True)]
elif isinstance(cols, slice):
cols = dataset.dimensions()[cols]
else:
cols = [dataset.get_dimension(d, strict=True) for d in cols]

if np.isscalar(rows):
rows = [rows]

new_data = OrderedDict()
for d, values in dataset.data.items():
if d in cols:
new_data[d] = values[rows]

if scalar:
return new_data[cols[0].name][0]
return new_data


Interface.register(DictInterface)
23 changes: 23 additions & 0 deletions holoviews/core/data/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,5 +391,28 @@ def sort(cls, dataset, by=[]):
raise Exception('Compressed format cannot be sorted, either instantiate '
'in the desired order or use the expanded format.')

@classmethod
def iloc(cls, dataset, index):
rows, cols = index
scalar = False
if np.isscalar(cols):
scalar = np.isscalar(rows)
cols = [dataset.get_dimension(cols, strict=True)]
elif isinstance(cols, slice):
cols = dataset.dimensions()[cols]
else:
cols = [dataset.get_dimension(d, strict=True) for d in cols]

if np.isscalar(rows):
rows = [rows]

new_data = []
for d in cols:
new_data.append(dataset.dimension_values(d)[rows])

if scalar:
return new_data[0][0]
return tuple(new_data)


Interface.register(GridInterface)
47 changes: 47 additions & 0 deletions holoviews/core/data/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,53 @@
from .. import util


class TabularIndex(object):
"""
TabularIndex is small wrapper object that allows row, column
based indexing into a Dataset using the ``iloc`` property.
It supports the usual numpy and pandas iloc indexing semantics
including integer indices, slices, lists and arrays of values.
"""

def __init__(self, dataset):
self.dataset = dataset

def __getitem__(self, index):
index = util.wrap_tuple(index)
if len(index) == 1:
index = (index[0], slice(None))
elif len(index) > 2:
raise IndexError('Tabular index not understood, index '
'must be at most length 2.')

rows, cols = index
if rows is Ellipsis:
rows = slice(None)
data = self.dataset.interface.iloc(self.dataset, (rows, cols))
kdims = self.dataset.kdims
vdims = self.dataset.vdims
if np.isscalar(data):
return data
elif cols == slice(None):
pass
else:
if isinstance(cols, slice):
dims = self.dataset.dimensions()[index[1]]
elif np.isscalar(cols):
dims = [self.dataset.get_dimension(cols)]
else:
dims = [self.dataset.get_dimension(d) for d in cols]
kdims = [d for d in dims if d in kdims]
vdims = [d for d in dims if d in vdims]

datatype = [dt for dt in self.dataset.datatype
if dt in Interface.interfaces and
not Interface.interfaces[dt].gridded]
if not datatype: datatype = ['dataframe', 'dictionary']
return self.dataset.clone(data, kdims=kdims, vdims=vdims,
datatype=datatype)


class Interface(param.Parameterized):

interfaces = {}
Expand Down
27 changes: 26 additions & 1 deletion holoviews/core/data/ndelement.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .interface import Interface
from ..dimension import Dimension, Dimensioned
from ..element import NdElement
from ..ndmapping import item_check
from ..ndmapping import item_check, OrderedDict
from .. import util


Expand Down Expand Up @@ -141,5 +141,30 @@ def unpack_scalar(cls, columns, data):
else:
return data

@classmethod
def iloc(cls, dataset, index):
data = dataset.columns()
rows, cols = index
scalar = False
if np.isscalar(cols):
scalar = np.isscalar(rows)
cols = [dataset.get_dimension(cols, strict=True)]
elif isinstance(cols, slice):
cols = dataset.dimensions()[cols]
else:
cols = [dataset.get_dimension(d, strict=True) for d in cols]

if np.isscalar(rows):
rows = [rows]

new_data = OrderedDict()
for d, values in data.items():
if d in cols:
new_data[d] = values[rows]

if scalar:
return new_data[cols[0].name][0]
return new_data


Interface.register(NdElementInterface)
21 changes: 21 additions & 0 deletions holoviews/core/data/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,4 +246,25 @@ def dframe(cls, columns, dimensions):
return columns.data.copy()


@classmethod
def iloc(cls, dataset, index):
rows, cols = index
scalar = False
columns = list(dataset.data.columns)
if isinstance(cols, slice):
cols = [d.name for d in dataset.dimensions()][cols]
elif np.isscalar(cols):
scalar = np.isscalar(rows)
cols = [dataset.get_dimension(cols).name]
else:
cols = [dataset.get_dimension(d).name for d in index[1]]
cols = [columns.index(c) for c in cols]
if np.isscalar(rows):
rows = [rows]

if scalar:
return dataset.data.iloc[rows[0], cols[0]]
return dataset.data.iloc[rows, cols]


Interface.register(PandasInterface)
Loading

0 comments on commit fd350c3

Please sign in to comment.