Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprioritized array interface #2338

Merged
merged 6 commits into from
Feb 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/Tutorials/Columnar_Data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
"metadata": {},
"outputs": [],
"source": [
"xs = range(10)\n",
"xs = np.arange(10)\n",
"ys = np.exp(xs)\n",
"\n",
"table = hv.Table((xs, ys), kdims=['x'], vdims=['y'])\n",
Expand Down Expand Up @@ -185,7 +185,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(type(hv.Scatter((xs, ys), datatype=['array']).data))\n",
"print(type(hv.Scatter((xs.astype('float64'), ys), datatype=['array']).data))\n",
"print(type(hv.Scatter((xs, ys), datatype=['dictionary']).data))\n",
"print(type(hv.Scatter((xs, ys), datatype=['dataframe']).data))"
]
Expand Down
2 changes: 1 addition & 1 deletion doc/nbpublisher
Submodule nbpublisher updated 1 files
+54 −8 nbtest.py
6 changes: 3 additions & 3 deletions examples/user_guide/07-Tabular_Datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
"metadata": {},
"outputs": [],
"source": [
"xs = range(10)\n",
"xs = np.arange(10)\n",
"ys = np.exp(xs)\n",
"\n",
"table = hv.Table((xs, ys), 'x', 'y')\n",
Expand Down Expand Up @@ -201,7 +201,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Note these include grid based datatypes, which are covered in [Gridded Datasets](http://holoviews.org/user_guide/Gridded_Datasets.html). To select a particular storage format explicitly, supply one or more allowed datatypes:"
"Note these include grid based datatypes, which are covered in [Gridded Datasets](http://holoviews.org/user_guide/Gridded_Datasets.html). To select a particular storage format explicitly, supply one or more allowed datatypes (note that the 'array' interface only supports data with matching types):"
]
},
{
Expand All @@ -210,7 +210,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(type(hv.Scatter((xs, ys), datatype=['array']).data))\n",
"print(type(hv.Scatter((xs.astype('float64'), ys), datatype=['array']).data))\n",
"print(type(hv.Scatter((xs, ys), datatype=['dictionary']).data))\n",
"print(type(hv.Scatter((xs, ys), datatype=['dataframe']).data))"
]
Expand Down
7 changes: 5 additions & 2 deletions holoviews/core/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
from .multipath import MultiInterface # noqa (API import)
from .image import ImageInterface # noqa (API import)

datatypes = ['array', 'dictionary', 'grid']
datatypes = ['dictionary', 'grid']

try:
import pandas as pd # noqa (Availability import)
from .pandas import PandasInterface
datatypes = ['array', 'dataframe', 'dictionary', 'grid', 'ndelement']
datatypes = ['dataframe', 'dictionary', 'grid', 'ndelement', 'array']
DFColumns = PandasInterface
except ImportError:
pass
Expand Down Expand Up @@ -53,6 +53,9 @@
except ImportError:
pass

if 'array' not in datatypes:
datatypes.append('array')

from ..dimension import Dimension, process_dimensions
from ..element import Element
from ..ndmapping import OrderedDict
Expand Down
12 changes: 7 additions & 5 deletions holoviews/core/data/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,20 @@ def init(cls, eltype, data, kdims, vdims):
d for d in kdims + vdims]
if ((isinstance(data, dict) or util.is_dataframe(data)) and
all(d in data for d in dimensions)):
dataset = [data[d] for d in dimensions]
dataset = [d if isinstance(d, np.ndarray) else np.asarray(data[d]) for d in dimensions]
if len(set(d.dtype.kind for d in dataset)) > 1:
raise ValueError('ArrayInterface expects all columns to be of the same dtype')
data = np.column_stack(dataset)
elif isinstance(data, dict) and not all(d in data for d in dimensions):
dict_data = sorted(data.items())
dataset = zip(*((util.wrap_tuple(k)+util.wrap_tuple(v))
for k, v in dict_data))
data = np.column_stack(dataset)
elif isinstance(data, tuple):
data = [np.asarray(d) for d in data]
if any(arr.ndim > 1 for arr in data):
raise ValueError('ArrayInterface expects data to be of flat shape.')
if cls.expanded(data):
data = [d if isinstance(d, np.ndarray) else np.asarray(d) for d in data]
if len(set(d.dtype.kind for d in data)) > 1:
raise ValueError('ArrayInterface expects all columns to be of the same dtype')
elif cls.expanded(data):
data = np.column_stack(data)
else:
raise ValueError('ArrayInterface expects data to be of uniform shape.')
Expand Down
6 changes: 4 additions & 2 deletions holoviews/element/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def _aggregate_dataset(self, obj, xcoords, ycoords):
xdim, ydim = dim_labels[:2]
shape = (len(ycoords), len(xcoords))
nsamples = np.product(shape)
grid_data = {xdim: xcoords, ydim: ycoords}

ys, xs = cartesian_product([ycoords, xcoords], copy=True)
data = {xdim: xs, ydim: ys}
Expand All @@ -189,15 +190,16 @@ def _aggregate_dataset(self, obj, xcoords, ycoords):
dense_data = Dataset(data, kdims=obj.kdims, vdims=obj.vdims, datatype=[dtype])
concat_data = obj.interface.concatenate([dense_data, obj], datatype=[dtype])
reindexed = concat_data.reindex([xdim, ydim], vdims)
if pd:
if not reindexed:
agg = reindexed
elif pd:
df = PandasInterface.as_dframe(reindexed)
df = df.groupby([xdim, ydim], sort=False).first().reset_index()
agg = reindexed.clone(df)
else:
agg = reindexed.aggregate([xdim, ydim], reduce_fn)

# Convert data to a gridded dataset
grid_data = {xdim: xcoords, ydim: ycoords}
for vdim in vdims:
grid_data[vdim.name] = agg.dimension_values(vdim).reshape(shape)
return agg.clone(grid_data, kdims=[xdim, ydim], vdims=vdims,
Expand Down
2 changes: 2 additions & 0 deletions holoviews/operation/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,8 @@ def _process_layer(self, element, key=None):
if self.p.interpolation not in INTERPOLATE_FUNCS:
return element
x, y = element.dimension_values(0), element.dimension_values(1)
if 'f' in (x.dtype.kind, y.dtype.kind):
x, y = x.astype('float'), y.astype('float')
array = INTERPOLATE_FUNCS[self.p.interpolation](x, y)
dvals = tuple(element.dimension_values(d) for d in element.dimensions()[2:])
return element.clone((array[0, :].astype(x.dtype), array[1, :].astype(y.dtype))+dvals)
Expand Down
14 changes: 14 additions & 0 deletions tests/core/data/testdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,20 @@ def test_dataset_simple_dict_sorted(self):
self.assertEqual(dataset, Dataset([(i, i) for i in range(1, 4)],
kdims=['x'], vdims=['y']))

def test_dataset_sort_hm(self):
ds = Dataset(([2, 2, 1], [2,1,2], [1, 2, 3]),
kdims=['x', 'y'], vdims=['z']).sort()
ds_sorted = Dataset(([1, 2, 2], [2, 1, 2], [3, 2, 1]),
kdims=['x', 'y'], vdims=['z'])
self.assertEqual(ds.sort(), ds_sorted)

def test_dataset_sort_reverse_hm(self):
ds = Dataset(([2, 1, 2, 1], [2, 2, 1, 1], [0, 1, 2, 3]),
kdims=['x', 'y'], vdims=['z'])
ds_sorted = Dataset(([2, 2, 1, 1], [2, 1, 2, 1], [0, 2, 1, 3]),
kdims=['x', 'y'], vdims=['z'])
self.assertEqual(ds.sort(reverse=True), ds_sorted)



class DFDatasetTest(HeterogeneousColumnTypes, ComparisonTestCase):
Expand Down