Skip to content

Commit

Permalink
Merge pull request #414 from achilleas-k/fix/data-view-write
Browse files Browse the repository at this point in the history
Fix DataView reading and writing
  • Loading branch information
jgrewe authored Oct 14, 2019
2 parents ac0d807 + 8deb1fc commit d6df55e
Show file tree
Hide file tree
Showing 6 changed files with 307 additions and 152 deletions.
24 changes: 15 additions & 9 deletions nixio/data_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from . import util
from .compression import Compression

from .exceptions import InvalidUnit
from .exceptions import InvalidUnit, IncompatibleDimensions
from .section import Section


Expand Down Expand Up @@ -296,15 +296,21 @@ def unit(self, u):
def get_slice(self, positions, extents=None, mode=DataSliceMode.Index):
datadim = len(self.shape)
if not len(positions) == datadim:
raise IndexError("Number of positions given ({}) does not match "
"number of data dimensions ({})".format(
len(positions), datadim
))
raise IncompatibleDimensions(
"Number of positions given ({}) does not match "
"number of data dimensions ({})".format(
len(positions), datadim
),
"DataArray.get_slice"
)
if extents and not len(extents) == datadim:
raise IndexError("Number of extents given ({}) does not match "
"number of data dimensions ({})".format(
len(extents), datadim
))
raise IncompatibleDimensions(
"Number of extents given ({}) does not match "
"number of data dimensions ({})".format(
len(extents), datadim
),
"DataArray.get_slice"
)
if mode == DataSliceMode.Index:
sl = tuple(slice(p, p+e) for p, e in zip(positions, extents))
return DataView(self, sl)
Expand Down
85 changes: 0 additions & 85 deletions nixio/data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,91 +116,6 @@ def append(self, data, axis=0):
sl = tuple(slice(o, c+o) for o, c in zip(offset, count))
self._write_data(data, sl)

@staticmethod
def __index_to_tuple(index):
tidx = type(index)

if tidx == tuple:
return index
elif tidx == int or tidx == slice:
return (index, )
elif tidx == type(Ellipsis):
return ()
else:
raise IndexError("Unsupported index")

@staticmethod
def __complete_slices(shape, index):
if type(index) is slice:
if index.step is not None:
raise IndexError('Invalid index, stepping unsupported')
start = index.start
stop = index.stop
if start is None:
start = 0
elif start < 0:
start = shape + start
if stop is None:
stop = shape
elif stop < 0:
stop = shape + stop
index = slice(start, stop, index.step)
elif type(index) is int:
if index < 0:
index = shape + index
index = slice(index, index+1)
else:
index = slice(index, index+1)
elif index is None:
index = slice(0, shape)
else:
raise IndexError('Invalid index')
return index

@staticmethod
def __fill_none(shape, index, to_replace=1):
size = len(shape) - len(index) + to_replace
return tuple([None] * size)

def __tuple_to_count_offset_shape(self, index):
# precondition: type(index) == tuple and len(index) >= 1
fill_none = self.__fill_none
shape = self.shape

if index[0] is Ellipsis:
index = fill_none(shape, index) + index[1:]
if index[-1] is Ellipsis:
# if we have a trailing ellipsis we just cut it away
# and let complete_slices do the right thing
index = index[:-1]

# here we handle Ellipsis in the middle of the tuple
# we *can* only handle one, if there are more, then
# __complete_slices will raise a InvalidIndex error
pos = index.index(Ellipsis) if Ellipsis in index else -1
if pos > -1:
index = index[:pos] + fill_none(shape, index) + index[pos+1:]

# in python3 map does not work with None therefore if
# len(shape) != len(index) we wont get the expected
# result. We therefore need to fill up the missing values
index = index + fill_none(shape, index, to_replace=0)

completed = list(map(self.__complete_slices, shape, index))
combined = list(map(lambda s: (s.start, s.stop), completed))
count = tuple(x[1] - x[0] for x in combined)
offset = [x for x in zip(*combined)][0]

# drop all indices from count that came from single ints
# NB: special case when we only have ints, e.g. (int, ) then
# we get back the empty tuple and this is what we want,
# because it indicates a scalar result
squeezed = map(lambda i, c: c if type(i) != int
else None, index, count)
shape = list(filter(lambda x: x is not None, squeezed))

return count, offset, shape

def _write_data(self, data, sl=None):
dataset = self._h5group.get_dataset("data")
dataset.write_data(data, sl)
Expand Down
170 changes: 115 additions & 55 deletions nixio/data_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,44 @@
# Redistribution and use in source and binary forms, with or without
# modification, are permitted under the terms of the BSD License. See
# LICENSE file in the root of the Project.
import numpy as np
from numbers import Integral
try:
from collections.abc import Iterable
except ImportError:
from collections import Iterable
from .data_set import DataSet
from .exceptions import OutOfBounds
from .exceptions import OutOfBounds, IncompatibleDimensions


class DataView(DataSet):

def __init__(self, da, sl):
self.array = da
self._h5group = self.array._h5group
self._slice = sl
def __init__(self, da, slices):
if len(slices) != len(da.shape):
# This is always checked by the calling function, but we repeat
# the check here for future bug catching
raise IncompatibleDimensions(
"Number of dimensions for DataView does not match underlying "
"DataArray: {} != {}".format(len(slices), len(da.shape)),
"DataView"
)

if any(s.stop > e for s, e in zip(sl, self.array.data_extent)):
if any(s.stop > e for s, e in zip(slices, da.data_extent)):
raise OutOfBounds(
"Trying to create DataView which is out of bounds"
"Trying to create DataView which is out of bounds of the "
"underlying DataArray"
)

# Simplify all slices
slices = tuple(slice(*sl.indices(dimlen))
for sl, dimlen in zip(slices, da.shape))

self.array = da
self._h5group = self.array._h5group
self._slices = slices

@property
def data_extent(self):
return tuple(s.stop - s.start for s in self._slice)
return tuple(s.stop - s.start for s in self._slices)

@data_extent.setter
def data_extent(self, v):
Expand All @@ -39,51 +53,97 @@ def data_extent(self, v):
def data_type(self):
return self.array.data_type

def _write_data(self, data, count, offset):
if not count:
count = self._count
offset = self._transform_coordinates(count, offset)
return super(DataView, self)._write_data(data, count, offset)
def _write_data(self, data, sl=None):
tsl = self._slices
if sl:
tsl = self._transform_coordinates(sl)
super(DataView, self)._write_data(data, tsl)

def _read_data(self, sl=None):
dvslices = self._slice
# complete DataView slices (turn Nones into values)
dvslices = tuple(slice(*dv.indices(l)) for dv, l in
zip(dvslices, self.array.shape))
sup = super(DataView, self)
if sl is None or sl == slice(None, None, None):
# full DataView: pass dvslices directly
return sup._read_data(dvslices)
if isinstance(sl, int):
# single value or dimension, offset by DataView start on first dim
readslice = dvslices[0].start + sl
return sup._read_data(readslice)
if isinstance(sl, Iterable):
# combine slices
readslice = list()
for readi, datai in zip(sl, dvslices):
if readi is None:
readslice.append(datai)
elif isinstance(readi, int):
readslice.append(datai.start + readi)
elif isinstance(readi, slice):
start = datai.start + (readi.start or 0)
stop = (datai.start + readi.stop
if readi.stop else datai.stop)
readslice.append(slice(start, stop, readi.step))
return sup._read_data(tuple(readslice))

# something else? Just read the underlying data then slice it
# probably inefficient, but correct
return sup._read_data(dvslices).read_data(sl)

def _transform_coordinates(self, count, offset):
if not offset:
if np.any(np.greater(count, self._count)):
raise OutOfBounds("Trying to access data outside of range")
return self._offset
else:
co = tuple(c + o for c, o in zip(count, offset))
if any(c > sc for c, sc in zip(co, self._count)):
raise OutOfBounds("Trying to access data outside of range")
return tuple(so + o for so, o in zip(self._offset, offset))
tsl = self._slices
if sl:
tsl = self._transform_coordinates(sl)
return super(DataView, self)._read_data(tsl)

def _transform_coordinates(self, user_slices):
"""
Takes a series (tuple) of slices or indices passed to the DataView and
transforms them to the equivalent slices or indices for the underlying
DataArray. Bounds checking is performed on the results to make sure it
is not outside the DataView's range.
Note: HDF5 hyperslabs don't support negative steps, so we catch it
here to throw an error from NIX instead to shorten the stack trace (we
use the same message).
"""
oob = OutOfBounds("Trying to access data outside of range of DataView")

def transform_slice(uslice, dvslice):
"""
Single dimension transform function for slices.
uslice: User provided slice for dimension
dvslice: DataView slice for dimension
"""
# Simplify uslice; DataView step is always 1
dimlen = dvslice.stop - dvslice.start
ustart, ustop, ustep = uslice.indices(dimlen)
if ustop < 0: # special case for None stop
ustop = dimlen + ustop
tslice = slice(dvslice.start+ustart, dvslice.start+ustop, ustep)
if tslice.stop > dvslice.stop:
raise oob

if tslice.step < 0:
raise ValueError("Step must be >= 1")

return tslice

dvslices = self._slices
user_slices = self._expand_user_slices(user_slices)
tslices = list()
for uslice, dvslice in zip(user_slices, dvslices):
if isinstance(uslice, Integral):
if uslice < 0:
tslice = dvslice.stop + uslice
else:
tslice = uslice + dvslice.start
if tslice < dvslice.start or tslice >= dvslice.stop:
raise oob
elif isinstance(uslice, slice):
tslice = transform_slice(uslice, dvslice)
if tslice.start < dvslice.start:
raise oob
if tslice.stop > dvslice.stop:
raise oob
else:
raise TypeError("Data indices must be integers or slices, "
"not {}".format(type(uslice)))
tslices.append(tslice)

return tuple(tslices)

def _expand_user_slices(self, user_slices):
"""
Given the user-supplied slices or indices, expands Ellipses if
necessary and returns the same objects in a tuple padded with
slice(None) to match the dimensionality of the DataView.
"""
if not isinstance(user_slices, Iterable):
user_slices = (user_slices,)

if user_slices.count(Ellipsis) > 1:
raise IndexError(
"an index can only have a single ellipsis ('...')"
)
elif user_slices.count(Ellipsis) == 1:
# expand slices at Ellipsis index
expidx = user_slices.index(Ellipsis)
npad = len(self.data_extent) - len(user_slices) + 1
padding = (slice(None),) * npad
return user_slices[:expidx] + padding + user_slices[expidx+1:]

# expand slices at the end
npad = len(self.data_extent) - len(user_slices)
padding = (slice(None),) * npad
return user_slices + padding
7 changes: 4 additions & 3 deletions nixio/test/test_data_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import unittest
import numpy as np
import nixio as nix
from nixio.exceptions import IncompatibleDimensions
from .tmp import TempDir


Expand Down Expand Up @@ -460,11 +461,11 @@ def test_get_slice(self):
mode=nix.DataSliceMode.Data)
np.testing.assert_almost_equal(data, dslice)

with self.assertRaises(IndexError):
with self.assertRaises(IncompatibleDimensions):
da2d.get_slice((0, 0, 0), (10, 10, 10))

with self.assertRaises(IndexError):
with self.assertRaises(IncompatibleDimensions):
da2d.get_slice((0, 0), (10,))

with self.assertRaises(IndexError):
with self.assertRaises(IncompatibleDimensions):
da3d.get_slice((0, 0, 0), (3, 9, 40, 1))
Loading

0 comments on commit d6df55e

Please sign in to comment.