Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
more support for boolean indexing and boolean assign
Browse files Browse the repository at this point in the history
  • Loading branch information
Alicia1529 committed Feb 24, 2020
1 parent f83b206 commit 041b70c
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 163 deletions.
74 changes: 52 additions & 22 deletions python/mxnet/ndarray/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from ..base import ctypes2buffer
from ..runtime import Features
from ..context import Context, current_context
from ..util import is_np_array
from . import _internal
from . import op
from ._internal import NDArrayBase
Expand Down Expand Up @@ -521,7 +522,7 @@ def __setitem__(self, key, value):
return

else:
key = indexing_key_expand_implicit_axes(key, self.shape)
key, _ = indexing_key_expand_implicit_axes(key, self.shape)
slc_key = tuple(idx for idx in key if idx is not None)

if len(slc_key) < self.ndim:
Expand Down Expand Up @@ -2574,9 +2575,12 @@ def asscalar(self):
>>> type(x.asscalar())
<type 'numpy.int32'>
"""
if self.shape != (1,):
if self.size != 1:
raise ValueError("The current array is not a scalar")
return self.asnumpy()[0]
if self.ndim == 1:
return self.asnumpy()[0]
else:
return self.asnumpy()[()]

def astype(self, dtype, copy=True):
"""Returns a copy of the array after casting to a specified type.
Expand Down Expand Up @@ -2943,6 +2947,15 @@ def _scatter_set_nd(self, value_nd, indices):
lhs=self, rhs=value_nd, indices=indices, shape=self.shape, out=self
)

def check_boolean_array_dimension(array_shape, axis, bool_shape):
"""
Make sure that the boolean index has exactly as many dimensions as it is supposed to work with
"""
for i in range(len(bool_shape)):
if array_shape[axis + i] != bool_shape[i]:
raise IndexError('boolean index did not match indexed array along axis {};'
' size is {} but corresponding boolean size is {}'
.format(axis + i, array_shape[axis + i], bool_shape[i]))

def indexing_key_expand_implicit_axes(key, shape):
"""Make implicit axes explicit by adding ``slice(None)``.
Expand All @@ -2966,6 +2979,13 @@ def indexing_key_expand_implicit_axes(key, shape):
ell_idx = None
num_none = 0
nonell_key = []
# `prepend` is defined to handle zero-dim boolean index
# For 0-d boolean indices: A new axis is added,
# but at the same time no axis is "used". So if we have True,
# we add a new axis (a bit like with np.newaxis). If it is
# False, we add a new axis, but this axis has 0 entries.
prepend = -1
axis = 0
for i, idx in enumerate(key):
if idx is Ellipsis:
if ell_idx is not None:
Expand All @@ -2974,14 +2994,36 @@ def indexing_key_expand_implicit_axes(key, shape):
)
ell_idx = i
else:
# convert primitive type boolean value to mx.np.bool type
# otherwise will be treated as 1/0
if isinstance(idx, bool):
idx = array(idx, dtype=np.bool_)
if idx is None:
num_none += 1
if isinstance(idx, NDArrayBase) and idx.ndim == 0 and idx.dtype != np.bool_:
if isinstance(idx, NDArrayBase) and idx.ndim == 0 and idx.dtype == np.bool_:
if not idx: # array(False) has priority
prepend = 0
else:
prepend = np.newaxis
elif isinstance(idx, NDArrayBase) and idx.ndim == 0 and idx.dtype != np.bool_:
# This handles ndarray of zero dim. e.g array(1)
# while advoid converting zero dim boolean array
nonell_key.append(idx.item())
# float type will be converted to int
nonell_key.append(int(idx.item()))
axis += 1
elif isinstance(idx, NDArrayBase) and idx.dtype == np.bool_:
check_boolean_array_dimension(shape, axis, idx.shape)
# if the whole array is false and npx.set_np() is not set_up
# infer shape error will be thrown
if not is_np_array():
raise ValueError('Cannot perform boolean indexing in legacy mode. Please activate'
' numpy semantics by calling `npx.set_np()` in the global scope'
' before calling this function.')
nonell_key.extend(idx.nonzero())
axis += idx.ndim
else:
nonell_key.append(idx)
axis += 1

nonell_key = tuple(nonell_key)

Expand All @@ -2995,7 +3037,7 @@ def indexing_key_expand_implicit_axes(key, shape):
(slice(None),) * ell_ndim +
nonell_key[ell_idx:])

return expanded_key
return expanded_key, prepend


def _int_to_slice(idx):
Expand Down Expand Up @@ -3053,32 +3095,20 @@ def _is_advanced_index(idx):
def get_indexing_dispatch_code(key):
"""Returns a dispatch code for calling basic or advanced indexing functions."""
assert isinstance(key, tuple)
num_bools = 0
basic_indexing = True

for idx in key:
if isinstance(idx, (NDArray, np.ndarray, list, tuple)):
if isinstance(idx, (NDArray, np.ndarray, list, tuple, range)):
if isinstance(idx, tuple) and len(idx) == 0:
return _NDARRAY_EMPTY_TUPLE_INDEXING
if getattr(idx, 'dtype', None) == np.bool_:
num_bools += 1
basic_indexing = False
elif isinstance(idx, range):
basic_indexing = False
return _NDARRAY_BOOLEAN_INDEXING
return _NDARRAY_ADVANCED_INDEXING
elif not (isinstance(idx, (py_slice, integer_types)) or idx is None):
raise ValueError(
'NDArray does not support slicing with key {} of type {}.'
''.format(idx, type(idx))
)
if basic_indexing and num_bools == 0:
return _NDARRAY_BASIC_INDEXING
elif not basic_indexing and num_bools == 0:
return _NDARRAY_ADVANCED_INDEXING
elif num_bools == 1:
return _NDARRAY_BOOLEAN_INDEXING
else:
raise TypeError('ndarray indexing does not more than one boolean ndarray'
' in a tuple of complex indices.')
return _NDARRAY_BASIC_INDEXING


def _get_index_range(start, stop, length, step=1):
Expand Down
164 changes: 33 additions & 131 deletions python/mxnet/numpy/multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from ..base import check_call, _LIB, NDArrayHandle, c_array
from ..base import mx_real_t, c_array_buf, mx_uint, numeric_types, integer_types
from ..context import Context
from ..util import set_module, wrap_np_unary_func, wrap_np_binary_func
from ..util import set_module, wrap_np_unary_func, wrap_np_binary_func, is_np_array
from ..context import current_context
from ..ndarray import numpy as _mx_nd_np
from ..ndarray.numpy import _internal as _npi
Expand Down Expand Up @@ -405,124 +405,6 @@ def _set_np_advanced_indexing(self, key, value):
value_nd = self._prepare_value_nd(value, bcast_shape=vshape, squeeze_axes=new_axes)
self._scatter_set_nd(value_nd, idcs)

def _check_boolean_indexing_type(self, key):
"""Check boolean indexing type arr[bool, :, :], arr[1, bool, 4], or arr[:, bool, :]
return bool_type, bool_position"""

dim = len(key)
rest_int = True
rest_full_slice = True
pos = None
for idx in range(dim):
if isinstance(key[idx], _np.ndarray) and key[idx].dtype == _np.bool_:
key[idx] = array(key[idx], dtype='bool', ctx=self.ctx)
if isinstance(key[idx], ndarray) and key[idx].dtype == _np.bool_:
pos = idx
elif isinstance(key[idx], integer_types):
rest_full_slice = False
elif isinstance(key[idx], py_slice) and key[idx] == slice(None, None, None):
rest_int = False
# not arr[:, bool, :] format slicing or not arr[3,bool,4]
else:
raise TypeError('ndarray boolean indexing does not support slicing '
'with key {} of type {}'.format(idx, type(idx))
)

if rest_int:
return _NDARRAY_INT_BOOLEAN_INDEXING, pos
elif rest_full_slice:
return _NDARRAY_SLICE_BOOLEAN_INDEXING, pos
raise NotImplementedError("Do not support {} as key for boolean indexing".format(key))

@staticmethod
def _calculate_new_idx(key, shape, mask_pos, mask_ndim): # pylint: disable=redefined-outer-name
new_idx = 0
step = 1
for idx in range(len(key)-1, mask_pos, -1):
new_idx += key[idx]*step
step *= shape[idx+mask_ndim-1]
return new_idx

def _get_np_boolean_indexing(self, key):
if not isinstance(key, tuple):
key = (key,)
bool_type, pos = self._check_boolean_indexing_type(key)

from functools import reduce
mask_shape = key[pos].shape
mask_ndim = len(mask_shape)
ndim = len(self.shape) # pylint: disable=redefined-outer-name, unused-variable
for i in range(mask_ndim):
if key[pos].shape[i] != self.shape[pos + i]:
raise IndexError('boolean index did not match indexed array along axis {};'
' size is {} but corresponding boolean size is {}'
.format(pos + i, self.shape[pos + i], key[pos].shape[i]))
remaining_idces = pos + mask_ndim
remaining_shapes = self.shape[remaining_idces:]
mask = _reshape_view(key[pos], -1)

if bool_type == _NDARRAY_SLICE_BOOLEAN_INDEXING:
data = _reshape_view(self, -1, *remaining_shapes)
# if mask is at the begining, then the scale is one
scale = reduce(lambda x, y: x * y, self.shape[:pos], 1)
keys = mask if scale == 1 else _reshape_view(_npi.stack(*[mask for i in range(scale)]), -1)
all_shapes = self.shape[:pos] + remaining_shapes
return _reshape_view(_npi.boolean_mask(data, keys), -1, *all_shapes)

elif bool_type == _NDARRAY_INT_BOOLEAN_INDEXING:
out = self
for idx in range(pos):
out = out[key[idx]]
data = _reshape_view(out, -1, *remaining_shapes)
after_mask = _reshape_view(_npi.boolean_mask(data, mask), -1, *remaining_shapes)
if pos == len(key) - 1:
return after_mask
# check boundary
for idx in range(pos+1, len(key)):
if key[idx] >= self.shape[idx+mask_ndim-1]:
raise IndexError('index {} on a dimension of {}'
.format(key[idx], self.shape[idx+mask_ndim-1]))
implicit_idces = len(key)+mask_ndim-1 # idces not explictly shown in the key
implicit_shape = self.shape[implicit_idces:]
new_dim = reduce(lambda x, y: x * y, self.shape[pos+mask_ndim:implicit_idces], 1)
new_idx = self._calculate_new_idx(key, self.shape, pos, mask_ndim)
after_reshape = _reshape_view(after_mask, -1, new_dim, *implicit_shape)
return _reshape_view(_npi.take(after_reshape, array([new_idx]), axis=1), -1, *implicit_shape)

raise NotImplementedError("This boolean indexing type is not supported.")

def _set_np_boolean_indexing(self, key, value):
if not isinstance(key, tuple):
key = (key,)
bool_type, pos = self._check_boolean_indexing_type(key)

mask = key[pos]
mask_shape = mask.shape
mask_ndim = len(mask_shape)
for i in range(mask_ndim):
if mask_shape[i] != self.shape[pos + i]:
raise IndexError('boolean index did not match indexed array along axis {};'
' size is {} but corresponding boolean size is {}'
.format(pos + i, self.shape[pos + i], mask_shape[i]))

data = self # when bool_type == _NDARRAY_SLICE_BOOLEAN_INDEXING
if bool_type == _NDARRAY_INT_BOOLEAN_INDEXING:
if pos != len(key) - 1:
raise NotImplementedError('only support boolean array at the end of the idces '
'when it is mixed with integers')
for idx in range(pos):
data = data[key[idx]]
pos -= 1

if isinstance(value, numeric_types):
_npi.boolean_mask_assign_scalar(data=data, mask=mask,
value=int(value) if isinstance(value, bool) else value,
start_axis=pos, out=data)
elif isinstance(value, ndarray):
_npi.boolean_mask_assign_tensor(data=data, mask=mask, value=value, start_axis=pos, out=data)
else:
raise NotImplementedError('type %s is not supported.'%(type(value)))

# pylint: disable=too-many-return-statements
def __getitem__(self, key):
"""Return self[key].
Expand Down Expand Up @@ -663,7 +545,7 @@ def __getitem__(self, key):
ndim = self.ndim # pylint: disable=redefined-outer-name
shape = self.shape # pylint: disable=redefined-outer-name
if isinstance(key, bool): # otherwise will be treated as 0 and 1
key = array(key, dtype=_np.bool)
key = array(key, dtype=_np.bool, ctx=self.ctx)
if isinstance(key, list):
try:
new_key = _np.array(key)
Expand Down Expand Up @@ -703,17 +585,30 @@ def __getitem__(self, key):
elif key.step == 0:
raise ValueError("slice step cannot be zero")

key_before_expaned = key
key = indexing_key_expand_implicit_axes(key, self.shape)
# `prepend` is defined to handle zero-dim boolean index
# For 0-d boolean indices: A new axis is added,
# but at the same time no axis is "used". So if we have True,
# we add a new axis (a bit like with np.newaxis). If it is
# False, we add a new axis, but this axis has 0 entries.
key, prepend = indexing_key_expand_implicit_axes(key, self.shape)
indexing_dispatch_code = get_indexing_dispatch_code(key)
if indexing_dispatch_code == _NDARRAY_BASIC_INDEXING:
return self._get_np_basic_indexing(key)
elif indexing_dispatch_code == _NDARRAY_EMPTY_TUPLE_INDEXING:
if indexing_dispatch_code == _NDARRAY_EMPTY_TUPLE_INDEXING:
# won't be affected by zero-dim boolean indices
return self._get_np_empty_tuple_indexing(key)
elif indexing_dispatch_code == _NDARRAY_BASIC_INDEXING:
if prepend == 0:
return empty((0,) + self._get_np_basic_indexing(key).shape,
dtype=self.dtype, ctx=self.ctx)
if prepend == _np.newaxis:
key = (_np.newaxis,) + key
return self._get_np_basic_indexing(key)
elif indexing_dispatch_code == _NDARRAY_ADVANCED_INDEXING:
if prepend == 0:
return empty((0,) + self._get_np_adanced_indexing(key).shape,
dtype=self.dtype, ctx=self.ctx)
if prepend == _np.newaxis:
key = (_np.newaxis,) + key
return self._get_np_advanced_indexing(key)
elif indexing_dispatch_code == _NDARRAY_BOOLEAN_INDEXING:
return self._get_np_boolean_indexing(key_before_expaned)
else:
raise RuntimeError

Expand Down Expand Up @@ -788,8 +683,14 @@ def __setitem__(self, key, value):
else:
raise ValueError('setting an array element with a sequence.')
else:
key_before_expaned = key
key = indexing_key_expand_implicit_axes(key, self.shape)
# `prepend` is defined to handle zero-dim boolean index
# For 0-d boolean indices: A new axis is added,
# but at the same time no axis is "used". So if we have True,
# we add a new axis (a bit like with np.newaxis). If it is
# False, we add a new axis, but this axis has 0 entries.
key, prepend = indexing_key_expand_implicit_axes(key, self.shape)
if prepend == 0:
return # no action is needed
slc_key = tuple(idx for idx in key if idx is not None)
if len(slc_key) < self.ndim:
raise RuntimeError(
Expand All @@ -809,8 +710,6 @@ def __setitem__(self, key, value):
pass # no action needed
elif indexing_dispatch_code == _NDARRAY_ADVANCED_INDEXING:
self._set_np_advanced_indexing(key, value)
elif indexing_dispatch_code == _NDARRAY_BOOLEAN_INDEXING:
return self._set_np_boolean_indexing(key_before_expaned, value)
else:
raise ValueError(
'Indexing NDArray with index {} of type {} is not supported'
Expand Down Expand Up @@ -896,6 +795,9 @@ def __mul__(self, other):
def __neg__(self):
return self.__mul__(-1.0)

def __invert__(self):
return invert(self)

def __imul__(self, other):
"""x.__imul__(y) <=> x *= y"""
if not self.writable:
Expand Down
2 changes: 1 addition & 1 deletion src/operator/numpy/np_nonzero_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ void NonzeroForwardCPU(const nnvm::NodeAttrs& attrs,
CHECK_LE(in.shape().ndim(), MAXDIM) << "ndim of input cannot larger than " << MAXDIM;
// 0-dim
if (0 == in.shape().ndim()) {
MSHADOW_TYPE_SWITCH(in.dtype(), DType, {
MSHADOW_TYPE_SWITCH_WITH_BOOL(in.dtype(), DType, {
DType* in_dptr = in.data().dptr<DType>();
if (*in_dptr) {
mxnet::TShape s(2, 1);
Expand Down
2 changes: 1 addition & 1 deletion src/operator/tensor/indexing_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ void GatherNDForwardCPU(const nnvm::NodeAttrs& attrs,
strides[i] = stride;
mshape[i] = dshape[i];
}
MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, { // output data type switch
MSHADOW_TYPE_SWITCH_WITH_BOOL(inputs[0].type_flag_, DType, { // output data type switch
MSHADOW_TYPE_SWITCH(inputs[1].type_flag_, IType, { // indices data type switch
// check whether indices are out of bound
IType* idx_ptr = inputs[1].dptr<IType>();
Expand Down
2 changes: 1 addition & 1 deletion src/operator/tensor/indexing_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ void GatherNDForwardGPU(const nnvm::NodeAttrs& attrs,
strides[i] = stride;
mshape[i] = dshape[i];
}
MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, { // output data type switch
MSHADOW_TYPE_SWITCH_WITH_BOOL(inputs[0].type_flag_, DType, { // output data type switch
MSHADOW_TYPE_SWITCH(inputs[1].type_flag_, IType, { // indices data type switch
// check whether indices are out of bound
IType* idx_ptr = inputs[1].dptr<IType>();
Expand Down
Loading

0 comments on commit 041b70c

Please sign in to comment.