From 318d9c7f8f5434bdff37ddae6bfd7b03b7e1dede Mon Sep 17 00:00:00 2001 From: Hao Jin Date: Wed, 25 Dec 2019 16:40:11 +0800 Subject: [PATCH 01/17] Fix reshape interoperability test (#17155) * fix reshape interoperability test * fix for scipy import --- ci/docker/install/requirements | 4 ++-- tests/python/unittest/test_metric.py | 5 +++-- tests/python/unittest/test_numpy_interoperability.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ci/docker/install/requirements b/ci/docker/install/requirements index cbfc521e2c08..fd716f5fa815 100644 --- a/ci/docker/install/requirements +++ b/ci/docker/install/requirements @@ -26,8 +26,8 @@ h5py==2.8.0rc1 mock==2.0.0 nose==1.3.7 nose-timer==0.7.3 -numpy>1.16.0,<2.0.0 +numpy>1.16.0,<1.18.0 pylint==2.3.1; python_version >= '3.0' requests<2.19.0,>=2.18.4 -scipy==1.0.1 +scipy==1.2.1 six==1.11.0 diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py index a1e5128d8ac6..e7273fba35d5 100644 --- a/tests/python/unittest/test_metric.py +++ b/tests/python/unittest/test_metric.py @@ -18,6 +18,7 @@ import mxnet as mx import numpy as np import scipy +from scipy.stats import pearsonr import json import math from common import with_seed @@ -267,7 +268,7 @@ def test_pearsonr(): pred1 = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) label1 = mx.nd.array([[1, 0], [0, 1], [0, 1]]) pearsonr_expected_np = np.corrcoef(pred1.asnumpy().ravel(), label1.asnumpy().ravel())[0, 1] - pearsonr_expected_scipy, _ = scipy.stats.pearsonr(pred1.asnumpy().ravel(), label1.asnumpy().ravel()) + pearsonr_expected_scipy, _ = pearsonr(pred1.asnumpy().ravel(), label1.asnumpy().ravel()) macro_pr = mx.metric.create('pearsonr', average='macro') micro_pr = mx.metric.create('pearsonr', average='micro') @@ -289,7 +290,7 @@ def test_pearsonr(): label12 = mx.nd.array([[1, 0], [0, 1], [0, 1], [1, 0], [0, 1], [0, 1]]) pearsonr_expected_np = np.corrcoef(pred12.asnumpy().ravel(), label12.asnumpy().ravel())[0, 1] - pearsonr_expected_scipy, _ = scipy.stats.pearsonr(pred12.asnumpy().ravel(), label12.asnumpy().ravel()) + pearsonr_expected_scipy, _ = pearsonr(pred12.asnumpy().ravel(), label12.asnumpy().ravel()) macro_pr.reset() micro_pr.update([label2], [pred2]) diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index fcdf547bfbec..9b445044a3c1 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -758,7 +758,7 @@ def _add_workload_reshape(): # OpArgMngr.add_workload('reshape', b, (2, 2), order='F') # Items are not equal with order='F' a = np.array(_np.ones((0, 2))) - OpArgMngr.add_workload('reshape', a, -1, 2) + OpArgMngr.add_workload('reshape', a, (-1, 2)) def _add_workload_rint(array_pool): From 410165b0a5f903edf500786d4c1b973e389c7b57 Mon Sep 17 00:00:00 2001 From: Sheng Zha Date: Wed, 25 Dec 2019 13:18:32 -0800 Subject: [PATCH 02/17] [CD] enable s3 publish for nightly builds in cd (#17112) * enable s3 publish for nightly builds in cd * pass credential through env * confine credential variables to subprocess --- cd/python/pypi/Jenkins_pipeline.groovy | 3 ++- cd/python/pypi/pypi_publish.py | 21 ++++++++++----------- ci/docker/runtime_functions.sh | 9 +++++++++ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/cd/python/pypi/Jenkins_pipeline.groovy b/cd/python/pypi/Jenkins_pipeline.groovy index e9f172a570fe..fa9300db3ca0 100644 --- a/cd/python/pypi/Jenkins_pipeline.groovy +++ b/cd/python/pypi/Jenkins_pipeline.groovy @@ -27,7 +27,7 @@ // This is a temporary solution until we are confident with the packages generated by CI // This should be removed in the not too distant future. // We only skip the publish step so we can still QA the other variants. -pypi_releases = ["cu92", "cu92mkl"] +pypi_releases = [] def get_pipeline(mxnet_variant) { def node_type = mxnet_variant.startsWith('cu') ? NODE_LINUX_GPU : NODE_LINUX_CPU @@ -72,6 +72,7 @@ def push(mxnet_variant) { } else { echo "Temporarily skipping publishing PyPI package for '${mxnet_variant}'." } + sh "./ci/docker/runtime_functions.sh cd_s3_publish" } } diff --git a/cd/python/pypi/pypi_publish.py b/cd/python/pypi/pypi_publish.py index 7e09f644c734..2729068dd503 100755 --- a/cd/python/pypi/pypi_publish.py +++ b/cd/python/pypi/pypi_publish.py @@ -35,10 +35,8 @@ def post_wheel(path): logging.info('Posting {} to PyPI'.format(path)) pypi_credentials = get_secret() - cmd = 'python3 -m twine upload --username {} --password {} {}'.format( - pypi_credentials['username'], - pypi_credentials['password'], - path) + cmd = 'python3 -m twine upload {}'.format(path) + version = os.path.basename(path).split('-')[1] # The PyPI credentials for DEV has username set to 'skipPublish' # This way we do not attempt to publish the PyPI package @@ -47,14 +45,15 @@ def post_wheel(path): print('In DEV account, skipping publish') print('Would have run: {}'.format(cmd)) return 0 - else: + elif any(test_version_mark in version for test_version_mark in ['a', 'b', 'dev']): print('Skipping publishing nightly builds to Pypi.') print('See https://github.com/pypa/pypi-support/issues/50 for details') return 0 - - # DO NOT PRINT CMD IN THIS BLOCK, includes password - p = subprocess.run(cmd.split(' '), - stdout=subprocess.PIPE) + else: + env = os.environ.copy() + env['TWINE_USERNAME'] = pypi_credentials['username'] + env['TWINE_PASSWORD'] = pypi_credentials['password'] + p = subprocess.run(cmd.split(' '), stdout=subprocess.PIPE, env=env) logging.info(p.stdout) return p.returncode @@ -85,7 +84,7 @@ def get_secret(): raise e else: return json.loads(get_secret_value_response['SecretString']) - - + + if __name__ == '__main__': sys.exit(post_wheel(sys.argv[1])) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index b658f953a78a..e078b2a8f89c 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -2065,6 +2065,15 @@ cd_pypi_publish() { ./cd/python/pypi/pypi_publish.py `readlink -f wheel_build/dist/*.whl` } +cd_s3_publish() { + set -ex + pip3 install --user awscli + filepath=$(readlink -f wheel_build/dist/*.whl) + filename=$(basename $file_path) + variant=$(echo $filename | cut -d'-' -f1 | cut -d'_' -f2 -s) + aws s3 cp --grants read=uri=http://acs.amazonaws.com/groups/global/AllUsers,full=id=43f628fab72838a4f0b929d7f1993b14411f4b0294b011261bc6bd3e950a6822 s3://apache-mxnet/dist/${variant}/${filename} +} + build_static_scala_mkl() { set -ex pushd . From 2551a9d8c8a4f5fd73c98e56ff79ab5410053d0e Mon Sep 17 00:00:00 2001 From: Hao Jin Date: Thu, 26 Dec 2019 07:07:30 +0800 Subject: [PATCH 03/17] fix norm sparse fallback (#17149) --- src/operator/tensor/broadcast_reduce_norm_value.cc | 2 +- src/operator/tensor/broadcast_reduce_norm_value.cu | 2 +- src/operator/tensor/broadcast_reduce_op.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/operator/tensor/broadcast_reduce_norm_value.cc b/src/operator/tensor/broadcast_reduce_norm_value.cc index 4cd92d44997e..9acc157f8eca 100644 --- a/src/operator/tensor/broadcast_reduce_norm_value.cc +++ b/src/operator/tensor/broadcast_reduce_norm_value.cc @@ -40,7 +40,7 @@ void L2NormComputeEx(const nnvm::NodeAttrs& attrs, const NormParam& param = nnvm::get(attrs.parsed); mshadow::Stream* s = ctx.get_stream(); const NDArrayStorageType istype = inputs[0].storage_type(); - const mxnet::TShape axis = param.axis.has_value() ? param.axis.value() : mxnet::TShape(); + const mxnet::TShape axis = param.axis.has_value() ? param.axis.value() : mxnet::TShape(0, -1); if ((istype == kRowSparseStorage || istype == kCSRStorage) && axis.ndim() == 0 && param.ord == 2) { // l2 norm on the entire array diff --git a/src/operator/tensor/broadcast_reduce_norm_value.cu b/src/operator/tensor/broadcast_reduce_norm_value.cu index 188c93e61221..735c3d7faec9 100644 --- a/src/operator/tensor/broadcast_reduce_norm_value.cu +++ b/src/operator/tensor/broadcast_reduce_norm_value.cu @@ -39,7 +39,7 @@ void L2NormComputeEx(const nnvm::NodeAttrs& attrs, const NormParam& param = nnvm::get(attrs.parsed); mshadow::Stream* s = ctx.get_stream(); const NDArrayStorageType istype = inputs[0].storage_type(); - const mxnet::TShape axis = param.axis.has_value() ? param.axis.value() : mxnet::TShape(); + const mxnet::TShape axis = param.axis.has_value() ? param.axis.value() : mxnet::TShape(0, -1); if ((istype == kRowSparseStorage || istype == kCSRStorage) && axis.ndim() == 0 && param.ord == 2) { // l2 norm on the entire array diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h index 27e22491ca35..799f86544160 100644 --- a/src/operator/tensor/broadcast_reduce_op.h +++ b/src/operator/tensor/broadcast_reduce_op.h @@ -1152,7 +1152,7 @@ inline bool LpNormStorageType(const nnvm::NodeAttrs& attrs, DispatchMode::kFCompute); } if (param.ord == 2) { - const mxnet::TShape axis = param.axis.has_value() ? param.axis.value() : mxnet::TShape(); + const mxnet::TShape axis = param.axis.has_value() ? param.axis.value() : mxnet::TShape(0, -1); if (!dispatched && (in_stype == kRowSparseStorage || in_stype == kCSRStorage) && axis.ndim() == 0 && param.ord == 2) { // l2 norm: rsp/csr, axis = () -> dns From 872b533c45a627c79e8be9800bdcadebd77b28af Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Thu, 26 Dec 2019 10:56:21 +0800 Subject: [PATCH 04/17] randn implemented (#17141) --- python/mxnet/numpy/random.py | 43 +++++++++++++++++++++++++- tests/python/unittest/test_numpy_op.py | 22 +++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/python/mxnet/numpy/random.py b/python/mxnet/numpy/random.py index ebc24de63282..95719a005cec 100644 --- a/python/mxnet/numpy/random.py +++ b/python/mxnet/numpy/random.py @@ -20,7 +20,7 @@ from __future__ import absolute_import from ..ndarray import numpy as _mx_nd_np -__all__ = ["randint", "uniform", "normal", "choice", "rand", "multinomial", "shuffle"] +__all__ = ["randint", "uniform", "normal", "choice", "rand", "multinomial", "shuffle", "randn"] def randint(low, high=None, size=None, dtype=None, ctx=None, out=None): @@ -357,3 +357,44 @@ def shuffle(x): [0., 1., 2.]]) """ _mx_nd_np.random.shuffle(x) + + +def randn(*size, **kwargs): + r"""Return a sample (or samples) from the "standard normal" distribution. + If positive, int_like or int-convertible arguments are provided, + `randn` generates an array of shape ``(d0, d1, ..., dn)``, filled + with random floats sampled from a univariate "normal" (Gaussian) + distribution of mean 0 and variance 1 (if any of the :math:`d_i` are + floats, they are first converted to integers by truncation). A single + float randomly sampled from the distribution is returned if no + argument is provided. + This is a convenience function. If you want an interface that takes a + tuple as the first argument, use `numpy.random.standard_normal` instead. + Parameters + ---------- + d0, d1, ..., dn : int, optional + The dimensions of the returned array, should be all positive. + If no argument is given a single Python float is returned. + Returns + ------- + Z : ndarray + A ``(d0, d1, ..., dn)``-shaped array of floating-point samples from + the standard normal distribution, or a single such float if + no parameters were supplied. + Notes + ----- + For random samples from :math:`N(\mu, \sigma^2)`, use: + ``sigma * np.random.randn(...) + mu`` + Examples + -------- + >>> np.random.randn() + 2.1923875335537315 #random + Two-by-four array of samples from N(3, 6.25): + >>> 2.5 * np.random.randn(2, 4) + 3 + array([[-4.49401501, 4.00950034, -1.81814867, 7.29718677], #random + [ 0.39924804, 4.68456316, 4.99394529, 4.84057254]]) #random + """ + output_shape = () + for s in size: + output_shape += (s,) + return _mx_nd_np.random.normal(0, 1, size=output_shape, **kwargs) diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index af9228d45991..4bbf9b8040e2 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -3089,6 +3089,28 @@ def hybrid_forward(self, F, x): assert out.shape == expected_shape +@with_seed() +@use_np +def test_np_randn(): + # Test shapes. + shapes = [ + (3, 3), + (3, 4), + (0, 0), + (3, 3, 3), + (0, 0, 0), + (2, 2, 4, 3), + (2, 2, 4, 3), + (2, 0, 3, 0), + (2, 0, 2, 3) + ] + dtypes = ['float16', 'float32', 'float64'] + for dtype in dtypes: + for shape in shapes: + data_mx = np.random.randn(*shape, dtype=dtype) + assert data_mx.shape == shape + + @with_seed() @use_np def test_random_seed(): From 8f9ae1c883acec8c9e12b149fa44d10737a39de1 Mon Sep 17 00:00:00 2001 From: Tao Lv Date: Thu, 26 Dec 2019 13:20:00 +0800 Subject: [PATCH 05/17] update mkldnn to v1.1.2 (#17165) --- 3rdparty/mkldnn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/mkldnn b/3rdparty/mkldnn index 52c3052df8ec..cb2cc7ac17ff 160000 --- a/3rdparty/mkldnn +++ b/3rdparty/mkldnn @@ -1 +1 @@ -Subproject commit 52c3052df8ec1d5b8b45cb6c350a952840eabd42 +Subproject commit cb2cc7ac17ff4e2ef50805c7048d33256d82be4d From e15c778258f78eacb75865aff9b7fe0b75d6291f Mon Sep 17 00:00:00 2001 From: Yiyan66 <57363390+Yiyan66@users.noreply.github.com> Date: Thu, 26 Dec 2019 14:59:37 +0800 Subject: [PATCH 06/17] [numpy] fix argsort typo (#17150) * return * fix symbol --- python/mxnet/numpy/multiarray.py | 2 +- python/mxnet/symbol/numpy/_symbol.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index 4910b4d6b925..a1b0e016445a 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -1377,7 +1377,7 @@ def argsort(self, axis=-1, kind=None, order=None): # pylint: disable=arguments- The arguments are the same as for :py:func:`argsort`, with this array as data. """ - raise argsort(self, axis=axis, kind=kind, order=order) + return argsort(self, axis=axis, kind=kind, order=order) def argmax_channel(self, *args, **kwargs): """Convenience fluent method for :py:func:`argmax_channel`. diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index 6efc333cc16c..3ee385660715 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -491,7 +491,7 @@ def argsort(self, axis=-1, kind=None, order=None): # pylint: disable=arguments- The arguments are the same as for :py:func:`argsort`, with this array as data. """ - raise argsort(self, axis=axis, kind=kind, order=order) + return argsort(self, axis=axis, kind=kind, order=order) def argmax_channel(self, *args, **kwargs): """Convenience fluent method for :py:func:`argmax_channel`. From 07913f9beb07cb130900567bfe86e8305b84fd3c Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Thu, 26 Dec 2019 15:30:35 +0800 Subject: [PATCH 07/17] fix broken link (#17130) --- example/quantization/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/quantization/README.md b/example/quantization/README.md index 8cdc1bb7e06f..b934a811f31d 100644 --- a/example/quantization/README.md +++ b/example/quantization/README.md @@ -9,7 +9,7 @@ This folder contains examples of quantizing a FP32 model with Intel® MKL-DNN or

Model Quantization with Intel® MKL-DNN

-Intel® MKL-DNN supports quantization with subgraph features on Intel® CPU Platform and can bring performance improvements on the [Intel® Xeon® Scalable Platform](https://www.intel.com/content/www/us/en/processors/xeon/scalable/xeon-scalable-platform.html). A new quantization script `imagenet_gen_qsym_mkldnn.py` has been designed to launch quantization for image-classification models with Intel® MKL-DNN. This script integrates with [Gluon-CV modelzoo](https://gluon-cv.mxnet.io/model_zoo/classification.html), so that more pre-trained models can be downloaded from Gluon-CV and then converted for quantization. To apply quantization flow to your project directly, please refer [Quantize custom models with MKL-DNN backend](https://mxnet.apache.org/tutorials/mkldnn/mkldnn_quantization.html). +Intel® MKL-DNN supports quantization with subgraph features on Intel® CPU Platform and can bring performance improvements on the [Intel® Xeon® Scalable Platform](https://www.intel.com/content/www/us/en/processors/xeon/scalable/xeon-scalable-platform.html). A new quantization script `imagenet_gen_qsym_mkldnn.py` has been designed to launch quantization for image-classification models with Intel® MKL-DNN. This script integrates with [Gluon-CV modelzoo](https://gluon-cv.mxnet.io/model_zoo/classification.html), so that more pre-trained models can be downloaded from Gluon-CV and then converted for quantization. To apply quantization flow to your project directly, please refer [Quantize custom models with MKL-DNN backend](https://mxnet.apache.org/api/python/docs/tutorials/performance/backend/mkldnn/mkldnn_quantization.html). ``` usage: imagenet_gen_qsym_mkldnn.py [-h] [--model MODEL] [--epoch EPOCH] From d26dd15a1b074b0fef3ef43e660679c0b696887a Mon Sep 17 00:00:00 2001 From: Yiyan66 <57363390+Yiyan66@users.noreply.github.com> Date: Thu, 26 Dec 2019 17:14:01 +0800 Subject: [PATCH 08/17] [numpy] add op round (#17175) * add round * sanity * space --- python/mxnet/ndarray/numpy/_op.py | 27 +++++++++++++--- python/mxnet/numpy/multiarray.py | 23 ++++++++++--- python/mxnet/numpy_dispatch_protocol.py | 1 + python/mxnet/symbol/numpy/_symbol.py | 30 +++++++++++++---- .../unittest/test_numpy_interoperability.py | 5 +++ tests/python/unittest/test_numpy_op.py | 32 +++++++++++++++++++ 6 files changed, 103 insertions(+), 15 deletions(-) diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index 02e42145fb18..e380b4937168 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -37,10 +37,10 @@ 'logspace', 'expand_dims', 'tile', 'arange', 'array_split', 'split', 'vsplit', 'concatenate', 'append', 'stack', 'vstack', 'column_stack', 'dstack', 'average', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'argmin', 'std', 'var', 'indices', 'copysign', 'ravel', 'unravel_index', 'hanning', 'hamming', - 'blackman', 'flip', 'around', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', - 'tril', 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', - 'greater_equal', 'less_equal', 'hsplit', 'rot90', 'einsum', 'true_divide', 'nonzero', 'shares_memory', - 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where', 'bincount'] + 'blackman', 'flip', 'around', 'round', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', + 'unique', 'lcm', 'tril', 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', + 'greater', 'less', 'greater_equal', 'less_equal', 'hsplit', 'rot90', 'einsum', 'true_divide', 'nonzero', + 'shares_memory', 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where', 'bincount'] @set_module('mxnet.ndarray.numpy') @@ -4737,6 +4737,25 @@ def around(x, decimals=0, out=None, **kwargs): raise TypeError('type {} not supported'.format(str(type(x)))) +@set_module('mxnet.ndarray.numpy') +def round(x, decimals=0, out=None, **kwargs): + r""" + round_(a, decimals=0, out=None) + Round an array to the given number of decimals. + + See Also + -------- + around : equivalent function; see for details. + """ + from ...numpy import ndarray + if isinstance(x, numeric_types): + return _np.around(x, decimals, **kwargs) + elif isinstance(x, ndarray): + return _npi.around(x, decimals, out=out, **kwargs) + else: + raise TypeError('type {} not supported'.format(str(type(x)))) + + @set_module('mxnet.ndarray.numpy') @wrap_np_binary_func def arctan2(x1, x2, out=None, **kwargs): diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index a1b0e016445a..22094a1621d2 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -55,9 +55,9 @@ 'tensordot', 'eye', 'linspace', 'logspace', 'expand_dims', 'tile', 'arange', 'array_split', 'split', 'vsplit', 'concatenate', 'stack', 'vstack', 'column_stack', 'dstack', 'average', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'argmin', 'std', 'var', 'indices', 'copysign', - 'ravel', 'unravel_index', 'hanning', 'hamming', 'blackman', 'flip', 'around', 'arctan2', 'hypot', - 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', 'tril', 'identity', 'take', - 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', 'greater_equal', + 'ravel', 'unravel_index', 'hanning', 'hamming', 'blackman', 'flip', 'around', 'round', 'arctan2', + 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', 'tril', 'identity', + 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', 'greater_equal', 'less_equal', 'hsplit', 'rot90', 'einsum', 'true_divide', 'nonzero', 'shares_memory', 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where', 'bincount'] @@ -1558,13 +1558,13 @@ def norm(self, *args, **kwargs): """ raise AttributeError('mxnet.numpy.ndarray object has no attribute norm') - def round(self, *args, **kwargs): + def round(self, decimals=0, out=None, **kwargs): # pylint: disable=arguments-differ """Convenience fluent method for :py:func:`round`. The arguments are the same as for :py:func:`round`, with this array as data. """ - raise NotImplementedError + return round(self, decimals=decimals, out=out, **kwargs) def rint(self, *args, **kwargs): """Convenience fluent method for :py:func:`rint`. @@ -6456,6 +6456,19 @@ def around(x, decimals=0, out=None, **kwargs): return _mx_nd_np.around(x, decimals, out=out, **kwargs) +@set_module('mxnet.numpy') +def round(x, decimals=0, out=None, **kwargs): + r""" + round_(a, decimals=0, out=None) + Round an array to the given number of decimals. + + See Also + -------- + around : equivalent function; see for details. + """ + return _mx_nd_np.around(x, decimals, out=out, **kwargs) + + @set_module('mxnet.numpy') @wrap_np_binary_func def arctan2(x1, x2, out=None, **kwargs): diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index c7e9dd1398eb..9aa755fb436e 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -86,6 +86,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs): 'argmin', 'argmax', 'around', + 'round', 'argsort', 'append', 'broadcast_arrays', diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index 3ee385660715..0b341b804758 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -45,10 +45,10 @@ 'logspace', 'expand_dims', 'tile', 'arange', 'array_split', 'split', 'vsplit', 'concatenate', 'append', 'stack', 'vstack', 'column_stack', 'dstack', 'average', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'argmin', 'std', 'var', 'indices', 'copysign', 'ravel', 'unravel_index', 'hanning', 'hamming', - 'blackman', 'flip', 'around', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', - 'tril', 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', - 'greater_equal', 'less_equal', 'hsplit', 'rot90', 'einsum', 'true_divide', 'shares_memory', - 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where', 'bincount'] + 'blackman', 'flip', 'around', 'round', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', + 'unique', 'lcm', 'tril', 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', + 'greater', 'less', 'greater_equal', 'less_equal', 'hsplit', 'rot90', 'einsum', 'true_divide', + 'shares_memory', 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where', 'bincount'] @set_module('mxnet.symbol.numpy') @@ -665,13 +665,13 @@ def norm(self, *args, **kwargs): """ raise AttributeError('_Symbol object has no attribute norm') - def round(self, *args, **kwargs): + def round(self, decimals=0, out=None, **kwargs): # pylint: disable=arguments-differ """Convenience fluent method for :py:func:`round`. The arguments are the same as for :py:func:`round`, with this array as data. """ - raise NotImplementedError + return round(self, decimals=decimals, out=out, **kwargs) def rint(self, *args, **kwargs): """Convenience fluent method for :py:func:`rint`. @@ -4524,6 +4524,24 @@ def around(x, decimals=0, out=None, **kwargs): raise TypeError('type {} not supported'.format(str(type(x)))) +@set_module('mxnet.symbol.numpy') +def round(x, decimals=0, out=None, **kwargs): + r""" + round_(a, decimals=0, out=None) + Round an array to the given number of decimals. + + See Also + -------- + around : equivalent function; see for details. + """ + if isinstance(x, numeric_types): + return _np.around(x, decimals, **kwargs) + elif isinstance(x, _Symbol): + return _npi.around(x, decimals, out=out, **kwargs) + else: + raise TypeError('type {} not supported'.format(str(type(x)))) + + @set_module('mxnet.symbol.numpy') @wrap_np_binary_func def arctan2(x1, x2, out=None, **kwargs): diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 9b445044a3c1..3d26ee28b22e 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -594,6 +594,10 @@ def _add_workload_around(): OpArgMngr.add_workload('around', np.array([1.56, 72.54, 6.35, 3.25]), decimals=1) +def _add_workload_round(): + OpArgMngr.add_workload('round', np.array([1.56, 72.54, 6.35, 3.25]), decimals=1) + + def _add_workload_argsort(): for dtype in [np.int32, np.float32]: a = np.arange(101, dtype=dtype) @@ -1442,6 +1446,7 @@ def _prepare_workloads(): _add_workload_argmin() _add_workload_argmax() _add_workload_around() + _add_workload_round() _add_workload_argsort() _add_workload_append() _add_workload_bincount() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 4bbf9b8040e2..3f9f1d6677cc 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -4435,6 +4435,38 @@ def hybrid_forward(self, F, x): assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) +@with_seed() +@use_np +def test_np_round(): + class TestRound(HybridBlock): + def __init__(self, decimals): + super(TestRound, self).__init__() + self.decimals = decimals + + def hybrid_forward(self, F, x): + return F.np.round(x, self.decimals) + + shapes = [(), (1, 2, 3), (1, 0)] + types = ['int32', 'int64', 'float32', 'float64'] + for hybridize in [True, False]: + for oneType in types: + rtol, atol = 1e-3, 1e-5 + for shape in shapes: + for d in range(-5, 6): + test_round = TestRound(d) + if hybridize: + test_round.hybridize() + x = rand_ndarray(shape, dtype=oneType).as_np_ndarray() + np_out = _np.round(x.asnumpy(), d) + mx_out = test_round(x) + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + + mx_out = np.round(x, d) + np_out = _np.round(x.asnumpy(), d) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + + @with_seed() @use_np def test_np_nonzero(): From cf81887a99cea3abebbf1c9728e926a0da9f0f1b Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Thu, 26 Dec 2019 18:53:19 +0800 Subject: [PATCH 09/17] Quantized Elemwise Mul Operator (#17147) * add elt-wise mul xinyu * fuse mul dequantize * change to use subgraph * address comments and add tests * fix ut * improve ut * skip pragma omp simd for msvc * fix lint * fix clang error --- .../quantization/quantized_elemwise_mul-inl.h | 64 +++++ .../quantization/quantized_elemwise_mul.cc | 267 ++++++++++++++++++ ...kldnn_elemwisemul_post_quantize_property.h | 222 +++++++++++++++ .../mkldnn/mkldnn_subgraph_property.cc | 2 + .../python/quantization/test_quantization.py | 62 +++- 5 files changed, 616 insertions(+), 1 deletion(-) create mode 100644 src/operator/quantization/quantized_elemwise_mul-inl.h create mode 100644 src/operator/quantization/quantized_elemwise_mul.cc create mode 100644 src/operator/subgraph/mkldnn/mkldnn_elemwisemul_post_quantize_property.h diff --git a/src/operator/quantization/quantized_elemwise_mul-inl.h b/src/operator/quantization/quantized_elemwise_mul-inl.h new file mode 100644 index 000000000000..f58db8a45eea --- /dev/null +++ b/src/operator/quantization/quantized_elemwise_mul-inl.h @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file quantized_elemwise_mul.cc + * \brief CPU Implementation of basic elementwise binary mul operators + */ +#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZED_ELEMWISE_MUL_INL_H_ +#define MXNET_OPERATOR_QUANTIZATION_QUANTIZED_ELEMWISE_MUL_INL_H_ + +#include "../tensor/elemwise_binary_op-inl.h" + +namespace mxnet { +namespace op { +/* These structure is used for requantization only when fusion */ +struct QuantizeElemwiseMulParam : public dmlc::Parameter { + dmlc::optional min_calib_range; + dmlc::optional max_calib_range; + bool enable_float_output; + DMLC_DECLARE_PARAMETER(QuantizeElemwiseMulParam) { + DMLC_DECLARE_FIELD(min_calib_range) + .set_default(dmlc::optional()) + .describe("The minimum scalar value in the form of float32 obtained " + "through calibration. If present, it will be used to requantize the " + "int8 output data."); + DMLC_DECLARE_FIELD(max_calib_range) + .set_default(dmlc::optional()) + .describe("The maximum scalar value in the form of float32 obtained " + "through calibration. If present, it will be used to requantize the " + "int8 output data."); + DMLC_DECLARE_FIELD(enable_float_output).set_default(false) + .describe("Whether to enable float32 output"); + } +}; + +namespace quantized_elemwise_mul { +enum QuantizedElemwiseMulOpInputs {kLhs, kRhs, kLhsMin, kLhsMax, kRhsMin, kRhsMax}; +enum QuantizedElemwiseMulOpOutputs {kOut, kOutMin, kOutMax}; +enum QuantizedElemwiseMulOpResource {kTempSpace}; +} // namespace quantized_elemwise_mul + + + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_QUANTIZATION_QUANTIZED_ELEMWISE_MUL_INL_H_ diff --git a/src/operator/quantization/quantized_elemwise_mul.cc b/src/operator/quantization/quantized_elemwise_mul.cc new file mode 100644 index 000000000000..a752c14837a6 --- /dev/null +++ b/src/operator/quantization/quantized_elemwise_mul.cc @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file quantized_elemwise_mul.cc + * \brief CPU Implementation of basic elementwise binary mul operators + */ +#include +#include "../tensor/elemwise_binary_op-inl.h" +#include "./quantized_elemwise_mul-inl.h" +#include "./quantization_utils.h" + +namespace mxnet { +namespace op { + +DMLC_REGISTER_PARAMETER(QuantizeElemwiseMulParam); + +static std::vector QuantizedElemwiseMulOutputNames(const NodeAttrs &attrs) { + const QuantizeElemwiseMulParam& params = nnvm::get(attrs.parsed); + if (params.enable_float_output) + return std::vector{"output"}; + else + return std::vector{"output", "min_output", "max_output"}; +} + +inline bool QuantizedElemwiseMulOpShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + using namespace mshadow; + const QuantizeElemwiseMulParam& params = nnvm::get(attrs.parsed); + const mxnet::TShape &lshape = (*in_attrs)[quantized_elemwise_mul::kLhs]; + const mxnet::TShape &rshape = (*in_attrs)[quantized_elemwise_mul::kRhs]; + if (!ndim_is_known(lshape) || !ndim_is_known(rshape)) return false; + CHECK_EQ(lshape.ndim(), rshape.ndim()) + << "Currently, quantized elemwise multiply doesn't support broadcast."; + for (int i = 0; i < lshape.ndim(); ++i) { + CHECK_EQ(lshape[i], rshape[i]); + } + SHAPE_ASSIGN_CHECK(*in_attrs, quantized_elemwise_mul::kLhsMin, mxnet::TShape(1, 1)); + SHAPE_ASSIGN_CHECK(*in_attrs, quantized_elemwise_mul::kLhsMax, mxnet::TShape(1, 1)); + SHAPE_ASSIGN_CHECK(*in_attrs, quantized_elemwise_mul::kRhsMin, mxnet::TShape(1, 1)); + SHAPE_ASSIGN_CHECK(*in_attrs, quantized_elemwise_mul::kRhsMax, mxnet::TShape(1, 1)); + + out_attrs->clear(); + SHAPE_ASSIGN_CHECK(*out_attrs, quantized_elemwise_mul::kOut, lshape); + if (!params.enable_float_output) { + SHAPE_ASSIGN_CHECK(*out_attrs, quantized_elemwise_mul::kOutMin, mxnet::TShape(1, 1)); + SHAPE_ASSIGN_CHECK(*out_attrs, quantized_elemwise_mul::kOutMax, mxnet::TShape(1, 1)); + } + return true; +} + +inline bool QuantizedElemwiseMulOpType(const nnvm::NodeAttrs& attrs, + std::vector *in_type, + std::vector *out_type) { + const QuantizeElemwiseMulParam& params = nnvm::get(attrs.parsed); + for (int i = 0; i < 2; ++i) { + if (in_type->at(i) == mshadow::kInt8) { + TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kInt8); + } else { + LOG(ERROR) << "currently, quantized elemwise mul only support int8 inputs."; + } + } + TYPE_ASSIGN_CHECK(*in_type, quantized_elemwise_mul::kLhsMin, mshadow::kFloat32); + TYPE_ASSIGN_CHECK(*in_type, quantized_elemwise_mul::kLhsMax, mshadow::kFloat32); + TYPE_ASSIGN_CHECK(*in_type, quantized_elemwise_mul::kRhsMin, mshadow::kFloat32); + TYPE_ASSIGN_CHECK(*in_type, quantized_elemwise_mul::kRhsMax, mshadow::kFloat32); + + int dtype = mshadow::kInt32; + if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { + dtype = mshadow::kInt8; + } + if (!params.enable_float_output) { + TYPE_ASSIGN_CHECK(*out_type, quantized_elemwise_mul::kOut, dtype); + TYPE_ASSIGN_CHECK(*out_type, quantized_elemwise_mul::kOutMin, mshadow::kFloat32); + TYPE_ASSIGN_CHECK(*out_type, quantized_elemwise_mul::kOutMax, mshadow::kFloat32); + } else { + TYPE_ASSIGN_CHECK(*out_type, quantized_elemwise_mul::kOut, mshadow::kFloat32); + } + return true; +} + +inline bool QuantizedElemwiseMulOpStorageType(const nnvm::NodeAttrs& attrs, + int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + using namespace common; + *dispatch_mode = DispatchMode::kFCompute; + + for (auto &v : *out_attrs) { + v = kDefaultStorage; + if (common::stype_string(v).compare("unknown") == 0) { + return false; + } + } + + for (auto &v : *in_attrs) { + v = kDefaultStorage; + if (common::stype_string(v).compare("unknown") == 0) { + return false; + } + } + return true; +} + +void QuantizedElemwiseMulOpForward(const nnvm::NodeAttrs &attrs, + const OpContext &ctx, + const std::vector &inputs, + const std::vector &req, + const std::vector &outputs) { + const QuantizeElemwiseMulParam& params = nnvm::get(attrs.parsed); + using namespace mxnet_op; + + float lhs_min = inputs[quantized_elemwise_mul::kLhsMin].dptr()[0]; + float lhs_max = inputs[quantized_elemwise_mul::kLhsMax].dptr()[0]; + float rhs_min = inputs[quantized_elemwise_mul::kRhsMin].dptr()[0]; + float rhs_max = inputs[quantized_elemwise_mul::kRhsMax].dptr()[0]; + + float cached_output_min_ = 0.f; + float cached_output_max_ = 0.f; + float out_data_scale = 1.f; + float out_scale = 1.f; + if (!params.enable_float_output) { + float output_data_range = kInt32Range; + // dataA && dataB are int8 + if (outputs[quantized_elemwise_mul::kOut].type_flag_ == mshadow::kInt8) { + output_data_range = kInt8Range; + } else { + output_data_range = kInt32Range; + } + if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { + cached_output_min_ = params.min_calib_range.value(); + cached_output_max_ = params.max_calib_range.value(); + out_data_scale = output_data_range / MaxAbs(cached_output_min_, cached_output_max_); + auto lhs_scale = kInt8Range / MaxAbs(lhs_min, lhs_max); + auto rhs_scale = kInt8Range / MaxAbs(rhs_min, rhs_max); + out_scale = out_data_scale / lhs_scale / rhs_scale; + } else { + Stream *s = ctx.get_stream(); + if (inputs[quantized_elemwise_mul::kLhs].type_flag_ == mshadow::kInt8 && + inputs[quantized_elemwise_mul::kRhs].type_flag_ == mshadow::kInt8) { + mxnet_op::Kernel::Launch( + s, 1, &cached_output_min_, &cached_output_max_, &lhs_min, &lhs_max, &rhs_min, &rhs_max); + } else { + LOG(ERROR) << "lhs and rhs only support iny8 dtype."; + } + } + } else { + auto lhs_scale = kInt8Range / MaxAbs(lhs_min, lhs_max); + auto rhs_scale = kInt8Range / MaxAbs(rhs_min, rhs_max); + out_scale = 1.0 / lhs_scale / rhs_scale; + } + + size_t out_size = outputs[quantized_elemwise_mul::kOut].Size(); + auto *input_l = inputs[quantized_elemwise_mul::kLhs].dptr(); + auto *input_r = inputs[quantized_elemwise_mul::kRhs].dptr(); + // TODO(Xinyu): a temp solution to enable Elemwise INT8 computation, + // will be refactored after the DNNL primitive is done. + if (!params.enable_float_output) { + if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { + typedef int8_t out_type; + auto *out_data = outputs[quantized_elemwise_mul::kOut].dptr(); +#if !defined(_MSC_VER) +#pragma omp simd +#endif + for (size_t i = 0; i < out_size; ++i) { + const int8_t a = input_l[i]; + const int8_t b = input_r[i]; + out_data[i] = static_cast(a * b * out_scale); + } + } else { + typedef int32_t out_type; + auto *out_data = outputs[quantized_elemwise_mul::kOut].dptr(); +#if !defined(_MSC_VER) +#pragma omp simd +#endif + for (size_t i = 0; i < out_size; ++i) { + const int8_t a = input_l[i]; + const int8_t b = input_r[i]; + out_data[i] = static_cast(a * b * out_scale); + } + } + } else { + typedef float_t out_type; + auto *out_data = outputs[quantized_elemwise_mul::kOut].dptr(); +#if !defined(_MSC_VER) +#pragma omp simd +#endif + for (size_t i = 0; i < out_size; ++i) { + const int8_t a = input_l[i]; + const int8_t b = input_r[i]; + out_data[i] = static_cast(a * b * out_scale); + } + } + + if (!params.enable_float_output) { + outputs[quantized_elemwise_mul::kOutMin].dptr()[0] = cached_output_min_; + outputs[quantized_elemwise_mul::kOutMax].dptr()[0] = cached_output_max_; + } +} + +NNVM_REGISTER_OP(_contrib_quantized_elemwise_mul) +.describe(R"code(Multiplies arguments int8 element-wise. +)code" ADD_FILELINE) +.set_num_inputs(6) +.set_num_outputs([](const NodeAttrs& attrs) { + const QuantizeElemwiseMulParam& params = nnvm::get(attrs.parsed); + return (!params.enable_float_output) ? 3 : 1; +}) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"lhs", "rhs", "lhs_min", "lhs_max", "rhs_min", "rhs_max"}; + }) +.set_attr("FListOutputNames", QuantizedElemwiseMulOutputNames) +.set_attr("FInferShape", QuantizedElemwiseMulOpShape) +.set_attr("FInferType", QuantizedElemwiseMulOpType) +.set_attr("FInferStorageType", QuantizedElemwiseMulOpStorageType) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FCompute", QuantizedElemwiseMulOpForward) +// TODO(Xinyu): a temp solution to enable GluonCV INT8 flow, +// will be reverted after the improvement of CachedOP is done. +.set_attr("FGradient", MakeZeroGradNodes) +.set_attr("FNeedRequantize", [](const NodeAttrs& attrs) { return true; }) +.add_argument("lhs", "NDArray-or-Symbol", "first input") +.add_argument("rhs", "NDArray-or-Symbol", "second input") +.add_argument("lhs_min", "NDArray-or-Symbol", "Minimum value of first input.") +.add_argument("lhs_max", "NDArray-or-Symbol", "Maximum value of first input.") +.add_argument("rhs_min", "NDArray-or-Symbol", "Minimum value of second input.") +.add_argument("rhs_max", "NDArray-or-Symbol", "Maximum value of second input.") +.set_attr_parser(ParamParser) +.add_arguments(QuantizeElemwiseMulParam::__FIELDS__()); + +NNVM_REGISTER_OP(elemwise_mul) +.set_attr("FQuantizedOp", [](const NodeAttrs& attrs) { + nnvm::NodePtr node = nnvm::Node::Create(); + node->attrs.op = Op::Get("_contrib_quantized_elemwise_mul"); + node->attrs.name = "quantized_" + attrs.name; + node->attrs.dict = attrs.dict; + if (node->op()->attr_parser != nullptr) { + node->op()->attr_parser(&(node->attrs)); + } + return node; +}); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/subgraph/mkldnn/mkldnn_elemwisemul_post_quantize_property.h b/src/operator/subgraph/mkldnn/mkldnn_elemwisemul_post_quantize_property.h new file mode 100644 index 000000000000..1469395ec169 --- /dev/null +++ b/src/operator/subgraph/mkldnn/mkldnn_elemwisemul_post_quantize_property.h @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file mkldnn_elemwisemul_post_quantize_property.cc + * \brief Partition gragph property for MKLDNN Quantized ElemwiseMul operator + * \author Xinyu Chen +*/ + +#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_ +#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_ +#if MXNET_USE_MKLDNN == 1 + +#include +#include +#include "../../tensor/elemwise_binary_op-inl.h" +#include "../../quantization/requantize-inl.h" +#include "../common.h" +#include "mkldnn_subgraph_base-inl.h" + +namespace mxnet { +namespace op { + +#define QUANTIZED_ElemwiseMul_NAME "_contrib_quantized_elemwise_mul" + +class ElemwiseMulPostQuantizeSelector : public SubgraphSelector { + public: + /*! \brief pattern match status */ + enum SelectStatus { + kFail = 0, + kStart, + kRequantize, + kSuccess, + }; + + private: + bool disable_all; + bool disable_float_output; + SelectStatus status; + std::vector matched_list; + + public: + explicit ElemwiseMulPostQuantizeSelector(const bool dis_all, + const bool dis_float_output) + : disable_all(dis_all), + disable_float_output(dis_float_output) {} + + bool Select(const nnvm::Node &n) override { + if ((!disable_all) && n.op() == Op::Get(QUANTIZED_ElemwiseMul_NAME)) { + status = disable_all ? kSuccess : kStart; + matched_list.clear(); + matched_list.push_back(&n); + return true; + } + return false; + } + + bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override { + return false; + } + + bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override { + if (status == kFail || status == kSuccess || new_node.is_variable()) + return false; + // If n isn't the last matched node, then we encoutered a internal + // branch, we should pop out the node behind n and stop fusion. + if (matched_list.back() != &n) { + if (std::find(matched_list.begin(), matched_list.end(), &n) != + matched_list.end()) { + while (matched_list.back() != &n) { + matched_list.pop_back(); + } + } + + status = kSuccess; + return false; + } + + switch (status) { + case kStart: + if (new_node.op() == Op::Get("_contrib_requantize")) { + auto const ¶m = nnvm::get(new_node.attrs.parsed); + if (param.min_calib_range.has_value() && + param.max_calib_range.has_value()) { + matched_list.push_back(&new_node); + status = kRequantize; + return true; + } + } + case kRequantize: + if ((!disable_float_output) && (new_node.op() == Op::Get("_contrib_dequantize"))) { + matched_list.push_back(&new_node); + status = kSuccess; + return true; + } + default: + status = kSuccess; + return false; + } + } + + std::vector Filter( + const std::vector &candidates) override { + if ((status != kSuccess) || (matched_list.size() <= 1)) { + return std::vector(0); + } else { + std::vector ret; + for (auto i : matched_list) { + auto non_const_i = const_cast(i); + if (std::find(candidates.begin(), candidates.end(), non_const_i) != + candidates.end()) { + ret.push_back(non_const_i); + } + } + return ret; + } + } + + void Reset() override { + CHECK_GE(matched_list.size(), 1); + auto new_selector = ElemwiseMulPostQuantizeSelector(disable_all, disable_float_output); + new_selector.Select(*matched_list[0]); + *this = new_selector; + } +}; + +class ElemwiseMulPostQuantizeProperty : public SubgraphProperty { + public: + ElemwiseMulPostQuantizeProperty() { + disable_fuse_all = dmlc::GetEnv("MXNET_DISABLE_MKLDNN_QEM_FUSE_ALL", false); + disable_float_output = dmlc::GetEnv("MXNET_DISABLE_MKLDNN_QEM_FLOAT_OUTPUT", false); + } + + static SubgraphPropertyPtr Create() { + static const std::string &name = "MKLDNN EltwiseMul post-quantization optimization pass"; + auto property = std::make_shared(); + property->SetAttr("property_name", name); + property->SetAttr("inference_only", true); + return property; + } + + nnvm::NodePtr CreateSubgraphNode(const nnvm::Symbol &sym, + const int subgraph_id = 0) const override { + nnvm::NodePtr em_node = nullptr; + nnvm::NodePtr requantize_node = nullptr; + nnvm::NodePtr dequantize_node = nullptr; + + DFSVisit(sym.outputs, [&](const nnvm::NodePtr &node) { + if (node->is_variable()) return; + if (node->op() == Op::Get(QUANTIZED_ElemwiseMul_NAME)) { + em_node = node; + } else if (node->op() == Op::Get("_contrib_requantize")) { + requantize_node = node; + } else if (node->op() == Op::Get("_contrib_dequantize")) { + dequantize_node = node; + } + }); + + CHECK_NOTNULL(em_node); + CHECK_NOTNULL(requantize_node); + auto const &requantize_param = + nnvm::get(requantize_node->attrs.parsed); + CHECK(requantize_param.min_calib_range.has_value()); + CHECK(requantize_param.max_calib_range.has_value()); + + // When only fused quantized_elemwise_mul and requantize, set min/max_cablib_range, + // When fused quantized_elemwise_mul + requantize + dequantize, set dequantize flag to true. + if (dequantize_node != nullptr) { + em_node->attrs.dict["enable_float_output"] = "True"; + } else { + em_node->attrs.dict["min_calib_range"] = + std::to_string(requantize_param.min_calib_range.value()); + em_node->attrs.dict["max_calib_range"] = + std::to_string(requantize_param.max_calib_range.value()); + } + em_node->op()->attr_parser(&(em_node->attrs)); + return em_node; + } + + SubgraphSelectorPtr CreateSubgraphSelector() const override { + auto selector = + std::make_shared(disable_fuse_all, + disable_float_output); + return selector; + } + + void ConnectSubgraphOutputs( + const nnvm::NodePtr n, + std::vector *output_entries) const override { + for (size_t i = 0; i < output_entries->size(); ++i) { + auto entry_ptr = output_entries->at(i); + *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0}; + } + } + + private: + bool disable_fuse_all; + bool disable_float_output; +}; + +} // namespace op +} // namespace mxnet + +#endif // if MXNET_USE_MKLDNN == 1 +#endif // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_ diff --git a/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc b/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc index 269017ea6a03..18cd3031ef18 100644 --- a/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc +++ b/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc @@ -23,6 +23,7 @@ #include "mkldnn_fc_property.h" #include "mkldnn_post_quantize_property.h" #include "mkldnn_fc_post_quantize_property.h" +#include "mkldnn_elemwisemul_post_quantize_property.h" #include "mkldnn_post_quantize_align_scale_property.h" namespace mxnet { @@ -57,6 +58,7 @@ MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_QUANTIZE, SgMKLDNNPostQuantizeProperty); #if MXNET_USE_MKLDNN == 1 MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_QUANTIZE, SgMKLDNNFCPostQuantizeProperty); +MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_QUANTIZE, ElemwiseMulPostQuantizeProperty); MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_QUANTIZE, SgMKLDNNPostQuantizeAlignScaleProperty); #endif // MXNET_USE_MKLDNN == 1 diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 527737e03cd7..0c40f32d1666 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -341,6 +341,66 @@ def check_quantized_elemwise_add(data_shape, qtype): check_quantized_elemwise_add((3, 4, 56, 56), qtype) check_quantized_elemwise_add((32, 56, 64, 11), qtype) +@with_seed() +def test_quantized_elemwise_mul(): + def check_quantized_elemwise_mul(data_shape, qtype): + if is_test_for_native_cpu(): + print('skipped testing quantized_elemwise_mul for native cpu since it is not supported yet') + return + elif qtype != 'int8': + print('skipped testing quantized_elemwise_mul for not supported data type') + return + elif is_test_for_gpu(): + print('skipped testing quantized_elemwise_mul for gpu since it is not supported yet') + return + + dataA = mx.sym.Variable(name='dataA', shape=data_shape, dtype='float32') + dataB = mx.sym.Variable(name='dataB', shape=data_shape, dtype='float32') + elemwise_mul_fp32 = mx.sym.elemwise_mul(dataA, dataB) + arg_names = elemwise_mul_fp32.list_arguments() + elemwise_mul_fp32_exe = elemwise_mul_fp32.simple_bind(ctx=mx.current_context(), grad_req='null') + if qtype == 'uint8': + data_low = 0.0 + data_high = 255.0 + else: + data_low = -127.0 + data_high = 127.0 + + dataA_val = mx.nd.random.uniform(low=data_low, high=data_high, shape=data_shape).astype('int32') + dataB_val = mx.nd.random.uniform(low=data_low, high=data_high, shape=data_shape).astype('int32') + elemwise_mul_fp32_exe.arg_dict[arg_names[0]][:] = dataA_val + + elemwise_mul_fp32_exe.arg_dict[arg_names[1]][:] = dataB_val + + output = elemwise_mul_fp32_exe.forward()[0] + + qdataA = mx.sym.Variable(name='qdataA', shape=data_shape, dtype=qtype) + qdataB = mx.sym.Variable(name='qdataB', shape=data_shape, dtype=qtype) + min_dataA = mx.sym.Variable(name='min_dataA') + max_dataA = mx.sym.Variable(name='max_dataA') + min_dataB = mx.sym.Variable(name='min_dataB') + max_dataB = mx.sym.Variable(name='max_dataB') + quantized_elemwise_mul = mx.sym.contrib.quantized_elemwise_mul(qdataA, qdataB, min_dataA, max_dataA, min_dataB, max_dataB) + elemwise_mul_int8_exe = quantized_elemwise_mul.simple_bind(ctx=mx.current_context(), grad_req='null') + qarg_names = quantized_elemwise_mul.list_arguments() + elemwise_mul_int8_exe.arg_dict[qarg_names[0]][:] = elemwise_mul_fp32_exe.arg_dict[arg_names[0]].astype(qtype) + elemwise_mul_int8_exe.arg_dict[qarg_names[1]][:] = elemwise_mul_fp32_exe.arg_dict[arg_names[1]].astype(qtype) + quantized_range = 127.0 + elemwise_mul_int8_exe.arg_dict[qarg_names[2]][:] = data_low + elemwise_mul_int8_exe.arg_dict[qarg_names[3]][:] = data_high + elemwise_mul_int8_exe.arg_dict[qarg_names[4]][:] = data_low + elemwise_mul_int8_exe.arg_dict[qarg_names[5]][:] = data_high + qoutput, min_range, max_range = elemwise_mul_int8_exe.forward() + + fp32_rslt = output.asnumpy() + int8_rslt = qoutput.astype(output.dtype) + assert_almost_equal(fp32_rslt, int8_rslt, atol = 1e-4) + + for qtype in ['int8', 'uint8']: + check_quantized_elemwise_mul((4, 6), qtype) + check_quantized_elemwise_mul((13, 74, 52), qtype) + check_quantized_elemwise_mul((3, 4, 56, 56), qtype) + check_quantized_elemwise_mul((32, 56, 64, 11), qtype) @with_seed() def test_quantized_pooling(): @@ -1005,7 +1065,7 @@ def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape, label_shape=N else: excluded_sym_names = excluded_names + optional_names if name == 'sym4': - excluded_op_names += ['elemwise_add'] + excluded_op_names += ['elemwise_add', 'elemwise_mul'] qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=s, arg_params=arg_params, From dfa3d07a74e59297394c624a3373a335f94a64eb Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Fri, 27 Dec 2019 08:37:48 +0800 Subject: [PATCH 10/17] Further optimization for NCF model (#17148) * further optimization for ncf model * fix benchmark script * enhance model optimizer to support general configurations of mlp and neurf models * fix minor typo --- .../neural_collaborative_filtering/README.md | 36 ++++-- .../benchmark.sh | 114 ++++++++++++++++++ .../neural_collaborative_filtering/convert.py | 2 +- .../core/model.py | 60 ++++++--- .../model_optimizer.py | 81 +++++++++++++ example/neural_collaborative_filtering/ncf.py | 24 ++-- .../neural_collaborative_filtering/train.py | 2 +- 7 files changed, 273 insertions(+), 46 deletions(-) create mode 100644 example/neural_collaborative_filtering/benchmark.sh create mode 100644 example/neural_collaborative_filtering/model_optimizer.py diff --git a/example/neural_collaborative_filtering/README.md b/example/neural_collaborative_filtering/README.md index 819f4d94dff9..00d3ed12295b 100644 --- a/example/neural_collaborative_filtering/README.md +++ b/example/neural_collaborative_filtering/README.md @@ -29,15 +29,6 @@ Author: Dr. Xiangnan He (http://www.comp.nus.edu.sg/~xiangnan/) Code Reference: https://github.com/hexiangnan/neural_collaborative_filtering -## Environment Settings -We use MXnet with MKL-DNN as the backend. -- MXNet version: '1.5.1' - -## Install -``` -pip install -r requirements.txt -``` - ## Dataset We provide the processed datasets on [Google Drive](https://drive.google.com/drive/folders/1qACR_Zhc2O2W0RrazzcepM2vJeh0MMdO?usp=sharing): MovieLens 20 Million (ml-20m), you can download directly or @@ -66,7 +57,9 @@ We provide the pretrained ml-20m model on [Google Drive](https://drive.google.co |dtype|HR@10|NDCG@10| |:---:|:--:|:--:| |float32|0.6393|0.3849| -|int8|0.6366|0.3824| +|float32 opt|0.6393|0.3849| +|int8|0.6395|0.3852| +|int8 opt|0.6396|0.3852| ## Training @@ -75,11 +68,20 @@ We provide the pretrained ml-20m model on [Google Drive](https://drive.google.co python train.py # --gpu=0 ``` +## Model Optimizer + +``` +# optimize model +python model_optimizer.py +``` + ## Calibration ``` # neumf calibration on ml-20m dataset python ncf.py --prefix=./model/ml-20m/neumf --calibration +# optimized neumf calibration on ml-20m dataset +python ncf.py --prefix=./model/ml-20m/neumf-opt --calibration ``` ## Evaluation @@ -87,15 +89,25 @@ python ncf.py --prefix=./model/ml-20m/neumf --calibration ``` # neumf float32 inference on ml-20m dataset python ncf.py --batch-size=1000 --prefix=./model/ml-20m/neumf +# optimized neumf float32 inference on ml-20m dataset +python ncf.py --batch-size=1000 --prefix=./model/ml-20m/neumf-opt # neumf int8 inference on ml-20m dataset python ncf.py --batch-size=1000 --prefix=./model/ml-20m/neumf-quantized +# optimized neumf int8 inference on ml-20m dataset +python ncf.py --batch-size=1000 --prefix=./model/ml-20m/neumf-opt-quantized ``` ## Benchmark ``` +usage: bash ./benchmark.sh [[[-p prefix ] [-e epoch] [-d dataset] [-b batch_size] [-i instance] [-c cores/instance]] | [-h]] + # neumf float32 benchmark on ml-20m dataset -python ncf.py --batch-size=1000 --prefix=./model/ml-20m/neumf --benchmark +sh benchmark.sh -p model/ml-20m/neumf +# optimized neumf float32 benchmark on ml-20m dataset +sh benchmark.sh -p model/ml-20m/neumf-opt # neumf int8 benchmark on ml-20m dataset -python ncf.py --batch-size=1000 --prefix=./model/ml-20m/neumf-quantized --benchmark +sh benchmark.sh -p model/ml-20m/neumf-quantized +# optimized neumf int8 benchmark on ml-20m dataset +sh benchmark.sh -p model/ml-20m/neumf-opt-quantized ``` diff --git a/example/neural_collaborative_filtering/benchmark.sh b/example/neural_collaborative_filtering/benchmark.sh new file mode 100644 index 000000000000..60fec746cd20 --- /dev/null +++ b/example/neural_collaborative_filtering/benchmark.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +usage() +{ + echo "usage: bash ./benchmark.sh [[[-p prefix ] [-e epoch] [-d dataset] [-b batch_size] [-i instance] [-c cores/instance]] | [-h]]" +} + +while [ $# -gt 0 ]; do + case "$1" in + --prefix | -p) + shift + PREFIX=$1 + ;; + --epoch | -e) + shift + EPOCH=$1 + ;; + --dataset | -d) + shift + DATASET=$1 + ;; + --batch-size | -b) + shift + BS=$1 + ;; + --instance | -i) + shift + INS=$1 + ;; + --core | -c) + shift + CORES=$1 + ;; + --help | -h) + usage + exit 1 + ;; + *) + usage + exit 1 + esac + shift +done + +NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'` +NUM_NUMA_NODE=`lscpu | grep 'NUMA node(s)' | awk '{print $NF}'` +CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'` +NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET)) +CORES_PER_NUMA=$((NUM_CORES / NUM_NUMA_NODE)) +echo "target machine has $NUM_CORES physical core(s) on $NUM_NUMA_NODE numa nodes of $NUM_SOCKET socket(s)." + +if [ -z $PREFIX ]; then + echo "Error: Need a model prefix." + exit +fi +if [ -z $EPOCH ]; then + echo "Default: set epoch of model parameters to 7." + EPOCH=7 +fi +if [ -z $DATASET ]; then + echo "Default: set dataset to ml-20m." + DATASET='ml-20m' +fi +if [ -z $INS ]; then + echo "Default: launch one instance per physical core." + INS=$NUM_CORES +fi +if [ -z $CORES ]; then + echo "Default: divide full physical cores." + CORES=$((NUM_CORES / $INS)) +fi +if [ -z $BS ]; then + echo "Default: set batch size to 700." + BS=700 +fi + +echo " cores/instance: $CORES" +echo " total instances: $INS" +echo " batch size: $BS" +echo "" + +rm NCF_*.log + +for((i=0;i<$INS;i++)); +do + ((a=$i*$CORES)) + ((b=$a+$CORES-1)) + memid=$((b/CORES_PER_NUMA)) + LOG=NCF_$i.log + echo " $i instance use $a-$b cores with $LOG" + KMP_AFFINITY=granularity=fine,noduplicates,compact,1,0 \ + OMP_NUM_THREADS=$CORES \ + numactl --physcpubind=$a-$b --membind=$memid python ncf.py --batch-size=$BS --dataset=$DATASET --epoch=$EPOCH --benchmark --prefix=$PREFIX 2>&1 | tee $LOG & +done +wait + +grep speed NCF_*.log | awk '{ sum += $(NF-1) }; END { print "Total Performance is " sum " samples/sec"}' diff --git a/example/neural_collaborative_filtering/convert.py b/example/neural_collaborative_filtering/convert.py index 4c64d2cdedab..7fb7f1ede9e4 100644 --- a/example/neural_collaborative_filtering/convert.py +++ b/example/neural_collaborative_filtering/convert.py @@ -38,7 +38,7 @@ def parse_args(): parser = ArgumentParser() parser.add_argument('--dataset', nargs='?', default='ml-20m', choices=['ml-1m', 'ml-20m'], help='The dataset name, temporary support ml-1m and ml-20m.') - parser.add_argument('path', type=str, default = './data/', + parser.add_argument('--path', type=str, default = './data/', help='Path to reviews CSV file from MovieLens') parser.add_argument('-n', '--negatives', type=int, default=999, help='Number of negative samples for each positive' diff --git a/example/neural_collaborative_filtering/core/model.py b/example/neural_collaborative_filtering/core/model.py index b516e5039fed..6c03bb01a357 100644 --- a/example/neural_collaborative_filtering/core/model.py +++ b/example/neural_collaborative_filtering/core/model.py @@ -37,6 +37,27 @@ def _init_weight(self, _, arr): limit = np.sqrt(3. / self._fan_in) mx.random.uniform(-limit, limit, out=arr) +# only for inference model optimize +def mlp_opt(user, item, factor_size, model_layers, max_user, max_item): + user_weight = mx.sym.Variable('fused_mlp_user_weight', init=mx.init.Normal(0.01)) + item_weight = mx.sym.Variable('fused_mlp_item_weight', init=mx.init.Normal(0.01)) + embed_user = mx.sym.Embedding(data=user, weight=user_weight, input_dim=max_user, + output_dim=factor_size * 2, name='fused_embed_user'+str(factor_size)) + embed_item = mx.sym.Embedding(data=item, weight=item_weight, input_dim=max_item, + output_dim=factor_size * 2, name='fused_embed_item'+str(factor_size)) + pre_gemm_concat = embed_user + embed_item + + for i in range(1, len(model_layers)): + if i==1: + pre_gemm_concat = mx.sym.Activation(data=pre_gemm_concat, act_type='relu', name='act_'+str(i-1)) + continue + else: + mlp_weight_init = golorot_uniform(model_layers[i-1], model_layers[i]) + mlp_weight = mx.sym.Variable('fc_{}_weight'.format(i-1), init=mlp_weight_init) + pre_gemm_concat = mx.sym.FullyConnected(data=pre_gemm_concat, weight=mlp_weight, num_hidden=model_layers[i], name='fc_'+str(i-1)) + pre_gemm_concat = mx.sym.Activation(data=pre_gemm_concat, act_type='relu', name='act_'+str(i-1)) + + return pre_gemm_concat def mlp(user, item, factor_size, model_layers, max_user, max_item): user_weight = mx.sym.Variable('mlp_user_weight', init=mx.init.Normal(0.01)) @@ -47,14 +68,11 @@ def mlp(user, item, factor_size, model_layers, max_user, max_item): output_dim=factor_size, name='embed_item'+str(factor_size)) pre_gemm_concat = mx.sym.concat(embed_user, embed_item, dim=1, name='pre_gemm_concat') - for i, layer in enumerate(model_layers): - if i==0: - mlp_weight_init = golorot_uniform(2 * factor_size, model_layers[i]) - else: - mlp_weight_init = golorot_uniform(model_layers[i-1], model_layers[i]) - mlp_weight = mx.sym.Variable('fc_{}_weight'.format(i), init=mlp_weight_init) - pre_gemm_concat = mx.sym.FullyConnected(data=pre_gemm_concat, weight=mlp_weight, num_hidden=layer, name='fc_'+str(i)) - pre_gemm_concat = mx.sym.Activation(data=pre_gemm_concat, act_type='relu', name='act_'+str(i)) + for i in range(1, len(model_layers)): + mlp_weight_init = golorot_uniform(model_layers[i-1], model_layers[i]) + mlp_weight = mx.sym.Variable('fc_{}_weight'.format(i-1), init=mlp_weight_init) + pre_gemm_concat = mx.sym.FullyConnected(data=pre_gemm_concat, weight=mlp_weight, num_hidden=model_layers[i], name='fc_'+str(i-1)) + pre_gemm_concat = mx.sym.Activation(data=pre_gemm_concat, act_type='relu', name='act_'+str(i-1)) return pre_gemm_concat @@ -70,24 +88,34 @@ def gmf(user, item, factor_size, max_user, max_item): return pred def get_model(model_type='neumf', factor_size_mlp=128, factor_size_gmf=64, - model_layers=[256, 128, 64], num_hidden=1, - max_user=138493, max_item=26744): + model_layers=[256, 256, 128, 64], num_hidden=1, + max_user=138493, max_item=26744, opt=False): # input user = mx.sym.Variable('user') item = mx.sym.Variable('item') if model_type == 'mlp': - net = mlp(user=user, item=item, - factor_size=factor_size_mlp, model_layers=model_layers, - max_user=max_user, max_item=max_item) + if opt: + net = mlp_opt(user=user, item=item, + factor_size=factor_size_mlp, model_layers=model_layers, + max_user=max_user, max_item=max_item) + else: + net = mlp(user=user, item=item, + factor_size=factor_size_mlp, model_layers=model_layers, + max_user=max_user, max_item=max_item) elif model_type == 'gmf': net = gmf(user=user, item=item, factor_size=factor_size_gmf, max_user=max_user, max_item=max_item) elif model_type == 'neumf': - net_mlp = mlp(user=user, item=item, - factor_size=factor_size_mlp, model_layers=model_layers, - max_user=max_user, max_item=max_item) + if opt: + net_mlp = mlp_opt(user=user, item=item, + factor_size=factor_size_mlp, model_layers=model_layers, + max_user=max_user, max_item=max_item) + else: + net_mlp = mlp(user=user, item=item, + factor_size=factor_size_mlp, model_layers=model_layers, + max_user=max_user, max_item=max_item) net_gmf = gmf(user=user, item=item, factor_size=factor_size_gmf, max_user=max_user, max_item=max_item) diff --git a/example/neural_collaborative_filtering/model_optimizer.py b/example/neural_collaborative_filtering/model_optimizer.py new file mode 100644 index 000000000000..2866ae7e7e05 --- /dev/null +++ b/example/neural_collaborative_filtering/model_optimizer.py @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +import os +import time +import argparse +import logging +import math +import random +import numpy as np +import mxnet as mx +from core.model import get_model +from core.dataset import NCFTrainData + +logging.basicConfig(level=logging.DEBUG) + +parser = argparse.ArgumentParser(description="Run model optimizer.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('--path', nargs='?', default='./data/', + help='Input data path.') +parser.add_argument('--dataset', nargs='?', default='ml-20m', + help='The dataset name.') +parser.add_argument('--model-prefix', type=str, default='./model/ml-20m/neumf') +parser.add_argument('--epoch', type=int, default=7, help='parameters epoch') +parser.add_argument('--model-type', type=str, default='neumf', choices=['neumf', 'gmf', 'mlp'], + help="mdoel type") +parser.add_argument('--layers', default='[256, 256, 128, 64]', + help="list of number hiddens of fc layers in mlp model.") +parser.add_argument('--factor-size-gmf', type=int, default=64, + help="outdim of gmf embedding layers.") +parser.add_argument('--num-hidden', type=int, default=1, + help="num-hidden of neumf fc layer") + +head = '%(asctime)-15s %(message)s' +logging.basicConfig(level=logging.INFO, format=head) + +# arg parser +args = parser.parse_args() +logging.info(args) + +model_prefix = args.model_prefix +model_type = args.model_type +model_layers = eval(args.layers) +factor_size_gmf = args.factor_size_gmf +factor_size_mlp = int(model_layers[0]/2) +num_hidden = args.num_hidden +train_dataset = NCFTrainData((args.path + args.dataset + '/train-ratings.csv'), nb_neg=4) +net = get_model(model_type, factor_size_mlp, factor_size_gmf, + model_layers, num_hidden, train_dataset.nb_users, train_dataset.nb_items, opt=True) + +raw_params, _ = mx.model.load_params(model_prefix, args.epoch) +fc_0_weight_split = mx.nd.split(raw_params['fc_0_weight'], axis=1, num_outputs=2) +fc_0_left = fc_0_weight_split[0] +fc_0_right = fc_0_weight_split[1] + +user_weight_fusion = mx.nd.FullyConnected(data = raw_params['mlp_user_weight'], weight=fc_0_left, bias=raw_params['fc_0_bias'], no_bias=False, num_hidden=model_layers[0]) +item_weight_fusion = mx.nd.FullyConnected(data = raw_params['mlp_item_weight'], weight=fc_0_right, no_bias=True, num_hidden=model_layers[0]) + +opt_params = raw_params +del opt_params['mlp_user_weight'] +del opt_params['mlp_item_weight'] +del opt_params['fc_0_bias'] +opt_params['fused_mlp_user_weight'] = user_weight_fusion +opt_params['fused_mlp_item_weight'] = item_weight_fusion + +mx.model.save_checkpoint(model_prefix + '-opt', args.epoch, net, opt_params, {}) + diff --git a/example/neural_collaborative_filtering/ncf.py b/example/neural_collaborative_filtering/ncf.py index 0fd9f733a1bd..b01be01bc8d9 100644 --- a/example/neural_collaborative_filtering/ncf.py +++ b/example/neural_collaborative_filtering/ncf.py @@ -42,20 +42,12 @@ help='max number of item index.') parser.add_argument('--batch-size', type=int, default=256, help='number of examples per batch') -parser.add_argument('--model-type', type=str, default='neumf', choices=['neumf', 'gmf', 'mlp'], - help="mdoel type") -parser.add_argument('--layers', default='[256, 128, 64]', - help="list of number hiddens of fc layers in mlp model.") -parser.add_argument('--factor-size-gmf', type=int, default=64, - help="outdim of gmf embedding layers.") -parser.add_argument('--num-hidden', type=int, default=1, - help="num-hidden of neumf fc layer") parser.add_argument('--topk', type=int, default=10, help="topk for accuracy evaluation.") parser.add_argument('--gpu', type=int, default=None, help="index of gpu to run, e.g. 0 or 1. None means using cpu().") parser.add_argument('--benchmark', action='store_true', help="whether to benchmark performance only") -parser.add_argument('--epoch', type=int, default=0, help='model checkpoint index for inference') +parser.add_argument('--epoch', type=int, default=7, help='model checkpoint index for inference') parser.add_argument('--prefix', default='./model/ml-20m/neumf', help="model checkpoint prefix") parser.add_argument('--calibration', action='store_true', help="whether to calibrate model") parser.add_argument('--calib-mode', type=str, choices=['naive', 'entropy'], default='naive', @@ -85,11 +77,6 @@ max_user = args.max_user max_item = args.max_item batch_size = args.batch_size - model_type = args.model_type - model_layers = eval(args.layers) - factor_size_gmf = args.factor_size_gmf - factor_size_mlp = int(model_layers[0]/2) - num_hidden = args.num_hidden benchmark = args.benchmark calibration = args.calibration calib_mode = args.calib_mode @@ -129,7 +116,7 @@ cqsym, cqarg_params, aux_params, collector = quantize_graph(sym=net, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, calib_mode=calib_mode, - quantized_dtype=args.quantized_dtype, logger=logging) + quantized_dtype=quantized_dtype, logger=logging) max_num_examples = num_calib_batches * batch_size mod._exec_group.execs[0].set_monitor_callback(collector.collect, monitor_all=True) num_batches = 0 @@ -144,12 +131,17 @@ % (num_batches, batch_size)) cqsym, cqarg_params, aux_params = calib_graph(qsym=cqsym, arg_params=arg_params, aux_params=aux_params, collector=collector, calib_mode=calib_mode, - quantized_dtype=args.quantized_dtype, logger=logging) + quantized_dtype=quantized_dtype, logger=logging) sym_name = '%s-symbol.json' % (args.prefix + '-quantized') cqsym = cqsym.get_backend_symbol('MKLDNN_QUANTIZE') mx.model.save_checkpoint(args.prefix + '-quantized', args.epoch, cqsym, cqarg_params, aux_params) elif benchmark: logging.info('Benchmarking...') + data = [mx.random.randint(0, 1000, shape=shape, ctx=ctx) for _, shape in mod.data_shapes] + batch = mx.io.DataBatch(data, []) # empty label + for i in range(2000): + mod.forward(batch, is_train=False) + logging.info('Benchmarking...') num_samples = 0 for ib, batch in enumerate(val_iter): if ib == 5: diff --git a/example/neural_collaborative_filtering/train.py b/example/neural_collaborative_filtering/train.py index 0b0cfad1ef39..c68f271a6f0d 100644 --- a/example/neural_collaborative_filtering/train.py +++ b/example/neural_collaborative_filtering/train.py @@ -45,7 +45,7 @@ help="mdoel type") parser.add_argument('--num-negative', type=int, default=4, help="number of negative samples per positive sample while training.") -parser.add_argument('--layers', default='[256, 128, 64]', +parser.add_argument('--layers', default='[256, 256, 128, 64]', help="list of number hiddens of fc layers in mlp model.") parser.add_argument('--factor-size-gmf', type=int, default=64, help="outdim of gmf embedding layers.") From 8a14a6acb9c31b0bfc1395a0bc8e9019c73846fd Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Fri, 27 Dec 2019 08:44:26 +0800 Subject: [PATCH 11/17] [MKL-DNN] Enhance Quantization Method (#17161) * support bert quantization * support bert quantization * fix lint --- python/mxnet/contrib/quantization.py | 55 +++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py index 01051ab7c8e4..a56f8531aa49 100644 --- a/python/mxnet/contrib/quantization.py +++ b/python/mxnet/contrib/quantization.py @@ -27,6 +27,7 @@ import logging import os import shutil +import warnings import numpy as np from ..base import _LIB, check_call, py_str from ..base import c_array, c_str, mx_uint, c_str_array @@ -419,6 +420,7 @@ def __init__(self, calib_data): else: data_example = [data_example] # suppose there must be one label in data_example + # TODO(xinyu-intel): little tricky here, need to refactor. num_data = len(data_example) assert num_data > 0 # here reshape is to handle the 5D/6D input data @@ -426,6 +428,10 @@ def __init__(self, calib_data): data_example[0] = data_example[0].reshape((-1,) + data_example[0].shape[2:]) self.provide_data = [DataDesc(name='data', shape=(data_example[0].shape))] self.provide_data += [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[1:])] + # data0, data1, ..., label + if num_data >= 3: + self.provide_data = [DataDesc(name='data{}'.format(i), shape=x.shape) + for i, x in enumerate(data_example[0:])] self.batch_size = data_example[0].shape[0] self.reset() @@ -627,8 +633,9 @@ def quantize_model_mkldnn(sym, arg_params, aux_params, return qsym, qarg_params, aux_params def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), - excluded_sym_names=None, excluded_op_names=None, calib_mode='entropy', - quantized_dtype='int8', quantize_mode='full', logger=None): + excluded_sym_names=None, excluded_op_names=None, + calib_mode='entropy', quantized_dtype='int8', quantize_mode='full', + LayerOutputCollector=None, logger=None): """User-level API for generating a quantized model from a FP32 model w/o calibration and a collector for naive or entropy calibration. The backend quantized operators are only enabled for Linux systems. Please do not run @@ -667,6 +674,8 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), The mode that quantization pass to apply. Support 'full' and 'smart'. 'full' means quantize all operator if possible. 'smart' means quantization pass will smartly choice which operator should be quantized. + LayerOutputCollector : class + For customize calibration method usage. logger : Object A logging object for printing information during the process of quantization. Returns @@ -711,9 +720,14 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), if logger: logger.info( 'Create a layer output minmax collector for naive calibration') + elif calib_mode == 'customize' and LayerOutputCollector is not None: + collector = LayerOutputCollector + if logger: + logger.info( + 'Create a customize layer output minmax collector for calibration') else: raise ValueError('unknown calibration mode %s received,' - ' expected `none`, `naive`, or `entropy`' % calib_mode) + ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode) if logger: logger.info('Collector created, please use set_monitor_callback' ' to collect calibration information.') @@ -770,9 +784,11 @@ def calib_graph(qsym, arg_params, aux_params, collector, collector.hist_dict, quantized_dtype, logger=logger) elif calib_mode == 'naive': th_dict = collector.min_max_dict + elif calib_mode == 'customize': + th_dict = collector.min_max_dict else: raise ValueError('unknown calibration mode %s received,' - ' expected `none`, `naive`, or `entropy`' % calib_mode) + ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode) qsym = _calibrate_quantized_sym(qsym, th_dict) else: raise ValueError('please set calibration mode to naive or entropy.') @@ -783,10 +799,10 @@ def calib_graph(qsym, arg_params, aux_params, collector, return qsym, qarg_params, aux_params -def quantize_net(network, quantized_dtype='auto', quantize_mode='full', - exclude_layers=None, exclude_layers_match=None, exclude_operators=None, - calib_data=None, data_shapes=None, calib_mode='none', - num_calib_examples=None, ctx=cpu(), logger=None): +def quantize_net_v2(network, quantized_dtype='auto', quantize_mode='full', + exclude_layers=None, exclude_layers_match=None, exclude_operators=None, + calib_data=None, data_shapes=None, calib_mode='none', + num_calib_examples=None, ctx=cpu(), LayerOutputCollector=None, logger=None): """User-level API for Gluon users to generate a quantized SymbolBlock from a FP32 HybridBlock w/ or w/o calibration. The backend quantized operators are only enabled for Linux systems. Please do not run inference using the quantized models on Windows for now. @@ -830,6 +846,8 @@ def quantize_net(network, quantized_dtype='auto', quantize_mode='full', ctx : Context Defines the device that users want to run forward propagation on the calibration dataset for collecting layer output statistics. Currently, only supports single context. + LayerOutputCollector : class + For customize calibration method usage. logger : Object A logging object for printing information during the process of quantization. @@ -906,7 +924,8 @@ def __exit__(self, exc_type, exc_value, traceback): qsym, qarg_params, aux_params, collector = quantize_graph( sym=symnet, arg_params=args, aux_params=auxs, ctx=ctx, excluded_sym_names=exclude_layers, excluded_op_names=exclude_operators, - calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode, logger=logger) + calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode, + LayerOutputCollector=LayerOutputCollector, logger=logger) if calib_mode is not None and calib_mode != 'none': if not isinstance(ctx, Context): @@ -915,7 +934,7 @@ def __exit__(self, exc_type, exc_value, traceback): if calib_data is None: raise ValueError( 'calib_data must be provided when calib_mode=%s' % calib_mode) - if calib_mode in ['naive', 'entropy']: + if calib_mode in ['naive', 'entropy', 'customize']: data_names = [pair[0] for pair in calib_data.provide_data] mod = Module(symbol=symnet, context=ctx, data_names=data_names, label_names=None) @@ -956,3 +975,19 @@ def __exit__(self, exc_type, exc_value, traceback): net.collect_params().load(param_name, cast_dtype=True, dtype_source='saved') net.collect_params().reset_ctx(ctx) return net + +def quantize_net(network, quantized_dtype='auto', quantize_mode='full', + exclude_layers=None, exclude_layers_match=None, exclude_operators=None, + calib_data=None, data_shapes=None, calib_mode='none', + num_calib_examples=None, ctx=cpu(), logger=None): + """User-level API for Gluon users to generate a quantized SymbolBlock from a FP32 HybridBlock w/ or w/o calibration. + Will be deprecated after MXNet 2.0, please use quantize_net_v2. + """ + warnings.warn('WARNING: This will be deprecated after MXNet 2.0, please use quantize_net_v2.') + return quantize_net_v2(network=network, quantized_dtype=quantized_dtype, + quantize_mode=quantize_mode, exclude_layers=exclude_layers, + exclude_layers_match=exclude_layers_match, + exclude_operators=exclude_operators, + calib_data=calib_data, data_shapes=data_shapes, + calib_mode=calib_mode, num_calib_examples=num_calib_examples, + ctx=ctx, LayerOutputCollector=None, logger=logger) From 4fda46bd8c0e9f68376a0b6c69f8027ab227f650 Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Fri, 27 Dec 2019 08:45:19 +0800 Subject: [PATCH 12/17] fix py27 quantization (#17153) --- python/mxnet/contrib/quantization.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py index a56f8531aa49..ce22fb753ace 100644 --- a/python/mxnet/contrib/quantization.py +++ b/python/mxnet/contrib/quantization.py @@ -613,7 +613,9 @@ def quantize_model_mkldnn(sym, arg_params, aux_params, A tuple of quantized symbol, quantized arg_params, and aux_params. ------- """ - if ctx != cpu(): + if not isinstance(ctx, Context): + raise ValueError('currently only supports single ctx, while received %s' % str(ctx)) + if ctx.device_type != 'cpu': raise ValueError( 'quantize_model_mkldnn only support Intel cpu platform with MKL-DNN Backend') From 2ad3ce408b08456bff0b92a82aa3c484adde29f2 Mon Sep 17 00:00:00 2001 From: Tao Lv Date: Fri, 27 Dec 2019 10:23:41 +0800 Subject: [PATCH 13/17] broadcast_axis optimization (#17091) * broadcast_aixs opt * fix lint --- .../tensor/broadcast_reduce_op_value.cc | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/src/operator/tensor/broadcast_reduce_op_value.cc b/src/operator/tensor/broadcast_reduce_op_value.cc index 43bfc729329a..31e0dd054afd 100644 --- a/src/operator/tensor/broadcast_reduce_op_value.cc +++ b/src/operator/tensor/broadcast_reduce_op_value.cc @@ -32,6 +32,41 @@ DMLC_REGISTER_PARAMETER(BroadcastAxesParam); DMLC_REGISTER_PARAMETER(BroadcastToParam); DMLC_REGISTER_PARAMETER(BroadcastLikeParam); +template +void BroadcastAxisKer(DType* src, + DType* dst, + index_t outer, + index_t inner, + index_t size) { +#pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) + for (index_t i = 0; i < outer * size; i++) { + const index_t m = i / size; + const index_t n = i % size; + void* offset = reinterpret_cast(dst + m * size * inner + n * inner); + memcpy(offset, reinterpret_cast(src + m * inner), inner * sizeof (DType)); + } +} + +inline void BroadcastAxisComputeCPU(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + const BroadcastAxesParam& param = nnvm::get(attrs.parsed); + if (param.axis.ndim() == 1 && inputs[0].shape_[param.axis[0]] == 1 && req[0] == kWriteTo) { + MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { + auto dst = outputs[0].dptr(); + auto src = inputs[0].dptr(); + index_t outer = inputs[0].shape_.ProdShape(0, param.axis[0]); + index_t inner = inputs[0].shape_.ProdShape(param.axis[0], inputs[0].shape_.ndim()); + BroadcastAxisKer(src, dst, outer, inner, param.size[0]); + }); + } else { + BroadcastComputeImpl(attrs, ctx, inputs, req, outputs, inputs[0].shape_); + } +} + MXNET_OPERATOR_REGISTER_BROADCAST(broadcast_axis) .add_alias("broadcast_axes") .describe(R"code(Broadcasts the input array over particular axes. @@ -59,7 +94,7 @@ Example:: .set_attr_parser(ParamParser) .add_arguments(BroadcastAxesParam::__FIELDS__()) .set_attr("FInferShape", BroadcastAxesShape) -.set_attr("FCompute", BroadcastCompute); +.set_attr("FCompute", BroadcastAxisComputeCPU); MXNET_OPERATOR_REGISTER_BROADCAST(broadcast_to) .describe(R"code(Broadcasts the input array to a new shape. From 37197b6750a09cf0dbad250bad36ab545f462fbb Mon Sep 17 00:00:00 2001 From: Minghao Liu <40382964+Tommliu@users.noreply.github.com> Date: Fri, 27 Dec 2019 11:12:55 +0800 Subject: [PATCH 14/17] any/all (#17087) --- python/mxnet/_numpy_op_doc.py | 101 ++++++++++++++++++ python/mxnet/numpy_dispatch_protocol.py | 2 + src/operator/mshadow_op.h | 8 ++ src/operator/mxnet_op.h | 23 ++++ src/operator/numpy/np_broadcast_reduce_op.h | 53 +++++++++ .../numpy/np_broadcast_reduce_op_boolean.cc | 85 +++++++++++++++ .../numpy/np_broadcast_reduce_op_boolean.cu | 40 +++++++ src/operator/operator_tune.cc | 2 + src/operator/tensor/broadcast_reduce-inl.cuh | 10 ++ src/operator/tensor/broadcast_reduce-inl.h | 12 +++ src/operator/tensor/broadcast_reduce_op.h | 33 ++++++ src/operator/tensor/matrix_op-inl.h | 2 +- .../unittest/test_numpy_interoperability.py | 28 +++++ tests/python/unittest/test_numpy_op.py | 78 ++++++++++++++ 14 files changed, 476 insertions(+), 1 deletion(-) create mode 100644 src/operator/numpy/np_broadcast_reduce_op_boolean.cc create mode 100644 src/operator/numpy/np_broadcast_reduce_op_boolean.cu diff --git a/python/mxnet/_numpy_op_doc.py b/python/mxnet/_numpy_op_doc.py index 0d0e3b64491b..d1d67be06b05 100644 --- a/python/mxnet/_numpy_op_doc.py +++ b/python/mxnet/_numpy_op_doc.py @@ -20,6 +20,107 @@ """Doc placeholder for numpy ops with prefix _np.""" +def _np_all(a, axis=None, keepdims=False, out=None): + """ + Test whether all array elements along a given axis evaluate to True. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : None or int or tuple of ints, optional + Axis or axes along which a logical AND reduction is performed. + The default (axis = None) is to perform a logical AND over + all the dimensions of the input array. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output and its type is preserved + + Returns + -------- + all : ndarray, bool + A new boolean or array is returned unless out is specified, + in which case a reference to out is returned. + + Examples: + --------- + >>> np.all([[True,False],[True,True]]) + False + + >>> np.all([[True,False],[True,True]], axis=0) + array([ True, False]) + + >>> np.all([-1, 4, 5]) + True + + >>> np.all([1.0, np.nan]) + True + + >>> o=np.array(False) + >>> z=np.all([-1, 4, 5], out=o) + >>> id(z), id(o), z + (28293632, 28293632, array(True)) # may vary + """ + pass + +def _np_any(a, axis=None, keepdims=False, out=None): + """ + Test whether any array element along a given axis evaluates to True. + Returns single boolean unless axis is not None + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : None or int or tuple of ints, optional + Axis or axes along which a logical AND reduction is performed. + The default (axis = None) is to perform a logical AND over + all the dimensions of the input array. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output and its type is preserved + + Returns + -------- + any : bool or ndarray + A new boolean or ndarray is returned unless out is specified, + in which case a reference to out is returned. + + Examples: + --------- + >>> np.any([[True, False], [True, True]]) + True + + >>> np.any([[True, False], [False, False]], axis=0) + array([ True, False]) + + >>> np.any([-1, 0, 5]) + True + + >>> np.any(np.nan) + True + + >>> o=np.array(False) + >>> z=np.any([-1, 4, 5], out=o) + >>> z, o + (array(True), array(True)) + >>> # Check now that z is a reference to o + >>> z is o + True + >>> id(z), id(o) # identity of z and o # doctest: +SKIP + (191614240, 191614240) + """ + pass + + def _np_cumsum(a, axis=None, dtype=None, out=None): """ Return the cumulative sum of the elements along a given axis. diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index 9aa755fb436e..80b3d66c5e5d 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -83,6 +83,8 @@ def _run_with_array_ufunc_proto(*args, **kwargs): _NUMPY_ARRAY_FUNCTION_LIST = [ + 'all', + 'any', 'argmin', 'argmax', 'around', diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index e3a3c0443428..4176d3a68792 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -1101,6 +1101,14 @@ struct minimum : public mxnet_op::tunable { } }; +/*! \brief boolean any/all kernel that determines whether elem is NonZero */ +struct NonZero { + template + MSHADOW_XINLINE static bool Map(DType a) { + return (a != DType(0)); + } +}; + /*! \brief sum reducer that ignores NaN values in the input */ struct nansum { /*! \brief do reduction into dst */ diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h index b15117f9f83b..d7752c4759db 100644 --- a/src/operator/mxnet_op.h +++ b/src/operator/mxnet_op.h @@ -1148,6 +1148,29 @@ struct set_to_int : public tunable { */ using set_zero = set_to_int<0>; using set_one = set_to_int<1>; + +/*! + * \brief Set to immediate scalar value kernel + * \tparam val Scalar immediate + */ +template +struct set_to_bool : public tunable { + // mxnet_op version (when used directly with Kernel<>::Launch()) */ + template + MSHADOW_XINLINE static void Map(index_t i, DType *out) { + out[i] = DType(val); + } + // mshadow_op version (when used with op_with_req<>) + MSHADOW_XINLINE static int Map() { + return val; + } +}; + +/*! + * \brief Special-case kernel shortcut for setting to true and false + */ +using set_true = set_to_bool; +using set_false = set_to_bool; } // namespace mxnet_op } // namespace op diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h index 7d0025a62ad2..0efe2c2aa3df 100644 --- a/src/operator/numpy/np_broadcast_reduce_op.h +++ b/src/operator/numpy/np_broadcast_reduce_op.h @@ -86,6 +86,21 @@ struct NumpyReduceAxesNoDTypeParam : public dmlc::Parameter { + dmlc::optional> axis; + bool keepdims; + DMLC_DECLARE_PARAMETER(NumpyReduceAxesBoolParam) { + DMLC_DECLARE_FIELD(axis) + .set_default(dmlc::optional>()) + .describe("Axis or axes along which a sum is performed. The default, axis=None, will sum " + "all of the elements of the input array. If axis is negative it counts from the " + "last to the first axis."); + DMLC_DECLARE_FIELD(keepdims).set_default(false) + .describe("If this is set to `True`, the reduced axes are left " + "in the result as dimension with size one."); + } +}; + inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape, const dmlc::optional>& axis, bool keepdims) { @@ -173,6 +188,20 @@ inline bool NumpyReduceAxesShape(const nnvm::NodeAttrs& attrs, return shape_is_known(out_attrs->at(0)); } +inline bool NumpyReduceAxesBoolShape(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + if (!shape_is_known(in_attrs->at(0))) { + return false; + } + const NumpyReduceAxesBoolParam& param = nnvm::get(attrs.parsed); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, + NumpyReduceAxesShapeImpl((*in_attrs)[0], param.axis, param.keepdims)); + return shape_is_known(out_attrs->at(0)); +} + inline bool NumpyReduceAxesNoDTypeShape(const nnvm::NodeAttrs& attrs, std::vector *in_attrs, std::vector *out_attrs) { @@ -298,6 +327,30 @@ void NumpyReduceAxesNoDTypeCompute(const nnvm::NodeAttrs& attrs, ReduceAxesComputeImpl(ctx, inputs, req, outputs, small); } +template +void NumpyReduceAxesBoolCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + const NumpyReduceAxesBoolParam& param = nnvm::get(attrs.parsed); + mshadow::Stream* s = ctx.get_stream(); + if (inputs[0].shape_.Size() == 0 && outputs[0].shape_.Size() != 0) { + using namespace mxnet_op; + Kernel::Launch(s, outputs[0].shape_.Size(), outputs[0].dptr()); + return; + } + if (param.axis.has_value() && param.axis.value().ndim() == 0) { + UnaryOp::IdentityCompute(attrs, ctx, inputs, req, outputs); + } + TShape small; + if (param.keepdims) { + small = outputs[0].shape_; + } else { + small = NumpyReduceAxesShapeImpl(inputs[0].shape_, param.axis, true); + } + ReduceAxesComputeBoolImpl(ctx, inputs, req, outputs, small); +} template inline void NumpyReduceAxesBackwardUseNone(const nnvm::NodeAttrs& attrs, diff --git a/src/operator/numpy/np_broadcast_reduce_op_boolean.cc b/src/operator/numpy/np_broadcast_reduce_op_boolean.cc new file mode 100644 index 000000000000..7529c0d4e1d3 --- /dev/null +++ b/src/operator/numpy/np_broadcast_reduce_op_boolean.cc @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_broadcast_reduce_op_boolean.cc + * \brief CPU Implementation of broadcast and reduce functions based on boolean. + */ + +#include "./np_broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +inline bool NumpyReduceAxesBoolType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kBool); + return out_attrs->at(0) != -1 && in_attrs->at(0) != -1; +} + +DMLC_REGISTER_PARAMETER(NumpyReduceAxesBoolParam); + +NNVM_REGISTER_OP(_np_any) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"data"}; + }) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("THasDeterministicOutput", true) +.set_attr("FInferShape", NumpyReduceAxesBoolShape) +.set_attr("FInferType", NumpyReduceAxesBoolType) +.set_attr("FCompute", NumpyReduceAxesBoolCompute) +.set_attr("FGradient", MakeZeroGradNodes) +.add_argument("data", "NDArray-or-Symbol", "Input ndarray") +.add_arguments(NumpyReduceAxesBoolParam::__FIELDS__()); + +NNVM_REGISTER_OP(_np_all) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"data"}; + }) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("THasDeterministicOutput", true) +.set_attr("FInferShape", NumpyReduceAxesBoolShape) +.set_attr("FInferType", NumpyReduceAxesBoolType) +.set_attr("FCompute", NumpyReduceAxesBoolCompute) +.set_attr("FGradient", MakeZeroGradNodes) +.add_argument("data", "NDArray-or-Symbol", "Input ndarray") +.add_arguments(NumpyReduceAxesBoolParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_broadcast_reduce_op_boolean.cu b/src/operator/numpy/np_broadcast_reduce_op_boolean.cu new file mode 100644 index 000000000000..2c206bf88b2f --- /dev/null +++ b/src/operator/numpy/np_broadcast_reduce_op_boolean.cu @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_broadcast_reduce_op_boolean.cu + * \brief GPU Implementation of broadcast and reduce functions based on boolean. + */ + +#include "./np_broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_np_any) +.set_attr("FCompute", NumpyReduceAxesBoolCompute); + +NNVM_REGISTER_OP(_np_all) +.set_attr("FCompute", NumpyReduceAxesBoolCompute); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/operator_tune.cc b/src/operator/operator_tune.cc index 7ca594d21e59..9ca291391c5c 100644 --- a/src/operator/operator_tune.cc +++ b/src/operator/operator_tune.cc @@ -404,6 +404,8 @@ IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::smooth_l1_gradient); // NO IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::lcm); // NOLINT() IMPLEMENT_BLANK_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mxnet_op::set_to_int<0>); // NOLINT() IMPLEMENT_BLANK_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mxnet_op::set_to_int<1>); // NOLINT() +IMPLEMENT_BLANK_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mxnet_op::set_to_bool); // NOLINT() +IMPLEMENT_BLANK_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mxnet_op::set_to_bool); // NOLINT() IMPLEMENT_BLANK_WORKLOAD_FWD(mxnet::op::PopulateFullIdxRspKernel); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::ldexp); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rldexp); // NOLINT() diff --git a/src/operator/tensor/broadcast_reduce-inl.cuh b/src/operator/tensor/broadcast_reduce-inl.cuh index 41940e4b1e07..6cd7dd50657a 100644 --- a/src/operator/tensor/broadcast_reduce-inl.cuh +++ b/src/operator/tensor/broadcast_reduce-inl.cuh @@ -634,6 +634,16 @@ void Reduce(Stream *s, const TBlob& small, const OpReqType req, } } +template +void ReduceBool(Stream *s, const TBlob& small, const OpReqType req, + const Tensor& workspace, const TBlob& big) { + if (req == kNullOp) return; + cudaStream_t stream = Stream::GetStream(s); + ReduceImplConfig config = + ConfigureReduceImpl(small.shape_, big.shape_, NULL, NULL); + ReduceImpl(stream, small, req, big, workspace, config); +} + template void ReduceWithExtraMem(Stream* s, const TBlob& small, const OpReqType req, const Tensor& workspace, const TBlob& big) {}; diff --git a/src/operator/tensor/broadcast_reduce-inl.h b/src/operator/tensor/broadcast_reduce-inl.h index 0a20e1263fbf..841fbcd28a68 100644 --- a/src/operator/tensor/broadcast_reduce-inl.h +++ b/src/operator/tensor/broadcast_reduce-inl.h @@ -255,6 +255,18 @@ void Reduce(Stream* s, const TBlob& small, const OpReqType req, } } +template +void ReduceBool(Stream* s, const TBlob& small, const OpReqType req, + const Tensor& workspace, const TBlob& big) { + if (req == kNullOp) return; + Shape rshape, rstride; + diff(small.shape_.get(), big.shape_.get(), &rshape, &rstride); + size_t N = small.shape_.Size(), M = rshape.Size(); + seq_reduce_compute( + N, M, req == kAddTo, big.dptr(), small.dptr(), + big.shape_.get(), small.shape_.get(), rshape, rstride); +} + template void ReduceWithExtraMem(Stream* s, const TBlob& small, const OpReqType req, const Tensor& workspace, const TBlob& big) { diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h index 799f86544160..608e44dcfe76 100644 --- a/src/operator/tensor/broadcast_reduce_op.h +++ b/src/operator/tensor/broadcast_reduce_op.h @@ -636,6 +636,39 @@ void ReduceAxesComputeImpl(const OpContext& ctx, }); } +template +void ReduceAxesComputeBoolImpl(const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs, + const mxnet::TShape& small) { + using namespace mshadow; + using namespace mshadow::expr; + + mxnet::TShape src_shape, dst_shape; + BroadcastReduceShapeCompact(inputs[0].shape_, small, &src_shape, &dst_shape); + Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH_WITH_BOOL(inputs[0].type_flag_, DType, { + MSHADOW_TYPE_SWITCH_WITH_BOOL(outputs[0].type_flag_, OType, { + const TBlob in_data = inputs[0].reshape(src_shape); + const TBlob out_data = outputs[0].reshape(dst_shape); + BROADCAST_NDIM_SWITCH(dst_shape.ndim(), NDim, { + size_t workspace_size = broadcast::ReduceWorkspaceSize( + s, out_data.shape_, req[0], in_data.shape_); + Tensor workspace = + ctx.requested[0].get_space_typed(Shape1(workspace_size), s); + broadcast::ReduceBool( + s, out_data, req[0], workspace, in_data); + if (normalize) { + auto out = out_data.FlatTo2D(s); + out /= scalar(src_shape.Size()/dst_shape.Size()); + } + }); + }); + }); +} + template void ReduceAxesCompute(const nnvm::NodeAttrs& attrs, diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index 0c501808a6c0..cd0bd8deeac3 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -1244,7 +1244,7 @@ void SliceAssignScalarOpForward(const nnvm::NodeAttrs& attrs, const int b = begin[i], e = end[i], s = step[i]; SetSliceOpOutputDimSize(data.shape_, i, b, e, s, &vshape); } - MSHADOW_TYPE_SWITCH(out.type_flag_, DType, { + MSHADOW_TYPE_SWITCH_WITH_BOOL(out.type_flag_, DType, { mxnet_op::Kernel, xpu>::Launch(s, vshape.FlatTo2D()[0], out.dptr(), static_cast(param.scalar), req[0], out.shape_.get(), vshape.get(), begin, step); diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 3d26ee28b22e..53f912967cf1 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -59,6 +59,32 @@ def get_workloads(name): return OpArgMngr._args.get(name, None) +def _add_workload_all(): + # check bad element in all positions + for i in range(256-7): + e = np.array([True] * 256, dtype=bool)[7::] + e[i] = False + OpArgMngr.add_workload('all', e) + # big array test for blocked libc loops + for i in list(range(9, 6000, 507)) + [7764, 90021, -10]: + e = np.array([True] * 100043, dtype=bool) + e[i] = False + OpArgMngr.add_workload('all', e) + + +def _add_workload_any(): + # check bad element in all positions + for i in range(256-7): + d = np.array([False] * 256, dtype=bool)[7::] + d[i] = True + OpArgMngr.add_workload('any', d) + # big array test for blocked libc loops + for i in list(range(9, 6000, 507)) + [7764, 90021, -10]: + d = np.array([False] * 100043, dtype=bool) + d[i] = True + OpArgMngr.add_workload('any', d) + + def _add_workload_unravel_index(): OpArgMngr.add_workload('unravel_index', indices=np.array([2],dtype=_np.int64), shape=(2, 2)) OpArgMngr.add_workload('unravel_index', np.array([(2*3 + 1)*6 + 4], dtype=_np.int64), (4, 3, 6)) @@ -1443,6 +1469,8 @@ def _prepare_workloads(): '1x1x0': np.array([[[]]]) } + _add_workload_all() + _add_workload_any() _add_workload_argmin() _add_workload_argmax() _add_workload_around() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 3f9f1d6677cc..42407adddf7c 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -493,6 +493,84 @@ def is_int(dtype): assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) +@with_seed() +@use_np +def test_np_any(): + class TestAny(HybridBlock): + def __init__(self, axis=None, keepdims=False) : + super(TestAny, self).__init__() + self._axis = axis + self._keepdims = keepdims + + def hybrid_forward(self, F, a): + return F.np.any(a, axis=self._axis, keepdims=self._keepdims) + + keepdims = [True, False] + axes = [True, False] + shapes = [(), (5, ), (10, ), + (2, 5), (5, 5), (10, 10), + (4, 4, 4), (4, 6, 9), (6, 6, 6), + (7, 8, 9, 10), (7, 9, 11, 13)] + dtypes = [np.int8, np.uint8, np.int32, np.int64, np.float16, np.float32, np.float64, np.bool] + + combinations = itertools.product([False, True], shapes, dtypes, axes, keepdims) + for hybridize, shape, dtype, axis, keepdim in combinations: + ndim = len(shape) + samples = random.randint(0, ndim) + axis = None if not axis else tuple(random.sample([i for i in range(0, ndim)], samples)) + x = np.random.normal(0, 1.0, size=shape).astype(dtype) + test_any = TestAny(axis=axis, keepdims=keepdim) + if hybridize: + test_any.hybridize() + y = test_any(x) + expected_ret = _np.any(x.asnumpy(), axis=axis, keepdims=keepdim) + assert_almost_equal(y.asnumpy(), expected_ret) + + # test imperative + mx_outs = np.any(x, axis=axis, keepdims=keepdim) + np_outs = _np.any(x.asnumpy(), axis=axis, keepdims=keepdim) + assert_almost_equal(mx_outs.asnumpy(), np_outs) + + +@with_seed() +@use_np +def test_np_all(): + class TestAll(HybridBlock): + def __init__(self, axis=None, keepdims=False) : + super(TestAll, self).__init__() + self._axis = axis + self._keepdims = keepdims + + def hybrid_forward(self, F, a): + return F.np.all(a, axis=self._axis, keepdims=self._keepdims) + + keepdims = [True, False] + axes = [True, False] + shapes = [(), (5, ), (10, ), + (2, 5), (5, 5), (10, 10), + (4, 4, 4), (4, 6, 9), (6, 6, 6), + (7, 8, 9, 10), (7, 9, 11, 13)] + dtypes = [np.int8, np.uint8, np.int32, np.int64, np.float16, np.float32, np.float64, np.bool] + + combinations = itertools.product([False, True], shapes, dtypes, axes, keepdims) + for hybridize, shape, dtype, axis, keepdim in combinations: + ndim = len(shape) + samples = random.randint(0, ndim) + axis = None if not axis else tuple(random.sample([i for i in range(0, ndim)], samples)) + x = np.random.normal(0, 1.0, size=shape).astype(dtype) + test_all = TestAll(axis=axis, keepdims=keepdim) + if hybridize: + test_all.hybridize() + y = test_all(x) + expected_ret = _np.all(x.asnumpy(), axis=axis, keepdims=keepdim) + assert_almost_equal(y.asnumpy(), expected_ret) + + # test imperative + mx_outs = np.all(x, axis=axis, keepdims=keepdim) + np_outs = _np.all(x.asnumpy(), axis=axis, keepdims=keepdim) + assert_almost_equal(mx_outs.asnumpy(), np_outs) + + @with_seed() @use_np def test_np_max_min(): From 38388ff939511ed136ad9675d06bac7858ff4238 Mon Sep 17 00:00:00 2001 From: dw_sjtu <46704444+sjtuWangDing@users.noreply.github.com> Date: Fri, 27 Dec 2019 11:18:12 +0800 Subject: [PATCH 15/17] fix format (#17100) commit source file --- python/mxnet/ndarray/numpy/linalg.py | 50 +- python/mxnet/numpy/linalg.py | 50 +- python/mxnet/numpy_dispatch_protocol.py | 1 + python/mxnet/symbol/numpy/linalg.py | 50 +- .../numpy/linalg/np_tensorsolve-inl.h | 557 ++++++++++++++++++ src/operator/numpy/linalg/np_tensorsolve.cc | 145 +++++ src/operator/numpy/linalg/np_tensorsolve.cu | 42 ++ .../unittest/test_numpy_interoperability.py | 61 ++ tests/python/unittest/test_numpy_op.py | 154 +++++ 9 files changed, 1107 insertions(+), 3 deletions(-) create mode 100644 src/operator/numpy/linalg/np_tensorsolve-inl.h create mode 100644 src/operator/numpy/linalg/np_tensorsolve.cc create mode 100644 src/operator/numpy/linalg/np_tensorsolve.cu diff --git a/python/mxnet/ndarray/numpy/linalg.py b/python/mxnet/ndarray/numpy/linalg.py index 4c49c35b4a44..e4fee158bea4 100644 --- a/python/mxnet/ndarray/numpy/linalg.py +++ b/python/mxnet/ndarray/numpy/linalg.py @@ -21,7 +21,7 @@ from . import _op as _mx_nd_np from . import _internal as _npi -__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve', 'tensorinv'] +__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve', 'tensorinv', 'tensorsolve'] def norm(x, ord=None, axis=None, keepdims=False): @@ -461,3 +461,51 @@ def tensorinv(a, ind=2): True """ return _npi.tensorinv(a, ind) + + +def tensorsolve(a, b, axes=None): + r""" + Solve the tensor equation ``a x = b`` for x. + It is assumed that all indices of `x` are summed over in the product, + together with the rightmost indices of `a`, as is done in, for example, + ``tensordot(a, x, axes=b.ndim)``. + + Parameters + ---------- + a : ndarray + Coefficient tensor, of shape ``b.shape + Q``. `Q`, a tuple, equals + the shape of that sub-tensor of `a` consisting of the appropriate + number of its rightmost indices, and must be such that + ``prod(Q) == prod(b.shape)`` (in which sense `a` is said to be + 'square'). + b : ndarray + Right-hand tensor, which can be of any shape. + axes : tuple of ints, optional + Axes in `a` to reorder to the right, before inversion. + If None (default), no reordering is done. + + Returns + ------- + x : ndarray, shape Q + + Raises + ------ + MXNetError + If `a` is singular or not 'square' (in the above sense). + + See Also + -------- + numpy.tensordot, tensorinv, numpy.einsum + + Examples + -------- + >>> a = np.eye(2*3*4) + >>> a.shape = (2*3, 4, 2, 3, 4) + >>> b = np.random.randn(2*3, 4) + >>> x = np.linalg.tensorsolve(a, b) + >>> x.shape + (2, 3, 4) + >>> np.allclose(np.tensordot(a, x, axes=3), b) + True + """ + return _npi.tensorsolve(a, b, axes) diff --git a/python/mxnet/numpy/linalg.py b/python/mxnet/numpy/linalg.py index 2ee2d2670693..96fe1d311028 100644 --- a/python/mxnet/numpy/linalg.py +++ b/python/mxnet/numpy/linalg.py @@ -20,7 +20,7 @@ from __future__ import absolute_import from ..ndarray import numpy as _mx_nd_np -__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve', 'tensorinv'] +__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve', 'tensorinv', 'tensorsolve'] def norm(x, ord=None, axis=None, keepdims=False): @@ -479,3 +479,51 @@ def tensorinv(a, ind=2): True """ return _mx_nd_np.linalg.tensorinv(a, ind) + + +def tensorsolve(a, b, axes=None): + r""" + Solve the tensor equation ``a x = b`` for x. + It is assumed that all indices of `x` are summed over in the product, + together with the rightmost indices of `a`, as is done in, for example, + ``tensordot(a, x, axes=b.ndim)``. + + Parameters + ---------- + a : ndarray + Coefficient tensor, of shape ``b.shape + Q``. `Q`, a tuple, equals + the shape of that sub-tensor of `a` consisting of the appropriate + number of its rightmost indices, and must be such that + ``prod(Q) == prod(b.shape)`` (in which sense `a` is said to be + 'square'). + b : ndarray + Right-hand tensor, which can be of any shape. + axes : tuple of ints, optional + Axes in `a` to reorder to the right, before inversion. + If None (default), no reordering is done. + + Returns + ------- + x : ndarray, shape Q + + Raises + ------ + MXNetError + If `a` is singular or not 'square' (in the above sense). + + See Also + -------- + numpy.tensordot, tensorinv, numpy.einsum + + Examples + -------- + >>> a = np.eye(2*3*4) + >>> a.shape = (2*3, 4, 2, 3, 4) + >>> b = np.random.randn(2*3, 4) + >>> x = np.linalg.tensorsolve(a, b) + >>> x.shape + (2, 3, 4) + >>> np.allclose(np.tensordot(a, x, axes=3), b) + True + """ + return _mx_nd_np.linalg.tensorsolve(a, b, axes) diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index 80b3d66c5e5d..603b118855a5 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -138,6 +138,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs): 'linalg.inv', 'linalg.solve', 'linalg.tensorinv', + 'linalg.tensorsolve', 'shape', 'trace', 'tril', diff --git a/python/mxnet/symbol/numpy/linalg.py b/python/mxnet/symbol/numpy/linalg.py index a445c79001ec..0bfbb6ee540f 100644 --- a/python/mxnet/symbol/numpy/linalg.py +++ b/python/mxnet/symbol/numpy/linalg.py @@ -22,7 +22,7 @@ from . import _op as _mx_sym_np from . import _internal as _npi -__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve', 'tensorinv'] +__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve', 'tensorinv', 'tensorsolve'] def norm(x, ord=None, axis=None, keepdims=False): @@ -448,3 +448,51 @@ def tensorinv(a, ind=2): True """ return _npi.tensorinv(a, ind) + + +def tensorsolve(a, b, axes=None): + r""" + Solve the tensor equation ``a x = b`` for x. + It is assumed that all indices of `x` are summed over in the product, + together with the rightmost indices of `a`, as is done in, for example, + ``tensordot(a, x, axes=b.ndim)``. + + Parameters + ---------- + a : ndarray + Coefficient tensor, of shape ``b.shape + Q``. `Q`, a tuple, equals + the shape of that sub-tensor of `a` consisting of the appropriate + number of its rightmost indices, and must be such that + ``prod(Q) == prod(b.shape)`` (in which sense `a` is said to be + 'square'). + b : ndarray + Right-hand tensor, which can be of any shape. + axes : tuple of ints, optional + Axes in `a` to reorder to the right, before inversion. + If None (default), no reordering is done. + + Returns + ------- + x : ndarray, shape Q + + Raises + ------ + MXNetError + If `a` is singular or not 'square' (in the above sense). + + See Also + -------- + numpy.tensordot, tensorinv, numpy.einsum + + Examples + -------- + >>> a = np.eye(2*3*4) + >>> a.shape = (2*3, 4, 2, 3, 4) + >>> b = np.random.randn(2*3, 4) + >>> x = np.linalg.tensorsolve(a, b) + >>> x.shape + (2, 3, 4) + >>> np.allclose(np.tensordot(a, x, axes=3), b) + True + """ + return _npi.tensorsolve(a, b, axes) diff --git a/src/operator/numpy/linalg/np_tensorsolve-inl.h b/src/operator/numpy/linalg/np_tensorsolve-inl.h new file mode 100644 index 000000000000..829a119b64a2 --- /dev/null +++ b/src/operator/numpy/linalg/np_tensorsolve-inl.h @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_tensorsolve-inl.h + * \brief Placeholder for tensor solve + */ +#ifndef MXNET_OPERATOR_NUMPY_LINALG_NP_TENSORSOLVE_INL_H_ +#define MXNET_OPERATOR_NUMPY_LINALG_NP_TENSORSOLVE_INL_H_ + +#include +#include +#include "../../operator_common.h" +#include "../../mshadow_op.h" +#include "../../tensor/la_op.h" +#include "../../tensor/la_op-inl.h" +#include "../np_tensordot_op-inl.h" +#include "./np_solve-inl.h" + +namespace mxnet { +namespace op { + +using namespace mshadow; + +struct TensorsolveParam : public dmlc::Parameter { + mxnet::Tuple a_axes; + DMLC_DECLARE_PARAMETER(TensorsolveParam) { + DMLC_DECLARE_FIELD(a_axes) + .set_default(mxnet::Tuple()) + .describe("Tuple of ints, optional. Axes in a to reorder to the right, before inversion."); + } +}; + +// Fix negative axes. +inline void FixNegativeAxes(mxnet::Tuple *a_axes_param, + const mxnet::TShape& a_shape) { + if (-1 == a_axes_param->ndim()) { return; } + const int a_ndim = a_shape.ndim(); + for (auto& i : *a_axes_param) { + i = (i + a_ndim) % a_ndim; + } +} + +// Get remained axes and axes of a. +inline void GetReorderedAxes(const mxnet::Tuple& a_axes_param, + mxnet::Tuple *a_axes_remained, + mxnet::Tuple *a_axes, + const mxnet::TShape& a_shape) { + std::vector a_axes_vec; + for (int i = 0; i < a_shape.ndim(); ++i) { + a_axes_vec.push_back(i); + } + // Get remained axes and axes. + if (-1 == a_axes_param.ndim()) { + *a_axes_remained = mxnet::Tuple(a_axes_vec); + *a_axes = mxnet::Tuple(a_axes_vec); + return; + } + for (const auto& i : a_axes_param) { + a_axes_vec.erase(std::find(a_axes_vec.begin(), a_axes_vec.end(), i)); + } + *a_axes_remained = mxnet::Tuple(a_axes_vec); + + a_axes_vec.clear(); + for (const auto& i : *a_axes_remained) { + a_axes_vec.push_back(i); + } + for (const auto& i : a_axes_param) { + a_axes_vec.push_back(i); + } + *a_axes = mxnet::Tuple(a_axes_vec); +} + +// Calculate output shape if a and b is tensor +inline mxnet::TShape GetOutShape(const mxnet::TShape& a_shape, + const mxnet::TShape& b_shape) { + const int a_ndim = a_shape.ndim(), b_ndim = b_shape.ndim(); + const int temp = a_ndim > b_ndim ? b_ndim : b_ndim - a_ndim; + mxnet::TShape out_shape(a_ndim - temp, -1); + for (int i = temp; i < a_ndim; ++i) { + out_shape[i - temp] = a_shape[i]; + } + return out_shape; +} + +// Calculates workspace size of tensorsolve forward. +template +size_t TensorsolveForwardWorkspaceSize(const Tuple& a_axes_param, + const TBlob& a, + const TBlob& b, + const TBlob& out, + const std::vector& req) { + if (kNullOp == req[0]) { return 0U; } + + // Zero-size output, no need to launch kernel + if (0U == out.shape_.Size()) { return 0U; } + + const mxnet::TShape& a_shape = a.shape_; + const mxnet::TShape& b_shape = b.shape_; + MSHADOW_SGL_DBL_TYPE_SWITCH(out.type_flag_, DType, { + if (0U == a_shape.Size() || 0U == b_shape.Size()) { + // 0-size input + return 0U; + } else if (0 == a_shape.ndim() || 0 == b_shape.ndim()) { + // At least 1 scalar. + return (a.Size() + b.Size()) * sizeof(DType) + b.Size() * sizeof(int); + } else { + // Two tensors of at least 1 dimensions. + return (2 * a.Size() + b.Size()) * sizeof(DType) + b.Size() * sizeof(int); + } + }); + LOG(FATAL) << "InternalError: cannot reach here"; + return 0U; +} + +template +struct assign_helper { + template + MSHADOW_XINLINE static void Map(int i, const DType *in_data, DType *out_data) { + KERNEL_ASSIGN(out_data[i], req, in_data[i]); + } +}; + +struct tensorsolve { + template + static void op(const Tensor& A, + const Tensor& X, + const Tensor& ipiv, + const OpContext& ctx) { + mshadow::Stream *s = ctx.get_stream(); + linalg_solve(A, X, ipiv, s); // ipiv for work_space in Lapacke_#gesv + } +}; + +template +void TensorsolveOpForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 2U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + + mshadow::Stream *s = ctx.get_stream(); + const TBlob& a = inputs[0]; + const TBlob& b = inputs[1]; + const TBlob& out = outputs[0]; + const mxnet::TShape a_shape = a.shape_; + const mxnet::TShape b_shape = b.shape_; + const mxnet::TShape out_shape = out.shape_; + const TensorsolveParam& param = nnvm::get(attrs.parsed); + mxnet::Tuple a_axes_param = param.a_axes; + FixNegativeAxes(&a_axes_param, a_shape); + + size_t workspace_size = TensorsolveForwardWorkspaceSize(a_axes_param, a, b, out, req); + Tensor workspace = ctx.requested[0].get_space_typed( + Shape1(workspace_size), ctx.get_stream()); + + if (kNullOp == req[0]) { return; } + + // Zero-size output, no need to launch kernel + if (0U == out.shape_.Size()) { return; } + + MSHADOW_SGL_DBL_TYPE_SWITCH(out.type_flag_, DType, { + if (0U == a_shape.Size() || 0U == b_shape.Size()) { // 0-size input + if (req[0] != kAddTo) { + Tensor out_tensor = + out.get_with_shape(Shape1(out.shape_.Size()), s); + out_tensor = static_cast(0); + } + } else if (0U == a_shape.ndim() || 0U == b_shape.ndim()) { // At least 1 scalar. + // Check again + CHECK_EQ(a_shape.Size(), 1U) + << "a's and b's dimensions don't match"; + CHECK_EQ(b_shape.Size(), 1U) + << "a's and b's dimensions don't match"; + + DType* a_ptr = + reinterpret_cast(workspace.dptr_); + DType* b_ptr = + reinterpret_cast(workspace.dptr_+ a.Size() * sizeof(DType)); + int* ipiv_ptr = + reinterpret_cast(workspace.dptr_ + (a.Size() + b.Size()) * sizeof(DType)); + + // Cast type + MSHADOW_TYPE_SWITCH(a.type_flag_, AType, { + mxnet_op::Kernel::Launch( + s, a_shape.Size(), a_ptr, a.dptr()); + }); + MSHADOW_TYPE_SWITCH(b.type_flag_, BType, { + mxnet_op::Kernel::Launch( + s, b_shape.Size(), b_ptr, b.dptr()); + }); + + mxnet::TBlob a_tblob(a_ptr, Shape2(1, 1), a.dev_mask(), a.dev_id()); + mxnet::TBlob b_tblob(b_ptr, Shape2(1, 1), b.dev_mask(), b.dev_id()); + mxnet::TBlob ipiv_tblob(ipiv_ptr, Shape1(1), out.dev_mask(), out.dev_id()); + Tensor a_tensor = a_tblob.get(s); + Tensor b_tensor = b_tblob.get(s); + Tensor ipiv_tensor = ipiv_tblob.get(s); + + // Solve linear equation + laop::op(a_tensor, b_tensor, ipiv_tensor, ctx); + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + mxnet_op::Kernel, xpu>::Launch( + s, out_shape.Size(), b_tensor.dptr_, out.dptr()); + }); + } else { + // Two tensors of at least 1 dimensions. + Tuple a_axes_remained; + Tuple a_axes; + GetReorderedAxes(a_axes_param, &a_axes_remained, &a_axes, a_shape); + mxnet::TShape a_transpose_shape = GetReorderedShape(a_shape, a_axes); + const int N = b_shape.Size(); + + DType* a_ptr = + reinterpret_cast(workspace.dptr_); + DType* a_trans_ptr = + reinterpret_cast(workspace.dptr_ + a.Size() * sizeof(DType)); + DType* b_ptr = + reinterpret_cast(workspace.dptr_ + 2 * a.Size() * sizeof(DType)); + int* ipiv_ptr = + reinterpret_cast(workspace.dptr_ + (2 * a.Size() + b.Size()) * sizeof(DType)); + + // Cast type + MSHADOW_TYPE_SWITCH(a.type_flag_, AType, { + mxnet_op::Kernel::Launch( + s, a_shape.Size(), a_ptr, a.dptr()); + }); + // Cast type + MSHADOW_TYPE_SWITCH(b.type_flag_, BType, { + mxnet_op::Kernel::Launch( + s, b_shape.Size(), b_ptr, b.dptr()); + }); + + mxnet::TBlob a_tblob = + TBlob(a_ptr, a_shape, a.dev_mask(), a.dev_id()); + mxnet::TBlob a_transpose_tblob = + TBlob(a_trans_ptr, a_transpose_shape, a.dev_mask(), a.dev_id()); + mxnet::TBlob b_tblob = + TBlob(b_ptr, b_shape, b.dev_mask(), b.dev_id()); + mxnet::TBlob ipiv_tblob = + TBlob(ipiv_ptr, b_shape, out.dev_mask(), out.dev_id()); + mxnet::op::TransposeImpl(ctx.run_ctx, + a_tblob, // src + a_transpose_tblob, // res + mxnet::TShape(a_axes.begin(), a_axes.end())); + + Tensor a_tensor = + a_tblob.get_with_shape(Shape2(N, N), s); + Tensor ipiv_tensor = + ipiv_tblob.get_with_shape(Shape1(N), s); + Tensor b_tensor = + b_tblob.get_with_shape(Shape2(1, N), s); + Tensor out_tensor = + out.get_with_shape(Shape2(1, N), s); + + a_tblob = a_tblob.reshape(Shape2(N, N)); + a_transpose_tblob = a_transpose_tblob.reshape(Shape2(N, N)); + Tuple a_axes_2D(std::vector{1, 0}); + mxnet::op::TransposeImpl(ctx.run_ctx, + a_transpose_tblob, // src + a_tblob, // res + mxnet::TShape(a_axes_2D.begin(), a_axes_2D.end())); + // Solve linear equation + laop::op(a_tensor, b_tensor, ipiv_tensor, ctx); + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + mxnet_op::Kernel, xpu>::Launch( + s, out_shape.Size(), b_tensor.dptr_, out_tensor.dptr_); + }); + } + }); +} + +// Calculates workspace size of tensordot backward. +template +size_t TensorsolveBackwardWorkspaceSize(const TBlob& out_grad, + const TBlob& a, + const TBlob& b, + const TBlob& x) { + const mxnet::TShape& a_shape = a.shape_; + const mxnet::TShape& b_shape = b.shape_; + const mxnet::TShape& x_shape = x.shape_; + + // Zero-size output, no need to launch kernel + if (0U == a_shape.Size() || 0U == b_shape.Size()) { return 0U; } + + MSHADOW_SGL_DBL_TYPE_SWITCH(out_grad.type_flag_, DType, { + int work_space_size = 0; + if (0U == a_shape.ndim() || 0U == b_shape.ndim()) { + // At least 1 scalar. + work_space_size += sizeof(DType) * a_shape.Size(); // for tensorinv(a) + work_space_size += sizeof(DType) * a_shape.Size(); // for getri work space lu + work_space_size += sizeof(int) * b_shape.Size(); // for getri work space pivot + } else { + // Two tensors of at least 1 dimensions. + work_space_size += sizeof(DType) * a_shape.Size(); // for tensorinv(a) + work_space_size += sizeof(DType) * a_shape.Size(); // for getri work space lu + work_space_size += sizeof(DType) * b_shape.Size(); // for b + work_space_size += sizeof(DType) * x_shape.Size(); // for x + work_space_size += sizeof(DType) * a_shape.Size(); // for grad_a + work_space_size += sizeof(DType) * b_shape.Size(); // for grad_b + work_space_size += sizeof(int) * b_shape.Size(); // for getri work space pivot + } + return work_space_size; + }); + LOG(FATAL) << "InternalError: cannot reach here"; + return 0U; +} + +// Get original axes for tensor a. +inline void GetOriginAxes(const mxnet::TShape& a_shape, + const mxnet::Tuple& a_axes, + mxnet::Tuple *a_origin_axes) { + std::vector a_origin_axes_vec(a_shape.ndim(), -1); + for (int i = 0; i < a_shape.ndim(); ++i) { + a_origin_axes_vec[a_axes[i]] = i; + } + *a_origin_axes = mxnet::Tuple(a_origin_axes_vec); +} + +struct tensorsolve_backward { + template + static void op(const Tensor& dX, + const Tensor& inv_A, + const Tensor& B, + const Tensor& X, + const Tensor& dA, + const Tensor& dB, + const OpContext& ctx) { + // (1) calcualte dB = trans(tensorinv(A)) * dX + // (2) calcualte dA = dB * trans(X) + Stream *s = ctx.get_stream(); + gemm2::op(inv_A, dX, dB, DType(1), true, false, s); + gemm2::op(dB, X, dA, DType(-1), false, true, s); + } +}; + +template +void TensorsolveBackwardImpl(const Tuple& a_axes_param, + const TBlob& out_grad, + const TBlob& a, + const TBlob& b, + const TBlob& x, + const TBlob& grad_a, + const TBlob& grad_b, + const OpContext& ctx, + const std::vector& req, + const Tensor& workspace) { + mshadow::Stream *s = ctx.get_stream(); + const mxnet::TShape& a_shape = a.shape_; + const mxnet::TShape& b_shape = b.shape_; + const mxnet::TShape& x_shape = x.shape_; + + if (kNullOp == req[0] && kNullOp == req[1]) { return; } + + // Zero-size output, no need to launch kernel + if (0U == a_shape.Size() || 0U == b_shape.Size()) { return; } + + MSHADOW_SGL_DBL_TYPE_SWITCH(out_grad.type_flag_, DType, { + if (0 == a_shape.ndim() || 0 == b_shape.ndim()) { + // At least 1 scalar. + CHECK_EQ(a_shape.Size(), 1U) + << "a's and b's dimensions don't match"; + CHECK_EQ(b_shape.Size(), 1U) + << "a's and b's dimensions don't match"; + + // Allocate workspace. + DType *tensorinv_a_ptr = reinterpret_cast(workspace.dptr_); + DType *lu_ptr = reinterpret_cast(workspace.dptr_ + a_shape.Size() * sizeof(DType)); + int *ipiv_ptr = reinterpret_cast(workspace.dptr_ + 2 * a_shape.Size() * sizeof(DType)); + TBlob tensorinv_a(tensorinv_a_ptr, a_shape, xpu::kDevMask); + TBlob lu(lu_ptr, a_shape, xpu::kDevMask); + TBlob ipiv(ipiv_ptr, b_shape, xpu::kDevMask); + + MSHADOW_TYPE_SWITCH(a.type_flag_, AType, { + mxnet_op::Kernel::Launch( + s, a_shape.Size(), + tensorinv_a_ptr, + a.dptr()); + }); + // Calculate tensorinv(a) + Tensor tensorinv_a_tensor = + tensorinv_a.get_with_shape(Shape3(1, 1, 1), s); + Tensor lu_tensor = + lu.get_with_shape(Shape3(1, 1, 1), s); + Tensor ipiv_tensor = + ipiv.get_with_shape(Shape2(1, 1), s); + batch_inverse(tensorinv_a_tensor, lu_tensor, ipiv_tensor, ctx); + + MSHADOW_TYPE_SWITCH(x.type_flag_, XType, { + DType temp1 = (*(tensorinv_a_tensor.dptr_)) * (*(out_grad.dptr())); + DType temp2 = -temp1 * static_cast(*x.dptr()); + ASSIGN_DISPATCH(*grad_b.dptr(), req[1], temp1); + ASSIGN_DISPATCH(*grad_a.dptr(), req[0], temp2); + }); + } else { + // Two tensors of at least 1 dimensions. + const int N = b_shape.Size(); + Tuple a_axes_remained; + Tuple a_axes; + Tuple a_origin_axes; + // Use a_axes to transpose (a_shape) --> (a_reordered_shape). + GetReorderedAxes(a_axes_param, &a_axes_remained, &a_axes, a_shape); + // Use a_origin_axes to transpose (a_reordered_shape) --> (a_shape). + GetOriginAxes(a_shape, a_axes, &a_origin_axes); + mxnet::TShape reordered_a_shape = GetReorderedShape(a_shape, a_axes); + + // Allocate workspace. + DType *tensorinv_a_ptr = reinterpret_cast( + workspace.dptr_); + DType *lu_ptr = reinterpret_cast( + workspace.dptr_ + a_shape.Size() * sizeof(DType)); + DType *b_ptr = reinterpret_cast( + workspace.dptr_ + 2 * a_shape.Size() * sizeof(DType)); + DType *x_ptr = reinterpret_cast( + workspace.dptr_ + (2 * a_shape.Size() + b_shape.Size()) * sizeof(DType)); + DType *grad_a_ptr = reinterpret_cast( + workspace.dptr_ + 2 * (a_shape.Size() + b_shape.Size()) * sizeof(DType)); + DType *grad_b_ptr = reinterpret_cast( + workspace.dptr_ + (3 * a_shape.Size() + 2 * b_shape.Size()) * sizeof(DType)); + int *ipiv_ptr = reinterpret_cast( + workspace.dptr_ + 3 * (a_shape.Size() + b_shape.Size()) * sizeof(DType)); + + TBlob tensorinv_a_data(tensorinv_a_ptr, a_shape, xpu::kDevMask); + TBlob lu_data(lu_ptr, a_shape, xpu::kDevMask); + TBlob b_data(b_ptr, b_shape, xpu::kDevMask); + TBlob x_data(x_ptr, x_shape, xpu::kDevMask); + TBlob grad_a_data(grad_a_ptr, reordered_a_shape, xpu::kDevMask); + TBlob grad_b_data(grad_b_ptr, b_shape, xpu::kDevMask); + TBlob ipiv_data(ipiv_ptr, b_shape, xpu::kDevMask); + MSHADOW_TYPE_SWITCH(a.type_flag_, AType, { + mxnet_op::Kernel::Launch( + s, a_shape.Size(), + lu_ptr, + a.dptr()); + }); + MSHADOW_TYPE_SWITCH(b.type_flag_, BType, { + mxnet_op::Kernel::Launch( + s, b_shape.Size(), + b_ptr, + b.dptr()); + }); + MSHADOW_TYPE_SWITCH(x.type_flag_, XType, { + mxnet_op::Kernel::Launch( + s, x_shape.Size(), + x_ptr, + x.dptr()); + }); + // Eg: lu_data(2, 3, 2, 15, 4, 5) -> tensorinv_a_data(3, 4, 5, 15, 2, 2) + tensorinv_a_data = tensorinv_a_data.reshape(reordered_a_shape); + mxnet::op::TransposeImpl(ctx.run_ctx, + lu_data, // src + tensorinv_a_data, // res + mxnet::TShape(a_axes.begin(), a_axes.end())); + + Tensor tensorinv_a_tensor = + tensorinv_a_data.get_with_shape(Shape3(1, N, N), s); + Tensor lu_tensor = + lu_data.get_with_shape(Shape3(1, N, N), s); + Tensor b_tensor = + b_data.get_with_shape(Shape3(1, N, 1), s); + Tensor x_tensor = + x_data.get_with_shape(Shape3(1, N, 1), s); + Tensor grad_a_tensor = + grad_a_data.get_with_shape(Shape3(1, N, N), s); + Tensor grad_b_tensor = + grad_b_data.get_with_shape(Shape3(1, N, 1), s); + Tensor ipiv_tensor = + ipiv_data.get_with_shape(Shape2(1, N), s); + + // Calculate tensorinv(a). + batch_inverse(tensorinv_a_tensor, lu_tensor, ipiv_tensor, ctx); + // No need to transpose tensorinv_a + // because transpose(tensorinv_a).shape == reordered_a_shape. + laop::op(out_grad.get_with_shape(x_tensor.shape_, s), + tensorinv_a_tensor, + b_tensor, + x_tensor, + grad_a_tensor, + grad_b_tensor, + ctx); + // Eg: grad_a_src(3, 4, 5, 15, 2, 2) --> lu_data(2, 3, 2, 15, 4, 5) + mxnet::op::TransposeImpl(ctx.run_ctx, + grad_a_data, // src + lu_data, // res + mxnet::TShape(a_origin_axes.begin(), a_origin_axes.end())); + + MXNET_ASSIGN_REQ_SWITCH(req[1], req_type, { + mxnet_op::Kernel, xpu>::Launch( + s, b_shape.Size(), grad_b_tensor.dptr_, grad_b.dptr()); + }); + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + mxnet_op::Kernel, xpu>::Launch( + s, a_shape.Size(), lu_tensor.dptr_, grad_a.dptr()); + }); + } + }); +} + +template +void TensorsolveOpBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + CHECK_EQ(inputs.size(), 4U); + CHECK_EQ(outputs.size(), 2U); + CHECK_EQ(req.size(), 2U); + + const TBlob& out_grad = inputs[0]; + const TBlob& a = inputs[1]; + const TBlob& b = inputs[2]; + const TBlob& x = inputs[3]; + const TBlob& grad_a = outputs[0]; + const TBlob& grad_b = outputs[1]; + const mxnet::TShape a_shape = a.shape_; + const mxnet::TShape b_shape = b.shape_; + const TensorsolveParam& param = nnvm::get(attrs.parsed); + mxnet::Tuple a_axes_param = param.a_axes; + FixNegativeAxes(&a_axes_param, a_shape); + + size_t workspace_size = TensorsolveBackwardWorkspaceSize(out_grad, a, b, x); + Tensor workspace = + ctx.requested[0].get_space_typed(Shape1(workspace_size), + ctx.get_stream()); + TensorsolveBackwardImpl(a_axes_param, + out_grad, + a, b, x, + grad_a, grad_b, + ctx, req, + workspace); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_LINALG_NP_TENSORSOLVE_INL_H_ diff --git a/src/operator/numpy/linalg/np_tensorsolve.cc b/src/operator/numpy/linalg/np_tensorsolve.cc new file mode 100644 index 000000000000..1dabcdd0eac4 --- /dev/null +++ b/src/operator/numpy/linalg/np_tensorsolve.cc @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_tensorsolve.cc + * \brief CPU implementation placeholder of Tensor Solve Operator + */ +#include "./np_tensorsolve-inl.h" + +namespace mxnet { +namespace op { + +bool TensorsolveOpShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + CHECK_EQ(in_attrs->size(), 2U); + CHECK_EQ(out_attrs->size(), 1U); + + const mxnet::TShape& a_shape = in_attrs->at(0); + const mxnet::TShape& b_shape = in_attrs->at(1); + const int a_ndim = a_shape.ndim(); + const int b_ndim = b_shape.ndim(); + + if (!ndim_is_known(a_shape) || !ndim_is_known(b_shape)) { + return false; + } + + if (0 == a_ndim && 0 == b_ndim) { + // a and b is scalar + SHAPE_ASSIGN_CHECK(*out_attrs, 0, b_shape); + } else if (0 == a_ndim && 0 != b_ndim) { + // a is scalar, b is tensor + CHECK_EQ(b_shape.Size(), 1U) + << "a's and b's dimensions don't match"; + SHAPE_ASSIGN_CHECK(*out_attrs, 0, a_shape); + } else if (0 != a_ndim && 0 == b_ndim) { + // a is tensor, a is scalar + CHECK_EQ(a_shape.Size(), 1U) + << "a's and b's dimensions don't match"; + SHAPE_ASSIGN_CHECK(*out_attrs, 0, a_shape); + } else { + // a and b of at least 1 dimensions. + const TensorsolveParam& param = nnvm::get(attrs.parsed); + mxnet::Tuple a_axes_param = param.a_axes; + FixNegativeAxes(&a_axes_param, a_shape); + + mxnet::Tuple a_axes_remained; + mxnet::Tuple a_axes; + GetReorderedAxes(a_axes_param, &a_axes_remained, &a_axes, a_shape); + mxnet::TShape a_transpose_shape = GetReorderedShape(a_shape, a_axes); + + // Calculate output shape + const int temp = a_ndim > b_ndim ? b_ndim : b_ndim - a_ndim; + int prod_front = 1, prod_back = 1; + mxnet::TShape out_shape(a_ndim - temp > 0 ? a_ndim - temp : 0, -1); + for (int i = 0; i < a_ndim; ++i) { + if (i < temp) { + prod_front *= a_transpose_shape[i]; + } else { + prod_back *= a_transpose_shape[i]; + out_shape[i - temp] = a_transpose_shape[i]; + } + } + CHECK_EQ(prod_front, prod_back) << "a shape must be square."; + CHECK_EQ(prod_back, b_shape.Size()) << "a's and b's dimensions don't match"; + SHAPE_ASSIGN_CHECK(*out_attrs, 0, out_shape); + } + + return shape_is_known(*in_attrs) && shape_is_known(*out_attrs); +} + +inline bool TensorsolveOpType(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 2U); + CHECK_EQ(out_attrs->size(), 1U); + int a_type = in_attrs->at(0); + int b_type = in_attrs->at(1); + // unsupport float16 + CHECK_NE(a_type, mshadow::kFloat16) + << "array type float16 is unsupported in linalg"; + CHECK_NE(b_type, mshadow::kFloat16) + << "array type float16 is unsupported in linalg"; + if (mshadow::kFloat32 == a_type && mshadow::kFloat32 == b_type) { + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(1)); + } else { + TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat64); + } + return out_attrs->at(0) != -1; +} + +DMLC_REGISTER_PARAMETER(TensorsolveParam); + +NNVM_REGISTER_OP(_npi_tensorsolve) +.set_attr_parser(mxnet::op::ParamParser) +.set_num_inputs(2) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a", "b"}; + }) +.set_attr("FInferShape", TensorsolveOpShape) +.set_attr("FInferType", TensorsolveOpType) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector(1, ResourceRequest::kTempSpace); + }) +.set_attr("THasDeterministicOutput", true) +.set_attr("FCompute", TensorsolveOpForward) +.set_attr("FGradient", + mxnet::op::ElemwiseGradUseInOut{"_backward_npi_tensorsolve"}) +.add_argument("a", "NDArray-or-Symbol", "First input") +.add_argument("b", "NDArray-or-Symbol", "Second input") +.add_arguments(TensorsolveParam::__FIELDS__()); + +NNVM_REGISTER_OP(_backward_npi_tensorsolve) +.set_attr_parser(mxnet::op::ParamParser) +.set_num_inputs(4) +.set_num_outputs(2) +.set_attr("FResourceRequest", + [](const NodeAttrs& ){ + return std::vector{1, ResourceRequest::kTempSpace}; + }) +.set_attr("TIsBackward", true) +.set_attr("FCompute", TensorsolveOpBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/linalg/np_tensorsolve.cu b/src/operator/numpy/linalg/np_tensorsolve.cu new file mode 100644 index 000000000000..07e2121750d5 --- /dev/null +++ b/src/operator/numpy/linalg/np_tensorsolve.cu @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_tensorsolve.cu + * \brief GPU implementation placeholder of Tensor Solve Operator + */ + +#include +#include "./np_tensorsolve-inl.h" + +namespace mxnet { +namespace op { + +#if MXNET_USE_CUSOLVER == 1 + +NNVM_REGISTER_OP(_npi_tensorsolve) +.set_attr("FCompute", TensorsolveOpForward); + +NNVM_REGISTER_OP(_backward_npi_tensorsolve) +.set_attr("FCompute", TensorsolveOpBackward); + +#endif + +} // namespace op +} // namespace mxnet diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 53f912967cf1..6beb26cdf40a 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -442,6 +442,66 @@ def _add_workload_linalg_tensorinv(): OpArgMngr.add_workload('linalg.tensorinv', np.array(a, dtype=dtype), ind) +def _add_workload_linalg_tensorsolve(): + shapes = [ + # a_shape.ndim <= 6 + # (a_shape, b_shape, axes) + ((1, 1), (1,), None), + ((1, 1), (1, 1, 1, 1, 1), None), + ((4, 4), (4,), None), + ((2, 3, 3, 4, 2), (3, 4), (0, 2, 4)), + ((1, 3, 3, 4, 4), (1, 3, 4), (1, 3)), + ((1, 4, 1, 12, 3), (1, 2, 1, 2, 1, 3, 1), (1, 2, 4)), + ] + dtypes = (np.float32, np.float64) + for dtype in dtypes: + for a_shape, b_shape, axes in shapes: + a_ndim = len(a_shape) + b_ndim = len(b_shape) + a_trans_shape = list(a_shape) + a_axes = list(range(0, a_ndim)) + if axes is not None: + for k in axes: + a_axes.remove(k) + a_axes.insert(a_ndim, k) + for k in range(a_ndim): + a_trans_shape[k] = a_shape[a_axes[k]] + x_shape = a_trans_shape[-(a_ndim - b_ndim):] + prod = 1 + for k in x_shape: + prod *= k + if prod * prod != _np.prod(a_shape): + raise ValueError("a is not square") + if prod != _np.prod(b_shape): + raise ValueError("a's shape and b's shape dismatch") + mat_shape = (prod, prod) + a_trans_shape = tuple(a_trans_shape) + x_shape = tuple(x_shape) + + a_np = _np.eye(prod) + shape = mat_shape + while 1: + # generate well-conditioned matrices with small eigenvalues + D = _np.diag(_np.random.uniform(-1.0, 1.0, shape[-1])) + I = _np.eye(shape[-1]).reshape(shape) + v = _np.random.uniform(-1., 1., shape[-1]).reshape(shape[:-1] + (1,)) + v = v / _np.linalg.norm(v, axis=-2, keepdims=True) + v_T = _np.swapaxes(v, -1, -2) + U = I - 2 * _np.matmul(v, v_T) + a = _np.matmul(U, D) + if (_np.linalg.cond(a, 2) < 4): + a_np = a.reshape(a_trans_shape) + break + x_np = _np.random.randn(*x_shape) + b_np = _np.tensordot(a_np, x_np, axes=len(x_shape)) + a_origin_axes = list(range(a_np.ndim)) + if axes is not None: + for k in range(a_np.ndim): + a_origin_axes[a_axes[k]] = k + a_np = a_np.transpose(a_origin_axes) + OpArgMngr.add_workload('linalg.tensorsolve', np.array(a_np, dtype=dtype), np.array(b_np, dtype=dtype), axes) + + def _add_workload_linalg_slogdet(): OpArgMngr.add_workload('linalg.slogdet', np.array(_np.ones((2, 2)), dtype=np.float32)) OpArgMngr.add_workload('linalg.slogdet', np.array(_np.ones((0, 1, 1)), dtype=np.float64)) @@ -1523,6 +1583,7 @@ def _prepare_workloads(): _add_workload_linalg_solve() _add_workload_linalg_det() _add_workload_linalg_tensorinv() + _add_workload_linalg_tensorsolve() _add_workload_linalg_slogdet() _add_workload_trace() _add_workload_tril() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 42407adddf7c..37cdaae328ee 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -4062,6 +4062,160 @@ def get_grad_A(A, ind): check_tensorinv(mx_out, a, ind) +@with_seed() +@use_np +def test_np_linalg_tensorsolve(): + class TestTensorsolve(HybridBlock): + def __init__(self, axes): + super(TestTensorsolve, self).__init__() + self._axes = axes + + def hybrid_forward(self, F, a, b): + return F.np.linalg.tensorsolve(a, b, axes=self._axes) + + def get_tensorsolve_backward(a_np, b_np, mx_out_np, a_axes, a_origin_axes, a_trans_shape): + if (a_np.ndim == 0 or b_np.ndim == 0) or (a_np.ndim == b_np.ndim): + a_shape = a_np.shape + b_shape = b_np.shape + a_np = a_np.reshape((1, 1)) + b_np = b_np.reshape((1,)) + mx_out_np = mx_out_np.reshape((1,)) + dx = _np.ones_like(mx_out_np) + inv_a_temp_np = _np.linalg.inv(a_np) + grad_b = inv_a_temp_np[0][0] * dx[0] + grad_a = -grad_b * mx_out_np[0] + return grad_a.reshape(a_shape), grad_b.reshape(b_shape) + else: + dx = _np.ones_like(mx_out_np) + a_np = a_np.transpose(a_axes) + ind = a_np.ndim - mx_out_np.ndim + tensorinv_a_np = _np.linalg.tensorinv(a_np, ind=ind) + a_trans_axes = list(range(a_np.ndim))[a_np.ndim - ind:] + list(range(a_np.ndim))[:a_np.ndim - ind] + trans_tensorinv_a_np = tensorinv_a_np.transpose(a_trans_axes) + grad_b = _np.tensordot(trans_tensorinv_a_np, dx, axes=dx.ndim) + grad_a = _np.tensordot(grad_b, mx_out_np, axes=0) + grad_a = grad_a.transpose(a_origin_axes) + return -grad_a, grad_b.reshape(b_np.shape) + + def check_tensorsolve(x, a_np, b_np, axes): + try: + x_expected = _np.linalg.tensorsolve(a_np, b_np, axes=axes) + except Exception as e: + print("a:", a_np) + print("a shape:", a_np.shape) + print("b", b_np) + print("b shape:", b_np.shape) + print(e) + else: + assert x.shape == x_expected.shape + assert_almost_equal(x.asnumpy(), x_expected, rtol=rtol, atol=atol) + + def shapeInfer(a_shape, b_shape, axes=None): + # b_shape - Right-hand tensor shape, which can be of any shape. + a_ndim = len(a_shape) + b_ndim = len(b_shape) + a_trans_shape = list(a_shape) + a_axes = list(range(0, a_ndim)) + if axes is not None: + for k in axes: + a_axes.remove(k) + a_axes.insert(a_ndim, k) + for k in range(a_ndim): + a_trans_shape[k] = a_shape[a_axes[k]] + x_shape = a_trans_shape[-(a_ndim - b_ndim):] + prod = 1 + for k in x_shape: + prod *= k + if prod * prod != _np.prod(a_shape): + raise ValueError("a is not square") + if prod != _np.prod(b_shape): + raise ValueError("a's shape and b's shape dismatch") + return a_axes, (prod, prod), tuple(a_trans_shape), tuple(x_shape) + + def newInvertibleMatrix_2D(shape, max_cond=4): + while 1: + # generate well-conditioned matrices with small eigenvalues + D = _np.diag(_np.random.uniform(-1.0, 1.0, shape[-1])) + I = _np.eye(shape[-1]).reshape(shape) + v = _np.random.uniform(-1., 1., shape[-1]).reshape(shape[:-1] + (1,)) + v = v / _np.linalg.norm(v, axis=-2, keepdims=True) + v_T = _np.swapaxes(v, -1, -2) + U = I - 2 * _np.matmul(v, v_T) + a = _np.matmul(U, D) + if (_np.linalg.cond(a, 2) < max_cond): + return a + + shapes = [ + # a_shape.ndim <= 6, + # (a_shape, b_shape, axes) + ((), (), None), # a.ndim == 0, b.ndim == 0, with axes must be None + ((), (1, 1, 1), None), # a.ndim == 0, b.ndim != 0, with axes must be None + ((1, 1, 1), (), None), # a.ndim != 0, b.ndim == 0, with axes == None + ((1, 1, 1), (), (0, 1, 2)), # a.ndim != 0, b.ndim == 0, with axes != None + ((1, 1, 1), (1, 1, 1), None), # a.ndim != 0, b.ndim != 0, a.ndim == b.ndim with axes == None + ((1, 1, 1), (1, 1, 1), (2, 0, 1)), # a.ndim != 0, b.ndim != 0, a.ndim == b.ndim with axes != None + ((1, 1), (1,), None), # a.ndim != 0, b.ndim != 0, a.ndim > b.ndim + ((1, 1), (1, 1, 1, 1, 1), None), # a.ndim != 0, b.ndim != 0, a.ndim < b.ndim - a.ndim + ((4, 4), (4,), None), + ((6, 2, 3), (6,), None), + ((2, 3, 6), (6,), (0, 1)), + ((3, 4, 2, 3, 2), (3, 4), None), + ((2, 1, 4, 2, 4), (2, 4), (0, 1, 2)), + ((2, 3, 3, 4, 2), (3, 4), (0, 2, 4)), + ((1, 3, 3, 4, 4), (1, 3, 4), (1, 3)), + ((1, 12, 4, 1, 3), (1, 2, 1, 2, 1, 3, 1), None), + ((1, 4, 1, 12, 3), (1, 2, 1, 2, 1, 3, 1), (1, 2, 4)), + ] + dtypes = ['float32', 'float64'] + for hybridize in [True, False]: + for dtype in dtypes: + for a_shape, b_shape, axes in shapes: + rtol = 1e-2 if dtype == 'float32' else 1e-3 + atol = 1e-4 if dtype == 'float32' else 1e-5 + test_tensorsolve = TestTensorsolve(axes) + if hybridize: + test_tensorsolve.hybridize() + + a_axes, mat_shape, a_trans_shape, x_shape = shapeInfer(a_shape, b_shape, axes) + # generate coefficient tensor a and right side tensor b + if (len(a_shape) == 0 or len(b_shape) == 0) or (len(a_shape) == len(b_shape)): + a_np = _np.asarray(1).astype(dtype).reshape(a_shape) + b_np = _np.asarray(2).astype(dtype).reshape(b_shape) + else: + a_np = newInvertibleMatrix_2D(mat_shape, max_cond=3).reshape(a_trans_shape) + x_np = _np.random.randn(*x_shape) + b_np = _np.tensordot(a_np, x_np, axes=len(x_shape)) + + # resume original shape of tensor a + a_origin_axes = list(range(a_np.ndim)) + if axes is not None: + for k in range(a_np.ndim): + a_origin_axes[a_axes[k]] = k + a_np = a_np.transpose(a_origin_axes) + a = np.array(a_np, dtype=dtype).reshape(a_shape) + b = np.array(b_np, dtype=dtype).reshape(b_shape) + a.attach_grad() + b.attach_grad() + + with mx.autograd.record(): + mx_out = test_tensorsolve(a, b) + # check tensorsolve validity + assert mx_out.shape == x_shape + check_tensorsolve(mx_out, a.asnumpy(), b.asnumpy(), axes) + + # check backward + if len(a_shape) != 0 and len(b_shape) != 0: + mx.autograd.backward(mx_out) + grad_a_expected, grad_b_expected = get_tensorsolve_backward( + a.asnumpy(), b.asnumpy(), mx_out.asnumpy(), a_axes, a_origin_axes, a_trans_shape) + assert_almost_equal(a.grad.asnumpy(), grad_a_expected, rtol=rtol, atol=atol) + assert_almost_equal(b.grad.asnumpy(), grad_b_expected, rtol=rtol, atol=atol) + + # check imperative once again + mx_out = test_tensorsolve(a, b) + check_tensorsolve(mx_out, a.asnumpy(), b.asnumpy(), axes) + + @with_seed() @use_np def test_np_linalg_det(): From a6ed69e060da3661972f4cd4041baf0a689773a3 Mon Sep 17 00:00:00 2001 From: Minghao Liu <40382964+Tommliu@users.noreply.github.com> Date: Fri, 27 Dec 2019 11:18:37 +0800 Subject: [PATCH 16/17] amax (#17176) --- python/mxnet/_numpy_op_doc.py | 11 +++++++++-- python/mxnet/numpy_dispatch_protocol.py | 1 + .../numpy/np_broadcast_reduce_op_value.cc | 1 + .../unittest/test_numpy_interoperability.py | 15 +++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/python/mxnet/_numpy_op_doc.py b/python/mxnet/_numpy_op_doc.py index d1d67be06b05..6abb96c62c09 100644 --- a/python/mxnet/_numpy_op_doc.py +++ b/python/mxnet/_numpy_op_doc.py @@ -731,7 +731,7 @@ def _np_squeeze(a, axis=None, out=None): pass -def _np_max(a, axis=None, out=None, keepdims=False): +def _np_max(a, axis=None, keepdims=False, out=None): """ Return the maximum of an array or maximum along an axis. @@ -795,7 +795,14 @@ def _np_max(a, axis=None, out=None, keepdims=False): pass -def _np_min(a, axis=None, out=None, keepdims=False): +def _np_amax(a, axis=None, keepdims=False, out=None): + """ + Refer to _np_max + """ + pass + + +def _np_min(a, axis=None, keepdims=False, out=None): """ Return the minimum of an array or minimum along an axis. diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index 603b118855a5..96354684241c 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -106,6 +106,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs): 'flip', 'inner', 'max', + 'amax', 'mean', 'min', 'nonzero', diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cc b/src/operator/numpy/np_broadcast_reduce_op_value.cc index 2a1bc5261701..cf92da52d1f8 100644 --- a/src/operator/numpy/np_broadcast_reduce_op_value.cc +++ b/src/operator/numpy/np_broadcast_reduce_op_value.cc @@ -161,6 +161,7 @@ inline bool NumpyReduceAxesNoDTypeType(const nnvm::NodeAttrs& attrs, } NNVM_REGISTER_OP(_np_max) +.add_alias("_np_amax") .describe(R"code()code" ADD_FILELINE) .set_num_inputs(1) .set_num_outputs(1) diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 6beb26cdf40a..1a31a0d3e4c2 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -791,6 +791,20 @@ def _add_workload_max(array_pool): OpArgMngr.add_workload('max', array_pool['4x1']) +def _add_workload_amax(array_pool): + a = np.array([3, 4, 5, 10, -3, -5, 6.0]) + b = np.array([[3, 6.0, 9.0], + [4, 10.0, 5.0], + [8, 3.0, 2.0]]) + c = np.array(1) + OpArgMngr.add_workload('amax', array_pool['4x1']) + OpArgMngr.add_workload('amax', a) + OpArgMngr.add_workload('amax', b, axis=0) + OpArgMngr.add_workload('amax', b, axis=1) + OpArgMngr.add_workload('amax', c) + OpArgMngr.add_workload('amax', c, axis=None) + + def _add_workload_min(array_pool): OpArgMngr.add_workload('min', array_pool['4x1']) @@ -1554,6 +1568,7 @@ def _prepare_workloads(): _add_workload_fix() _add_workload_flip() _add_workload_max(array_pool) + _add_workload_amax(array_pool) _add_workload_min(array_pool) _add_workload_mean(array_pool) _add_workload_nonzero() From 1cfaf3cbfceaa474d4d706a5f63be62b9f06c0a4 Mon Sep 17 00:00:00 2001 From: JiangZhaoh <54654391+JiangZhaoh@users.noreply.github.com> Date: Fri, 27 Dec 2019 11:42:10 +0800 Subject: [PATCH 17/17] [numpy]Add op delete (#17023) * add_op_delete * modify code * reuse existing function / add licenses / modify alignment * add comment / abstarct basic function * fix windows build error - remove '#' from MSHADOW_TYPE_SWITCH --- python/mxnet/ndarray/numpy/_op.py | 63 +++- python/mxnet/numpy/multiarray.py | 51 ++- python/mxnet/numpy_dispatch_protocol.py | 1 + python/mxnet/symbol/numpy/_symbol.py | 41 ++- src/operator/numpy/np_delete_op-inl.h | 347 ++++++++++++++++++ src/operator/numpy/np_delete_op.cc | 98 +++++ src/operator/numpy/np_delete_op.cu | 35 ++ .../unittest/test_numpy_interoperability.py | 22 ++ tests/python/unittest/test_numpy_op.py | 75 ++++ 9 files changed, 730 insertions(+), 3 deletions(-) create mode 100644 src/operator/numpy/np_delete_op-inl.h create mode 100644 src/operator/numpy/np_delete_op.cc create mode 100644 src/operator/numpy/np_delete_op.cu diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index e380b4937168..c9ffa2328872 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -28,7 +28,7 @@ from . import _internal as _npi from ..ndarray import NDArray -__all__ = ['shape', 'zeros', 'zeros_like', 'ones', 'ones_like', 'full', 'full_like', 'invert', +__all__ = ['shape', 'zeros', 'zeros_like', 'ones', 'ones_like', 'full', 'full_like', 'invert', 'delete', 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'bitwise_not', 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', @@ -913,6 +913,67 @@ def mod(x1, x2, out=None, **kwargs): return _ufunc_helper(x1, x2, _npi.mod, _np.mod, _npi.mod_scalar, _npi.rmod_scalar, out) +@set_module('mxnet.ndarray.numpy') +def delete(arr, obj, axis=None): + """ + Return a new array with sub-arrays along an axis deleted. For a one + dimensional array, this returns those entries not returned by + `arr[obj]`. + + Parameters + ---------- + arr : ndarray + Input array. + obj : slice, int or ndarray of ints + Indicate indices of sub-arrays to remove along the specified axis. + axis : int, optional + The axis along which to delete the subarray defined by `obj`. + If `axis` is None, `obj` is applied to the flattened array. + + Returns + ------- + out : ndarray + A copy of `arr` with the elements specified by `obj` removed. Note + that `delete` does not occur in-place. If `axis` is None, `out` is + a flattened array. + + Examples + -------- + >>> arr = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]]) + >>> arr + array([[ 1., 2., 3., 4.], + [ 5., 6., 7., 8.], + [ 9., 10., 11., 12.]]) + + >>> np.delete(arr, 1, 0) + array([[ 1., 2., 3., 4.], + [ 9., 10., 11., 12.]]) + + >>> np.delete(arr, slice(None, None, 2), 1) + array([[ 2., 4.], + [ 6., 8.], + [10., 12.]]) + + >>> np.delete(arr, np.array([1,3,5]), None) + array([ 1., 3., 5., 7., 8., 9., 10., 11., 12.]) + >>> np.delete(arr, np.array([1,1,5]), None) + array([ 1., 3., 4., 5., 7., 8., 9., 10., 11., 12.]) + """ + if not isinstance(arr, NDArray): + raise TypeError("'arr' can not support type {}".format(str(type(arr)))) + if isinstance(obj, slice): + start = obj.start + stop = obj.stop + step = 1 if obj.step is None else obj.step + return _npi.delete(arr, start=start, stop=stop, step=step, axis=axis) + elif isinstance(obj, integer_types): + return _npi.delete(arr, int_ind=obj, axis=axis) + elif isinstance(obj, NDArray): + return _npi.delete(arr, obj, axis=axis) + else: + raise TypeError("'obj' can not support type {}".format(str(type(obj)))) + + @set_module('mxnet.ndarray.numpy') @wrap_np_binary_func def remainder(x1, x2, out=None): diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index 22094a1621d2..a870b9a7632e 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -47,7 +47,7 @@ from ..ndarray.ndarray import _storage_type __all__ = ['ndarray', 'empty', 'array', 'shape', 'zeros', 'zeros_like', 'ones', 'ones_like', 'full', 'full_like', - 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'bitwise_not', + 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'bitwise_not', 'delete', 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'invert', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'histogram', @@ -5848,6 +5848,55 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): # pylint: return _npi.std(a, axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, out=out) +@set_module('mxnet.numpy') +def delete(arr, obj, axis=None): + """ + Return a new array with sub-arrays along an axis deleted. For a one + dimensional array, this returns those entries not returned by + `arr[obj]`. + + Parameters + ---------- + arr : ndarray + Input array. + obj : slice, int or ndarray of ints + Indicate indices of sub-arrays to remove along the specified axis. + axis : int, optional + The axis along which to delete the subarray defined by `obj`. + If `axis` is None, `obj` is applied to the flattened array. + + Returns + ------- + out : ndarray + A copy of `arr` with the elements specified by `obj` removed. Note + that `delete` does not occur in-place. If `axis` is None, `out` is + a flattened array. + + Examples + -------- + >>> arr = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]]) + >>> arr + array([[ 1., 2., 3., 4.], + [ 5., 6., 7., 8.], + [ 9., 10., 11., 12.]]) + + >>> np.delete(arr, 1, 0) + array([[ 1., 2., 3., 4.], + [ 9., 10., 11., 12.]]) + + >>> np.delete(arr, slice(None, None, 2), 1) + array([[ 2., 4.], + [ 6., 8.], + [10., 12.]]) + + >>> np.delete(arr, np.array([1,3,5]), None) + array([ 1., 3., 5., 7., 8., 9., 10., 11., 12.]) + >>> np.delete(arr, np.array([1,1,5]), None) + array([ 1., 3., 4., 5., 7., 8., 9., 10., 11., 12.]) + """ + return _mx_nd_np.delete(arr, obj, axis=axis) + + @set_module('mxnet.numpy') def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): # pylint: disable=too-many-arguments """ diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index 96354684241c..65486e6e5f37 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -129,6 +129,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs): 'transpose', 'unique', 'unravel_index', + 'delete', 'var', 'vdot', 'vstack', diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index 0b341b804758..10751cb0d289 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -36,7 +36,7 @@ except ImportError: from builtins import slice as py_slice -__all__ = ['zeros', 'zeros_like', 'ones', 'ones_like', 'full_like', 'bitwise_not', 'invert', +__all__ = ['zeros', 'zeros_like', 'ones', 'ones_like', 'full_like', 'bitwise_not', 'invert', 'delete', 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', @@ -3161,6 +3161,45 @@ def arange(start, stop=None, step=1, dtype=None, ctx=None): return _npi.arange(start=start, stop=stop, step=step, dtype=dtype, ctx=ctx) +@set_module('mxnet.symbol.numpy') +def delete(arr, obj, axis=None): + """ + Return a new array with sub-arrays along an axis deleted. For a one + dimensional array, this returns those entries not returned by + `arr[obj]`. + + Parameters + ---------- + arr : _Symbol + Input array. + obj : slice, scaler or _Symbol of ints + Indicate indices of sub-arrays to remove along the specified axis. + axis : scaler, optional + The axis along which to delete the subarray defined by `obj`. + If `axis` is None, `obj` is applied to the flattened array. + + Returns + ------- + out : _Symbol + A copy of `arr` with the elements specified by `obj` removed. Note + that `delete` does not occur in-place. If `axis` is None, `out` is + a flattened array. + """ + if not isinstance(arr, Symbol): + raise TypeError("'arr' can not support type {}".format(str(type(arr)))) + if isinstance(obj, slice): + start = obj.start + stop = obj.stop + step = 1 if obj.step is None else obj.step + return _npi.delete(arr, start=start, stop=stop, step=step, axis=axis) + elif isinstance(obj, integer_types): + return _npi.delete(arr, int_ind=obj, axis=axis) + elif isinstance(obj, Symbol): + return _npi.delete(arr, obj, axis=axis) + else: + raise TypeError("'obj' can not support type {}".format(str(type(obj)))) + + # pylint: disable=redefined-outer-name @set_module('mxnet.symbol.numpy') def split(ary, indices_or_sections, axis=0): diff --git a/src/operator/numpy/np_delete_op-inl.h b/src/operator/numpy/np_delete_op-inl.h new file mode 100644 index 000000000000..a144833f3294 --- /dev/null +++ b/src/operator/numpy/np_delete_op-inl.h @@ -0,0 +1,347 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_delete_op-inl.h + * \brief Function definition of delete operators + */ +#ifndef MXNET_OPERATOR_NUMPY_NP_DELETE_OP_INL_H_ +#define MXNET_OPERATOR_NUMPY_NP_DELETE_OP_INL_H_ + +#include +#include +#include +#include "../../common/utils.h" +#include "../tensor/sort_op.h" +#include "../tensor/init_op.h" +#include "../operator_common.h" +#include "../mxnet_op.h" +#include "../tensor/broadcast_reduce_op.h" +#ifdef __CUDACC__ +#include +#include +#include +#include +#include +#include +#endif + +namespace mxnet { +namespace op { + +struct NumpyDeleteParam : public dmlc::Parameter { + dmlc::optional start; + dmlc::optional stop; + dmlc::optional step; + dmlc::optional int_ind; + dmlc::optional axis; + DMLC_DECLARE_PARAMETER(NumpyDeleteParam) { + DMLC_DECLARE_FIELD(start) + .set_default(dmlc::optional()) + .describe("If 'obj' is slice, 'start' is one of it's arguments."); + DMLC_DECLARE_FIELD(stop) + .set_default(dmlc::optional()) + .describe("If 'obj' is slice, 'stop' is one of it's arguments."); + DMLC_DECLARE_FIELD(step) + .set_default(dmlc::optional()) + .describe("If 'obj' is slice, 'step' is one of it's arguments."); + DMLC_DECLARE_FIELD(int_ind) + .set_default(dmlc::optional()) + .describe("If 'obj' is int, 'int_ind' is the index before which" + "'values' is inserted"); + DMLC_DECLARE_FIELD(axis) + .set_default(dmlc::optional()) + .describe("Axis along which to insert `values`."); + } +}; + +namespace delete_ { + +enum DeleteOpInputs {kArr, kObj}; +enum DeleteOpOutputs {kOut}; +} // namespace delete_ + +struct SliceToIndices { + /*! + * \brief transfer slice to indices array + */ + template + MSHADOW_XINLINE static void Map(int i, IType* indices, int start, int step) { + indices[i] = start + i * step; + } +}; + +struct IsDeleteCal { + /*! + * \brief indicate which indices need to be deleted in input + * \param N used to check indices legality + * \param is_delete if is_delete[i] == False, index i needn't to be deleted from output + * if is_delete[i] == True, index i need to be deleted from output + * \param indices the indices need to be deleted + */ + template + MSHADOW_XINLINE static void Map(int i, int N, bool* is_delete, const IType* indices) { + if ((indices[i] >= 0) && (indices[i] < N)) { + is_delete[static_cast(indices[i])] = true; + } + } +}; + +struct OutPosCal { + /*! + * \brief map the index from input to output. e.g. + * \example original_position 0 1 2 3 4 + * is_delete F T T F F + * out_position 0 - - 1 2 + */ + MSHADOW_XINLINE static void Map(int i, int64_t* out_pos, const bool* is_delete) { + if (!is_delete[i]) { + int cnt = 0; + for (int j = 0; j < i; ++j) { + if (!is_delete[j]) { + cnt++; + } + } + out_pos[i] = cnt; + } + } +}; + +template +struct DeleteKernel { + /*! + * \brief delete a sub-array from input along an axis according to 'is_delete'. + * \param out_data - output: a new array with sub-arrays along an axis deleted. + * \param in_arr - input: 'arr', original array. + * \param is_delete - mark where will be deleted or be reminded in 'arr' + * \param out_pos - if is_delete[i] is 'false', out_pos[i] indicates its. + * \param arrshape - the shape of 'arr'. + * \param out_stride - the stride of 'out_data'. + * \param axis - delete sub-array along this axis + */ + template + MSHADOW_XINLINE static void Map(int i, DType* out_data, + const DType* in_arr, + const bool* is_delete, + const int64_t* out_pos, + const mshadow::Shape arrshape, + const mshadow::Shape out_stride, + const int axis) { + // i -> position in in_arr's shape + mshadow::Shape arr_idx = mxnet_op::unravel(i, arrshape); + if (!is_delete[arr_idx[axis]]) { + arr_idx[axis] = out_pos[arr_idx[axis]]; + int64_t dest_idx = mxnet_op::dot(arr_idx, out_stride); + KERNEL_ASSIGN(out_data[dest_idx], req, in_arr[i]); + } + } +}; + +/*! + * /brief equals to numpy's slice.indices(range) + * /param pstart - slice.start + * /param pstep - slice.step + * /param pstop - slice.stop + * /return start - slice.indices(range).start + * /return stop - slice.indices(range).stop + * /return step - slice.indices(range).step + * /return tot - total number of slice.indices(range) + */ +inline void SliceIndices(const dmlc::optional& pstart, + const dmlc::optional& pstop, + const dmlc::optional& pstep, + const int range, + int* start, int* stop, int* step, + size_t* tot) { + *step = pstep.has_value() ? pstep.value() : 1; + CHECK_NE(*step, 0) << "'step' can not equal to 0."; + if (pstop.has_value()) { + *stop = pstop.value(); + *stop += (*stop < 0) ? range : 0; + *stop = (*stop < 0) ? ((*step < 0) ? -1 : 0) : *stop; + *stop = (*stop >= range) ? ((*step < 0) ? range - 1 : range) : *stop; + } else { + *stop = (*step > 0) ? range : -1; + } + if (pstart.has_value()) { + *start = pstart.value(); + *start += (*start < 0) ? range : 0; + *start = (*start < 0) ? ((*step < 0) ? -1 : 0) : *start; + *start = (*start >= range) ? ((*step < 0) ? range - 1 : range) : *start; + } else { + *start = (*step > 0) ? 0 : range - 1; + } + if (*step > 0 && *stop >= *start) { + *tot = static_cast((*stop - *start + *step - 1) / *step); + } else if (*step < 0 && *stop <= *start) { + *tot = static_cast((*stop - *start + *step + 1) / *step); + } +} + +template +void NumpyDeleteCompute(const nnvm::NodeAttrs& attrs, + const OpContext &ctx, + const std::vector &inputs, + const std::vector &req, + const std::vector &outputs) { + using namespace mshadow; + using namespace mxnet_op; + + const NumpyDeleteParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(inputs.size(), + (param.step.has_value() || param.int_ind.has_value()) ? 1U : 2U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + mshadow::Stream *s = ctx.get_stream(); + + int ndim = inputs[delete_::kArr].shape().ndim(); + int axis = param.axis.has_value() ? param.axis.value() : -1; + NDArray arr; // original array + + if (!param.axis.has_value()) { + arr = inputs[delete_::kArr].Reshape(Shape1(inputs[delete_::kArr].shape().Size())); + ndim = 1; + axis = -1; + } else { + arr = inputs[delete_::kArr]; + } + + if (ndim == 0) { + const_cast(outputs[delete_::kOut]).Init(arr.shape()); + mxnet_op::copy(s, outputs[delete_::kOut].data(), inputs[delete_::kArr].data()); + return; + } + + axis = CheckAxis(axis, ndim); + int N = (arr.shape())[axis]; + mxnet::TShape outshape(arr.shape()); + // if obj is slice, they're obj's arguments + int start = 0, stop = 0, step = 0; + // total number to be deleted + size_t numtodel = 0; + // if obj is scaler, index is it's value + int index = 0; + + if (param.step.has_value()) { // obj is slice + SliceIndices(param.start, param.stop, param.step, + N, &start, &stop, &step, &numtodel); + if (numtodel == 0) { + const_cast(outputs[delete_::kOut]).Init(arr.shape()); + mxnet_op::copy(s, outputs[delete_::kOut].data(), inputs[delete_::kArr].data()); + return; + } + outshape[axis] -= numtodel; + const_cast(outputs[delete_::kOut]).Init(outshape); + } else if (param.int_ind.has_value()) { // obj is scaler + index = param.int_ind.value(); + CHECK((index >= -1 * N) && (index < N)) + << "index " << index + << " is out of bounds for axis " << axis + << " with size " << N << "\n"; + index += ((index < 0) ? N : 0); + numtodel = static_cast(1); + outshape[axis] -= 1; + const_cast(outputs[delete_::kOut]).Init(outshape); + } else { // obj is tensor + numtodel = inputs[delete_::kObj].shape().Size(); + } + + char* out_pos_ptr = NULL; + char* indices_ptr = NULL; + char* is_delete_ptr = NULL; + MSHADOW_TYPE_SWITCH(((inputs.size() == 2U) ? // obj is tensor + inputs[delete_::kObj].dtype() : + mshadow::DataType::kFlag), IType, { + size_t temp_mem_size = sizeof(int64_t) * arr.shape()[axis] + + sizeof(IType) * numtodel + + sizeof(bool) * arr.shape()[axis]; + Tensor temp_mem = + ctx.requested[0].get_space_typed(Shape1(temp_mem_size), s); + out_pos_ptr = temp_mem.dptr_; + indices_ptr = out_pos_ptr + sizeof(int64_t) * arr.shape()[axis]; + is_delete_ptr = indices_ptr + sizeof(IType) * numtodel; + if (param.step.has_value()) { // obj is slice, transfer slice to tensor + Kernel::Launch( + s, numtodel, reinterpret_cast(indices_ptr), start, step); + } else if (param.int_ind.has_value()) { // obj is scaler, copy it to tensor + Fill(s, TBlob(reinterpret_cast(indices_ptr), + Shape1(numtodel), xpu::kDevMask), kWriteTo, index); + } else { // obj is tensor, copy it to a unified tensor + mxnet_op::copy(s, + TBlob(reinterpret_cast(indices_ptr), inputs[delete_::kObj].shape(), + inputs[delete_::kObj].data().dev_mask()), + inputs[delete_::kObj].data()); + } + mxnet_op::Kernel::Launch( + s, arr.shape()[axis], reinterpret_cast(is_delete_ptr)); + // mark which position need to be deleted from input arr + Kernel::Launch( + s, numtodel, N, reinterpret_cast(is_delete_ptr), + reinterpret_cast(indices_ptr)); + // calculate output data's original position in input arr + Kernel::Launch( + s, arr.shape()[axis], reinterpret_cast(out_pos_ptr), + reinterpret_cast(is_delete_ptr)); + }); + + if (inputs.size() == 2U) { // obj is tensor + // get total number of nonredundant indices + #ifdef __CUDACC__ + thrust::device_ptris_delete_dev(reinterpret_cast(is_delete_ptr)); + thrust::device_vectorvec_is_delete(is_delete_dev, is_delete_dev + arr.shape()[axis]); + #else + std::vectorvec_is_delete(reinterpret_cast(is_delete_ptr), + reinterpret_cast(is_delete_ptr) + arr.shape()[axis]); + #endif + numtodel = 0; + for (int i = 0; i < arr.shape()[axis]; ++i) { + if (vec_is_delete[i]) { + numtodel++; + } + } + outshape[axis] -= numtodel; + const_cast(outputs[delete_::kOut]).Init(outshape); + } + + MSHADOW_TYPE_SWITCH(((inputs.size() == 2U) ? // obj is tensor + inputs[delete_::kObj].dtype() : + mshadow::DataType::kFlag), IType, { + MXNET_NDIM_SWITCH(outshape.ndim(), ndim, { + mshadow::Shape out_strides = mxnet_op::calc_stride(outshape.get()); + MSHADOW_TYPE_SWITCH(outputs[delete_::kOut].dtype(), DType, { + MXNET_ASSIGN_REQ_SWITCH(req[delete_::kOut], req_type, { + Kernel, xpu>::Launch( + s, arr.shape().Size(), + outputs[delete_::kOut].data().dptr(), + arr.data().dptr(), + reinterpret_cast(is_delete_ptr), + reinterpret_cast(out_pos_ptr), + arr.shape().get(), + out_strides, axis); + }); + }); + }); + }); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_NP_DELETE_OP_INL_H_ diff --git a/src/operator/numpy/np_delete_op.cc b/src/operator/numpy/np_delete_op.cc new file mode 100644 index 000000000000..48840bf9d230 --- /dev/null +++ b/src/operator/numpy/np_delete_op.cc @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_delete_op.cc + * \brief CPU Implementation of numpy insert operations + */ + +#include +#include "./np_delete_op-inl.h" + +namespace mxnet { +namespace op { + +DMLC_REGISTER_PARAMETER(NumpyDeleteParam); + +bool NumpyDeleteType(const nnvm::NodeAttrs& attrs, + std::vector *in_type, + std::vector *out_type) { + const NumpyDeleteParam& param = nnvm::get(attrs.parsed); + int insize = (param.step.has_value() || param.int_ind.has_value()) ? 1 : 2; + CHECK_EQ(in_type->size(), insize); + CHECK_EQ(out_type->size(), 1U); + if (insize == 3) { + CHECK_NE((*in_type)[1], -1) << "Index type must be set for insert operator\n"; + CHECK(((*in_type)[1] == mshadow::DataType::kFlag) || + ((*in_type)[1] == mshadow::DataType::kFlag)) + << "Index type only support int32 or int64.\n"; + } + TYPE_ASSIGN_CHECK(*out_type, 0, (*in_type)[0]); + TYPE_ASSIGN_CHECK(*in_type, 0, (*out_type)[0]); + return (*in_type)[0] != -1; +} + +inline bool NumpyDeleteStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + const NumpyDeleteParam& param = nnvm::get(attrs.parsed); + unsigned int insize = (param.step.has_value() || param.int_ind.has_value()) ? 1U : 2U; + CHECK_EQ(in_attrs->size(), insize); + CHECK_EQ(out_attrs->size(), 1U); + for (int &attr : *in_attrs) { + CHECK_EQ(attr, kDefaultStorage) << "Only default storage is supported"; + } + for (int &attr : *out_attrs) { + attr = kDefaultStorage; + } + *dispatch_mode = DispatchMode::kFComputeEx; + return true; +} + +NNVM_REGISTER_OP(_npi_delete) +.describe(R"code(Delete values along the given axis before the given indices.)code" ADD_FILELINE) +.set_attr_parser(ParamParser) +.set_num_inputs([](const NodeAttrs& attrs) { + const NumpyDeleteParam& params = nnvm::get(attrs.parsed); + return (params.step.has_value() || params.int_ind.has_value()) ? 1U : 2U; +}) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + const NumpyDeleteParam& params = nnvm::get(attrs.parsed); + return (params.step.has_value() || params.int_ind.has_value()) ? + std::vector{"arr"} : + std::vector{"arr", "obj"}; +}) +.set_attr("FInferType", NumpyDeleteType) +.set_attr("FComputeEx", NumpyDeleteCompute) +.set_attr("FInferStorageType", NumpyDeleteStorageType) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.add_argument("arr", "NDArray-or-Symbol", "Input ndarray") +.add_argument("obj", "NDArray-or-Symbol", "Input ndarray") +.add_arguments(NumpyDeleteParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_delete_op.cu b/src/operator/numpy/np_delete_op.cu new file mode 100644 index 000000000000..599d01788138 --- /dev/null +++ b/src/operator/numpy/np_delete_op.cu @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License.ø + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_delete_op.cu + * \brief GPU Implementation of numpy delete operations + */ + +#include "./np_delete_op-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_npi_delete) +.set_attr("FComputeEx", NumpyDeleteCompute); + +} +} diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 1a31a0d3e4c2..bb45fead7cd9 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -959,6 +959,27 @@ def _add_workload_unique(): # OpArgMngr.add_workload('unique', np.arange(10, dtype=np.uint8).reshape(-1, 2).astype(bool), axis=1) +def _add_workload_delete(): + a = np.arange(5) + nd_a = np.arange(5).repeat(2).reshape(1, 5, 2) + lims = [-6, -2, 0, 1, 2, 4, 5] + steps = [-3, -1, 1, 3] + for start in lims: + for stop in lims: + for step in steps: + s = slice(start, stop, step) + OpArgMngr.add_workload('delete', a, s) + OpArgMngr.add_workload('delete', nd_a, s, axis=1) + OpArgMngr.add_workload('delete', a, np.array([]), axis=0) + OpArgMngr.add_workload('delete', a, 0) + OpArgMngr.add_workload('delete', a, np.array([])) + OpArgMngr.add_workload('delete', a, np.array([0, 1])) + OpArgMngr.add_workload('delete', a, slice(1, 2)) + OpArgMngr.add_workload('delete', a, slice(1, -2)) + k = np.arange(10).reshape(2, 5) + OpArgMngr.add_workload('delete', k, slice(60, None), axis=1) + + def _add_workload_var(array_pool): OpArgMngr.add_workload('var', array_pool['4x1']) OpArgMngr.add_workload('var', np.array([np.float16(1.)])) @@ -1590,6 +1611,7 @@ def _prepare_workloads(): _add_workload_tile() _add_workload_transpose() _add_workload_unique() + _add_workload_delete() _add_workload_var(array_pool) _add_workload_zeros_like(array_pool) _add_workload_linalg_norm() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 37cdaae328ee..9f746d4dd47c 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -2832,6 +2832,81 @@ def hybrid_forward(self, F, x): assert same(ret_mx.asnumpy(), ret_np) +@with_seed() +@use_np +def test_np_delete(): + class TestDelete(HybridBlock): + def __init__(self, obj, axis=None): + super(TestDelete, self).__init__() + self._obj = obj + self._axis = axis + + def hybrid_forward(self, F, a): + return F.np.delete(a, self._obj, axis=self._axis) + + def GetSize(shp): + if len(shp) == 0: + return 0 + else: + res = 1 + shp_list = list(shp) + for x in shp: + res *= x + return res + + def GetDimSize(shp, axis): + if axis is None: + return GetSize(shp) + shp_list = list(shp) + return shp_list[axis] + + shape = [(), (0, ), (1, ), (2, 3), (2, 1, 4, 5)] + config = [] + for shp in shape: + for ax in range(-1 * len(shp), len(shp), 2): + #test slice + for st in [-5, -2, 0, 2, 5, None]: + for ed in [-5, -2, 0, 2, 5, None]: + for stp in [-5, -2, 2, 5, None]: + config.append(tuple([shp, slice(st, ed, stp), None])) + config.append(tuple([shp, slice(st, ed, stp), ax])) + #test iteger + for idx in range(-1 * GetDimSize(shp, ax), GetDimSize(shp, ax)): + config.append(tuple([shp, idx, ax])) + #test ndarray indices + idx = _np.random.randint(-1 * shp[ax], shp[ax] + 1, size = (4)).tolist() + config.append(tuple([shp, idx, ax])) + + for arr_shape, obj, axis in config: + for objtype in ['int32', 'int64']: + if type(obj) == list: + obj_mxnp = np.array(obj, dtype=objtype) + obj_onp = _np.array(obj, dtype=objtype) + elif type(obj) == slice: + obj_mxnp = obj + obj_onp = obj + else: + obj_mxnp = (_np.int32(obj) if objtype == 'int32' else _np.int64(obj)) + obj_onp = (_np.int32(obj) if objtype == 'int32' else _np.int64(obj)) + test_delete = TestDelete(obj=obj_mxnp, axis=axis) + + a = mx.nd.random.uniform(-1.0, 1.0, shape=arr_shape).as_np_ndarray() + a.attach_grad() + expected_ret = _np.delete(a.asnumpy(), obj_onp, axis=axis) + + with mx.autograd.record(): + y = test_delete(a) + + assert y.shape == expected_ret.shape + assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3, atol=1e-5) + + #test imperative + mx_out = np.delete(a, obj_mxnp, axis=axis) + np_out = _np.delete(a.asnumpy(), obj_onp, axis=axis) + + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + @with_seed() @use_np def test_np_argmin_argmax():