From c1f2cd0c033e68efe78440dde768cd3738c29018 Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@buckeyemail.osu.edu>
Date: Thu, 22 Aug 2019 06:49:37 +0000
Subject: [PATCH] enabling Large Index support for slice and softmax

wergqergwetg
---
 src/operator/softmax_output-inl.h   | 18 +++++++-------
 src/operator/tensor/matrix_op-inl.h |  6 ++---
 tests/nightly/test_large_vector.py  | 37 ++++++++++-------------------
 3 files changed, 24 insertions(+), 37 deletions(-)
diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h
index 80ab40ef6c50..db8676c028e4 100644
--- a/src/operator/softmax_output-inl.h
+++ b/src/operator/softmax_output-inl.h
@@ -117,9 +117,9 @@ class SoftmaxOutputOp : public Operator {
     CHECK_EQ(out_data.size(), 1U) << "SoftmaxOutput Output: [output]";
     Stream<xpu> *s = ctx.get_stream<xpu>();
     if (param_.multi_output) {
-      int n = in_data[softmaxout_enum::kData].size(0);
-      int k = in_data[softmaxout_enum::kData].size(1);
-      Shape<3> s3 = Shape3(n, k, static_cast<int>(in_data[softmaxout_enum::kData].Size()/n/k));
+      index_t n = in_data[softmaxout_enum::kData].size(0);
+      index_t k = in_data[softmaxout_enum::kData].size(1);
+      Shape<3> s3 = Shape3(n, k, static_cast<index_t>(in_data[softmaxout_enum::kData].Size()/n/k));
       Tensor<xpu, 3, DType> data =
           in_data[softmaxout_enum::kData].get_with_shape<xpu, 3, DType>(s3, s);
       Tensor<xpu, 3, DType> out =
@@ -131,8 +131,8 @@ class SoftmaxOutputOp : public Operator {
         Tensor<xpu, 2, DType> out = out_data[softmaxout_enum::kOut].FlatTo2D<xpu, DType>(s);
         Softmax(out, data);
       } else {
-        int n = in_data[softmaxout_enum::kData].size(0);
-        int k = in_data[softmaxout_enum::kData].Size()/n;
+        index_t n = in_data[softmaxout_enum::kData].size(0);
+        index_t k = in_data[softmaxout_enum::kData].Size()/n;
         Shape<2> s2 = Shape2(n, k);
         Tensor<xpu, 2, DType> data =
             in_data[softmaxout_enum::kData].get_with_shape<xpu, 2, DType>(s2, s);
@@ -171,9 +171,9 @@ class SoftmaxOutputOp : public Operator {
         grad = (out - label) * scalar<DType>(param_.grad_scale);
       }
     } else if (param_.multi_output) {
-      int n = out_data[softmaxout_enum::kOut].size(0);
-      int k = out_data[softmaxout_enum::kOut].size(1);
-      Shape<3> s3 = Shape3(n, k, static_cast<int>(out_data[softmaxout_enum::kOut].Size()/n/k));
+      index_t n = out_data[softmaxout_enum::kOut].size(0);
+      index_t k = out_data[softmaxout_enum::kOut].size(1);
+      Shape<3> s3 = Shape3(n, k, static_cast<index_t>(out_data[softmaxout_enum::kOut].Size()/n/k));
       Shape<2> s2 = Shape2(s3[0], s3[2]);
       Tensor<xpu, 2, DType> label =
           in_data[softmaxout_enum::kLabel].get_with_shape<xpu, 2, DType>(s2, s);
@@ -224,7 +224,7 @@ class SoftmaxOutputOp : public Operator {
 //        Tensor<xpu, 2, DType> out = out_data[softmaxout_enum::kOut].FlatTo2D<xpu, DType>(s);
 //        Tensor<xpu, 2, DType> grad = in_grad[softmaxout_enum::kData].FlatTo2D<xpu, DType>(s);
       } else {
-        int n = out_data[softmaxout_enum::kOut].size(0);
+        index_t n = out_data[softmaxout_enum::kOut].size(0);
         data_shape = Shape2(n, out_data[softmaxout_enum::kOut].Size()/n);
       }
       Tensor<xpu, 1, DType> label = in_data[softmaxout_enum::kLabel].get_with_shape<xpu, 1, DType>(
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 611dd7287206..58a535353e10 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -732,8 +732,8 @@ inline void GetIndexRange(const mxnet::TShape& dshape,
 }
 
 inline void SetSliceOpOutputDimSize(const mxnet::TShape& dshape,
-                                    const index_t i, const int b,
-                                    const int e, const int s,
+                                    const index_t i, const index_t b,
+                                    const index_t e, const index_t s,
                                     mxnet::TShape* oshape) {
   if (!mxnet::dim_size_is_known(dshape, i)) {
     (*oshape)[i] = -1;
@@ -765,7 +765,7 @@ inline bool SliceOpShape(const nnvm::NodeAttrs& attrs,
     common::StaticArray<index_t, ndim> begin, end, step;
     GetIndexRange(dshape, param.begin, param.end, param.step, &begin, &end, &step);
     for (int i = 0; i < param.begin.ndim(); ++i) {
-      const int b = begin[i], e = end[i], s = step[i];
+      const index_t b = begin[i], e = end[i], s = step[i];
       SetSliceOpOutputDimSize(dshape, i, b, e, s, &oshape);
     }
   })
diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py
index b45d51d9f1fa..02f05332893f 100644
--- a/tests/nightly/test_large_vector.py
+++ b/tests/nightly/test_large_vector.py
@@ -18,7 +18,7 @@
 import numpy as np
 import mxnet as mx
 
-from mxnet.test_utils import rand_ndarray, assert_almost_equal, rand_coord_2d
+from mxnet.test_utils import rand_ndarray, assert_almost_equal, rand_coord_2d, default_context
 from mxnet import gluon, nd
 from tests.python.unittest.common import with_seed
 
@@ -85,11 +85,11 @@ def test_elementwise():
     a = nd.ones(shape=LARGE_X)
     b = nd.ones(shape=LARGE_X)
     res = a + b
-    assert np.sum(res[-1].asnumpy() == 2) == a.shape[1]
+    assert res[-1].asnumpy() == 2
     res = a + 1
-    assert np.sum(res[-1].asnumpy() == 2) == a.shape[1]
-    res = nd.sqrt(a + 3)
-    assert np.sum(res[-1].asnumpy() == 2) == a.shape[1]
+    assert res[-1].asnumpy() == 2
+    res = nd.sqrt(a + 8)
+    assert res[-1].asnumpy() == 3
 
 
 def test_reduce():
@@ -97,13 +97,6 @@ def test_reduce():
     assert nd.sum(a).asnumpy() == a.shape[0] * a.shape[1]
 
 
-def test_FullyConnected():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    b = nd.ones(shape=(SMALL_Y, SMALL_Y))
-    res = nd.FullyConnected(a, b, num_hidden=b.shape[1], no_bias=True)
-    assert np.sum(res[-1].asnumpy() == SMALL_Y) == b.shape[1]
-
-
 def test_broadcast():
     a = nd.ones(shape=(LARGE_X, SMALL_Y*2))
     b = nd.arange(0, LARGE_X).reshape(LARGE_X, 1)
@@ -116,7 +109,7 @@ def test_broadcast():
 def test_clip():
     a = nd.arange(0, LARGE_X)
     res = nd.clip(a, a_min=100, a_max=1000)
-    assert np.sum(res[-1].asnumpy() == 1000) == 101
+    assert np.sum(res[-1].asnumpy() == 1000) == 1
 
 
 def test_argmin():
@@ -139,12 +132,6 @@ def test_take():
     assert np.sum(res.asnumpy() == 1) == res.shape[0]
 
 
-def test_slice():
-    a = nd.ones(shape=(2, LARGE_X))
-    res = nd.slice(a, begin=(1, LARGE_X-1000000000), end=(2, LARGE_X))
-    assert np.sum(res[-1].asnumpy() == 1) == res.shape[1]
-
-
 def test_slice_assign():
     a = nd.ones(shape=LARGE_X)
     a[LARGE_X-1:LARGE_X] = 1000
@@ -285,16 +272,16 @@ def test_swapaxes():
 
 def test_flip():
     b = nd.arange(0, LARGE_X, dtype=np.int64).reshape(1, LARGE_X)
-    t = nd.flip(b, axis=0)
-    assert t.shape == (LARGE_X, 1)
-    assert t[-1, :].asnumpy() == 0
+    t = nd.flip(b, axis=1)
+    assert t.shape == (1, LARGE_X)
+    assert t[-1, -1].asnumpy() == 0
 
 
 def test_softmax():
-    input_data = mx.nd.ones(2, LARGE_X)
-    true_output = np.full(LARGE_X, 0.5)
+    input_data = nd.ones((2, LARGE_X))
     output = nd.softmax(input_data, axis=0)
-    assert_almost_equal(output.asnumpy(), true_output, rtol=1e-5, atol=1e-5)
+    assert output[0][0] == 0.5
+    assert output[-1][-1] == 0.5
 
 
 def test_argsort():