From 212a1a58938999c38ef0cdbf797e0f032b71fbc4 Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@buckeyemail.osu.edu>
Date: Wed, 14 Aug 2019 22:39:11 +0000
Subject: [PATCH 1/5] Adding tests to verify support for Large Tensors in
 additional Ops along with new C_Apis supporting 64bit indexing

---
 tests/nightly/test_large_vector.py | 1 +
 1 file changed, 1 insertion(+)
diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py
index 64bfa8a1d3e9..f8592535a24a 100644
--- a/tests/nightly/test_large_vector.py
+++ b/tests/nightly/test_large_vector.py
@@ -25,6 +25,7 @@
 # dimension constants
 LARGE_X = 5000000000
 MEDIUM_X = 1000000000
+SMALL_Y = 1
 
 
 def test_slice():

From 2ad5d4bbe90ebf2d61ca7ac50e62e1240307a0bc Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@buckeyemail.osu.edu>
Date: Wed, 21 Aug 2019 02:47:53 +0000
Subject: [PATCH 2/5] removing skipped tests

---
 tests/nightly/test_large_vector.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py
index f8592535a24a..5516ed6076bd 100644
--- a/tests/nightly/test_large_vector.py
+++ b/tests/nightly/test_large_vector.py
@@ -25,6 +25,7 @@
 # dimension constants
 LARGE_X = 5000000000
 MEDIUM_X = 1000000000
+LARGE_Y = 100000
 SMALL_Y = 1
 
 

From 0f6dab872bb145c1ae7e1f3b0d8f37f8b8cac0e7 Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@buckeyemail.osu.edu>
Date: Fri, 23 Aug 2019 22:04:12 +0000
Subject: [PATCH 3/5] removing tests not required for vector testing

---
 tests/nightly/test_large_vector.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py
index 5516ed6076bd..64bfa8a1d3e9 100644
--- a/tests/nightly/test_large_vector.py
+++ b/tests/nightly/test_large_vector.py
@@ -25,8 +25,6 @@
 # dimension constants
 LARGE_X = 5000000000
 MEDIUM_X = 1000000000
-LARGE_Y = 100000
-SMALL_Y = 1
 
 
 def test_slice():

From ca83e3e311afb5fdfcb35fb18c122eb40e4fa703 Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@buckeyemail.osu.edu>
Date: Wed, 21 Aug 2019 08:22:35 +0000
Subject: [PATCH 4/5] Adding more tests for Large Indices and adding support
 for Large Indices in one_hot operator

---
 src/ndarray/ndarray_function.cc     |   2 +-
 src/operator/tensor/indexing_op.h   |  18 +--
 src/operator/tensor/matrix_op-inl.h |   4 +-
 tests/nightly/test_large_vector.py  | 222 +++++++++++++++++++++++++++-
 4 files changed, 233 insertions(+), 13 deletions(-)

diff --git a/src/ndarray/ndarray_function.cc b/src/ndarray/ndarray_function.cc
index 335856356534..1a699b12d76d 100644
--- a/src/ndarray/ndarray_function.cc
+++ b/src/ndarray/ndarray_function.cc
@@ -38,7 +38,7 @@ void Copy<cpu, cpu>(const TBlob &from, TBlob *to,
                     RunContext ctx) {
   MSHADOW_TYPE_SWITCH(to->type_flag_, DType, {
     if (to->type_flag_ == from.type_flag_) {
-      const index_t size = from.Size();
+      const index_t size = static_cast<index_t>(from.Size());
       CHECK_EQ(size, to->Size()) << "copying size mismatch, from: " << size * sizeof(DType)
                << " bytes, to: " << to->Size() * sizeof(DType) << " bytes.";
       common::ParallelCopy(to->dptr<DType>(), from.dptr<DType>(), size);
diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h
index 84b6a65dd29e..7dfc77d25b27 100644
--- a/src/operator/tensor/indexing_op.h
+++ b/src/operator/tensor/indexing_op.h
@@ -1133,7 +1133,7 @@ void BatchTakeOpForward(const nnvm::NodeAttrs& attrs,
  * \brief The parameters of the one_hot operator.
  */
 struct OneHotParam : public dmlc::Parameter<OneHotParam> {
-  int depth;
+  index_t depth;
   double on_value;
   double off_value;
   int axis;
@@ -1153,7 +1153,7 @@ struct OneHotParam : public dmlc::Parameter<OneHotParam> {
   }
 };
 
-inline void GetOneHotParams(const OneHotParam& param, int* depth, double* on_value,
+inline void GetOneHotParams(const OneHotParam& param, index_t* depth, double* on_value,
                             double* off_value, int* dtype) {
   *depth = param.depth;
   CHECK_GE(*depth, 0) << "Dimension size, depth, must be a non-negative integer";
@@ -1172,7 +1172,7 @@ inline bool OneHotOpShape(const nnvm::NodeAttrs& attrs,
   const mxnet::TShape& ishape = (*in_attrs)[0];
   if (!shape_is_known(ishape)) return false;
 
-  int depth = 0;
+  index_t depth = 0;
   double on_value = 1.0;
   double off_value = 0.0;
   int dtype = mshadow::kFloat32;
@@ -1193,7 +1193,7 @@ inline bool OneHotOpType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   CHECK_NE((*in_attrs)[0], -1) << "Index type must be set for one_hot operator";
-  int depth = 0;
+  index_t depth = 0;
   double on_value = 1.0;
   double off_value = 0.0;
   int dtype = -1;
@@ -1207,10 +1207,10 @@ inline bool OneHotOpType(const nnvm::NodeAttrs& attrs,
 template<int req>
 struct one_hot {
   template<typename DType, typename IType>
-  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* indices,
-                                  int depth, DType on_value) {
-    int offset = i * depth;
-    int j = static_cast<int>(indices[i]);
+  MSHADOW_XINLINE static void Map(index_t i, DType* out, const IType* indices,
+                                  index_t depth, DType on_value) {
+    index_t offset = i * depth;
+    index_t j = static_cast<index_t>(indices[i]);
     if (j >= 0 && j < depth) {
       KERNEL_ASSIGN(out[offset+j], req, on_value);
     }
@@ -1229,7 +1229,7 @@ void OneHotOpForward(const nnvm::NodeAttrs& attrs,
   // The following line is needed to guard the situation when
   // an output array is empty on GPU. In that case, out.dptr() = 0x0
   if (outputs[0].Size() == 0) return;
-  int depth = 0;
+  index_t depth = 0;
   double on_value = 1.0;
   double off_value = 0.0;
   int dtype = mshadow::kFloat32;
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 0d66907ad6cd..fa5f93cefc1a 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -1148,8 +1148,8 @@ void SliceAssignScalarOpForward(const nnvm::NodeAttrs& attrs,
 
 struct SliceAxisParam : public dmlc::Parameter<SliceAxisParam> {
   int axis;
-  int begin;
-  dmlc::optional<int> end;
+  index_t begin;
+  dmlc::optional<index_t> end;
   DMLC_DECLARE_PARAMETER(SliceAxisParam) {
     DMLC_DECLARE_FIELD(axis)
       .describe("Axis along which to be sliced, supports negative indexes.");
diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py
index 64bfa8a1d3e9..4e1b48c8d047 100644
--- a/tests/nightly/test_large_vector.py
+++ b/tests/nightly/test_large_vector.py
@@ -148,7 +148,6 @@ def test_Dense(ctx=mx.cpu(0)):
 def test_argsort():
     b = create_vector(size=LARGE_X)
     s = nd.argsort(b, axis=0, is_ascend=False, dtype=np.int64)
-    mx.nd.waitall()
     assert (s[0].asnumpy() == (LARGE_X - 1)).all()
 
 
@@ -170,6 +169,227 @@ def test_topk():
     assert val.sum() == (LARGE_X - 1)
 
 
+def test_shape():
+    b = create_vector(size=LARGE_X)
+    #explicit wait_to_read()
+    assert b[0] == 0
+    assert b.shape[0] == LARGE_X
+
+
+def test_size():
+    b = create_vector(size=LARGE_X)
+    #explicit wait_to_read()
+    assert b[0] == 0
+    assert b.size == LARGE_X
+
+
+def test_copy():
+    a = nd.ones(LARGE_X)
+    b = a.copy()
+    assert a[0] == b[0]
+    assert b.shape == a.shape
+    assert b.size == LARGE_X
+
+
+def test_copy_to():
+    a = create_vector(size=LARGE_X)
+    # keeping dtype same as input uses parallel copy which is much faster
+    b = nd.zeros(LARGE_X, dtype=np.int64)
+    c = a.copyto(b)
+    assert c is b
+    assert b[-1] == LARGE_X-1
+    assert b[0] == 0
+
+
+def test_zeros_like():
+    a = nd.ones(LARGE_X)
+    b = nd.zeros_like(a)
+    assert b[-1] == 0
+    assert b.shape == a.shape
+
+
+def test_ones_like():
+    a = nd.zeros(LARGE_X)
+    b = nd.ones_like(a)
+    assert b[-1] == 1
+    assert b.shape == a.shape
+
+
+def test_concat():
+    a = nd.ones(LARGE_X)
+    b = nd.zeros(LARGE_X)
+    c = nd.concat(a,b, dim=0)
+    assert c[0][0] == 1
+    assert c[-1][-1] == 0
+    assert c.shape[0] == (2 * LARGE_X)
+
+
+def test_sum():
+    a = nd.ones(LARGE_X)
+    b = nd.sum(a, axis=0)
+    assert b[0] == LARGE_X
+
+
+def test_prod():
+    a = nd.ones(LARGE_X)
+    b = nd.prod(a, axis=0)
+    assert b[0] == 1
+
+
+def test_min():
+    a = create_vector(size=LARGE_X)
+    b = nd.min(a, axis=0)
+    assert b[0] == 0
+    assert b[-1] == 0
+
+
+def test_max():
+    a = create_vector(size=LARGE_X)
+    b = nd.max(a, axis=0)
+    assert b[0] == (LARGE_X - 1)
+
+
+def test_argmax():
+    a = nd.ones(LARGE_X)
+    b = nd.zeros(LARGE_X)
+    c = nd.concat(a, b, dim=0)
+    d = nd.argmax(c, axis=0)
+    assert c.shape[0] == (2 * LARGE_X)
+    assert d == 0
+
+
+def np_softmax(x, axis=-1, temperature=1.0):
+    x = x - np.max(x, axis=axis, keepdims=True)
+    x = np.exp(x/temperature)
+    x /= np.sum(x, axis=axis, keepdims=True)
+    return x
+
+
+def test_iadd():
+    a = nd.ones(LARGE_X)
+    b = nd.ones(LARGE_X)
+    c = b
+    c += a
+    assert c.shape == a.shape
+    assert c[-1] == 2
+
+
+def test_isub():
+    a = nd.full(LARGE_X, 3)
+    b = nd.ones(LARGE_X)
+    c = a
+    c -= b
+    assert c.shape == a.shape
+    assert c[-1] == 2
+
+
+def test_imul():
+    a = nd.full(LARGE_X, 3)
+    b = nd.ones(LARGE_X)
+    c = b
+    c *= a
+    assert c.shape == a.shape
+    assert c[-1] == 3
+
+
+def test_idiv():
+    a = nd.full(LARGE_X, 4)
+    b = nd.full(LARGE_X, 2)
+    c = a
+    c /= b
+    assert c.shape == a.shape
+    assert c[-1] == 2
+
+
+def test_imod():
+    a = nd.full(LARGE_X, 3)
+    b = nd.full(LARGE_X, 2)
+    c = a
+    c %= b
+    assert c.shape == a.shape
+    assert c[0][-1] == 1
+
+
+def test_eq():
+    a = nd.full(LARGE_X, 3)
+    b = nd.full(LARGE_X, 3)
+    c = (a == b)
+    assert np.sum(c[0].asnumpy() == 1).all()
+
+
+def test_neq():
+    a = nd.full(LARGE_X, 2)
+    b = nd.full(LARGE_X, 3)
+    c = (a != b)
+    assert np.sum(c[0].asnumpy() == 1).all()
+
+
+def test_lt():
+    a = nd.full(LARGE_X, 2)
+    b = nd.full(LARGE_X, 3)
+    d = (a <= b)
+    assert np.sum(d[0].asnumpy() == 1).all()
+
+
+def test_lte():
+    a = nd.full(LARGE_X, 2)
+    b = nd.full(LARGE_X, 3)
+    c = nd.full(LARGE_X, 2)
+    d = (a <= b)
+    assert np.sum(d[0].asnumpy() == 1).all()
+    d = (a <= c)
+    assert np.sum(d[0].asnumpy() == 1).all()
+
+
+def test_gt():
+    a = nd.full(LARGE_X, 3)
+    b = nd.full(LARGE_X, 2)
+    d = (a > b)
+    assert np.sum(d[0].asnumpy() == 1).all()
+
+
+def test_gte():
+    a = nd.full(LARGE_X, 3)
+    b = nd.full(LARGE_X, 2)
+    c = nd.full(LARGE_X, 3)
+    d = (a >= b)
+    assert np.sum(d[0].asnumpy() == 1).all()
+    d = (a >= c)
+    assert np.sum(d[0].asnumpy() == 1).all()
+
+
+def test_slice_like():
+    a = create_vector(size=LARGE_X)
+    b = nd.ones(LARGE_X//2)
+    c = nd.slice_like(a, b)
+    assert c.shape == b.shape
+    assert c[0] == 0
+    assert c[-1] == (LARGE_X//2-1)
+
+
+def test_slice_axis():
+    a = create_vector(size=LARGE_X)
+    c = nd.slice_axis(a, axis=0, begin=0, end=LARGE_X//2)
+    assert c.shape[0] == a.shape[0]//2
+    assert c[-1][0] == (LARGE_X//2-1)
+
+
+def test_full():
+    a = nd.full(LARGE_X, 3)
+    assert a.shape[0] == LARGE_X
+    assert a[LARGE_X//2] == 3
+    assert a[-1] == 3
+
+
+def test_one_hot():
+    a = nd.zeros(10)
+    a[0] = 1
+    a[-1] = 1
+    b = nd.one_hot(a, LARGE_X)
+    assert b[0][1] == 1
+    assert b[-1][1] == 1
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()

From b5241d2df136f146c9967a5531e6aa4c1f5088a4 Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@buckeyemail.osu.edu>
Date: Fri, 30 Aug 2019 17:16:18 +0000
Subject: [PATCH 5/5] Re-Trigger build