From d0804b156bf8c354a512fdc5e75fef397f4a0849 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sun, 26 Apr 2020 17:33:45 -0700
Subject: [PATCH 01/15] update nvidiadocker command & remove cuda compat

---
 ci/build.py                       | 3 ++-
 ci/docker/Dockerfile.build.ubuntu | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/build.py b/ci/build.py
index 323a4487fafc..72dfef7c5876 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -222,8 +222,9 @@ def container_run(docker_client: SafeDockerClient,
 
     # Equivalent command
     docker_cmd_list = [
-        "nvidia-docker" if nvidia_runtime else "docker",
+        "docker",
         'run',
+        "--gpus all" if nvidia_runtime else "",
         "--cap-add",
         "SYS_PTRACE", # Required by ASAN
         '--rm',
diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
index c44a5cce8138..63a4fd3a005d 100644
--- a/ci/docker/Dockerfile.build.ubuntu
+++ b/ci/docker/Dockerfile.build.ubuntu
@@ -164,5 +164,4 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
 
 FROM gpu as gpuwithcompatenv
 # TVMOP requires /usr/local/cuda/compat is no LD_LIBRARY_PATH.
-# This should be fixed and deleted.
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/compat
\ No newline at end of file
+# This should be fixed and deleted.
\ No newline at end of file

From 715078a3d8dc47e34dc6dbdbeb0499af75b8f8fb Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Mon, 27 Apr 2020 19:52:24 -0700
Subject: [PATCH 02/15] replace cu101 with cuda since compat is no longer to be
 used

---
 ci/jenkins/Jenkins_steps.groovy | 36 ++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 28d76beabab2..b7c2763f7ec2 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -155,7 +155,7 @@ def compile_unix_int64_gpu() {
         ws('workspace/build-gpu-int64') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_large_tensor', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_large_tensor', false)
             utils.pack_lib('ubuntu_gpu_int64', mx_cmake_lib)
           }
         }
@@ -309,7 +309,7 @@ def compile_unix_cmake_gpu() {
         ws('workspace/build-cmake-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake', false)
             utils.pack_lib('cmake_gpu', mx_cmake_lib_cython)
           }
         }
@@ -323,7 +323,7 @@ def compile_unix_cmake_gpu_no_tvm_op() {
         ws('workspace/build-cmake-gpu-no-tvm-op') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
           }
         }
       }
@@ -336,7 +336,7 @@ def compile_unix_cmake_gpu_no_rtc() {
             ws('workspace/build-cmake-gpu-no-rtc') {
                 timeout(time: max_time, unit: 'MINUTES') {
                     utils.init_git()
-                    utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false)
+                    utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_rtc', false)
                 }
             }
         }
@@ -608,7 +608,7 @@ def compile_unix_clang10_cuda_werror() {
         ws('workspace/build-cpu-clang10') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_clang10_werror', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_clang10_werror', false)
             utils.pack_lib('gpu_clang10', mx_lib)
           }
         }
@@ -831,7 +831,7 @@ def test_unix_python3_gpu() {
         ws('workspace/ut-python3-gpu') {
           try {
             utils.unpack_and_init('gpu', mx_lib_cython)
-            python3_gpu_ut_cython('ubuntu_gpu_cu101')
+            python3_gpu_ut_cython('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
@@ -847,7 +847,7 @@ def test_unix_python3_gpu_no_tvm_op() {
         ws('workspace/ut-python3-gpu-no-tvm-op') {
           try {
             utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
-            python3_gpu_ut_cython('ubuntu_gpu_cu101')
+            python3_gpu_ut_cython('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
@@ -864,7 +864,7 @@ def test_unix_python3_quantize_gpu() {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
               utils.unpack_and_init('gpu', mx_lib)
-              utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_python3_quantization_gpu', true)
+              utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_python3_quantization_gpu', true)
               utils.publish_test_coverage()
             } finally {
               utils.collect_test_results_unix('tests_quantization_gpu.xml', 'tests_python3_quantize_gpu.xml')
@@ -947,7 +947,7 @@ def test_unix_python3_mkldnn_gpu() {
         ws('workspace/ut-python3-mkldnn-gpu') {
           try {
             utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib)
-            python3_gpu_ut('ubuntu_gpu_cu101')
+            python3_gpu_ut('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu.xml')
@@ -963,7 +963,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() {
         ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
           try {
             utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
-            python3_gpu_ut_nocudnn('ubuntu_gpu_cu101')
+            python3_gpu_ut_nocudnn('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu_nocudnn.xml')
@@ -997,7 +997,7 @@ def test_unix_python3_integration_gpu() {
         ws('workspace/it-python-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_python', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_python', true)
             utils.publish_test_coverage()
           }
         }
@@ -1011,7 +1011,7 @@ def test_unix_cpp_package_gpu() {
         ws('workspace/it-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_cpp_examples_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_cpp_package', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -1025,7 +1025,7 @@ def test_unix_capi_cpp_package() {
         ws('workspace/it-capi-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -1067,7 +1067,7 @@ def test_unix_scala_gpu() {
         ws('workspace/ut-scala-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_scala', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_scala', true)
             utils.publish_test_coverage()
           }
         }
@@ -1150,7 +1150,7 @@ def test_unix_cpp_gpu() {
         ws('workspace/ut-cpp-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('cmake_gpu', mx_cmake_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'unittest_cpp', true)
+            utils.docker_run('ubuntu_build_cuda', 'unittest_cpp', true)
             utils.publish_test_coverage()
           }
         }
@@ -1178,7 +1178,7 @@ def test_unix_perl_gpu() {
         ws('workspace/ut-perl-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_cpugpu_perl', true)
+            utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_cpugpu_perl', true)
             utils.publish_test_coverage()
           }
         }
@@ -1192,7 +1192,7 @@ def test_unix_r_gpu() {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_gpu_R', true)
+            utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_gpu_R', true)
             utils.publish_test_coverage()
           }
         }
@@ -1260,7 +1260,7 @@ def test_unix_distributed_kvstore_gpu() {
         ws('workspace/it-dist-kvstore') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
             utils.publish_test_coverage()
           }
         }

From 9ad16b7fae5f7559ceb96a0eb8b1e4e2dfac59e0 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Tue, 28 Apr 2020 08:42:51 -0700
Subject: [PATCH 03/15] skip flaky tests

---
 tests/python/unittest/test_numpy_interoperability.py | 2 ++
 tests/python/unittest/test_numpy_ndarray.py          | 1 +
 tests/python/unittest/test_numpy_op.py               | 4 ++++
 3 files changed, 7 insertions(+)

diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py
index 9e94ad303afb..18a54eab6a94 100644
--- a/tests/python/unittest/test_numpy_interoperability.py
+++ b/tests/python/unittest/test_numpy_interoperability.py
@@ -3250,6 +3250,7 @@ def test_np_memory_array_function():
         assert op(data_mx, np.ones((5, 0))) == op(data_np, _np.ones((5, 0)))
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 @with_array_function_protocol
@@ -3257,6 +3258,7 @@ def test_np_array_function_protocol():
     check_interoperability(_NUMPY_ARRAY_FUNCTION_LIST)
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 @with_array_ufunc_protocol
diff --git a/tests/python/unittest/test_numpy_ndarray.py b/tests/python/unittest/test_numpy_ndarray.py
index 98267e9bc650..15607c54781a 100644
--- a/tests/python/unittest/test_numpy_ndarray.py
+++ b/tests/python/unittest/test_numpy_ndarray.py
@@ -259,6 +259,7 @@ def check_identity_array_creation(shape, dtype):
                 assert type(y[1]) == np.ndarray
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 def test_np_ndarray_binary_element_wise_ops():
     np_op_map = {
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index 6a1618e05f2d..2cf63ff0587f 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -636,6 +636,7 @@ def hybrid_forward(self, F, a, b):
                 assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol=1e-2, atol=1e-2)
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_sum():
@@ -1029,6 +1030,7 @@ def avg_backward(a, w, avg, axes, init_a_grad=None, init_w_grad=None):
         assert_almost_equal(mx_out.asnumpy(), np_out.astype(dtype), rtol=rtol, atol=atol)
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_mean():
@@ -2220,6 +2222,7 @@ def hybrid_forward(self, F, x):
                 assert same(ret_mx.asnumpy(), ret_np)
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_unary_funcs():
@@ -2442,6 +2445,7 @@ def hybrid_forward(self, F, a, *args, **kwargs):
             check_unary_func(func, ref_grad, shape, low, high)
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_binary_funcs():

From f4ab73084b531e0303e7ad72aa09eb8d629f91d5 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Tue, 28 Apr 2020 15:56:44 -0700
Subject: [PATCH 04/15] get rid of ubuntu_build_cuda and point ubuntu_cu101 to
 base gpu instead of cuda compat

---
 ci/build.py                          |  2 +-
 ci/docker/Dockerfile.build.ubuntu    |  5 ---
 ci/docker/docker-compose.yml         | 15 ++-------
 ci/jenkins/Jenkins_steps.groovy      | 48 ++++++++++++++--------------
 tests/nightly/JenkinsfileForBinaries |  2 +-
 5 files changed, 29 insertions(+), 43 deletions(-)

diff --git a/ci/build.py b/ci/build.py
index 83975e674ad8..394abb0d7d6f 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -45,7 +45,7 @@
 # NOTE: Temporary whitelist used until all Dockerfiles are refactored for docker compose
 DOCKER_COMPOSE_WHITELIST = ('centos7_cpu', 'centos7_gpu_cu92', 'centos7_gpu_cu100',
                             'centos7_gpu_cu101', 'centos7_gpu_cu102', 'ubuntu_cpu',
-                            'ubuntu_build_cuda', 'ubuntu_gpu_cu101', 'publish.test.centos7_cpu',
+                            'ubuntu_gpu_cu101', 'publish.test.centos7_cpu',
                             'publish.test.centos7_gpu')
 # Files for docker compose
 DOCKER_COMPOSE_FILES = set(('docker/build.centos7', 'docker/build.ubuntu', 'docker/publish.test.centos7'))
diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
index 63a4fd3a005d..7e4b3d08d0bd 100644
--- a/ci/docker/Dockerfile.build.ubuntu
+++ b/ci/docker/Dockerfile.build.ubuntu
@@ -160,8 +160,3 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     apt install -y  --no-install-recommends \
         cuda-10-1 && \
     rm -rf /var/lib/apt/lists/*
-
-
-FROM gpu as gpuwithcompatenv
-# TVMOP requires /usr/local/cuda/compat is no LD_LIBRARY_PATH.
-# This should be fixed and deleted.
\ No newline at end of file
diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml
index 5ac660df796b..6971aea852b7 100644
--- a/ci/docker/docker-compose.yml
+++ b/ci/docker/docker-compose.yml
@@ -103,21 +103,12 @@ services:
     build:
       context: .
       dockerfile: Dockerfile.build.ubuntu
-      target: gpuwithcompatenv
-      args:
-        BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest
-  ubuntu_build_cuda:
-    image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.ubuntu
-      target: gpuwithcudaruntimelibs
+      target: gpu
       args:
         BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
       cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest
+        - build.ubuntu_gpu_cu101:latest
+        - mxnetci/build.ubuntu_gpu_cu101:latest
   ###################################################################################################
   # Dockerfile.publish.test based images used for testing binary artifacts on minimal systems.
   ###################################################################################################
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index b7c2763f7ec2..14bb91f86088 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -155,7 +155,7 @@ def compile_unix_int64_gpu() {
         ws('workspace/build-gpu-int64') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_large_tensor', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_large_tensor', false)
             utils.pack_lib('ubuntu_gpu_int64', mx_cmake_lib)
           }
         }
@@ -225,7 +225,7 @@ def compile_unix_mkldnn_gpu() {
         ws('workspace/build-mkldnn-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn', false)
             utils.pack_lib('mkldnn_gpu', mx_mkldnn_lib)
           }
         }
@@ -239,7 +239,7 @@ def compile_unix_mkldnn_nocudnn_gpu() {
          ws('workspace/build-mkldnn-gpu-nocudnn') {
            timeout(time: max_time, unit: 'MINUTES') {
              utils.init_git()
-             utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn_nocudnn', false)
+             utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn_nocudnn', false)
              utils.pack_lib('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
            }
          }
@@ -253,7 +253,7 @@ def compile_unix_full_gpu() {
         ws('workspace/build-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7', false)
             utils.pack_lib('gpu', mx_lib_cpp_examples)
           }
         }
@@ -267,7 +267,7 @@ def compile_unix_full_gpu_make() {
         ws('workspace/build-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_make', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_make', false)
             utils.pack_lib('gpu_make', mx_lib_cpp_examples_make)
           }
         }
@@ -281,7 +281,7 @@ def compile_unix_full_gpu_mkldnn_cpp_test() {
         ws('workspace/build-gpu-mkldnn-cpp') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test', false)
             utils.pack_lib('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make)
           }
         }
@@ -295,7 +295,7 @@ def compile_unix_full_gpu_no_tvm_op() {
         ws('workspace/build-gpu-no-tvm-op') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false)
             utils.pack_lib('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
           }
         }
@@ -309,7 +309,7 @@ def compile_unix_cmake_gpu() {
         ws('workspace/build-cmake-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake', false)
             utils.pack_lib('cmake_gpu', mx_cmake_lib_cython)
           }
         }
@@ -323,7 +323,7 @@ def compile_unix_cmake_gpu_no_tvm_op() {
         ws('workspace/build-cmake-gpu-no-tvm-op') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
           }
         }
       }
@@ -336,7 +336,7 @@ def compile_unix_cmake_gpu_no_rtc() {
             ws('workspace/build-cmake-gpu-no-rtc') {
                 timeout(time: max_time, unit: 'MINUTES') {
                     utils.init_git()
-                    utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_rtc', false)
+                    utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false)
                 }
             }
         }
@@ -608,7 +608,7 @@ def compile_unix_clang10_cuda_werror() {
         ws('workspace/build-cpu-clang10') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_clang10_werror', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_clang10_werror', false)
             utils.pack_lib('gpu_clang10', mx_lib)
           }
         }
@@ -831,7 +831,7 @@ def test_unix_python3_gpu() {
         ws('workspace/ut-python3-gpu') {
           try {
             utils.unpack_and_init('gpu', mx_lib_cython)
-            python3_gpu_ut_cython('ubuntu_build_cuda')
+            python3_gpu_ut_cython('ubuntu_gpu_cu101')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
@@ -847,7 +847,7 @@ def test_unix_python3_gpu_no_tvm_op() {
         ws('workspace/ut-python3-gpu-no-tvm-op') {
           try {
             utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
-            python3_gpu_ut_cython('ubuntu_build_cuda')
+            python3_gpu_ut_cython('ubuntu_gpu_cu101')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
@@ -864,7 +864,7 @@ def test_unix_python3_quantize_gpu() {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
               utils.unpack_and_init('gpu', mx_lib)
-              utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_python3_quantization_gpu', true)
+              utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_python3_quantization_gpu', true)
               utils.publish_test_coverage()
             } finally {
               utils.collect_test_results_unix('tests_quantization_gpu.xml', 'tests_python3_quantize_gpu.xml')
@@ -947,7 +947,7 @@ def test_unix_python3_mkldnn_gpu() {
         ws('workspace/ut-python3-mkldnn-gpu') {
           try {
             utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib)
-            python3_gpu_ut('ubuntu_build_cuda')
+            python3_gpu_ut('ubuntu_gpu_cu101')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu.xml')
@@ -963,7 +963,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() {
         ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
           try {
             utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
-            python3_gpu_ut_nocudnn('ubuntu_build_cuda')
+            python3_gpu_ut_nocudnn('ubuntu_gpu_cu101')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu_nocudnn.xml')
@@ -997,7 +997,7 @@ def test_unix_python3_integration_gpu() {
         ws('workspace/it-python-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_python', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_python', true)
             utils.publish_test_coverage()
           }
         }
@@ -1011,7 +1011,7 @@ def test_unix_cpp_package_gpu() {
         ws('workspace/it-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_cpp_examples_make)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_cpp_package', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -1025,7 +1025,7 @@ def test_unix_capi_cpp_package() {
         ws('workspace/it-capi-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -1067,7 +1067,7 @@ def test_unix_scala_gpu() {
         ws('workspace/ut-scala-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_scala', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_scala', true)
             utils.publish_test_coverage()
           }
         }
@@ -1150,7 +1150,7 @@ def test_unix_cpp_gpu() {
         ws('workspace/ut-cpp-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('cmake_gpu', mx_cmake_lib)
-            utils.docker_run('ubuntu_build_cuda', 'unittest_cpp', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'unittest_cpp', true)
             utils.publish_test_coverage()
           }
         }
@@ -1178,7 +1178,7 @@ def test_unix_perl_gpu() {
         ws('workspace/ut-perl-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
-            utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_cpugpu_perl', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_cpugpu_perl', true)
             utils.publish_test_coverage()
           }
         }
@@ -1192,7 +1192,7 @@ def test_unix_r_gpu() {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_gpu_R', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_gpu_R', true)
             utils.publish_test_coverage()
           }
         }
@@ -1260,7 +1260,7 @@ def test_unix_distributed_kvstore_gpu() {
         ws('workspace/it-dist-kvstore') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
             utils.publish_test_coverage()
           }
         }
diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries
index 81c022c28990..3f79ebdb7940 100755
--- a/tests/nightly/JenkinsfileForBinaries
+++ b/tests/nightly/JenkinsfileForBinaries
@@ -34,7 +34,7 @@ core_logic: {
       node(NODE_LINUX_CPU) {
         ws('workspace/build-mkldnn-gpu') {
           utils.init_git()
-          utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn', false)
+          utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn', false)
           utils.pack_lib('gpu', mx_lib)
         }
       }

From 7fc728211f53ce5470465c50e9d9ceebe668910b Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Thu, 30 Apr 2020 18:23:04 -0700
Subject: [PATCH 05/15] Revert "get rid of ubuntu_build_cuda and point
 ubuntu_cu101 to base gpu instead of cuda compat"

This reverts commit f4ab73084b531e0303e7ad72aa09eb8d629f91d5.
---
 ci/build.py                          |  2 +-
 ci/docker/Dockerfile.build.ubuntu    |  5 +++
 ci/docker/docker-compose.yml         | 15 +++++++--
 ci/jenkins/Jenkins_steps.groovy      | 48 ++++++++++++++--------------
 tests/nightly/JenkinsfileForBinaries |  2 +-
 5 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/ci/build.py b/ci/build.py
index 394abb0d7d6f..83975e674ad8 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -45,7 +45,7 @@
 # NOTE: Temporary whitelist used until all Dockerfiles are refactored for docker compose
 DOCKER_COMPOSE_WHITELIST = ('centos7_cpu', 'centos7_gpu_cu92', 'centos7_gpu_cu100',
                             'centos7_gpu_cu101', 'centos7_gpu_cu102', 'ubuntu_cpu',
-                            'ubuntu_gpu_cu101', 'publish.test.centos7_cpu',
+                            'ubuntu_build_cuda', 'ubuntu_gpu_cu101', 'publish.test.centos7_cpu',
                             'publish.test.centos7_gpu')
 # Files for docker compose
 DOCKER_COMPOSE_FILES = set(('docker/build.centos7', 'docker/build.ubuntu', 'docker/publish.test.centos7'))
diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
index 7e4b3d08d0bd..63a4fd3a005d 100644
--- a/ci/docker/Dockerfile.build.ubuntu
+++ b/ci/docker/Dockerfile.build.ubuntu
@@ -160,3 +160,8 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     apt install -y  --no-install-recommends \
         cuda-10-1 && \
     rm -rf /var/lib/apt/lists/*
+
+
+FROM gpu as gpuwithcompatenv
+# TVMOP requires /usr/local/cuda/compat is no LD_LIBRARY_PATH.
+# This should be fixed and deleted.
\ No newline at end of file
diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml
index 6971aea852b7..5ac660df796b 100644
--- a/ci/docker/docker-compose.yml
+++ b/ci/docker/docker-compose.yml
@@ -103,12 +103,21 @@ services:
     build:
       context: .
       dockerfile: Dockerfile.build.ubuntu
-      target: gpu
+      target: gpuwithcompatenv
+      args:
+        BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest
+  ubuntu_build_cuda:
+    image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.ubuntu
+      target: gpuwithcudaruntimelibs
       args:
         BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
       cache_from:
-        - build.ubuntu_gpu_cu101:latest
-        - mxnetci/build.ubuntu_gpu_cu101:latest
+        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest
   ###################################################################################################
   # Dockerfile.publish.test based images used for testing binary artifacts on minimal systems.
   ###################################################################################################
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 14bb91f86088..b7c2763f7ec2 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -155,7 +155,7 @@ def compile_unix_int64_gpu() {
         ws('workspace/build-gpu-int64') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_large_tensor', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_large_tensor', false)
             utils.pack_lib('ubuntu_gpu_int64', mx_cmake_lib)
           }
         }
@@ -225,7 +225,7 @@ def compile_unix_mkldnn_gpu() {
         ws('workspace/build-mkldnn-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn', false)
             utils.pack_lib('mkldnn_gpu', mx_mkldnn_lib)
           }
         }
@@ -239,7 +239,7 @@ def compile_unix_mkldnn_nocudnn_gpu() {
          ws('workspace/build-mkldnn-gpu-nocudnn') {
            timeout(time: max_time, unit: 'MINUTES') {
              utils.init_git()
-             utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn_nocudnn', false)
+             utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn_nocudnn', false)
              utils.pack_lib('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
            }
          }
@@ -253,7 +253,7 @@ def compile_unix_full_gpu() {
         ws('workspace/build-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7', false)
             utils.pack_lib('gpu', mx_lib_cpp_examples)
           }
         }
@@ -267,7 +267,7 @@ def compile_unix_full_gpu_make() {
         ws('workspace/build-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_make', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_make', false)
             utils.pack_lib('gpu_make', mx_lib_cpp_examples_make)
           }
         }
@@ -281,7 +281,7 @@ def compile_unix_full_gpu_mkldnn_cpp_test() {
         ws('workspace/build-gpu-mkldnn-cpp') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test', false)
             utils.pack_lib('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make)
           }
         }
@@ -295,7 +295,7 @@ def compile_unix_full_gpu_no_tvm_op() {
         ws('workspace/build-gpu-no-tvm-op') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false)
             utils.pack_lib('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
           }
         }
@@ -309,7 +309,7 @@ def compile_unix_cmake_gpu() {
         ws('workspace/build-cmake-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake', false)
             utils.pack_lib('cmake_gpu', mx_cmake_lib_cython)
           }
         }
@@ -323,7 +323,7 @@ def compile_unix_cmake_gpu_no_tvm_op() {
         ws('workspace/build-cmake-gpu-no-tvm-op') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
           }
         }
       }
@@ -336,7 +336,7 @@ def compile_unix_cmake_gpu_no_rtc() {
             ws('workspace/build-cmake-gpu-no-rtc') {
                 timeout(time: max_time, unit: 'MINUTES') {
                     utils.init_git()
-                    utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false)
+                    utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_rtc', false)
                 }
             }
         }
@@ -608,7 +608,7 @@ def compile_unix_clang10_cuda_werror() {
         ws('workspace/build-cpu-clang10') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_clang10_werror', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_clang10_werror', false)
             utils.pack_lib('gpu_clang10', mx_lib)
           }
         }
@@ -831,7 +831,7 @@ def test_unix_python3_gpu() {
         ws('workspace/ut-python3-gpu') {
           try {
             utils.unpack_and_init('gpu', mx_lib_cython)
-            python3_gpu_ut_cython('ubuntu_gpu_cu101')
+            python3_gpu_ut_cython('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
@@ -847,7 +847,7 @@ def test_unix_python3_gpu_no_tvm_op() {
         ws('workspace/ut-python3-gpu-no-tvm-op') {
           try {
             utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
-            python3_gpu_ut_cython('ubuntu_gpu_cu101')
+            python3_gpu_ut_cython('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
@@ -864,7 +864,7 @@ def test_unix_python3_quantize_gpu() {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
               utils.unpack_and_init('gpu', mx_lib)
-              utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_python3_quantization_gpu', true)
+              utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_python3_quantization_gpu', true)
               utils.publish_test_coverage()
             } finally {
               utils.collect_test_results_unix('tests_quantization_gpu.xml', 'tests_python3_quantize_gpu.xml')
@@ -947,7 +947,7 @@ def test_unix_python3_mkldnn_gpu() {
         ws('workspace/ut-python3-mkldnn-gpu') {
           try {
             utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib)
-            python3_gpu_ut('ubuntu_gpu_cu101')
+            python3_gpu_ut('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu.xml')
@@ -963,7 +963,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() {
         ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
           try {
             utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
-            python3_gpu_ut_nocudnn('ubuntu_gpu_cu101')
+            python3_gpu_ut_nocudnn('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu_nocudnn.xml')
@@ -997,7 +997,7 @@ def test_unix_python3_integration_gpu() {
         ws('workspace/it-python-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_python', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_python', true)
             utils.publish_test_coverage()
           }
         }
@@ -1011,7 +1011,7 @@ def test_unix_cpp_package_gpu() {
         ws('workspace/it-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_cpp_examples_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_cpp_package', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -1025,7 +1025,7 @@ def test_unix_capi_cpp_package() {
         ws('workspace/it-capi-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -1067,7 +1067,7 @@ def test_unix_scala_gpu() {
         ws('workspace/ut-scala-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_scala', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_scala', true)
             utils.publish_test_coverage()
           }
         }
@@ -1150,7 +1150,7 @@ def test_unix_cpp_gpu() {
         ws('workspace/ut-cpp-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('cmake_gpu', mx_cmake_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'unittest_cpp', true)
+            utils.docker_run('ubuntu_build_cuda', 'unittest_cpp', true)
             utils.publish_test_coverage()
           }
         }
@@ -1178,7 +1178,7 @@ def test_unix_perl_gpu() {
         ws('workspace/ut-perl-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_cpugpu_perl', true)
+            utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_cpugpu_perl', true)
             utils.publish_test_coverage()
           }
         }
@@ -1192,7 +1192,7 @@ def test_unix_r_gpu() {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_gpu_R', true)
+            utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_gpu_R', true)
             utils.publish_test_coverage()
           }
         }
@@ -1260,7 +1260,7 @@ def test_unix_distributed_kvstore_gpu() {
         ws('workspace/it-dist-kvstore') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
             utils.publish_test_coverage()
           }
         }
diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries
index 3f79ebdb7940..81c022c28990 100755
--- a/tests/nightly/JenkinsfileForBinaries
+++ b/tests/nightly/JenkinsfileForBinaries
@@ -34,7 +34,7 @@ core_logic: {
       node(NODE_LINUX_CPU) {
         ws('workspace/build-mkldnn-gpu') {
           utils.init_git()
-          utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn', false)
+          utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn', false)
           utils.pack_lib('gpu', mx_lib)
         }
       }

From 1f698901c343c848bf41b5444d4760632b1a45f5 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sat, 2 May 2020 22:55:26 -0700
Subject: [PATCH 06/15] add linux gpu g4 node to all tests using g3 in unix-gpu
 pipeline

---
 ci/Jenkinsfile_utils.groovy     |  1 +
 ci/jenkins/Jenkins_steps.groovy | 41 ++++++++++++++++++++++++---------
 ci/jenkins/Jenkinsfile_unix_gpu |  2 +-
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy
index 4954963ff9c6..ee254b0dbee9 100644
--- a/ci/Jenkinsfile_utils.groovy
+++ b/ci/Jenkinsfile_utils.groovy
@@ -257,6 +257,7 @@ def assign_node_labels(args) {
   //    knowing about the limitations.
   NODE_LINUX_CPU = args.linux_cpu
   NODE_LINUX_GPU = args.linux_gpu
+  NODE_LINUX_GPU_G4 = args.linux_gpu_g4
   NODE_LINUX_GPU_P3 = args.linux_gpu_p3
   NODE_WINDOWS_CPU = args.windows_cpu
   NODE_WINDOWS_GPU = args.windows_gpu
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 9485d4803658..43113e5622ef 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -800,7 +800,7 @@ def test_unix_python3_mkl_cpu() {
 
 def test_unix_python3_gpu() {
     return ['Python3: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-gpu') {
           try {
             utils.unpack_and_init('gpu', mx_lib_cython)
@@ -814,6 +814,25 @@ def test_unix_python3_gpu() {
     }]
 }
 
+<<<<<<< HEAD
+=======
+def test_unix_python3_gpu_no_tvm_op() {
+    return ['Python3: GPU TVM_OP OFF': {
+      node(NODE_LINUX_GPU_G4) {
+        ws('workspace/ut-python3-gpu-no-tvm-op') {
+          try {
+            utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
+            python3_gpu_ut_cython('ubuntu_build_cuda')
+            utils.publish_test_coverage()
+          } finally {
+            utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
+          }
+        }
+      }
+    }]
+}
+
+>>>>>>> add linux gpu g4 node to all tests using g3 in unix-gpu pipeline
 def test_unix_python3_quantize_gpu() {
     return ['Python3: Quantize GPU': {
       node(NODE_LINUX_GPU_P3) {
@@ -900,7 +919,7 @@ def test_unix_python3_mkldnn_mkl_cpu() {
 
 def test_unix_python3_mkldnn_gpu() {
     return ['Python3: MKLDNN-GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-mkldnn-gpu') {
           try {
             utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib)
@@ -916,7 +935,7 @@ def test_unix_python3_mkldnn_gpu() {
 
 def test_unix_python3_mkldnn_nocudnn_gpu() {
     return ['Python3: MKLDNN-GPU-NOCUDNN': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
           try {
             utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
@@ -950,7 +969,7 @@ def test_unix_python3_tensorrt_gpu() {
 
 def test_unix_python3_integration_gpu() {
     return ['Python Integration GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-python-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
@@ -964,7 +983,7 @@ def test_unix_python3_integration_gpu() {
 
 def test_unix_cpp_package_gpu() {
     return ['cpp-package GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_cpp_examples_make)
@@ -978,7 +997,7 @@ def test_unix_cpp_package_gpu() {
 
 def test_unix_capi_cpp_package() {
     return ['capi-cpp-package GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-capi-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make)
@@ -1020,7 +1039,7 @@ def test_unix_scala_mkldnn_cpu(){
 
 def test_unix_scala_gpu() {
     return ['Scala: GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-scala-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
@@ -1103,7 +1122,7 @@ def test_unix_perl_cpu() {
 
 def test_unix_cpp_gpu() {
     return ['Cpp: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-cpp-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('cmake_gpu', mx_cmake_lib)
@@ -1131,7 +1150,7 @@ def test_unix_cpp_cpu() {
 
 def test_unix_perl_gpu() {
     return ['Perl: GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-perl-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
@@ -1145,7 +1164,7 @@ def test_unix_perl_gpu() {
 
 def test_unix_r_gpu() {
     return ['R: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
@@ -1213,7 +1232,7 @@ def test_unix_distributed_kvstore_cpu() {
 
 def test_unix_distributed_kvstore_gpu() {
     return ['dist-kvstore tests GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-dist-kvstore') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu
index 0e2310fc9220..f971463a7b1c 100644
--- a/ci/jenkins/Jenkinsfile_unix_gpu
+++ b/ci/jenkins/Jenkinsfile_unix_gpu
@@ -29,7 +29,7 @@ node('utility') {
   utils = load('ci/Jenkinsfile_utils.groovy')
   custom_steps = load('ci/jenkins/Jenkins_steps.groovy')
 }
-utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3')
+utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3',linux_gpu_g4: 'mxnetlinux-gpu-g4')
 
 utils.main_wrapper(
 core_logic: {

From dffcde0b4d33c5b32f8b7fea6445832364c36b17 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sat, 2 May 2020 22:55:26 -0700
Subject: [PATCH 07/15] add linux gpu g4 node to all tests using g3 in unix-gpu
 pipeline

---
 ci/Jenkinsfile_utils.groovy     |  1 +
 ci/jenkins/Jenkins_steps.groovy | 22 +++++++++++-----------
 ci/jenkins/Jenkinsfile_unix_gpu |  2 +-
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy
index 4954963ff9c6..ee254b0dbee9 100644
--- a/ci/Jenkinsfile_utils.groovy
+++ b/ci/Jenkinsfile_utils.groovy
@@ -257,6 +257,7 @@ def assign_node_labels(args) {
   //    knowing about the limitations.
   NODE_LINUX_CPU = args.linux_cpu
   NODE_LINUX_GPU = args.linux_gpu
+  NODE_LINUX_GPU_G4 = args.linux_gpu_g4
   NODE_LINUX_GPU_P3 = args.linux_gpu_p3
   NODE_WINDOWS_CPU = args.windows_cpu
   NODE_WINDOWS_GPU = args.windows_gpu
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 9485d4803658..ca68d5fda71d 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -800,7 +800,7 @@ def test_unix_python3_mkl_cpu() {
 
 def test_unix_python3_gpu() {
     return ['Python3: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-gpu') {
           try {
             utils.unpack_and_init('gpu', mx_lib_cython)
@@ -900,7 +900,7 @@ def test_unix_python3_mkldnn_mkl_cpu() {
 
 def test_unix_python3_mkldnn_gpu() {
     return ['Python3: MKLDNN-GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-mkldnn-gpu') {
           try {
             utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib)
@@ -916,7 +916,7 @@ def test_unix_python3_mkldnn_gpu() {
 
 def test_unix_python3_mkldnn_nocudnn_gpu() {
     return ['Python3: MKLDNN-GPU-NOCUDNN': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
           try {
             utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
@@ -950,7 +950,7 @@ def test_unix_python3_tensorrt_gpu() {
 
 def test_unix_python3_integration_gpu() {
     return ['Python Integration GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-python-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
@@ -964,7 +964,7 @@ def test_unix_python3_integration_gpu() {
 
 def test_unix_cpp_package_gpu() {
     return ['cpp-package GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_cpp_examples_make)
@@ -978,7 +978,7 @@ def test_unix_cpp_package_gpu() {
 
 def test_unix_capi_cpp_package() {
     return ['capi-cpp-package GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-capi-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make)
@@ -1020,7 +1020,7 @@ def test_unix_scala_mkldnn_cpu(){
 
 def test_unix_scala_gpu() {
     return ['Scala: GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-scala-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
@@ -1103,7 +1103,7 @@ def test_unix_perl_cpu() {
 
 def test_unix_cpp_gpu() {
     return ['Cpp: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-cpp-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('cmake_gpu', mx_cmake_lib)
@@ -1131,7 +1131,7 @@ def test_unix_cpp_cpu() {
 
 def test_unix_perl_gpu() {
     return ['Perl: GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-perl-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
@@ -1145,7 +1145,7 @@ def test_unix_perl_gpu() {
 
 def test_unix_r_gpu() {
     return ['R: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
@@ -1213,7 +1213,7 @@ def test_unix_distributed_kvstore_cpu() {
 
 def test_unix_distributed_kvstore_gpu() {
     return ['dist-kvstore tests GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-dist-kvstore') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu
index 0e2310fc9220..f971463a7b1c 100644
--- a/ci/jenkins/Jenkinsfile_unix_gpu
+++ b/ci/jenkins/Jenkinsfile_unix_gpu
@@ -29,7 +29,7 @@ node('utility') {
   utils = load('ci/Jenkinsfile_utils.groovy')
   custom_steps = load('ci/jenkins/Jenkins_steps.groovy')
 }
-utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3')
+utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3',linux_gpu_g4: 'mxnetlinux-gpu-g4')
 
 utils.main_wrapper(
 core_logic: {

From 2ec03657c65271a9278c88b58bf3c4aab17b6ebb Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sat, 2 May 2020 23:09:04 -0700
Subject: [PATCH 08/15] resolve remnant code due to simultaneous pushes from
 cron job and me

---
 ci/jenkins/Jenkins_steps.groovy | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 43113e5622ef..ca68d5fda71d 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -814,25 +814,6 @@ def test_unix_python3_gpu() {
     }]
 }
 
-<<<<<<< HEAD
-=======
-def test_unix_python3_gpu_no_tvm_op() {
-    return ['Python3: GPU TVM_OP OFF': {
-      node(NODE_LINUX_GPU_G4) {
-        ws('workspace/ut-python3-gpu-no-tvm-op') {
-          try {
-            utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
-            python3_gpu_ut_cython('ubuntu_build_cuda')
-            utils.publish_test_coverage()
-          } finally {
-            utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
-          }
-        }
-      }
-    }]
-}
-
->>>>>>> add linux gpu g4 node to all tests using g3 in unix-gpu pipeline
 def test_unix_python3_quantize_gpu() {
     return ['Python3: Quantize GPU': {
       node(NODE_LINUX_GPU_P3) {

From 359e03f9e17d8c6e99b89953d7e858361f4efbd1 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sun, 3 May 2020 01:27:08 -0700
Subject: [PATCH 09/15] compile int64gpu on g4

---
 ci/jenkins/Jenkins_steps.groovy | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index ca68d5fda71d..0fa742ce063c 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -151,7 +151,7 @@ def compile_unix_int64_cpu() {
 
 def compile_unix_int64_gpu() {
     return ['GPU: USE_INT64_TENSOR_SIZE': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/build-gpu-int64') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()

From 471c3aee9dd11a631eabf12c56c8a44c5af1e2ed Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sun, 3 May 2020 12:24:44 -0700
Subject: [PATCH 10/15] update p3 tests to run on g4

---
 ci/jenkins/Jenkins_steps.groovy | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 0fa742ce063c..e859a342dafe 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -816,7 +816,7 @@ def test_unix_python3_gpu() {
 
 def test_unix_python3_quantize_gpu() {
     return ['Python3: Quantize GPU': {
-      node(NODE_LINUX_GPU_P3) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-quantize-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
@@ -932,7 +932,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() {
 
 def test_unix_python3_tensorrt_gpu() {
     return ['Python3: TensorRT GPU': {
-      node(NODE_LINUX_GPU_P3) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/build-tensorrt') {
           timeout(time: max_time, unit: 'MINUTES') {
             try {

From 578c784bd440231063016be57386b48b9dac999f Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Thu, 7 May 2020 04:13:35 +0000
Subject: [PATCH 11/15] Revert "update p3 tests to run on g4"

This reverts commit 471c3aee9dd11a631eabf12c56c8a44c5af1e2ed.
---
 ci/jenkins/Jenkins_steps.groovy | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 10012fce7832..d9e49c1b4d67 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -832,7 +832,7 @@ def test_unix_python3_gpu(lib_name) {
 
 def test_unix_python3_quantize_gpu(lib_name) {
     return ['Python3: Quantize GPU': {
-      node(NODE_LINUX_GPU_G4) {
+      node(NODE_LINUX_GPU_P3) {
         ws('workspace/ut-python3-quantize-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
@@ -948,7 +948,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu(lib_name) {
 
 def test_unix_python3_tensorrt_gpu(lib_name) {
     return ['Python3: TensorRT GPU': {
-      node(NODE_LINUX_GPU_G4) {
+      node(NODE_LINUX_GPU_P3) {
         ws('workspace/build-tensorrt') {
           timeout(time: max_time, unit: 'MINUTES') {
             try {

From 896b7ed733ac8b216053328600dc0df4da1bbdff Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Thu, 7 May 2020 09:57:48 -0700
Subject: [PATCH 12/15] Revert "skip flaky tests"

This reverts commit 9ad16b7fae5f7559ceb96a0eb8b1e4e2dfac59e0.
---
 tests/python/unittest/test_numpy_interoperability.py | 2 --
 tests/python/unittest/test_numpy_ndarray.py          | 1 -
 tests/python/unittest/test_numpy_op.py               | 4 ----
 3 files changed, 7 deletions(-)

diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py
index 7124d3129231..395a1e124284 100644
--- a/tests/python/unittest/test_numpy_interoperability.py
+++ b/tests/python/unittest/test_numpy_interoperability.py
@@ -3252,7 +3252,6 @@ def test_np_memory_array_function():
         assert op(data_mx, np.ones((5, 0))) == op(data_np, _np.ones((5, 0)))
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 @with_array_function_protocol
@@ -3261,7 +3260,6 @@ def test_np_array_function_protocol():
     check_interoperability(_NUMPY_ARRAY_FUNCTION_LIST)
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 @with_array_ufunc_protocol
diff --git a/tests/python/unittest/test_numpy_ndarray.py b/tests/python/unittest/test_numpy_ndarray.py
index 8ecbf6a15b1d..b4de9f7ba5d3 100644
--- a/tests/python/unittest/test_numpy_ndarray.py
+++ b/tests/python/unittest/test_numpy_ndarray.py
@@ -261,7 +261,6 @@ def check_identity_array_creation(shape, dtype):
                 assert type(y[1]) == np.ndarray
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @pytest.mark.serial
 def test_np_ndarray_binary_element_wise_ops():
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index 3200c4f64cec..5401e61c8f86 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -621,7 +621,6 @@ def hybrid_forward(self, F, a, b):
     assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol=1e-2, atol=1e-2)
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 @pytest.mark.parametrize('shape', [rand_shape_nd(4, dim=4), (4, 0, 4, 0)])
@@ -965,7 +964,6 @@ def avg_backward(a, w, avg, axes, init_a_grad=None, init_w_grad=None):
         assert_almost_equal(mx_out.asnumpy(), np_out.astype(dtype), rtol=rtol, atol=atol)
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_mean():
@@ -2209,7 +2207,6 @@ def hybrid_forward(self, F, x):
                 assert same(ret_mx.asnumpy(), ret_np)
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_unary_funcs():
@@ -2421,7 +2418,6 @@ def hybrid_forward(self, F, a, *args, **kwargs):
         check_unary_func(func, shape, low, high)
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_binary_funcs():

From 152d2f5732b88e7ffa9c2f52638ab4cd9d8b070c Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sat, 9 May 2020 15:28:49 -0700
Subject: [PATCH 13/15] remove gpuwithcompatenv

---
 ci/docker/Dockerfile.build.ubuntu | 5 -----
 ci/docker/docker-compose.yml      | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
index 9d9edb0e5f8c..d5933c3bb628 100644
--- a/ci/docker/Dockerfile.build.ubuntu
+++ b/ci/docker/Dockerfile.build.ubuntu
@@ -164,8 +164,3 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     apt install -y  --no-install-recommends \
         cuda-10-1 && \
     rm -rf /var/lib/apt/lists/*
-
-
-FROM gpu as gpuwithcompatenv
-# TVMOP requires /usr/local/cuda/compat is no LD_LIBRARY_PATH.
-# This should be fixed and deleted.
\ No newline at end of file
diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml
index 5ac660df796b..9aeb762f335e 100644
--- a/ci/docker/docker-compose.yml
+++ b/ci/docker/docker-compose.yml
@@ -103,7 +103,7 @@ services:
     build:
       context: .
       dockerfile: Dockerfile.build.ubuntu
-      target: gpuwithcompatenv
+      target: gpu
       args:
         BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
       cache_from:

From e068021f4ba2f13b401af5f0cd6db5c137f35a9d Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sat, 9 May 2020 15:35:15 -0700
Subject: [PATCH 14/15] Revert "replace cu101 with cuda since compat is no
 longer to be used" with the replaement of gpuwithcompatenv with base gpu in
 ubuntu_gpu_cu101 all the unix-gpu builds which previously were switched to
 build_cuda can now be replaced back to gpu_cu101

This reverts commit 715078a3d8dc47e34dc6dbdbeb0499af75b8f8fb.
---
 ci/jenkins/Jenkins_steps.groovy | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index d9e49c1b4d67..ff24908b52c0 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -155,7 +155,7 @@ def compile_unix_int64_gpu(lib_name) {
         ws('workspace/build-gpu-int64') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_large_tensor', false)
+            utils.docker_run('ubuntu_build_cu101', 'build_ubuntu_gpu_large_tensor', false)
             utils.pack_lib(lib_name, mx_cmake_lib)
           }
         }
@@ -310,7 +310,7 @@ def compile_unix_cmake_gpu(lib_name) {
         ws('workspace/build-cmake-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake', false)
             utils.pack_lib(lib_name, mx_cmake_lib_cython)
           }
         }
@@ -324,7 +324,7 @@ def compile_unix_cmake_gpu_no_rtc(lib_name) {
             ws('workspace/build-cmake-gpu-no-rtc') {
                 timeout(time: max_time, unit: 'MINUTES') {
                   utils.init_git()
-                  utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_rtc', false)
+                  utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false)
                   utils.pack_lib(lib_name, mx_cmake_lib)
           }
         }
@@ -597,7 +597,7 @@ def compile_unix_clang10_cuda_werror(lib_name) {
         ws('workspace/build-cpu-clang10') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_clang10_werror', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_clang10_werror', false)
             utils.pack_lib(lib_name, mx_lib)
           }
         }
@@ -820,7 +820,7 @@ def test_unix_python3_gpu(lib_name) {
         ws('workspace/ut-python3-gpu') {
           try {
             utils.unpack_and_init(lib_name, mx_lib_cython)
-            python3_gpu_ut_cython('ubuntu_build_cuda')
+            python3_gpu_ut_cython('ubuntu_gpu_cu101')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
@@ -837,7 +837,7 @@ def test_unix_python3_quantize_gpu(lib_name) {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
               utils.unpack_and_init(lib_name, mx_lib)
-              utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_python3_quantization_gpu', true)
+              utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_python3_quantization_gpu', true)
               utils.publish_test_coverage()
             } finally {
               utils.collect_test_results_unix('tests_quantization_gpu.xml', 'tests_python3_quantize_gpu.xml')
@@ -920,7 +920,7 @@ def test_unix_python3_mkldnn_gpu(lib_name) {
         ws('workspace/ut-python3-mkldnn-gpu') {
           try {
             utils.unpack_and_init(lib_name, mx_mkldnn_lib)
-            python3_gpu_ut('ubuntu_build_cuda')
+            python3_gpu_ut('ubuntu_gpu_cu101')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu.xml')
@@ -936,7 +936,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu(lib_name) {
         ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
           try {
             utils.unpack_and_init(lib_name, mx_mkldnn_lib)
-            python3_gpu_ut_nocudnn('ubuntu_build_cuda')
+            python3_gpu_ut_nocudnn('ubuntu_gpu_cu101')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu_nocudnn.xml')
@@ -970,7 +970,7 @@ def test_unix_python3_integration_gpu(lib_name) {
         ws('workspace/it-python-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_python', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_python', true)
             utils.publish_test_coverage()
           }
         }
@@ -984,7 +984,7 @@ def test_unix_cpp_package_gpu(lib_name) {
         ws('workspace/it-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_cpp_examples_make)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_cpp_package', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -998,7 +998,7 @@ def test_unix_capi_cpp_package(lib_name) {
         ws('workspace/it-capi-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_cpp_capi_make)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -1040,7 +1040,7 @@ def test_unix_scala_gpu(lib_name) {
         ws('workspace/ut-scala-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_make)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_scala', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_scala', true)
             utils.publish_test_coverage()
           }
         }
@@ -1123,7 +1123,7 @@ def test_unix_cpp_gpu(lib_name) {
         ws('workspace/ut-cpp-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_cmake_lib)
-            utils.docker_run('ubuntu_build_cuda', 'unittest_cpp', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'unittest_cpp', true)
             utils.publish_test_coverage()
           }
         }
@@ -1151,7 +1151,7 @@ def test_unix_perl_gpu(lib_name) {
         ws('workspace/ut-perl-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_make)
-            utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_cpugpu_perl', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_cpugpu_perl', true)
             utils.publish_test_coverage()
           }
         }
@@ -1165,7 +1165,7 @@ def test_unix_r_gpu(lib_name) {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib)
-	    utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_gpu_R', true)
+	    utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_gpu_R', true)
             utils.publish_test_coverage()
           }
         }
@@ -1233,7 +1233,7 @@ def test_unix_distributed_kvstore_gpu(lib_name) {
         ws('workspace/it-dist-kvstore') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
             utils.publish_test_coverage()
           }
         }

From 25207d639a08a18c60d9e7a572031552e1c91dbd Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sat, 9 May 2020 15:40:24 -0700
Subject: [PATCH 15/15] fix indent and incorrect docker target

---
 ci/jenkins/Jenkins_steps.groovy | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index ff24908b52c0..27f4d2a11ee6 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -155,7 +155,7 @@ def compile_unix_int64_gpu(lib_name) {
         ws('workspace/build-gpu-int64') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cu101', 'build_ubuntu_gpu_large_tensor', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_large_tensor', false)
             utils.pack_lib(lib_name, mx_cmake_lib)
           }
         }
@@ -320,12 +320,12 @@ def compile_unix_cmake_gpu(lib_name) {
 
 def compile_unix_cmake_gpu_no_rtc(lib_name) {
     return ['GPU: CMake CUDA RTC OFF': {
-        node(NODE_LINUX_CPU) {
-            ws('workspace/build-cmake-gpu-no-rtc') {
-                timeout(time: max_time, unit: 'MINUTES') {
-                  utils.init_git()
-                  utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false)
-                  utils.pack_lib(lib_name, mx_cmake_lib)
+      node(NODE_LINUX_CPU) {
+        ws('workspace/build-cmake-gpu-no-rtc') {
+          timeout(time: max_time, unit: 'MINUTES') {
+            utils.init_git()
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false)
+            utils.pack_lib(lib_name, mx_cmake_lib)
           }
         }
       }
@@ -1165,7 +1165,7 @@ def test_unix_r_gpu(lib_name) {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib)
-	    utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_gpu_R', true)
+	          utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_gpu_R', true)
             utils.publish_test_coverage()
           }
         }