Merge branch 'master' into unify-kvb

apache · Dec 12, 2019 · 034d868 · 034d868
2 parents ce9c168 + 634f95e
commit 034d868
Show file tree

Hide file tree

Showing 143 changed files with 6,210 additions and 1,812 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -15,7 +15,7 @@ Please feel free to remove inapplicable items for your PR.
 - For new C++ functions in header files, their functionalities and arguments are documented. 
 - For new examples, README.md is added to explain the what the example does, the source of the dataset, expected performance on test set and reference to the original paper if applicable
 - Check the API doc at https://mxnet-ci-doc.s3-accelerate.dualstack.amazonaws.com/PR-$PR_ID/$BUILD_ID/index.html
-- [ ] To the my best knowledge, examples are either not affected by this change, or have been fixed to be compatible with this change
+- [ ] To the best of my knowledge, examples are either not affected by this change, or have been fixed to be compatible with this change
 
 ### Changes ###
 - [ ] Feature1, tests, (and when applicable, API doc)

diff --git a/3rdparty/mshadow/mshadow/dot_engine-inl.h b/3rdparty/mshadow/mshadow/dot_engine-inl.h
@@ -312,12 +312,9 @@ struct BLASEngine<cpu, float> {
   CBLAS_TRANSPOSE p_transa[GROUP_SIZE] = {cblas_a_trans};
   CBLAS_TRANSPOSE p_transb[GROUP_SIZE] = {cblas_b_trans};
 
-  std::vector<const float*> pp_A;
-  std::vector<const float*> pp_B;
-  std::vector<float*> pp_C;
-  pp_A.reserve(batch_count);
-  pp_B.reserve(batch_count);
-  pp_C.reserve(batch_count);
+  std::vector<const float*> pp_A(batch_count, nullptr);
+  std::vector<const float*> pp_B(batch_count, nullptr);
+  std::vector<float*> pp_C(batch_count, nullptr);
 
   auto m_k = m * k;
   auto k_n = k * n;

diff --git a/3rdparty/openmp b/3rdparty/openmp
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -18,39 +18,48 @@ endif()
 
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Utils.cmake)
 
+include(CMakeDependentOption)
 #Some things have order. This must be put in front alone
-mxnet_option(USE_CUDA             "Build with CUDA support"   ON)
-mxnet_option(USE_OLDCMAKECUDA     "Build with old cmake cuda" OFF)
-mxnet_option(USE_NCCL             "Use NVidia NCCL with CUDA" OFF)
-mxnet_option(USE_OPENCV           "Build with OpenCV support" ON)
-mxnet_option(USE_OPENMP           "Build with Openmp support" ON)
-mxnet_option(USE_CUDNN            "Build with cudnn support"  ON) # one could set CUDNN_ROOT for search path
-mxnet_option(USE_SSE              "Build with x86 SSE instruction support" ON IF NOT ARM)
-mxnet_option(USE_F16C             "Build with x86 F16C instruction support" ON) # autodetects support if ON
-mxnet_option(USE_LAPACK           "Build with lapack support" ON)
-mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
-mxnet_option(USE_MKLDNN           "Build with MKL-DNN support" ON IF USE_MKL_IF_AVAILABLE AND (NOT APPLE) AND (NOT MSVC) AND (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") AND (NOT CMAKE_CROSSCOMPILING))
-mxnet_option(USE_OPERATOR_TUNING  "Enable auto-tuning of operators" ON IF NOT MSVC)
-mxnet_option(USE_GPERFTOOLS       "Build with GPerfTools support" OFF)
-mxnet_option(USE_JEMALLOC         "Build with Jemalloc support"   ON)
-mxnet_option(USE_DIST_KVSTORE     "Build with DIST_KVSTORE support" OFF)
-mxnet_option(USE_PLUGINS_WARPCTC  "Use WARPCTC Plugins" OFF)
-mxnet_option(USE_PLUGIN_CAFFE     "Use Caffe Plugin" OFF)
-mxnet_option(USE_CPP_PACKAGE      "Build C++ Package" OFF)
-mxnet_option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON)
-mxnet_option(USE_GPROF            "Compile with gprof (profiling) flag" OFF)
-mxnet_option(USE_CXX14_IF_AVAILABLE "Build with C++14 if the compiler supports it" OFF)
-mxnet_option(USE_VTUNE            "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path
-mxnet_option(USE_TVM_OP           "Enable use of TVM operator build system." OFF)
-mxnet_option(ENABLE_CUDA_RTC      "Build with CUDA runtime compilation support" ON)
-mxnet_option(BUILD_CPP_EXAMPLES   "Build cpp examples" ON)
-mxnet_option(INSTALL_EXAMPLES     "Install the example source files." OFF)
-mxnet_option(USE_SIGNAL_HANDLER   "Print stack traces on segfaults." ON)
-mxnet_option(USE_TENSORRT         "Enable inference optimization with TensorRT." OFF)
-mxnet_option(USE_ASAN             "Enable Clang/GCC ASAN sanitizers." OFF)
-mxnet_option(ENABLE_TESTCOVERAGE  "Enable compilation with test coverage metric output" OFF)
-mxnet_option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF)
-mxnet_option(BUILD_CYTHON_MODULES "Build cython modules." OFF)
+option(USE_CUDA "Build with CUDA support"   ON)
+option(USE_OLDCMAKECUDA "Build with old cmake cuda" OFF)
+option(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
+option(USE_OPENCV "Build with OpenCV support" ON)
+option(USE_OPENMP "Build with Openmp support" ON)
+cmake_dependent_option(USE_CUDNN "Build with cudnn support" ON "USE_CUDA" OFF) # one could set CUDNN_ROOT for search path
+cmake_dependent_option(USE_SSE "Build with x86 SSE instruction support" ON "NOT ARM" OFF)
+option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON
+option(USE_LAPACK "Build with lapack support" ON)
+option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
+if(USE_MKL_IF_AVAILABLE AND (NOT APPLE) AND (NOT MSVC) AND (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") AND (NOT CMAKE_CROSSCOMPILING))
+  option(USE_MKLDNN "Build with MKL-DNN support" ON)
+else()
+  option(USE_MKLDNN "Build with MKL-DNN support" OFF)
+endif()
+if(NOT MSVC)
+  option(USE_OPERATOR_TUNING  "Enable auto-tuning of operators" ON)
+else()
+  option(USE_OPERATOR_TUNING  "Enable auto-tuning of operators" OFF)
+endif()
+option(USE_GPERFTOOLS "Build with GPerfTools support" OFF)
+option(USE_JEMALLOC "Build with Jemalloc support"   ON)
+option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF)
+option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF)
+option(USE_PLUGIN_CAFFE "Use Caffe Plugin" OFF)
+option(USE_CPP_PACKAGE "Build C++ Package" OFF)
+option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON)
+option(USE_GPROF "Compile with gprof (profiling) flag" OFF)
+option(USE_CXX14_IF_AVAILABLE "Build with C++14 if the compiler supports it" OFF)
+option(USE_VTUNE "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path
+option(USE_TVM_OP "Enable use of TVM operator build system." OFF)
+option(ENABLE_CUDA_RTC "Build with CUDA runtime compilation support" ON)
+option(BUILD_CPP_EXAMPLES "Build cpp examples" ON)
+option(INSTALL_EXAMPLES "Install the example source files." OFF)
+option(USE_SIGNAL_HANDLER "Print stack traces on segfaults." ON)
+option(USE_TENSORRT "Enable inference optimization with TensorRT." OFF)
+option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF)
+option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage metric output" OFF)
+option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF)
+option(BUILD_CYTHON_MODULES "Build cython modules." OFF)
 
 message(STATUS "CMAKE_CROSSCOMPILING ${CMAKE_CROSSCOMPILING}")
 message(STATUS "CMAKE_HOST_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}")
@@ -240,7 +249,7 @@ if(USE_TENSORRT)
 endif()
 
 # please note that when you enable this, you might run into an linker not being able to work properly due to large code injection.
-# you can find more information here https://github.com/apache/incubator-mxnet/issues/15971 
+# you can find more information here https://github.com/apache/incubator-mxnet/issues/15971
 if(ENABLE_TESTCOVERAGE)
   message(STATUS "Compiling with test coverage support enabled. This will result in additional files being written to your source directory!")
   find_program( GCOV_PATH gcov )
@@ -436,18 +445,23 @@ endif()
 
 # ---[ OpenMP
 if(USE_OPENMP)
+
+  function(load_omp)
+    # Intel/llvm OpenMP: https://github.com/llvm-mirror/openmp
+    set(OPENMP_STANDALONE_BUILD TRUE)
+    set(LIBOMP_ENABLE_SHARED TRUE)
+    set(CMAKE_BUILD_TYPE Release)
+    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp)
+  endfunction()
+
   find_package(OpenMP REQUIRED)
   # This should build on Windows, but there's some problem and I don't have a Windows box, so
   # could a Windows user please fix?
   if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp/CMakeLists.txt
      AND SYSTEM_ARCHITECTURE STREQUAL "x86_64"
      AND NOT MSVC
      AND NOT CMAKE_CROSSCOMPILING)
-
-    # Intel/llvm OpenMP: https://github.com/llvm-mirror/openmp
-    set(OPENMP_STANDALONE_BUILD TRUE)
-    set(LIBOMP_ENABLE_SHARED TRUE)
-    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp)
+    load_omp()
     list(REMOVE_ITEM mxnet_LINKER_LIBS iomp5)
     list(APPEND mxnet_LINKER_LIBS omp)
     if(UNIX)
@@ -502,13 +516,15 @@ add_subdirectory(${GTEST_ROOT})
 find_package(GTest REQUIRED)
 
 # cudnn detection
-if(USE_CUDNN AND USE_CUDA)
-  detect_cuDNN()
-  if(HAVE_CUDNN)
+if(USE_CUDNN)
+  find_package(CUDNN)
+  if(CUDNN_FOUND)
     add_definitions(-DUSE_CUDNN)
     include_directories(SYSTEM ${CUDNN_INCLUDE})
     list(APPEND mxnet_LINKER_LIBS ${CUDNN_LIBRARY})
-      add_definitions(-DMSHADOW_USE_CUDNN=1)
+    add_definitions(-DMSHADOW_USE_CUDNN=1)
+  else()
+    set(USE_CUDNN OFF)
   endif()
 endif()
 

diff --git a/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy b/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy
@@ -22,6 +22,7 @@
 
 // NOTE: ci_utils is loaded by the originating Jenkins job, e.g. jenkins/Jenkinsfile_release_job
 
+// NOTE: the following variables are referenced in the mxnet_lib_pipeline jenkins file imported bellow
 // libmxnet location
 libmxnet = 'lib/libmxnet.so'
 
@@ -30,6 +31,7 @@ licenses = 'licenses/*'
 
 // libmxnet dependencies
 mx_deps = ''
+mx_mkldnn_deps = ''
 
 // library type
 // either static or dynamic - depending on how it links to its dependencies

diff --git a/cd/mxnet_lib/static/Jenkins_pipeline.groovy b/cd/mxnet_lib/static/Jenkins_pipeline.groovy
@@ -23,6 +23,8 @@
 // To avoid confusion, please note:
 // ci_utils is loaded by the originating Jenkins job, e.g. jenkins/Jenkinsfile_release_job
 
+// NOTE: the following variables are referenced in the mxnet_lib_pipeline jenkins file imported bellow
+
 // libmxnet location
 libmxnet = 'lib/libmxnet.so'
 

diff --git a/ci/docker/Dockerfile.build.centos7_gpu b/ci/docker/Dockerfile.build.centos7_gpu
@@ -33,10 +33,6 @@ ENV CUDNN_VERSION=7.6.0.64
 COPY install/centos7_cudnn.sh /work/
 RUN /work/centos7_cudnn.sh
 
-# hotfix nvidia-docker image come with wrong version of libcublas
-COPY install/centos7_cublas.sh /work/
-RUN /work/centos7_cublas.sh
-
 ARG USER_ID=0
 COPY install/centos7_adduser.sh /work/
 RUN /work/centos7_adduser.sh

diff --git a/ci/docker/Dockerfile.build.ubuntu_base_gpu b/ci/docker/Dockerfile.build.ubuntu_base_gpu
@@ -29,10 +29,6 @@ ENV CUDNN_VERSION=7.6.0.64
 COPY install/ubuntu_cudnn.sh /work/
 RUN /work/ubuntu_cudnn.sh
 
-# hotfix nvidia-docker image come with wrong version of libcublas
-COPY install/ubuntu_cublas.sh /work/
-RUN /work/ubuntu_cublas.sh
-
 ARG USER_ID=0
 ARG GROUP_ID=0
 COPY install/ubuntu_adduser.sh /work/

diff --git a/ci/docker/Dockerfile.build.ubuntu_build_cuda b/ci/docker/Dockerfile.build.ubuntu_build_cuda
@@ -54,10 +54,6 @@ RUN /work/ubuntu_cudnn.sh
 COPY install/ubuntu_nvidia.sh /work/
 RUN /work/ubuntu_nvidia.sh
 
-# hotfix nvidia-docker image come with wrong version of libcublas
-COPY install/ubuntu_cublas.sh /work/
-RUN /work/ubuntu_cublas.sh
-
 # Keep this at the end since this command is not cachable
 ARG USER_ID=0
 ARG GROUP_ID=0

diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu101
@@ -65,14 +65,10 @@ RUN /work/ubuntu_docs.sh
 COPY install/ubuntu_tutorials.sh /work/
 RUN /work/ubuntu_tutorials.sh
 
-ENV CUDNN_VERSION=7.5.1.10
+ENV CUDNN_VERSION=7.6.0.64
 COPY install/ubuntu_cudnn.sh /work/
 RUN /work/ubuntu_cudnn.sh
 
-# hotfix nvidia-docker image come with wrong version of libcublas
-COPY install/ubuntu_cublas.sh /work/
-RUN /work/ubuntu_cublas.sh
-
 # Always last
 ARG USER_ID=0
 ARG GROUP_ID=0

diff --git a/ci/docker/Dockerfile.build.ubuntu_nightly_gpu b/ci/docker/Dockerfile.build.ubuntu_nightly_gpu
@@ -72,10 +72,6 @@ ENV CUDNN_VERSION=7.6.0.64
 COPY install/ubuntu_cudnn.sh /work/
 RUN /work/ubuntu_cudnn.sh
 
-# hotfix nvidia-docker image come with wrong version of libcublas
-COPY install/ubuntu_cublas.sh /work/
-RUN /work/ubuntu_cublas.sh
-
 ARG USER_ID=0
 ARG GROUP_ID=0
 COPY install/ubuntu_adduser.sh /work/

diff --git a/ci/docker/install/ubuntu_cublas.sh b/ci/docker/install/ubuntu_cublas.sh
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
@@ -1002,7 +1002,7 @@ sanity_check() {
 cd_unittest_ubuntu() {
     set -ex
     export PYTHONPATH=./python/
-    export MXNET_MKLDNN_DEBUG=1  # Ignored if not present
+    export MXNET_MKLDNN_DEBUG=0  # Ignored if not present
     export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
     export MXNET_SUBGRAPH_VERBOSE=0
     export MXNET_ENABLE_CYTHON=0
@@ -1978,7 +1978,7 @@ build_static_libmxnet() {
     set -ex
     pushd .
     local mxnet_variant=${1:?"This function requires a python command as the first argument"}
-    source tools/staticbuild/build.sh ${mxnet_variant} pip
+    source tools/staticbuild/build.sh ${mxnet_variant}
     popd
 }
 

diff --git a/ci/publish/Jenkinsfile b/ci/publish/Jenkinsfile
@@ -81,7 +81,7 @@ for (x in labels) {
 def toDeploy = [:]
 for (x in labels) {
   def label = x // Required due to language
-  toDeploy["Scala Deploy ${label}"] = wrapStep(nodeMap['cpu'], "deploy-scala-${label}") {
+  toDeploy["Scala Deploy ${label}"] = wrapStep(nodeMap[label], "deploy-scala-${label}") {
     withEnv(["MAVEN_PUBLISH_OS_TYPE=${scalaOSMap[label]}", "mxnet_variant=${scalaVariantMap[label]}"]) {
       utils.unpack_and_init("scala_${label}", mx_scala_pub, false)
       utils.docker_run("publish.ubuntu1604_${label}", 'publish_scala_deploy', label == 'gpu' ? true : false, '500m', 'MAVEN_PUBLISH_OS_TYPE MAVEN_PUBLISH_SECRET_ENDPOINT_URL MAVEN_PUBLISH_SECRET_NAME_CREDENTIALS MAVEN_PUBLISH_SECRET_NAME_GPG DOCKERHUB_SECRET_ENDPOINT_REGION mxnet_variant')

diff --git a/ci/publish/python/build.sh b/ci/publish/python/build.sh
@@ -18,7 +18,7 @@
 
 set -ex
 
-source tools/staticbuild/build.sh $mxnet_variant pip
+source tools/staticbuild/build.sh $mxnet_variant
 
 set -ex
 

diff --git a/ci/publish/scala/build.sh b/ci/publish/scala/build.sh
@@ -22,7 +22,7 @@ set -ex
 # MAVEN_PUBLISH_OS_TYPE: linux-x86_64-cpu|linux-x86_64-gpu|osx-x86_64-cpu
 # export MAVEN_PUBLISH_OS_TYPE=linux-x86_64-cpu
 
-source tools/staticbuild/build.sh $mxnet_variant maven
+source tools/staticbuild/build.sh $mxnet_variant
 
 set -ex
 

diff --git a/cmake/BuildTVM.cmake b/cmake/BuildTVM.cmake
@@ -98,18 +98,6 @@ set(USE_RANDOM OFF)
 # Whether use NNPack
 set(USE_NNPACK OFF)
 
-# Whether use CuDNN
-if(USE_CUDNN AND USE_CUDA)
-    detect_cuDNN()
-    if(HAVE_CUDNN)
-        set(USE_CUDNN ON)
-    else()
-        set(USE_CUDNN OFF)
-    endif()
-else()
-    set(USE_CUDNN OFF)
-endif()
-
 # Whether use cuBLAS
 set(USE_CUBLAS OFF)
 

diff --git a/cmake/FirstClassLangCuda.cmake b/cmake/FirstClassLangCuda.cmake
@@ -23,34 +23,6 @@ if(USE_CXX14_IF_AVAILABLE)
   check_cxx_compiler_flag("-std=c++14"   SUPPORT_CXX14)
 endif()
 
-################################################################################################
-# Short command for cuDNN detection. Believe it soon will be a part of CUDA toolkit distribution.
-# That's why not FindcuDNN.cmake file, but just the macro
-# Usage:
-#   detect_cuDNN()
-function(detect_cuDNN)
-  set(CUDNN_ROOT "" CACHE PATH "CUDNN root folder")
-
-  find_path(CUDNN_INCLUDE cudnn.h
-            PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT}
-            DOC "Path to cuDNN include directory." )
-
-
-  find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a
-                             PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE}
-                             PATH_SUFFIXES lib lib/x64
-                             DOC "Path to cuDNN library.")
-
-  if(CUDNN_INCLUDE AND CUDNN_LIBRARY)
-    set(HAVE_CUDNN  TRUE PARENT_SCOPE)
-    set(CUDNN_FOUND TRUE PARENT_SCOPE)
-
-    mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT)
-    message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})")
-  endif()
-endfunction()
-
-
 
 ################################################################################################
 # A function for automatic detection of GPUs installed  (if autodetection is enabled)
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,7 +18,7 @@ @@
     set -ex
-    source tools/staticbuild/build.sh $mxnet_variant pip
+    source tools/staticbuild/build.sh $mxnet_variant
     set -ex
@@ Expand Down @@