Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Fixing issue #17840 #18526

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions ci/docker/runtime_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,7 @@ build_ubuntu_gpu_mkldnn() {
CC=gcc-7 CXX=g++-7 cmake \
-DCMAKE_BUILD_TYPE="RelWithDebInfo" \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_TVM_OP=ON \
-DUSE_CUDA=ON \
-DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-DUSE_CPP_PACKAGE=ON \
Expand All @@ -732,6 +733,7 @@ build_ubuntu_gpu_mkldnn_nocudnn() {
CC=gcc-7 CXX=g++-7 cmake \
-DCMAKE_BUILD_TYPE="RelWithDebInfo" \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_TVM_OP=ON \
-DUSE_CUDA=ON \
-DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-DUSE_CUDNN=OFF \
Expand All @@ -746,6 +748,7 @@ build_ubuntu_gpu_cuda101_cudnn7() {
CC=gcc-7 CXX=g++-7 cmake \
-DCMAKE_BUILD_TYPE="RelWithDebInfo" \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_TVM_OP=ON \
-DUSE_CUDA=ON \
-DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-DUSE_CUDNN=ON \
Expand Down Expand Up @@ -785,6 +788,7 @@ build_ubuntu_gpu_cuda101_cudnn7_make() {
USE_CUDA=1 \
USE_CUDA_PATH=/usr/local/cuda \
USE_CUDNN=1 \
USE_TVM_OP=1 \
USE_CPP_PACKAGE=1 \
USE_DIST_KVSTORE=1 \
CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
Expand All @@ -804,6 +808,7 @@ build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test() {
USE_CUDA=1 \
USE_CUDA_PATH=/usr/local/cuda \
USE_CUDNN=1 \
USE_TVM_OP=0 \
USE_CPP_PACKAGE=1 \
USE_DIST_KVSTORE=1 \
CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
Expand All @@ -813,6 +818,23 @@ build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test() {
make cython PYTHON=python3
}

build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op() {
set -ex
cd /work/build
CC=gcc-7 CXX=g++-7 cmake \
-DCMAKE_BUILD_TYPE="RelWithDebInfo" \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_TVM_OP=OFF \
-DUSE_CUDA=ON \
-DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-DUSE_CUDNN=ON \
-DUSE_MKLDNN=OFF \
-DBUILD_CYTHON_MODULES=ON \
-DUSE_DIST_KVSTORE=ON \
-G Ninja /work/mxnet
ninja
}

build_ubuntu_amalgamation() {
set -ex
# Amalgamation can not be run with -j nproc
Expand Down Expand Up @@ -843,6 +865,7 @@ build_ubuntu_gpu_cmake() {
-DUSE_SIGNAL_HANDLER=ON \
-DUSE_CUDA=ON \
-DUSE_CUDNN=ON \
-DUSE_TVM_OP=ON \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_MKLML_MKL=OFF \
-DUSE_MKLDNN=OFF \
Expand All @@ -863,6 +886,7 @@ build_ubuntu_gpu_cmake_no_rtc() {
-DUSE_SIGNAL_HANDLER=ON \
-DUSE_CUDA=ON \
-DUSE_CUDNN=ON \
-DUSE_TVM_OP=ON \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_MKLML_MKL=OFF \
-DUSE_MKLDNN=ON \
Expand All @@ -877,6 +901,27 @@ build_ubuntu_gpu_cmake_no_rtc() {
ninja
}

build_ubuntu_gpu_cmake_no_tvm_op() {
set -ex
cd /work/build
CC=gcc-7 CXX=g++-7 cmake \
-DUSE_SIGNAL_HANDLER=ON \
-DUSE_CUDA=ON \
-DUSE_CUDNN=ON \
-DUSE_TVM_OP=OFF \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_MKLML_MKL=OFF \
-DUSE_MKLDNN=OFF \
-DUSE_DIST_KVSTORE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-DBUILD_CYTHON_MODULES=1 \
-G Ninja \
/work/mxnet

ninja
}

build_ubuntu_cpu_large_tensor() {
set -ex
cd /work/build
Expand All @@ -899,6 +944,7 @@ build_ubuntu_gpu_large_tensor() {
-DUSE_SIGNAL_HANDLER=ON \
-DUSE_CUDA=ON \
-DUSE_CUDNN=ON \
-DUSE_TVM_OP=ON \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_MKLML_MKL=OFF \
-DUSE_MKLDNN=OFF \
Expand Down
44 changes: 44 additions & 0 deletions ci/jenkins/Jenkins_steps.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,20 @@ def compile_unix_full_gpu_mkldnn_cpp_test(lib_name) {
}]
}

def compile_unix_full_gpu_no_tvm_op(lib_name) {
return ['GPU: CUDA10.1+cuDNN7 TVM_OP OFF': {
node(NODE_LINUX_CPU) {
ws('workspace/build-gpu-no-tvm-op') {
timeout(time: max_time, unit: 'MINUTES') {
utils.init_git()
utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false)
utils.pack_lib(lib_name, mx_lib_cpp_examples_no_tvm_op)
}
}
}
}]
}

def compile_unix_cmake_gpu(lib_name) {
return ['GPU: CMake': {
node(NODE_LINUX_CPU) {
Expand All @@ -324,6 +338,20 @@ def compile_unix_cmake_gpu(lib_name) {
}]
}

def compile_unix_cmake_gpu_no_tvm_op(lib_name) {
return ['GPU: CMake TVM_OP OFF': {
node(NODE_LINUX_CPU) {
ws('workspace/build-cmake-gpu-no-tvm-op') {
timeout(time: max_time, unit: 'MINUTES') {
utils.init_git()
utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
utils.pack_lib(lib_name, mx_cmake_lib_no_tvm_op)
}
}
}
}]
}

def compile_unix_cmake_gpu_no_rtc(lib_name) {
return ['GPU: CMake CUDA RTC OFF': {
node(NODE_LINUX_CPU) {
Expand All @@ -338,6 +366,22 @@ def compile_unix_cmake_gpu_no_rtc(lib_name) {
}]
}

def test_unix_python3_gpu_no_tvm_op() {
return ['Python3: GPU TVM_OP OFF': {
node(NODE_LINUX_GPU) {
ws('workspace/ut-python3-gpu-no-tvm-op') {
try {
utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
python3_gpu_ut_cython('ubuntu_gpu_cu101')
utils.publish_test_coverage()
} finally {
utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
}
}
}
}]
}

def compile_unix_tensorrt_gpu(lib_name) {
return ['TensorRT': {
node(NODE_LINUX_CPU) {
Expand Down
4 changes: 4 additions & 0 deletions ci/jenkins/Jenkinsfile_unix_gpu
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ core_logic: {
custom_steps.compile_unix_cmake_gpu('cmake_gpu'),
custom_steps.compile_unix_tensorrt_gpu('tensorrt'),
custom_steps.compile_unix_int64_gpu('gpu_int64'),
custom_steps.compile_unix_full_gpu_no_tvm_op('gpu_no_tvm_op'),
custom_steps.compile_unix_cmake_gpu_no_tvm_op('cmake_gpu_no_tvm_op'),
custom_steps.compile_unix_cmake_gpu_no_rtc('gpu_no_rtc'),
custom_steps.compile_unix_full_gpu_mkldnn_cpp_test('gpu_mkldnn_cpp_test_make')
])
Expand All @@ -60,6 +62,8 @@ core_logic: {
// TODO(szha): fix and reenable the hanging issue. tracked in #18098
// custom_steps.test_unix_distributed_kvstore_gpu('gpu'),
custom_steps.test_unix_byteps_gpu('gpu'),
custom_steps.test_unix_python3_gpu_no_tvm_op(),
custom_steps.test_unix_capi_cpp_package('gpu_mkldnn_cpp_test_make'),
])
}
,
Expand Down
2 changes: 2 additions & 0 deletions contrib/tvmop/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ def get_cuda_arch(arch):
# we create libtvmop.o first, which gives us chance to link tvm_runtime together with the libtvmop
# to allow mxnet find external helper functions in libtvm_runtime
func_binary.save(arguments.target_path + "/libtvmop.o")
if len(func_binary.imported_modules):
func_binary.imported_modules[0].save(arguments.target_path + "/libtvmop.cubin")
ld_path = arguments.target_path if arguments.ld_path is None else arguments.ld_path
create_shared(arguments.target_path + "/libtvmop.so",
arguments.target_path + "/libtvmop.o",
Expand Down
10 changes: 9 additions & 1 deletion src/c_api/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1363,7 +1363,15 @@ int MXGetVersion(int *out) {
#if MXNET_USE_TVM_OP
int MXLoadTVMOp(const char *libpath) {
API_BEGIN();
tvm::runtime::TVMOpModule::Get()->Load(libpath);
tvm::runtime::TVMOpModule *global_module = tvm::runtime::TVMOpModule::Get();
global_module->Load(libpath);
#if MXNET_USE_CUDA
std::string libpathstr(libpath);
std::string cubinpath = libpathstr.substr(0, libpathstr.size() - 11) + "libtvmop.cubin";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be better to pass libpath as dir, and do libpath + "libtvmop.so" as well to keep consistency.

Copy link
Contributor Author

@jinboci jinboci Jun 12, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I rewrite it in a more elegant way :)

Yes, but MXLoadTVMOp is called at:
https://github.com/apache/incubator-mxnet/blob/1bf881f381f91b157a26d9beddcaa8f4960cc038/python/mxnet/tvmop.py#L31-L32
where _LIB_TVM_OP is returned from the
https://github.com/apache/incubator-mxnet/blob/1bf881f381f91b157a26d9beddcaa8f4960cc038/python/mxnet/libinfo.py#L25
, and _LIB_TVM_OP[0] is the path of libtvmop.so.
We may need to modify find_lib_path or write a new function to get the directory that libtvmop.so locates.

tvm::runtime::TVMOpModule cubin_module;
cubin_module.Load(cubinpath);
global_module->Import(cubin_module);
#endif
API_END();
}

Expand Down
6 changes: 6 additions & 0 deletions src/operator/tvmop/op_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ void TVMOpModule::Load(const std::string &filepath) {
*module_ptr_ = module;
}

void TVMOpModule::Import(const TVMOpModule& module) {
CHECK(module_ptr_ != nullptr) << "module_ptr_ is not initialized.";
std::lock_guard<std::mutex> lock(mutex_);
module_ptr_->Import(*(module.module_ptr_));
}

PackedFunc GetFunction(const std::shared_ptr<Module> &module,
const std::string &op_name,
const std::vector<mxnet::TBlob> &args) {
Expand Down
2 changes: 2 additions & 0 deletions src/operator/tvmop/op_module.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class TVMOpModule {
// Load TVM operators binary
void Load(const std::string& filepath);

void Import(const TVMOpModule& module);

void Call(const std::string& func_name,
const mxnet::OpContext& ctx,
const std::vector<mxnet::TBlob>& args) const;
Expand Down