diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy index e7aeae935d06..8ecc7e193b97 100644 --- a/ci/Jenkinsfile_utils.groovy +++ b/ci/Jenkinsfile_utils.groovy @@ -255,6 +255,7 @@ def assign_node_labels(args) { // knowing about the limitations. NODE_LINUX_CPU = args.linux_cpu NODE_LINUX_GPU = args.linux_gpu + NODE_LINUX_GPU_G4 = args.linux_gpu_g4 NODE_LINUX_GPU_P3 = args.linux_gpu_p3 NODE_WINDOWS_CPU = args.windows_cpu NODE_WINDOWS_GPU = args.windows_gpu diff --git a/ci/build.py b/ci/build.py index cbc41218f042..18ad57fbd87c 100755 --- a/ci/build.py +++ b/ci/build.py @@ -196,8 +196,9 @@ def container_run(docker_client: SafeDockerClient, # Equivalent command docker_cmd_list = [ - get_docker_binary(nvidia_runtime), + "docker", 'run', + "--gpus all" if nvidia_runtime else "", "--cap-add", "SYS_PTRACE", # Required by ASAN '--rm', diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu new file mode 100644 index 000000000000..d5933c3bb628 --- /dev/null +++ b/ci/docker/Dockerfile.build.ubuntu @@ -0,0 +1,166 @@ +# -*- mode: dockerfile -*- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Dockerfile for Ubuntu based builds. +# +# See docker-compose.yml for supported BASE_IMAGE ARGs and targets. + +#################################################################################################### +# The Dockerfile uses a dynamic BASE_IMAGE (for example ubuntu:18.04 +# nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 etc). +# On top of BASE_IMAGE we install all dependencies shared by all MXNet build +# environments into a "base" target. At the end of this file, we can specialize +# "base" for specific usecases. The target built by docker can be selected via +# "--target" option or docker-compose.yml +#################################################################################################### +ARG BASE_IMAGE +FROM $BASE_IMAGE AS base + +WORKDIR /work/deps + +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update && \ + apt-get install -y wget software-properties-common && \ + wget -qO - http://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + wget -qO - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB | apt-key add - && \ + apt-add-repository "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-10 main" && \ + apt-add-repository "deb https://apt.repos.intel.com/mkl all main" && \ + apt-get update && \ + apt-get install -y \ + ## Utilities + curl \ + unzip \ + pandoc \ + ## Development tools + build-essential \ + ninja-build \ + git \ + protobuf-compiler \ + libprotobuf-dev \ + clang-6.0 \ + clang-tidy-6.0 \ + python-yaml \ + clang-10 \ + g++ \ + g++-8 \ + intel-mkl-2020.0-088 \ + ## Dependencies + libgomp1 \ + libturbojpeg0-dev \ + libopenblas-dev \ + libcurl4-openssl-dev \ + libatlas-base-dev \ + libzmq3-dev \ + liblapack-dev \ + libopencv-dev \ + # Caffe + caffe-cpu \ + libcaffe-cpu-dev \ + ## Frontend languages + # Python + python3 \ + python3-pip \ + python3-nose \ + python3-nose-timer \ + # Scala + openjdk-8-jdk \ + openjdk-8-jre \ + maven \ + scala \ + # Clojure + clojure \ + leiningen \ + # R + r-base-core \ + r-cran-devtools \ + libcairo2-dev \ + libxml2-dev \ + ## Documentation + doxygen \ + pandoc \ + ## Build-dependencies for ccache 3.7.9 + gperf \ + libb2-dev \ + libzstd-dev && \ + rm -rf /var/lib/apt/lists/* + +# ccache 3.7.9 has fixes for caching nvcc outputs +RUN cd /usr/local/src && \ + git clone --recursive https://github.com/ccache/ccache.git && \ + cd ccache && \ + git checkout v3.7.9 && \ + ./autogen.sh && \ + ./configure --disable-man && \ + make -j$(nproc) && \ + make install && \ + cd /usr/local/src && \ + rm -rf ccache + +# Python & cmake +COPY install/requirements /work/ +RUN python3 -m pip install cmake==3.16.6 && \ + python3 -m pip install -r /work/requirements + +# Only OpenJDK 8 supported at this time.. +RUN update-java-alternatives -s java-1.8.0-openjdk-amd64 + +# julia not available on 18.04 +COPY install/ubuntu_julia.sh /work/ +RUN /work/ubuntu_julia.sh + +# PDL::CCS missing on 18.04 +COPY install/ubuntu_perl.sh /work/ +RUN /work/ubuntu_perl.sh + +# MXNetJS nightly needs emscripten for wasm +COPY install/ubuntu_emscripten.sh /work/ +RUN /work/ubuntu_emscripten.sh + +ARG USER_ID=0 +COPY install/docker_filepermissions.sh /work/ +RUN /work/docker_filepermissions.sh + +ENV PYTHONPATH=./python/ +WORKDIR /work/mxnet + +COPY runtime_functions.sh /work/ + +#################################################################################################### +# Specialize base image to install more gpu specific dependencies. +# The target built by docker can be selected via "--target" option or docker-compose.yml +#################################################################################################### +FROM base as gpu +# Install Thrust 1.9.8 to be shipped with Cuda 11. +# Fixes https://github.com/thrust/thrust/issues/1072 for Clang 10 +# This file can be deleted when using Cuda 11 on CI +RUN cd /usr/local && \ + git clone https://github.com/thrust/thrust.git && \ + cd thrust && \ + git checkout 1.9.8 + + +FROM gpu as gpuwithcudaruntimelibs +# Special case because the CPP-Package requires the CUDA runtime libs +# and not only stubs (which are provided by the base image) +# This prevents usage of this image for actual GPU tests with Docker. +# This is a bug in CPP-Package and should be fixed. +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update && \ + apt install -y --no-install-recommends \ + cuda-10-1 && \ + rm -rf /var/lib/apt/lists/* diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml new file mode 100644 index 000000000000..ca00f9ff86bf --- /dev/null +++ b/ci/docker/docker-compose.yml @@ -0,0 +1,208 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# We use the cache_from feature introduced in file form version 3.4 (released 2017-11-01) +version: "3.4" + +# For simplicity, only the centos7_cpu is commented. But the comments apply to +# all other services as well. +services: + ################################################################################################### + # Dockerfile.build.centos7 based images used for building on CentOS7. On + # CentOS7, we respectively test the oldest supported toolchain and dependency + # versions + ################################################################################################### + centos7_cpu: + # The resulting image will be named build.centos7_cpu:latest and will be + # pushed to the dockerhub user specified in the environment variable + # ${DOCKER_CACHE_REGISTRY} (typicall "mxnetci") under this name + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_cpu:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + # Use "base" target declared in Dockerfile.build.centos7 as "build.centos7_cpu:latest" + target: base + args: + # BASE_IMAGE is used to dynamically specify the FROM image in Dockerfile.build.centos7 + BASE_IMAGE: centos:7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_cpu:latest + centos7_gpu_cu92: + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu92:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: gpu + args: + BASE_IMAGE: nvidia/cuda:9.2-cudnn7-devel-centos7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu92:latest + centos7_gpu_cu100: + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu100:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: gpu + args: + BASE_IMAGE: nvidia/cuda:10.0-cudnn7-devel-centos7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu100:latest + centos7_gpu_cu101: + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu101:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: gpu + args: + BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-centos7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu101:latest + centos7_gpu_cu102: + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu102:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: gpu + args: + BASE_IMAGE: nvidia/cuda:10.2-cudnn7-devel-centos7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu102:latest + ################################################################################################### + # Dockerfile.build.ubuntu based images. On Ubuntu we test more recent + # toolchain and dependency versions compared to CentOS7. We attempt to update + # the Ubuntu base image every 6 months, following the Ubuntu release cycle, + # and testing the dependencies in their version provided by the respective + # Ubuntu release. + ################################################################################################### + ubuntu_cpu: + image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_cpu:latest + build: + context: . + dockerfile: Dockerfile.build.ubuntu + target: base + args: + BASE_IMAGE: ubuntu:18.04 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_cpu:latest + ubuntu_gpu_cu101: + image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest + build: + context: . + dockerfile: Dockerfile.build.ubuntu + target: gpu + args: + BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest + ubuntu_build_cuda: + image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest + build: + context: . + dockerfile: Dockerfile.build.ubuntu + target: gpuwithcudaruntimelibs + args: + BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest + ################################################################################################### + # Dockerfile.build.android based images used for testing cross-compilation for plain ARM + ################################################################################################### + armv6: + image: ${DOCKER_CACHE_REGISTRY}/build.armv6:latest + build: + context: . + dockerfile: Dockerfile.build.arm + target: armv6 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.armv6:latest + armv7: + image: ${DOCKER_CACHE_REGISTRY}/build.armv7:latest + build: + context: . + dockerfile: Dockerfile.build.arm + target: armv7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.armv7:latest + armv8: + image: ${DOCKER_CACHE_REGISTRY}/build.armv8:latest + build: + context: . + dockerfile: Dockerfile.build.arm + target: armv8 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.armv8:latest + ################################################################################################### + # Dockerfile.test.arm based images for testing ARM artefacts via QEMU + ################################################################################################### + test.armv7: + image: ${DOCKER_CACHE_REGISTRY}/test.armv7:latest + build: + context: . + dockerfile: Dockerfile.test.arm + args: + BASE_IMAGE: arm32v7/ubuntu:20.04 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/test.armv7:latest + test.armv8: + image: ${DOCKER_CACHE_REGISTRY}/test.armv8:latest + build: + context: . + dockerfile: Dockerfile.test.arm + args: + BASE_IMAGE: arm64v8/ubuntu:20.04 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/test.armv8:latest + ################################################################################################### + # Dockerfile.build.android based images used for testing cross-compilation for Android + ################################################################################################### + android_armv7: + image: ${DOCKER_CACHE_REGISTRY}/build.android_armv7:latest + build: + context: . + dockerfile: Dockerfile.build.android + target: armv7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.android_armv7:latest + android_armv8: + image: ${DOCKER_CACHE_REGISTRY}/build.android_armv8:latest + build: + context: . + dockerfile: Dockerfile.build.android + target: armv8 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.android_armv8:latest + ################################################################################################### + # Dockerfile.publish.test based images used for testing binary artifacts on minimal systems. + ################################################################################################### + publish.test.centos7_cpu: + image: ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_cpu:latest + build: + context: . + dockerfile: Dockerfile.publish.test.centos7 + args: + BASE_IMAGE: centos:7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_cpu:latest + publish.test.centos7_gpu: + image: ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_gpu:latest + build: + context: . + dockerfile: Dockerfile.publish.test.centos7 + args: + BASE_IMAGE: nvidia/cuda:9.2-cudnn7-devel-centos7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_gpu:latest diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index 98c774b284ec..c34223bef288 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -143,7 +143,7 @@ def compile_unix_int64_cpu() { def compile_unix_int64_gpu() { return ['GPU: USE_INT64_TENSOR_SIZE': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/build-gpu-int64') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() @@ -750,7 +750,7 @@ def test_unix_python3_mkl_cpu() { def test_unix_python3_gpu() { return ['Python3: GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-python3-gpu') { try { utils.unpack_and_init('gpu', mx_lib_cython) @@ -866,7 +866,7 @@ def test_unix_python3_mkldnn_mkl_cpu() { def test_unix_python3_mkldnn_gpu() { return ['Python3: MKLDNN-GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-python3-mkldnn-gpu') { try { utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib) @@ -882,7 +882,7 @@ def test_unix_python3_mkldnn_gpu() { def test_unix_python3_mkldnn_nocudnn_gpu() { return ['Python3: MKLDNN-GPU-NOCUDNN': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-python3-mkldnn-gpu-nocudnn') { try { utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib) @@ -916,7 +916,7 @@ def test_unix_python3_tensorrt_gpu() { def test_unix_python3_integration_gpu() { return ['Python Integration GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/it-python-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib) @@ -928,24 +928,9 @@ def test_unix_python3_integration_gpu() { }] } -def test_unix_caffe_gpu() { - return ['Caffe GPU': { - node(NODE_LINUX_GPU) { - ws('workspace/it-caffe') { - timeout(time: max_time, unit: 'MINUTES') { - utils.init_git() - utils.unpack_lib('gpu', mx_lib) - utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_caffe', true) - utils.publish_test_coverage() - } - } - } - }] -} - def test_unix_cpp_package_gpu() { - return ['cpp-package GPU': { - node(NODE_LINUX_GPU) { + return ['cpp-package GPU Makefile': { + node(NODE_LINUX_GPU_G4) { ws('workspace/it-cpp-package') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib_cpp_examples) @@ -958,8 +943,8 @@ def test_unix_cpp_package_gpu() { } def test_unix_capi_cpp_package() { - return ['capi-cpp-package GPU': { - node(NODE_LINUX_GPU) { + return ['capi-cpp-package GPU Makefile': { + node(NODE_LINUX_GPU_G4) { ws('workspace/it-capi-cpp-package') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu_mkldnn_cpp_test', mx_lib_cpp_capi) @@ -1000,8 +985,8 @@ def test_unix_scala_mkldnn_cpu(){ } def test_unix_scala_gpu() { - return ['Scala: GPU': { - node(NODE_LINUX_GPU) { + return ['Scala: GPU Makefile': { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-scala-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib) @@ -1084,7 +1069,7 @@ def test_unix_perl_cpu() { def test_unix_cpp_gpu() { return ['Cpp: GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-cpp-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('cmake_gpu', mx_cmake_lib) @@ -1125,8 +1110,8 @@ def test_unix_cpp_cpu() { } def test_unix_perl_gpu() { - return ['Perl: GPU': { - node(NODE_LINUX_GPU) { + return ['Perl: GPU Makefile': { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-perl-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib) @@ -1140,7 +1125,7 @@ def test_unix_perl_gpu() { def test_unix_r_gpu() { return ['R: GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-r-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib) @@ -1208,7 +1193,7 @@ def test_unix_distributed_kvstore_cpu() { def test_unix_distributed_kvstore_gpu() { return ['dist-kvstore tests GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/it-dist-kvstore') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib) diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu index f8c28d5d1994..a9feae158311 100644 --- a/ci/jenkins/Jenkinsfile_unix_gpu +++ b/ci/jenkins/Jenkinsfile_unix_gpu @@ -29,7 +29,7 @@ node('utility') { utils = load('ci/Jenkinsfile_utils.groovy') custom_steps = load('ci/jenkins/Jenkins_steps.groovy') } -utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3') +utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3', linux_gpu_g4: 'mxnetlinux-gpu-g4') utils.main_wrapper( core_logic: {