From bcfa364ca773f0919033d0a49a5f5b6c336d5ab5 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Wed, 8 Apr 2020 21:59:50 +0000 Subject: [PATCH 01/14] Update edge toolchain --- CMakeLists.txt | 2 +- ci/build.py | 11 +- ci/docker/Dockerfile.build.android_armv7 | 94 +++----- ci/docker/Dockerfile.build.android_armv8 | 92 +++----- ci/docker/Dockerfile.build.armv6 | 45 ++-- ci/docker/Dockerfile.build.armv7 | 54 +++-- ci/docker/Dockerfile.build.armv8 | 56 +++-- ci/docker/Dockerfile.build.jetson | 96 ++++---- ci/docker/install/android_armv7_openblas.sh | 31 --- ci/docker/install/android_ndk.sh | 38 --- ci/docker/install/arm64_openblas.sh | 35 --- ci/docker/install/ubuntu_arm.sh | 28 --- ci/docker/runtime_functions.sh | 86 ++++--- .../aarch64-linux-gnu-toolchain.cmake} | 22 +- .../arm-linux-gnueabihf-toolchain.cmake} | 23 +- cmake/upstream/FindCUDAToolkit.cmake | 205 ++++++++++++----- make/crosscompile.jetson.mk | 216 ------------------ src/operator/random/shuffle_op.cc | 6 +- 18 files changed, 435 insertions(+), 705 deletions(-) delete mode 100755 ci/docker/install/android_armv7_openblas.sh delete mode 100755 ci/docker/install/android_ndk.sh delete mode 100755 ci/docker/install/arm64_openblas.sh delete mode 100755 ci/docker/install/ubuntu_arm.sh rename ci/docker/{install/arm_openblas.sh => toolchains/aarch64-linux-gnu-toolchain.cmake} (64%) mode change 100755 => 100644 rename ci/docker/{install/android_arm64_openblas.sh => toolchains/arm-linux-gnueabihf-toolchain.cmake} (65%) mode change 100755 => 100644 delete mode 100644 make/crosscompile.jetson.mk diff --git a/CMakeLists.txt b/CMakeLists.txt index 437d01668246..e630730115a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -202,7 +202,7 @@ endif(MSVC) if(NOT mxnet_LINKER_LIBS) set(mxnet_LINKER_LIBS "") -endif(NOT mxnet_LINKER_LIBS) +endif() if(USE_GPROF) message(STATUS "Using GPROF") diff --git a/ci/build.py b/ci/build.py index a21ec44942a8..cbc41218f042 100755 --- a/ci/build.py +++ b/ci/build.py @@ -70,7 +70,8 @@ def get_docker_binary(use_nvidia_docker: bool) -> str: return "nvidia-docker" if use_nvidia_docker else "docker" -def build_docker(platform: str, docker_binary: str, registry: str, num_retries: int, no_cache: bool) -> str: +def build_docker(platform: str, docker_binary: str, registry: str, num_retries: int, no_cache: bool, + cache_intermediate: bool) -> str: """ Build a container for the given platform :param platform: Platform @@ -104,6 +105,8 @@ def build_docker(platform: str, docker_binary: str, registry: str, num_retries: "--build-arg", "GROUP_ID={}".format(os.getgid())] if no_cache: cmd.append("--no-cache") + if cache_intermediate: + cmd.append("--rm=false") elif registry: cmd.extend(["--cache-from", tag]) cmd.extend(["-t", tag, get_dockerfiles_path()]) @@ -330,6 +333,9 @@ def main() -> int: parser.add_argument("--no-cache", action="store_true", help="passes --no-cache to docker build") + parser.add_argument("--cache-intermediate", action="store_true", + help="passes --rm=false to docker build") + parser.add_argument("-e", "--environment", nargs="*", default=[], help="Environment variables for the docker container. " "Specify with a list containing either names or name=value") @@ -361,7 +367,8 @@ def main() -> int: load_docker_cache(tag=tag, docker_registry=args.docker_registry) if not args.run_only: build_docker(platform=platform, docker_binary=docker_binary, registry=args.docker_registry, - num_retries=args.docker_build_retries, no_cache=args.no_cache) + num_retries=args.docker_build_retries, no_cache=args.no_cache, + cache_intermediate=args.cache_intermediate) else: logging.info("Skipping docker build step.") diff --git a/ci/docker/Dockerfile.build.android_armv7 b/ci/docker/Dockerfile.build.android_armv7 index 2c923a015b63..96ca04e9f5e6 100644 --- a/ci/docker/Dockerfile.build.android_armv7 +++ b/ci/docker/Dockerfile.build.android_armv7 @@ -18,62 +18,41 @@ # # Dockerfile to build MXNet for Android ARMv7 -FROM dockcross/base -MAINTAINER Pedro Larroy "pllarroy@amazon.com" - -# The cross-compiling emulator -RUN apt-get update && apt-get install -y \ - unzip - -ENV CROSS_TRIPLE=arm-linux-androideabi -ENV CROSS_ROOT=/usr/${CROSS_TRIPLE} -ENV AS=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-as \ - AR=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ar \ - CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-gcc \ - CPP=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-cpp \ - CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-g++ \ - LD=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ld - -ENV ANDROID_NDK_REVISION 17b -ENV ANDROID_NDK_API 27 -ENV ANDROID_NDK_ARCH arm -WORKDIR /work/deps -COPY install/android_ndk.sh /work/deps -RUN /work/deps/android_ndk.sh - -ENV DEFAULT_DOCKCROSS_IMAGE dockcross/android-arm - -# Build-time metadata as defined at http://label-schema.org -ARG BUILD_DATE -ARG IMAGE -ARG VCS_REF -ARG VCS_URL -LABEL org.label-schema.build-date=$BUILD_DATE \ - org.label-schema.name=$IMAGE \ - org.label-schema.vcs-ref=$VCS_REF \ - org.label-schema.vcs-url=$VCS_URL \ - org.label-schema.schema-version="1.0" - - -ENV CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang -ENV CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang++ - -WORKDIR /work/deps - -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh -WORKDIR /work -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh - -COPY install/arm_openblas.sh /work/ -COPY install/android_armv7_openblas.sh /work/deps -RUN /work/deps/android_armv7_openblas.sh - -ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} - -WORKDIR /work +FROM ubuntu:20.04 + +ENV ARCH=armv7l \ + HOSTCC=gcc \ + HOSTCXX=g++ \ + TARGET=ARMV7 + +WORKDIR /usr/local + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + cmake \ + ccache \ + git \ + curl \ + unzip \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -o android-ndk-r19-linux-x86_64.zip -L https://dl.google.com/android/repository/android-ndk-r19-linux-x86_64.zip && \ + unzip android-ndk-r19-linux-x86_64.zip && \ + rm android-ndk-r19-linux-x86_64.zip +ENV CMAKE_TOOLCHAIN_FILE=/usr/local/android-ndk-r19/build/cmake/android.toolchain.cmake + +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + mkdir /usr/local/openblas-android && \ + cd /usr/local/OpenBLAS && \ + export TOOLCHAIN=/usr/local/android-ndk-r19/toolchains/llvm/prebuilt/linux-x86_64 && \ + make NOFORTRAN=1 ARM_SOFTFP_ABI=1 \ + LDFLAGS="-L/usr/local/android-ndk-r19/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/lib/gcc/arm-linux-androideabi/4.9.x -lm" \ + CC=$TOOLCHAIN/bin/armv7a-linux-androideabi16-clang AR=$TOOLCHAIN/bin/arm-linux-androideabi-ar && \ + make PREFIX=/usr/local/openblas-android install && \ + cd /usr/local && \ + rm -rf OpenBLAS +ENV OpenBLAS_HOME=/usr/local/openblas-android ARG USER_ID=0 ARG GROUP_ID=0 @@ -81,5 +60,4 @@ COPY install/ubuntu_adduser.sh /work/ RUN /work/ubuntu_adduser.sh COPY runtime_functions.sh /work/ -WORKDIR /work/mxnet - +WORKDIR /work/build diff --git a/ci/docker/Dockerfile.build.android_armv8 b/ci/docker/Dockerfile.build.android_armv8 index ca62288129bb..81adc80edf14 100644 --- a/ci/docker/Dockerfile.build.android_armv8 +++ b/ci/docker/Dockerfile.build.android_armv8 @@ -18,62 +18,41 @@ # # Dockerfile to build MXNet for Android ARM64/ARMv8 -FROM dockcross/base -MAINTAINER Pedro Larroy "pllarroy@amazon.com" - -RUN apt-get update && apt-get install -y \ - unzip - -WORKDIR /work/deps - -# Build x86 dependencies. -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh - -# Setup Android cross-compilation environment. -ENV CROSS_TRIPLE=aarch64-linux-android -ENV CROSS_ROOT=/usr/${CROSS_TRIPLE} -ENV AS=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-as \ - AR=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ar \ - CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-gcc \ - CPP=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-cpp \ - CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-g++ \ - LD=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ld - - -ENV DEFAULT_DOCKCROSS_IMAGE dockcross/android-arm - -# Build-time metadata as defined at http://label-schema.org -ARG BUILD_DATE -ARG IMAGE -ARG VCS_REF -ARG VCS_URL -LABEL org.label-schema.build-date=$BUILD_DATE \ - org.label-schema.name=$IMAGE \ - org.label-schema.vcs-ref=$VCS_REF \ - org.label-schema.vcs-url=$VCS_URL \ - org.label-schema.schema-version="1.0" - -ENV ARCH aarch64 -ENV ANDROID_NDK_REVISION 17b -ENV ANDROID_NDK_API 27 -ENV ANDROID_NDK_ARCH arm64 -WORKDIR /work/deps -COPY install/android_ndk.sh /work/deps -RUN /work/deps/android_ndk.sh - - -WORKDIR /work/deps -COPY install/android_ndk.sh /work/ -RUN /work/android_ndk.sh - -ENV CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang -ENV CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang++ - -# Build ARM dependencies. -COPY install/android_arm64_openblas.sh /work/ -RUN /work/android_arm64_openblas.sh -ENV CPLUS_INCLUDE_PATH /work/deps/OpenBLAS +FROM ubuntu:20.04 + +ENV ARCH=aarch64 \ + HOSTCC=gcc \ + HOSTCXX=g++ \ + TARGET=ARMV8 + +WORKDIR /usr/local + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + cmake \ + ccache \ + git \ + curl \ + unzip \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -o android-ndk-r19-linux-x86_64.zip -L https://dl.google.com/android/repository/android-ndk-r19-linux-x86_64.zip && \ + unzip android-ndk-r19-linux-x86_64.zip && \ + rm android-ndk-r19-linux-x86_64.zip +ENV CMAKE_TOOLCHAIN_FILE=/usr/local/android-ndk-r19/build/cmake/android.toolchain.cmake + +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + mkdir /usr/local/openblas-android && \ + cd /usr/local/OpenBLAS && \ + export TOOLCHAIN=/usr/local/android-ndk-r19/toolchains/llvm/prebuilt/linux-x86_64 && \ + make NOFORTRAN=1 \ + LDFLAGS="-L/usr/local/android-ndk-r21/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/lib/gcc/aarch64-linux-android/4.9.x -lm" \ + CC=$TOOLCHAIN/bin/aarch64-linux-android21-clang AR=$TOOLCHAIN/bin/aarch64-linux-android-ar && \ + make PREFIX=/usr/local/openblas-android install && \ + cd /usr/local && \ + rm -rf OpenBLAS +ENV OpenBLAS_HOME=/usr/local/openblas-android ARG USER_ID=0 ARG GROUP_ID=0 @@ -81,5 +60,4 @@ COPY install/ubuntu_adduser.sh /work/ RUN /work/ubuntu_adduser.sh COPY runtime_functions.sh /work/ - WORKDIR /work/build diff --git a/ci/docker/Dockerfile.build.armv6 b/ci/docker/Dockerfile.build.armv6 index e6a7ffe758b9..02e16da11616 100644 --- a/ci/docker/Dockerfile.build.armv6 +++ b/ci/docker/Dockerfile.build.armv6 @@ -18,25 +18,42 @@ # # Dockerfile to build MXNet for ARMv6 -FROM dockcross/linux-armv6 +FROM ubuntu:20.04 -ENV ARCH armv6l -ENV HOSTCC gcc -ENV TARGET ARMV6 +ENV ARCH=armv6l \ + HOSTCC=gcc \ + HOSTCXX=g++ \ + TARGET=ARMV6 -WORKDIR /work/deps +WORKDIR /usr/local -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + cmake \ + ccache \ + git \ + curl \ + zip \ + python3 \ + python3-pip \ + && rm -rf /var/lib/apt/lists/* -COPY install/arm_openblas.sh /work/ -RUN /work/arm_openblas.sh +# We use a toolchain from toolchains.bootlin.com instead of Debian / Ubunut +# crossbuild-essential-armel toolchain, as the latter targets ARM architecture +# versions 4T, 5T, and 6, whereas we only wish to target ARMV6 and like to use +# ARMV6 specific features. https://wiki.debian.org/ArmEabiPort +RUN curl -o armv6-eabihf--glibc--stable-2020.02-2.tar.bz2 -L https://toolchains.bootlin.com/downloads/releases/toolchains/armv6-eabihf/tarballs/armv6-eabihf--glibc--stable-2020.02-2.tar.bz2 && \ + tar xf armv6-eabihf--glibc--stable-2020.02-2.tar.bz2 && \ + rm armv6-eabihf--glibc--stable-2020.02-2.tar.bz2 +ENV CMAKE_TOOLCHAIN_FILE=/usr/local/armv6-eabihf--glibc--stable-2020.02-2/share/buildroot/toolchainfile.cmake -ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} - -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + cd /usr/local/OpenBLAS && \ + make NOFORTRAN=1 CC=/usr/local/armv6-eabihf--glibc--stable-2020.02-2/bin/arm-linux-gcc && \ + make PREFIX=/usr/local/armv6-eabihf--glibc--stable-2020.02-2/arm-buildroot-linux-gnueabihf/sysroot install && \ + cd /usr/local && \ + rm -rf OpenBLAS ARG USER_ID=0 ARG GROUP_ID=0 diff --git a/ci/docker/Dockerfile.build.armv7 b/ci/docker/Dockerfile.build.armv7 index bad9ab214050..a9cc6d1e83a4 100644 --- a/ci/docker/Dockerfile.build.armv7 +++ b/ci/docker/Dockerfile.build.armv7 @@ -16,27 +16,39 @@ # specific language governing permissions and limitations # under the License. # -# Dockerfile to build MXNet for Android ARMv7 - -FROM dockcross/linux-armv7 - -ENV ARCH armv7l -ENV HOSTCC gcc -ENV TARGET ARMV7 - -WORKDIR /work/deps - -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh - -COPY install/arm_openblas.sh /work/ -RUN /work/arm_openblas.sh - -ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} - -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh +# Dockerfile to build MXNet for ARMv7 + +FROM ubuntu:20.04 + +ENV ARCH=armv7l \ + HOSTCC=gcc \ + HOSTCXX=g++ \ + TARGET=ARMV7 + +WORKDIR /usr/local + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + cmake \ + ccache \ + git \ + curl \ + zip \ + python3 \ + python3-pip \ + crossbuild-essential-armhf \ + && rm -rf /var/lib/apt/lists/* + +COPY toolchains/arm-linux-gnueabihf-toolchain.cmake /usr/local +ENV CMAKE_TOOLCHAIN_FILE=/usr/local/arm-linux-gnueabihf-toolchain.cmake + +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + cd /usr/local/OpenBLAS && \ + make NOFORTRAN=1 CC=arm-linux-gnueabihf-gcc && \ + make PREFIX=/usr/local/arm-linux-gnueabihf install && \ + cd /usr/local && \ + rm -rf OpenBLAS ARG USER_ID=0 ARG GROUP_ID=0 diff --git a/ci/docker/Dockerfile.build.armv8 b/ci/docker/Dockerfile.build.armv8 index bd2373180f0b..adf6873fb40c 100644 --- a/ci/docker/Dockerfile.build.armv8 +++ b/ci/docker/Dockerfile.build.armv8 @@ -18,29 +18,37 @@ # # Dockerfile to build MXNet for ARM64/ARMv8 -FROM dockcross/linux-arm64 - -ENV ARCH aarch64 -ENV HOSTCC gcc -ENV TARGET ARMV8 - -WORKDIR /work/deps - -# gh issue #11567 https://github.com/apache/incubator-mxnet/issues/11567 -#RUN sed -i '\#deb http://cdn-fastly.deb.debian.org/debian-security jessie/updates main#d' /etc/apt/sources.list -#RUN sed -i 's/cdn-fastly.//' /etc/apt/sources.list - -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh - -COPY install/arm_openblas.sh /work/ -RUN /work/arm_openblas.sh - -ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} - -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh +FROM ubuntu:20.04 + +ENV ARCH=aarch64 \ + HOSTCC=gcc \ + HOSTCXX=g++ \ + TARGET=ARMV8 + +WORKDIR /usr/local + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + cmake \ + ccache \ + git \ + curl \ + zip \ + python3 \ + python3-pip \ + crossbuild-essential-arm64 \ + && rm -rf /var/lib/apt/lists/* + +COPY toolchains/aarch64-linux-gnu-toolchain.cmake /usr +ENV CMAKE_TOOLCHAIN_FILE=/usr/aarch64-linux-gnu-toolchain.cmake + +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + cd /usr/local/OpenBLAS && \ + make NOFORTRAN=1 CC=aarch64-linux-gnu-gcc && \ + make PREFIX=/usr/aarch64-linux-gnu install && \ + cd /usr/local && \ + rm -rf OpenBLAS ARG USER_ID=0 ARG GROUP_ID=0 @@ -48,4 +56,4 @@ COPY install/ubuntu_adduser.sh /work/ RUN /work/ubuntu_adduser.sh COPY runtime_functions.sh /work/ -WORKDIR /work/build +WORKDIR /work/mxnet diff --git a/ci/docker/Dockerfile.build.jetson b/ci/docker/Dockerfile.build.jetson index e31ee43a93d8..93fe5e0a5b0d 100644 --- a/ci/docker/Dockerfile.build.jetson +++ b/ci/docker/Dockerfile.build.jetson @@ -20,68 +20,58 @@ # This script assumes /work/mxnet exists and contains the mxnet code you wish to compile and # that /work/build exists and is the target for your output. -FROM nvidia/cuda:9.0-cudnn7-devel as cudabuilder +FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 -FROM dockcross/linux-arm64 +ENV ARCH=aarch64 \ + HOSTCC=gcc \ + TARGET=ARMV8 -ENV ARCH aarch64 -ENV HOSTCC gcc -ENV TARGET ARMV8 +WORKDIR /usr/local -# gh issue #11567 https://github.com/apache/incubator-mxnet/issues/11567 -#RUN sed -i '\#deb http://cdn-fastly.deb.debian.org/debian-security jessie/updates main#d' /etc/apt/sources.list -#RUN sed -i 's/cdn-fastly.//' /etc/apt/sources.list +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + git \ + curl \ + zip \ + unzip \ + python3 \ + python3-pip \ + awscli \ + crossbuild-essential-arm64 \ + && rm -rf /var/lib/apt/lists/* +# cmake on Ubuntu 18.04 is too old +RUN python3 -m pip install cmake -WORKDIR /work/deps - -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh - -COPY install/arm_openblas.sh /work/ -RUN /work/arm_openblas.sh - -ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} - +# ccache on Ubuntu 18.04 is too old to support Cuda correctly COPY install/deb_ubuntu_ccache.sh /work/ RUN /work/deb_ubuntu_ccache.sh -# Setup CUDA build env (including configuring and copying nvcc) -COPY --from=cudabuilder /usr/local/cuda /usr/local/cuda -ENV TARGET_ARCH aarch64 -ENV TARGET_OS linux +COPY toolchains/aarch64-linux-gnu-toolchain.cmake /usr +ENV CMAKE_TOOLCHAIN_FILE=/usr/aarch64-linux-gnu-toolchain.cmake + +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + cd /usr/local/OpenBLAS && \ + make NOFORTRAN=1 CC=aarch64-linux-gnu-gcc && \ + make PREFIX=/usr/aarch64-linux-gnu install && \ + cd /usr/local && \ + rm -rf OpenBLAS -# Install ARM depedencies based on Jetpack 3.3 -RUN JETPACK_DOWNLOAD_PREFIX=https://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/3.3/lw.xd42/JetPackL4T_33_b39 && \ - CUDA_REPO_PREFIX=/var/cuda-repo-9-0-local && \ - ARM_CUDA_INSTALLER_PACKAGE=cuda-repo-l4t-9-0-local_9.0.252-1_arm64.deb && \ - ARM_CUDNN_INSTALLER_PACKAGE=libcudnn7_7.1.5.14-1+cuda9.0_arm64.deb && \ - ARM_CUDNN_DEV_INSTALLER_PACKAGE=libcudnn7-dev_7.1.5.14-1+cuda9.0_arm64.deb && \ - ARM_LICENSE_INSTALLER=cuda-license-9-0_9.0.252-1_arm64.deb && \ - ARM_CUBLAS_INSTALLER=cuda-cublas-9-0_9.0.252-1_arm64.deb && \ - ARM_NVINFER_INSTALLER_PACKAGE=libnvinfer4_4.1.3-1+cuda9.0_arm64.deb && \ - ARM_NVINFER_DEV_INSTALLER_PACKAGE=libnvinfer-dev_4.1.3-1+cuda9.0_arm64.deb && \ - dpkg --add-architecture arm64 && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDA_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_DEV_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_DEV_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $ARM_CUDA_INSTALLER_PACKAGE && \ - apt-key add $CUDA_REPO_PREFIX/7fa2af80.pub && \ - dpkg -i --force-architecture $ARM_CUDNN_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $ARM_CUDNN_DEV_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $CUDA_REPO_PREFIX/$ARM_LICENSE_INSTALLER && \ - dpkg -i --force-architecture $CUDA_REPO_PREFIX/$ARM_CUBLAS_INSTALLER && \ - dpkg -i --force-architecture $ARM_NVINFER_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $ARM_NVINFER_DEV_INSTALLER_PACKAGE && \ - apt update -y || true && apt install -y cuda-libraries-dev-9-0 libcudnn7-dev libnvinfer-dev -RUN ln -s /usr/include/aarch64-linux-gnu/cudnn_v7.h /usr/include/aarch64-linux-gnu/cudnn.h -ENV PATH $PATH:/usr/local/cuda/bin -ENV NVCCFLAGS "-m64" -ENV CUDA_ARCH "-gencode arch=compute_53,code=sm_53 -gencode arch=compute_62,code=sm_62" -ENV NVCC /usr/local/cuda/bin/nvcc +# Install aarch64 cross depedencies based on Jetpack 4.3 +# Manually downloaded using SDK Manager tool and placed in a private S3 bucket. +# We're not allowed to redistribute these files and there is no public version. +RUN aws s3 cp s3://mxnet-ci-prod-private-slave-data/nvidia/sdkm_downloads/cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb . && \ + dpkg -i cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb && \ + rm cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb && \ + apt-key add /var/cuda-repo-10-0-local-10.0.326-410.108/7fa2af80.pub && \ + aws s3 cp s3://mxnet-ci-prod-private-slave-data/nvidia/sdkm_downloads/cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb . && \ + dpkg -i cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb && \ + rm cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb && \ + apt-get update && \ + apt-get install -y -f && \ + apt-get install -y cuda-cross-aarch64 cuda-cross-aarch64-10-0 && \ + rm -rf /var/lib/apt/lists/* ARG USER_ID=0 ARG GROUP_ID=0 diff --git a/ci/docker/install/android_armv7_openblas.sh b/ci/docker/install/android_armv7_openblas.sh deleted file mode 100755 index 55c098909654..000000000000 --- a/ci/docker/install/android_armv7_openblas.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex -pushd . -git clone https://github.com/xianyi/OpenBLAS.git -cd OpenBLAS -make TARGET=ARMV7 HOSTCC=gcc NOFORTRAN=1 ARM_SOFTFP_ABI=1 -j$(nproc) libs -#make PREFIX=${CROSS_ROOT} TARGET=ARMV7 HOSTCC=gcc NOFORTRAN=1 ARM_SOFTFP_ABI=1 install -cp *.h ${CROSS_ROOT}/include -cp libopenblas*.a ${CROSS_ROOT}/lib -popd diff --git a/ci/docker/install/android_ndk.sh b/ci/docker/install/android_ndk.sh deleted file mode 100755 index cb83aa65639a..000000000000 --- a/ci/docker/install/android_ndk.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex -pushd . -# This environment variable comes from the docker file -echo "Downloading android SDK rev ${ANDROID_NDK_REVISION}" -curl -O https://dl.google.com/android/repository/android-ndk-r${ANDROID_NDK_REVISION}-linux-x86_64.zip && \ -unzip ./android-ndk-r${ANDROID_NDK_REVISION}-linux-x86_64.zip && \ -cd android-ndk-r${ANDROID_NDK_REVISION} && \ -./build/tools/make_standalone_toolchain.py \ - --stl=libc++ \ - --arch ${ANDROID_NDK_ARCH}\ - --api ${ANDROID_NDK_API}\ - --install-dir=${CROSS_ROOT} && \ - -find ${CROSS_ROOT} -exec chmod a+r '{}' \; && \ -find ${CROSS_ROOT} -executable -exec chmod a+x '{}' \; -popd diff --git a/ci/docker/install/arm64_openblas.sh b/ci/docker/install/arm64_openblas.sh deleted file mode 100755 index 88f2e98cd65b..000000000000 --- a/ci/docker/install/arm64_openblas.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex -pushd . -wget -nv https://api.github.com/repos/xianyi/OpenBLAS/git/refs/heads/master -O openblas_version.json -echo "Using openblas:" -cat openblas_version.json -git clone https://github.com/xianyi/OpenBLAS.git -cd OpenBLAS -make -j$(nproc) TARGET=ARMV8 -make install -ln -s /opt/OpenBLAS/lib/libopenblas.so /usr/lib/libopenblas.so -ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/libopenblas.a -ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/liblapack.a -popd diff --git a/ci/docker/install/ubuntu_arm.sh b/ci/docker/install/ubuntu_arm.sh deleted file mode 100755 index 608d0362f138..000000000000 --- a/ci/docker/install/ubuntu_arm.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -apt update || true -apt install -y \ - unzip \ - python3 \ - python3-pip - -pip3 install setuptools diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 0c7630f24015..ae55c12fb5ac 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -248,15 +248,22 @@ build_dynamic_libmxnet() { build_jetson() { set -ex - pushd . - - #build_ccache_wrappers - - cp make/crosscompile.jetson.mk ./config.mk - make -j$(nproc) - - build_wheel /work/mxnet/python /work/mxnet/lib - popd + cd /work/build + cmake \ + -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ + -DUSE_CUDA=ON \ + -DMXNET_CUDA_ARCH="5.2" \ + -DENABLE_CUDA_RTC=OFF \ + -DSUPPORT_F16C=OFF \ + -DUSE_OPENCV=OFF \ + -DUSE_OPENMP=ON \ + -DUSE_LAPACK=OFF \ + -DUSE_SIGNAL_HANDLER=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DUSE_MKL_IF_AVAILABLE=OFF \ + -G Ninja /work/mxnet + ninja + build_wheel } # @@ -286,7 +293,7 @@ build_armv6() { -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_LAPACK=OFF \ -DBUILD_CPP_EXAMPLES=OFF \ - -Dmxnet_LINKER_LIBS=-lgfortran \ + -Dmxnet_LINKER_LIBS=-latomic \ -G Ninja /work/mxnet ninja @@ -316,7 +323,6 @@ build_armv7() { -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_LAPACK=OFF \ -DBUILD_CPP_EXAMPLES=OFF \ - -Dmxnet_LINKER_LIBS=-lgfortran \ -G Ninja /work/mxnet ninja @@ -327,14 +333,15 @@ build_armv7() { build_armv8() { build_ccache_wrappers cmake \ - -DUSE_CUDA=OFF\ - -DSUPPORT_F16C=OFF\ - -DUSE_OPENCV=OFF\ + -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ + -DUSE_CUDA=OFF \ + -DSUPPORT_F16C=OFF \ + -DUSE_OPENCV=OFF \ -DUSE_OPENMP=ON \ - -DUSE_LAPACK=OFF\ - -DUSE_SIGNAL_HANDLER=ON\ - -DCMAKE_BUILD_TYPE=Release\ - -DUSE_MKL_IF_AVAILABLE=OFF\ + -DUSE_LAPACK=OFF \ + -DUSE_SIGNAL_HANDLER=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DUSE_MKL_IF_AVAILABLE=OFF \ -G Ninja /work/mxnet ninja build_wheel @@ -350,16 +357,18 @@ build_android_armv7() { cd /work/build build_ccache_wrappers cmake \ - -DANDROID=ON\ - -DUSE_CUDA=OFF\ - -DUSE_SSE=OFF\ - -DSUPPORT_F16C=OFF\ - -DUSE_LAPACK=OFF\ - -DUSE_OPENCV=OFF\ - -DUSE_OPENMP=OFF\ - -DUSE_SIGNAL_HANDLER=ON\ - -DCMAKE_BUILD_TYPE=RelWithDebInfo\ - -DUSE_MKL_IF_AVAILABLE=OFF\ + -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ + -DANDROID_ABI="armeabi-v7a" \ + -DANDROID_STL="c++_shared" \ + -DANDROID=ON \ + -DUSE_CUDA=OFF \ + -DUSE_SSE=OFF \ + -DSUPPORT_F16C=OFF \ + -DUSE_LAPACK=OFF \ + -DUSE_OPENCV=OFF \ + -DUSE_OPENMP=OFF \ + -DUSE_SIGNAL_HANDLER=ON \ + -DUSE_MKL_IF_AVAILABLE=OFF \ -G Ninja /work/mxnet ninja } @@ -367,17 +376,18 @@ build_android_armv7() { build_android_armv8() { set -ex cd /work/build - build_ccache_wrappers - cmake\ + cmake \ + -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ + -DANDROID_ABI="arm64-v8a" \ + -DANDROID_STL="c++_shared" \ -DANDROID=ON \ - -DUSE_CUDA=OFF\ - -DUSE_SSE=OFF\ - -DUSE_LAPACK=OFF\ - -DUSE_OPENCV=OFF\ - -DUSE_OPENMP=OFF\ - -DUSE_SIGNAL_HANDLER=ON\ - -DCMAKE_BUILD_TYPE=RelWithDebInfo\ - -DUSE_MKL_IF_AVAILABLE=OFF\ + -DUSE_CUDA=OFF \ + -DUSE_SSE=OFF \ + -DUSE_LAPACK=OFF \ + -DUSE_OPENCV=OFF \ + -DUSE_OPENMP=OFF \ + -DUSE_SIGNAL_HANDLER=ON \ + -DUSE_MKL_IF_AVAILABLE=OFF \ -G Ninja /work/mxnet ninja } diff --git a/ci/docker/install/arm_openblas.sh b/ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake old mode 100755 new mode 100644 similarity index 64% rename from ci/docker/install/arm_openblas.sh rename to ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake index fa2e5cae9cba..3780415c4b15 --- a/ci/docker/install/arm_openblas.sh +++ b/ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -17,14 +15,14 @@ # specific language governing permissions and limitations # under the License. -set -ex - -git clone --recursive -b v0.2.20 https://github.com/xianyi/OpenBLAS.git - -cd OpenBLAS -make -j$(nproc) -PREFIX=${CROSS_ROOT} make install - -cd .. +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR "aarch64") +set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++) +set(CMAKE_CUDA_HOST_COMPILER aarch64-linux-gnu-gcc) +set(CMAKE_FIND_ROOT_PATH "/usr/aarch64-linux-gnu") -rm -rf OpenBLAS +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) diff --git a/ci/docker/install/android_arm64_openblas.sh b/ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake old mode 100755 new mode 100644 similarity index 65% rename from ci/docker/install/android_arm64_openblas.sh rename to ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake index 1c3014f6cca9..62038ecee16a --- a/ci/docker/install/android_arm64_openblas.sh +++ b/ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -17,16 +15,13 @@ # specific language governing permissions and limitations # under the License. -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR "armv7l") +set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc) +set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++) +set(CMAKE_FIND_ROOT_PATH "/usr/arm-linux-gnueabihf" "/usr/local/arm-linux-gnueabihf") -set -ex -pushd . -git clone https://github.com/xianyi/OpenBLAS.git -cd OpenBLAS -make -j$(nproc) TARGET=ARMV8 ARM_SOFTFP_ABI=1 HOSTCC=gcc NOFORTRAN=1 libs -# Can't be run (utility not compiled for the target platform) -#make install -cp *.h /usr/include -cp libopenblas.a /usr/local/lib -popd +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) diff --git a/cmake/upstream/FindCUDAToolkit.cmake b/cmake/upstream/FindCUDAToolkit.cmake index d37c44d9c782..fee4f3f4f698 100644 --- a/cmake/upstream/FindCUDAToolkit.cmake +++ b/cmake/upstream/FindCUDAToolkit.cmake @@ -132,6 +132,7 @@ of the following libraries that are part of the CUDAToolkit: - :ref:`cuRAND` - :ref:`cuSOLVER` - :ref:`cuSPARSE` +- :ref:`cuPTI` - :ref:`NPP` - :ref:`nvBLAS` - :ref:`nvGRAPH` @@ -149,7 +150,6 @@ CUDA Runtime Library The CUDA Runtime library (cudart) are what most applications will typically need to link against to make any calls such as `cudaMalloc`, and `cudaFree`. -They are an explicit dependency of almost every library. Targets Created: @@ -230,6 +230,18 @@ Targets Created: - ``CUDA::cusparse`` - ``CUDA::cusparse_static`` +.. _`cuda_toolkit_cupti`: + +cupti +""""" + +The `NVIDIA CUDA Profiling Tools Interface `_. + +Targets Created: + +- ``CUDA::cupti`` +- ``CUDA::cupti_static`` + .. _`cuda_toolkit_NPP`: NPP @@ -361,8 +373,6 @@ Targets Created: - ``CUDA::nvml`` -.. _`cuda_toolkit_opencl`: - .. _`cuda_toolkit_nvToolsExt`: nvToolsExt @@ -375,6 +385,8 @@ Targets Created: - ``CUDA::nvToolsExt`` +.. _`cuda_toolkit_opencl`: + OpenCL """""" @@ -436,6 +448,11 @@ Result variables The path to the CUDA Toolkit library directory that contains the CUDA Runtime library ``cudart``. +``CUDAToolkit_TARGET_DIR`` + The path to the CUDA Toolkit directory including the target architecture + when cross-compiling. When not cross-compiling this will be equivalant to + ``CUDAToolkit_ROOT_DIR``. + ``CUDAToolkit_NVCC_EXECUTABLE`` The path to the NVIDIA CUDA compiler ``nvcc``. Note that this path may **not** be the same as @@ -487,6 +504,7 @@ if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR) get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY) # use the already detected cuda compiler set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "") + mark_as_advanced(CUDAToolkit_BIN_DIR) unset(cuda_dir) endif() @@ -641,6 +659,7 @@ endif() if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE) get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY) set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE) + mark_as_advanced(CUDAToolkit_BIN_DIR) unset(cuda_dir) endif() @@ -669,8 +688,47 @@ endif() get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE) -# Now that we have the real ROOT_DIR, find components inside it. -list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR}) +# Handle cross compilation +if(CMAKE_CROSSCOMPILING) + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a") + # Support for NVPACK + set (CUDAToolkit_TARGET_NAME "armv7-linux-androideabi") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") + # Support for arm cross compilation + set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + # Support for aarch64 cross compilation + if (ANDROID_ARCH_NAME STREQUAL "arm64") + set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi") + else() + set(CUDAToolkit_TARGET_NAME "aarch64-linux") + endif (ANDROID_ARCH_NAME STREQUAL "arm64") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + set(CUDAToolkit_TARGET_NAME "x86_64-linux") + endif() + + if (EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + # add known CUDA target root path to the set of directories we search for programs, libraries and headers + list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}") + + # Mark that we need to pop the root search path changes after we have + # found all cuda libraries so that searches for our cross-compilation + # libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or + # PATh + set(_CUDAToolkit_Pop_ROOT_PATH True) + endif() +else() + # Not cross compiling + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}") + # Now that we have the real ROOT_DIR, find components inside it. + list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR}) + + # Mark that we need to pop the prefix path changes after we have + # found the cudart library. + set(_CUDAToolkit_Pop_Prefix True) +endif() + # Find the include/ directory find_path(CUDAToolkit_INCLUDE_DIR @@ -680,14 +738,17 @@ find_path(CUDAToolkit_INCLUDE_DIR # And find the CUDA Runtime Library libcudart find_library(CUDA_CUDART NAMES cudart - PATH_SUFFIXES lib64 lib/x64 + PATH_SUFFIXES lib64 lib64/stubs lib/x64 ) if (NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY) message(STATUS "Unable to find cudart library.") endif() unset(CUDAToolkit_ROOT_DIR) -list(REMOVE_AT CMAKE_PREFIX_PATH -1) +if(_CUDAToolkit_Pop_Prefix) + list(REMOVE_AT CMAKE_PREFIX_PATH -1) + unset(_CUDAToolkit_Pop_Prefix) +endif() #----------------------------------------------------------------------------- # Perform version comparison and validate all required variables are set. @@ -702,6 +763,10 @@ find_package_handle_standard_args(CUDAToolkit VERSION_VAR CUDAToolkit_VERSION ) +mark_as_advanced(CUDA_CUDART + CUDAToolkit_INCLUDE_DIR + CUDAToolkit_NVCC_EXECUTABLE + ) #----------------------------------------------------------------------------- # Construct result variables @@ -714,78 +779,103 @@ endif() # Construct import targets if(CUDAToolkit_FOUND) - function(find_and_add_cuda_import_lib lib_name) + function(_CUDAToolkit_find_and_add_import_lib lib_name) + cmake_parse_arguments(arg "" "" "ALT;DEPS;EXTRA_PATH_SUFFIXES" ${ARGN}) - if(ARGC GREATER 1) - set(search_names ${ARGN}) - else() - set(search_names ${lib_name}) - endif() + set(search_names ${lib_name} ${arg_ALT}) find_library(CUDA_${lib_name}_LIBRARY NAMES ${search_names} - PATHS ${CUDAToolkit_LIBRARY_DIR} + HINTS ${CUDAToolkit_LIBRARY_DIR} ENV CUDA_PATH - PATH_SUFFIXES nvidia/current lib64 lib/x64 lib + PATH_SUFFIXES nvidia/current lib64 lib64/stubs lib/x64 lib lib/stubs stubs + ${arg_EXTRA_PATH_SUFFIXES} ) + mark_as_advanced(CUDA_${lib_name}_LIBRARY) - if (NOT CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY) + if (NOT TARGET CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY) add_library(CUDA::${lib_name} IMPORTED INTERFACE) target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") target_link_libraries(CUDA::${lib_name} INTERFACE "${CUDA_${lib_name}_LIBRARY}") + foreach(dep ${arg_DEPS}) + if(TARGET CUDA::${dep}) + target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dep}) + endif() + endforeach() endif() endfunction() - function(add_cuda_link_dependency lib_name) - foreach(dependency IN LISTS ${ARGN}) - target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dependency}) - endforeach() - endfunction() + if(NOT TARGET CUDA::toolkit) + add_library(CUDA::toolkit IMPORTED INTERFACE) + target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}") + endif() - add_library(CUDA::toolkit IMPORTED INTERFACE) - target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") - target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}") + _CUDAToolkit_find_and_add_import_lib(cuda_driver ALT cuda) + _CUDAToolkit_find_and_add_import_lib(cudart) + _CUDAToolkit_find_and_add_import_lib(cudart_static) - find_and_add_cuda_import_lib(cuda_driver cuda) + # setup dependencies that are required for cudart_static when building + # on linux. These are generally only required when using the CUDA toolkit + # when CUDA language is disabled + if(NOT TARGET CUDA::cudart_static_deps + AND TARGET CUDA::cudart_static) - find_and_add_cuda_import_lib(cudart) - find_and_add_cuda_import_lib(cudart_static) + add_library(CUDA::cudart_static_deps IMPORTED INTERFACE) + target_link_libraries(CUDA::cudart_static INTERFACE CUDA::cudart_static_deps) - foreach (cuda_lib cublas cufft cufftw curand cusolver cusparse nvgraph nvjpeg) - find_and_add_cuda_import_lib(${cuda_lib}) - add_cuda_link_dependency(${cuda_lib} cudart) + if(UNIX AND (CMAKE_C_COMPILER OR CMAKE_CXX_COMPILER)) + find_package(Threads REQUIRED) + target_link_libraries(CUDA::cudart_static_deps INTERFACE Threads::Threads ${CMAKE_DL_LIBS}) + endif() - find_and_add_cuda_import_lib(${cuda_lib}_static) - add_cuda_link_dependency(${cuda_lib}_static cudart_static) + if(UNIX AND NOT APPLE) + # On Linux, you must link against librt when using the static cuda runtime. + find_library(CUDAToolkit_rt_LIBRARY rt) + mark_as_advanced(CUDAToolkit_rt_LIBRARY) + if(NOT CUDAToolkit_rt_LIBRARY) + message(WARNING "Could not find librt library, needed by CUDA::cudart_static") + else() + target_link_libraries(CUDA::cudart_static_deps INTERFACE ${CUDAToolkit_rt_LIBRARY}) + endif() + endif() + endif() + + _CUDAToolkit_find_and_add_import_lib(culibos) # it's a static library + foreach (cuda_lib cublas cufft curand cusparse nppc nvjpeg) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS culibos) endforeach() + # cuFFTW depends on cuFFT + _CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft) + _CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft_static) + # cuSOLVER depends on cuBLAS, and cuSPARSE - add_cuda_link_dependency(cusolver cublas cusparse) - add_cuda_link_dependency(cusolver_static cublas_static cusparse) + _CUDAToolkit_find_and_add_import_lib(cusolver DEPS cublas cusparse) + _CUDAToolkit_find_and_add_import_lib(cusolver_static DEPS cublas_static cusparse_static culibos) # nvGRAPH depends on cuRAND, and cuSOLVER. - add_cuda_link_dependency(nvgraph curand cusolver) - add_cuda_link_dependency(nvgraph_static curand_static cusolver_static) - - find_and_add_cuda_import_lib(nppc) - find_and_add_cuda_import_lib(nppc_static) - - add_cuda_link_dependency(nppc cudart) - add_cuda_link_dependency(nppc_static cudart_static culibos) + _CUDAToolkit_find_and_add_import_lib(nvgraph DEPS curand cusolver) + _CUDAToolkit_find_and_add_import_lib(nvgraph_static DEPS curand_static cusolver_static) # Process the majority of the NPP libraries. foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu) - find_and_add_cuda_import_lib(${cuda_lib}) - find_and_add_cuda_import_lib(${cuda_lib}_static) - add_cuda_link_dependency(${cuda_lib} nppc) - add_cuda_link_dependency(${cuda_lib}_static nppc_static) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib} DEPS nppc) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS nppc_static) endforeach() - find_and_add_cuda_import_lib(nvrtc) - add_cuda_link_dependency(nvrtc cuda_driver) + _CUDAToolkit_find_and_add_import_lib(cupti + EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/ + ../extras/CUPTI/lib/) + _CUDAToolkit_find_and_add_import_lib(cupti_static + EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/ + ../extras/CUPTI/lib/) + + _CUDAToolkit_find_and_add_import_lib(nvrtc DEPS cuda_driver) - find_and_add_cuda_import_lib(nvml nvidia-ml nvml) + _CUDAToolkit_find_and_add_import_lib(nvml ALT nvidia-ml nvml) if(WIN32) # nvtools can be installed outside the CUDA toolkit directory @@ -798,17 +888,12 @@ if(CUDAToolkit_FOUND) PATH_SUFFIXES lib/x64 lib ) endif() - find_and_add_cuda_import_lib(nvToolsExt nvToolsExt nvToolsExt64) + _CUDAToolkit_find_and_add_import_lib(nvToolsExt ALT nvToolsExt64) - add_cuda_link_dependency(nvToolsExt cudart) - - find_and_add_cuda_import_lib(OpenCL) - - find_and_add_cuda_import_lib(culibos) - if(TARGET CUDA::culibos) - foreach (cuda_lib cublas cufft cusparse curand nvjpeg) - add_cuda_link_dependency(${cuda_lib}_static culibos) - endforeach() - endif() + _CUDAToolkit_find_and_add_import_lib(OpenCL) +endif() +if(_CUDAToolkit_Pop_ROOT_PATH) + list(REMOVE_AT CMAKE_FIND_ROOT_PATH 0) + unset(_CUDAToolkit_Pop_ROOT_PATH) endif() diff --git a/make/crosscompile.jetson.mk b/make/crosscompile.jetson.mk deleted file mode 100644 index 880e2cf5b466..000000000000 --- a/make/crosscompile.jetson.mk +++ /dev/null @@ -1,216 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#------------------------------------------------------------------------------- -# Template configuration for compiling mxnet -# -# If you want to change the configuration, please use the following -# steps. Assume you are on the root directory of mxnet. First copy the this -# file so that any local changes will be ignored by git -# -# $ cp make/config.mk . -# -# Next modify the according entries, and then compile by -# -# $ make -# -# or build in parallel with 8 threads -# -# $ make -j8 -#------------------------------------------------------------------------------- - -#--------------------- -# For cross compilation we only explictily set a compiler when one is not already present. -#-------------------- - -ifndef CC -export CC = gcc -endif -ifndef CXX -export CXX = g++ -endif -ifndef NVCC -export NVCC = nvcc -endif - -# whether compile with options for MXNet developer -DEV = 0 - -# whether compile with debug -DEBUG = 0 - -# whether to turn on segfault signal handler to log the stack trace -USE_SIGNAL_HANDLER = 1 - -# the additional link flags you want to add -ADD_LDFLAGS = -L${CROSS_ROOT}/lib -L/usr/lib/aarch64-linux-gnu/ - -# the additional compile flags you want to add -ADD_CFLAGS = -I${CROSS_ROOT}/include -I/usr/include/aarch64-linux-gnu/ - -#--------------------------------------------- -# matrix computation libraries for CPU/GPU -#--------------------------------------------- - -# whether use CUDA during compile -USE_CUDA = 1 - -# add the path to CUDA library to link and compile flag -# if you have already add them to environment variable, leave it as NONE -# USE_CUDA_PATH = /usr/local/cuda -USE_CUDA_PATH = /usr/local/cuda-9.0/targets/aarch64-linux - -# whether to enable CUDA runtime compilation -ENABLE_CUDA_RTC = 0 - -# whether use CuDNN R3 library -USE_CUDNN = 1 - -#whether to use NCCL library -USE_NCCL = 0 -#add the path to NCCL library -USE_NCCL_PATH = NONE - -# whether use opencv during compilation -# you can disable it, however, you will not able to use -# imbin iterator -USE_OPENCV = 0 -# Add OpenCV include path, in which the directory `opencv2` exists -USE_OPENCV_INC_PATH = NONE -# Add OpenCV shared library path, in which the shared library exists -USE_OPENCV_LIB_PATH = NONE - -#whether use libjpeg-turbo for image decode without OpenCV wrapper -USE_LIBJPEG_TURBO = 0 -#add the path to libjpeg-turbo library -USE_LIBJPEG_TURBO_PATH = NONE - -# use openmp for parallelization -USE_OPENMP = 1 - -# whether use MKL-DNN library -USE_MKLDNN = 0 - -# whether use NNPACK library -USE_NNPACK = 0 - -# choose the version of blas you want to use -# can be: mkl, blas, atlas, openblas -# in default use atlas for linux while apple for osx -UNAME_S := $(shell uname -s) -USE_BLAS = openblas - -# whether use lapack during compilation -# only effective when compiled with blas versions openblas/apple/atlas/mkl -USE_LAPACK = 1 - -# path to lapack library in case of a non-standard installation -USE_LAPACK_PATH = - -# add path to intel library, you may need it for MKL, if you did not add the path -# to environment variable -USE_INTEL_PATH = NONE - -# If use MKL only for BLAS, choose static link automatically to allow python wrapper -ifeq ($(USE_BLAS), mkl) -USE_STATIC_MKL = 1 -else -USE_STATIC_MKL = NONE -endif - -#---------------------------- -# Settings for power and arm arch -#---------------------------- -USE_SSE=0 - -# Turn off F16C instruction set support -USE_F16C=0 - -#---------------------------- -# distributed computing -#---------------------------- - -# whether or not to enable multi-machine supporting -USE_DIST_KVSTORE = 0 - -# whether or not allow to read and write HDFS directly. If yes, then hadoop is -# required -USE_HDFS = 0 - -# path to libjvm.so. required if USE_HDFS=1 -LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server - -# whether or not allow to read and write AWS S3 directly. If yes, then -# libcurl4-openssl-dev is required, it can be installed on Ubuntu by -# sudo apt-get install -y libcurl4-openssl-dev -USE_S3 = 0 - -#---------------------------- -# performance settings -#---------------------------- -# Use operator tuning -USE_OPERATOR_TUNING = 1 - -# Use gperftools if found -# Disable because of #8968 -USE_GPERFTOOLS = 0 - -# path to gperftools (tcmalloc) library in case of a non-standard installation -USE_GPERFTOOLS_PATH = - -# Use JEMalloc if found, and not using gperftools -USE_JEMALLOC = 1 - -# path to jemalloc library in case of a non-standard installation -USE_JEMALLOC_PATH = - -#---------------------------- -# additional operators -#---------------------------- - -# path to folders containing projects specific operators that you don't want to put in src/operators -EXTRA_OPERATORS = - -#---------------------------- -# other features -#---------------------------- - -# Create C++ interface package -USE_CPP_PACKAGE = 0 - -# Use int64_t type to represent the total number of elements in the tensor -# This will cause performance degradation reported in issue #14496 -# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e. 2147483647 -# Note: the size of each dimension is still bounded by INT32_MAX -USE_INT64_TENSOR_SIZE = 0 - -#---------------------------- -# plugins -#---------------------------- - -# whether to use caffe integration. This requires installing caffe. -# You also need to add CAFFE_PATH/build/lib to your LD_LIBRARY_PATH -# CAFFE_PATH = $(HOME)/caffe -# MXNET_PLUGINS += plugin/caffe/caffe.mk - -# WARPCTC_PATH = $(HOME)/warp-ctc -# MXNET_PLUGINS += plugin/warpctc/warpctc.mk - -# whether to use sframe integration. This requires build sframe -# git@github.com:dato-code/SFrame.git -# SFRAME_PATH = $(HOME)/SFrame -# MXNET_PLUGINS += plugin/sframe/plugin.mk diff --git a/src/operator/random/shuffle_op.cc b/src/operator/random/shuffle_op.cc index 0f64fbc51449..fed3215f965d 100644 --- a/src/operator/random/shuffle_op.cc +++ b/src/operator/random/shuffle_op.cc @@ -22,9 +22,9 @@ * \file shuffle_op.cc * \brief Operator to shuffle elements of an NDArray */ -#if !defined (__ANDROID__) && ((__GNUC__ > 4 &&\ - !defined(__clang__major__)) || (__clang_major__ > 4 && __linux__)) - #define USE_GNU_PARALLEL_SHUFFLE +#if ((__GNUC__ > 4 && !defined(__clang__major__)) || (__clang_major__ > 4 && __linux__)) && \ + defined(_OPENMP) && !defined(__ANDROID__) +#define USE_GNU_PARALLEL_SHUFFLE #endif #include From bd7f8cf191620bdda551aba9e80118c57b860d38 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Fri, 10 Apr 2020 19:01:06 +0000 Subject: [PATCH 02/14] Support platforms without rand_r --- .../multi_threaded_inference.cc | 5 +- src/operator/contrib/dgl_graph.cc | 73 +++++++++++++------ src/operator/nn/mkldnn/mkldnn_rnn.cc | 4 +- src/operator/rnn-inl.h | 20 +++-- src/operator/rnn.cc | 1 + src/operator/rnn_impl.h | 43 +++++------ tests/cpp/engine/threaded_engine_test.cc | 14 ++-- tests/cpp/thread_safety/thread_safety_test.cc | 16 ++-- 8 files changed, 113 insertions(+), 63 deletions(-) diff --git a/example/multi_threaded_inference/multi_threaded_inference.cc b/example/multi_threaded_inference/multi_threaded_inference.cc index e90d55307e53..8b1864feea93 100644 --- a/example/multi_threaded_inference/multi_threaded_inference.cc +++ b/example/multi_threaded_inference/multi_threaded_inference.cc @@ -34,6 +34,7 @@ #include #include #include "mxnet-cpp/MxNetCpp.h" +#include const float DEFAULT_MEAN = 117.0; @@ -248,7 +249,9 @@ void run_inference(const std::string& model_name, const std::vector distribution(0, 5); + int sleep_time = distribution(generator); std::this_thread::sleep_for(std::chrono::seconds(sleep_time)); } int num_output = 0; diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc index 428899791a5d..89bee8abf655 100644 --- a/src/operator/contrib/dgl_graph.cc +++ b/src/operator/contrib/dgl_graph.cc @@ -24,6 +24,9 @@ #include #include #include +#include +#include + #include "../elemwise_op_common.h" #include "../../imperative/imperative_utils.h" #include "../subgraph_op_common.h" @@ -41,7 +44,9 @@ typedef int64_t dgl_id_t; */ class ArrayHeap { public: - explicit ArrayHeap(const std::vector& prob) { + explicit ArrayHeap(const std::vector& prob, unsigned int seed) { + generator_ = std::mt19937(seed); + distribution_ = std::uniform_real_distribution(0.0, 1.0); vec_size_ = prob.size(); bit_len_ = ceil(log2(vec_size_)); limit_ = 1 << bit_len_; @@ -86,8 +91,8 @@ class ArrayHeap { /* * Sample from arrayHeap */ - size_t Sample(unsigned int* seed) { - float xi = heap_[1] * (rand_r(seed)%100/101.0); + size_t Sample() { + float xi = heap_[1] * distribution_(generator_); int i = 1; while (i < limit_) { i = i << 1; @@ -102,10 +107,10 @@ class ArrayHeap { /* * Sample a vector by given the size n */ - void SampleWithoutReplacement(size_t n, std::vector* samples, unsigned int* seed) { + void SampleWithoutReplacement(size_t n, std::vector* samples) { // sample n elements for (size_t i = 0; i < n; ++i) { - samples->at(i) = this->Sample(seed); + samples->at(i) = this->Sample(); this->Delete(samples->at(i)); } } @@ -115,6 +120,8 @@ class ArrayHeap { int bit_len_; // bit size int limit_; std::vector heap_; + std::mt19937 generator_; + std::uniform_real_distribution distribution_; }; struct NeighborSampleParam : public dmlc::Parameter { @@ -402,10 +409,12 @@ static bool CSRNeighborNonUniformSampleType(const nnvm::NodeAttrs& attrs, static void RandomSample(size_t set_size, size_t num, std::vector* out, - unsigned int* seed) { + unsigned int seed) { + std::mt19937 generator(seed); std::unordered_set sampled_idxs; + std::uniform_int_distribution distribution(0, set_size - 1); while (sampled_idxs.size() < num) { - sampled_idxs.insert(rand_r(seed) % set_size); + sampled_idxs.insert(distribution(generator)); } out->clear(); for (auto it = sampled_idxs.begin(); it != sampled_idxs.end(); it++) { @@ -441,7 +450,7 @@ static void GetUniformSample(const dgl_id_t* val_list, const size_t max_num_neighbor, std::vector* out_ver, std::vector* out_edge, - unsigned int* seed) { + unsigned int seed) { // Copy ver_list to output if (ver_len <= max_num_neighbor) { for (size_t i = 0; i < ver_len; ++i) { @@ -485,7 +494,7 @@ static void GetNonUniformSample(const float* probability, const size_t max_num_neighbor, std::vector* out_ver, std::vector* out_edge, - unsigned int* seed) { + unsigned int seed) { // Copy ver_list to output if (ver_len <= max_num_neighbor) { for (size_t i = 0; i < ver_len; ++i) { @@ -500,8 +509,8 @@ static void GetNonUniformSample(const float* probability, for (size_t i = 0; i < ver_len; ++i) { sp_prob[i] = probability[col_list[i]]; } - ArrayHeap arrayHeap(sp_prob); - arrayHeap.SampleWithoutReplacement(max_num_neighbor, &sp_index, seed); + ArrayHeap arrayHeap(sp_prob, seed); + arrayHeap.SampleWithoutReplacement(max_num_neighbor, &sp_index); out_ver->resize(max_num_neighbor); out_edge->resize(max_num_neighbor); for (size_t i = 0; i < max_num_neighbor; ++i) { @@ -536,8 +545,8 @@ static void SampleSubgraph(const NDArray &csr, const float* probability, int num_hops, size_t num_neighbor, - size_t max_num_vertices) { - unsigned int time_seed = time(nullptr); + size_t max_num_vertices, + unsigned int random_seed) { size_t num_seeds = seed_arr.shape().Size(); CHECK_GE(max_num_vertices, num_seeds); @@ -594,7 +603,7 @@ static void SampleSubgraph(const NDArray &csr, num_neighbor, &tmp_sampled_src_list, &tmp_sampled_edge_list, - &time_seed); + random_seed); } else { // non-uniform-sample GetNonUniformSample(probability, val_list + *(indptr + dst_id), @@ -603,7 +612,7 @@ static void SampleSubgraph(const NDArray &csr, num_neighbor, &tmp_sampled_src_list, &tmp_sampled_edge_list, - &time_seed); + random_seed); } CHECK_EQ(tmp_sampled_src_list.size(), tmp_sampled_edge_list.size()); size_t pos = neighbor_list.size(); @@ -720,12 +729,15 @@ static void CSRNeighborUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& inputs, const std::vector& req, const std::vector& outputs) { - const NeighborSampleParam& params = - nnvm::get(attrs.parsed); + const NeighborSampleParam& params = nnvm::get(attrs.parsed); int num_subgraphs = inputs.size() - 1; CHECK_EQ(outputs.size(), 3 * num_subgraphs); + mshadow::Stream *s = ctx.get_stream(); + mshadow::Random *prnd = ctx.requested[0].get_random(s); + unsigned int seed = prnd->GetRandInt(); + #pragma omp parallel for for (int i = 0; i < num_subgraphs; i++) { SampleSubgraph(inputs[0], // graph_csr @@ -737,7 +749,12 @@ static void CSRNeighborUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs, nullptr, // probability params.num_hops, params.num_neighbor, - params.max_num_vertices); + params.max_num_vertices, +#if defined(_OPENMP) + seed + omp_get_thread_num()); +#else + seed); +#endif } } @@ -798,6 +815,9 @@ of max_num_vertices, and the valid number of vertices is the same as the ones in .set_attr("FInferShape", CSRNeighborUniformSampleShape) .set_attr("FInferType", CSRNeighborUniformSampleType) .set_attr("FComputeEx", CSRNeighborUniformSampleComputeExCPU) +.set_attr("FResourceRequest", [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kRandom}; +}) .add_argument("csr_matrix", "NDArray-or-Symbol", "csr matrix") .add_argument("seed_arrays", "NDArray-or-Symbol[]", "seed vertices") .set_attr("key_var_num_args", "num_args") @@ -811,14 +831,17 @@ static void CSRNeighborNonUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs const std::vector& inputs, const std::vector& req, const std::vector& outputs) { - const NeighborSampleParam& params = - nnvm::get(attrs.parsed); + const NeighborSampleParam& params = nnvm::get(attrs.parsed); int num_subgraphs = inputs.size() - 2; CHECK_EQ(outputs.size(), 4 * num_subgraphs); const float* probability = inputs[1].data().dptr(); + mshadow::Stream *s = ctx.get_stream(); + mshadow::Random *prnd = ctx.requested[0].get_random(s); + unsigned int seed = prnd->GetRandInt(); + #pragma omp parallel for for (int i = 0; i < num_subgraphs; i++) { float* sub_prob = outputs[i+2*num_subgraphs].data().dptr(); @@ -831,7 +854,12 @@ static void CSRNeighborNonUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs probability, params.num_hops, params.num_neighbor, - params.max_num_vertices); + params.max_num_vertices, +#if defined(_OPENMP) + seed + omp_get_thread_num()); +#else + seed); +#endif } } @@ -897,6 +925,9 @@ of max_num_vertices, and the valid number of vertices is the same as the ones in .set_attr("FInferShape", CSRNeighborNonUniformSampleShape) .set_attr("FInferType", CSRNeighborNonUniformSampleType) .set_attr("FComputeEx", CSRNeighborNonUniformSampleComputeExCPU) +.set_attr("FResourceRequest", [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kRandom}; +}) .add_argument("csr_matrix", "NDArray-or-Symbol", "csr matrix") .add_argument("probability", "NDArray-or-Symbol", "probability vector") .add_argument("seed_arrays", "NDArray-or-Symbol[]", "seed vertices") diff --git a/src/operator/nn/mkldnn/mkldnn_rnn.cc b/src/operator/nn/mkldnn/mkldnn_rnn.cc index 5d3857e1c578..c8f1d45814f5 100644 --- a/src/operator/nn/mkldnn/mkldnn_rnn.cc +++ b/src/operator/nn/mkldnn/mkldnn_rnn.cc @@ -953,7 +953,7 @@ void MKLDNNRnnOp::Forward(const OpContext &ctx, const std::vector &inputs, const std::vector &req, const std::vector &outputs) { - TmpMemMgr::Get()->Init(ctx.requested[0]); + TmpMemMgr::Get()->Init(ctx.requested[1]); // In the `autograd.record()` context, RNNOp is required to run into // forward_training mode. const bool is_training = (ctx.is_train || ctx.need_grad); @@ -1076,7 +1076,7 @@ void MKLDNNRnnOp::Backward(const OpContext& ctx, const std::vector& req, const std::vector& outputs) { using tag = mkldnn::memory::format_tag; - TmpMemMgr::Get()->Init(ctx.requested[0]); + TmpMemMgr::Get()->Init(ctx.requested[1]); const RNNParam& default_param = full_param_.default_param; const int data_dtype = inputs[rnn_enum::kData].dtype(); const int w_dtype = inputs[rnn_enum::kParams].dtype(); diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h index 557c1117739a..1bd351fe0a9c 100644 --- a/src/operator/rnn-inl.h +++ b/src/operator/rnn-inl.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -292,23 +293,24 @@ void RNNForwardTraining(DType* ws, DType* hy_ptr, DType* cy_ptr, const float dropout, - int mode) { + int mode, + std::mt19937 &rnd_engine) { // NOLINT(runtime/references) switch (mode) { case rnn_enum::kLstm: LstmForwardTraining(ws, rs, state_outputs, num_layers, direction, seq_length, batch_size, input_size, state_size, x_ptr, hx_ptr, cx_ptr, - w_ptr, b_ptr, y_ptr, hy_ptr, cy_ptr, dropout); + w_ptr, b_ptr, y_ptr, hy_ptr, cy_ptr, dropout, rnd_engine); break; case rnn_enum::kGru: GruForwardTraining(ws, rs, state_outputs, num_layers, direction, seq_length, batch_size, input_size, state_size, x_ptr, hx_ptr, - w_ptr, y_ptr, hy_ptr, dropout); + w_ptr, y_ptr, hy_ptr, dropout, rnd_engine); break; case rnn_enum::kRnnTanh: case rnn_enum::kRnnRelu: VanillaRNNForwardTraining(ws, rs, state_outputs, num_layers, direction, seq_length, batch_size, input_size, state_size, x_ptr, hx_ptr, - w_ptr, y_ptr, hy_ptr, dropout, mode); + w_ptr, y_ptr, hy_ptr, dropout, mode, rnd_engine); break; default: LOG(FATAL) << "unknown RNN mode " << mode; @@ -841,7 +843,8 @@ class RNNOp { } #endif // MXNET_USE_CUDNN == 1 && defined(__CUDACC__) - if (ctx_.dev_type == kCPU) { +#if !defined(__CUDACC__) // cuda doesn't support C++17 + if constexpr (std::is_same::value) { int projection_size = 0; if (param_.projection_size.has_value()) { projection_size = param_.projection_size.value(); @@ -859,6 +862,9 @@ class RNNOp { DType* work_cpu_space = static_cast(temp_cpu_space_.data().dptr_); if (ctx.is_train || ctx.need_grad) { + mshadow::Random *prnd = ctx.requested[0].get_random(s); + std::mt19937 &rnd_engine = prnd->GetRndEngine(); + // allocate reserve space if (param_.projection_size.has_value()) { LOG(FATAL) << "No training support for LSTM with projection on CPU currently."; @@ -893,7 +899,8 @@ class RNNOp { hy_ptr, cy_ptr, param_.p, - param_.mode); + param_.mode, + rnd_engine); } else { RNNForwardInference(work_cpu_space, param_.state_outputs, @@ -915,6 +922,7 @@ class RNNOp { param_.mode); } } +#endif } void Backward(const OpContext &ctx, diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc index ac5e17d49133..efebc915a0e7 100644 --- a/src/operator/rnn.cc +++ b/src/operator/rnn.cc @@ -183,6 +183,7 @@ static std::vector RNNResourceEx(const NodeAttrs& attrs, const } #endif } else { + request.emplace_back(ResourceRequest::kRandom); #if MXNET_USE_MKLDNN == 1 request.emplace_back(ResourceRequest::kTempSpace); #endif diff --git a/src/operator/rnn_impl.h b/src/operator/rnn_impl.h index 008ba7d315c6..08d069801079 100644 --- a/src/operator/rnn_impl.h +++ b/src/operator/rnn_impl.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -139,17 +140,17 @@ void LstmForwardTraining(DType* ws, DType* y_ptr, DType* hy_ptr, DType* cy_ptr, - const float dropout) { + const float dropout, + std::mt19937 &rnd_engine) { // NOLINT(runtime/references) DType* dropout_random = rs; DType* rs2 = dropout_random + (L - 1) * D * T * N * H; const int total_layers = D * L; Tensor hx(hx_ptr, Shape3(total_layers, N, H)); Tensor cx(cx_ptr, Shape3(total_layers, N, H)); - const int b_size = 2 * H * 4; - const int r_size = D * T * N * H * 6; - const int y_offset = T * N * H * 5; - const int cell_size = N * H; - unsigned int seed_ = 17 + rand() % 4096; // NOLINT(runtime/threadsafe_fn) + const index_t b_size = 2 * H * 4; + const index_t r_size = D * T * N * H * 6; + const index_t y_offset = T * N * H * 5; + const index_t cell_size = N * H; int idx = 0; // state & cell state's idx; const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); for (int i = 0; i < L; ++i) { @@ -174,10 +175,9 @@ void LstmForwardTraining(DType* ws, w_ptr += w_size; b_ptr += b_size; if (dropout > 0.0f) { - #pragma omp parallel for num_threads(omp_threads) - for (int j = 0; j < T * N * H * D; j++) { - int rand_data = rand_r(&seed_); - if (static_cast(rand_data % 1000) < static_cast(1000 * dropout)) { + std::uniform_real_distribution distribution(0, 1); + for (index_t j = 0; j < T * N * H * D; j++) { + if (distribution(rnd_engine) < dropout) { dropout_random[i * T * N * H * D + j] = 0; y.dptr_[j] = 0; } else { @@ -995,7 +995,8 @@ void GruForwardTraining(DType* ws, DType* w_ptr, DType* y_ptr, DType* hy_ptr, - const float dropout) { + const float dropout, + std::mt19937 &rnd_engine) { // NOLINT(runtime/references) DType* wx = w_ptr; DType* wh = wx + I * H * 3; DType* bx = wh + H * H * 3 + (D - 1) * (H * H * 3 + I * H * 3) @@ -1016,18 +1017,15 @@ void GruForwardTraining(DType* ws, DType* bx_l = bx; DType* bh_l = bh; DType* y_tmp = x_ptr; - unsigned int seed_ = 17 + rand() % 4096; // NOLINT(runtime/threadsafe_fn) for (int l = 0; l < L; l++) { if (l != 0) { y_tmp = y_l; y_l = y_l + T * N * H * D; } if (dropout > 0.0f && l > 0) { - const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); - #pragma omp parallel for num_threads(omp_threads) - for (int i = 0; i < T * N * I; i++) { - int rand_data = rand_r(&seed_); - if (static_cast(rand_data % 1000) < static_cast(1000 * dropout)) { + std::uniform_real_distribution distribution(0, 1); + for (index_t i = 0; i < T * N * I; i++) { + if (distribution(rnd_engine) < dropout) { dropout_random[(l - 1) * T * N * I + i] = 0; y_tmp[i] = 0; } else { @@ -1884,7 +1882,8 @@ void VanillaRNNForwardTraining(DType* ws, DType* y_ptr, DType* hy_ptr, const float dropout, - int mode) { + int mode, + std::mt19937 &rnd_engine) { // NOLINT(runtime/references) DType* wx = w_ptr; DType* wh = wx + I * H; DType* bx = wh + H * H + (D - 1) * (H * H + I * H) @@ -1903,17 +1902,15 @@ void VanillaRNNForwardTraining(DType* ws, DType* bh_l = bh; DType* y_tmp = x_ptr; const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); - unsigned int seed_ = 17 + rand() % 4096; // NOLINT(runtime/threadsafe_fn) for (int l = 0; l < L; l++) { if (l != 0) { y_tmp = y_l; y_l = y_l + T * N * H * D; } if (dropout > 0.0f && l > 0) { - #pragma omp parallel for num_threads(omp_threads) - for (int i = 0; i < T * N * I; i++) { - int rand_data = rand_r(&seed_); - if (static_cast(rand_data % 1000) < static_cast(1000 * dropout)) { + std::uniform_real_distribution distribution(0, 1); + for (index_t i = 0; i < T * N * I; i++) { + if (distribution(rnd_engine) < dropout) { dropout_random[(l - 1) * T * N * I + i] = 0; y_tmp[i] = 0; } else { diff --git a/tests/cpp/engine/threaded_engine_test.cc b/tests/cpp/engine/threaded_engine_test.cc index cea92a01e799..e1e3a53e656c 100644 --- a/tests/cpp/engine/threaded_engine_test.cc +++ b/tests/cpp/engine/threaded_engine_test.cc @@ -35,6 +35,7 @@ #include #include #include +#include #include "../src/engine/engine_impl.h" #include "../include/test_util.h" @@ -62,15 +63,18 @@ void GenerateWorkload(int num_workloads, int num_var, std::vector* workloads) { workloads->clear(); workloads->resize(num_workloads); + static thread_local std::mt19937 generator; + std::uniform_int_distribution distribution_var(0, num_var - 1); + std::uniform_int_distribution distribution_time(min_time, max_time - 1); + std::uniform_int_distribution distribution_read(min_read, max_read - 1); for (int i = 0; i < num_workloads; ++i) { auto& wl = workloads->at(i); - wl.write = rand_r(&seed_) % num_var; - int r = rand_r(&seed_); - int num_read = min_read + (r % (max_read - min_read)); + wl.write = distribution_var(generator); + int num_read = distribution_read(generator); for (int j = 0; j < num_read; ++j) { - wl.reads.push_back(rand_r(&seed_) % num_var); + wl.reads.push_back(distribution_var(generator)); } - wl.time = min_time + rand_r(&seed_) % (max_time - min_time); + wl.time = distribution_time(generator); } } diff --git a/tests/cpp/thread_safety/thread_safety_test.cc b/tests/cpp/thread_safety/thread_safety_test.cc index 1f811d8c3fd7..9566adfd9d13 100644 --- a/tests/cpp/thread_safety/thread_safety_test.cc +++ b/tests/cpp/thread_safety/thread_safety_test.cc @@ -25,15 +25,17 @@ #if MXNET_USE_CPP_PACKAGE == 1 #include #include -#include #include -#include +#include #include #include +#include +#include #include "../src/engine/engine_impl.h" #include "../src/imperative/imperative_utils.h" #include "../include/test_util.h" #include "mxnet-cpp/MxNetCpp.h" + /* * Prepares input data for the ops/models used in this file */ @@ -298,8 +300,10 @@ void run_inference(const std::string& model, unsigned next = num; for (size_t i = 0; i < num_inf_per_thread; ++i) { if (random_sleep) { - int sleep_time = rand_r(&next) % 5; - std::this_thread::sleep_for(std::chrono::seconds(sleep_time)); + static thread_local std::mt19937 generator; + std::uniform_int_distribution distribution(0, 5); + int sleep_time = distribution(generator); + std::this_thread::sleep_for(std::chrono::seconds(sleep_time)); } int num_output = 0; const int *stypes; @@ -479,7 +483,9 @@ void run_inference_unsupported(const std::string& model, unsigned next = num; for (size_t i = 0; i < num_inf_per_thread; ++i) { if (random_sleep) { - int sleep_time = rand_r(&next) % 5; + static thread_local std::mt19937 generator; + std::uniform_int_distribution distribution(0, 5); + int sleep_time = distribution(generator); std::this_thread::sleep_for(std::chrono::seconds(sleep_time)); } int num_output = 0; From dcdbe3b23e4c64f5d22dccec497a423d96dc0b01 Mon Sep 17 00:00:00 2001 From: Nick Guletskii Date: Thu, 21 May 2020 01:07:55 +0300 Subject: [PATCH 03/14] Fix the URL to the IUS repository --- ci/docker/install/centos7_python.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/install/centos7_python.sh b/ci/docker/install/centos7_python.sh index 06c53bea48c1..796387e1b2ee 100755 --- a/ci/docker/install/centos7_python.sh +++ b/ci/docker/install/centos7_python.sh @@ -23,7 +23,7 @@ set -ex # Python 2.7 is installed by default, install 3.6 on top -yum -y install https://centos7.iuscommunity.org/ius-release.rpm +yum -y install https://repo.ius.io/ius-release-el7.rpm yum -y install python36u # Install PIP From ef0f14366d459db322ed73c1853ef37f9d93b6b8 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Tue, 7 Apr 2020 01:36:27 +0000 Subject: [PATCH 04/14] compiler warnings --- 3rdparty/mshadow/mshadow/logging.h | 5 +++++ 3rdparty/mshadow/mshadow/packet-inl.h | 4 ++++ Makefile | 4 +++- ci/docker/runtime_functions.sh | 2 -- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/3rdparty/mshadow/mshadow/logging.h b/3rdparty/mshadow/mshadow/logging.h index 666ca587b3bc..b639308f1c72 100644 --- a/3rdparty/mshadow/mshadow/logging.h +++ b/3rdparty/mshadow/mshadow/logging.h @@ -223,7 +223,12 @@ class LogMessageFatal { ~LogMessageFatal() MSHADOW_THROW_EXCEPTION { // throwing out of destructor is evil // hopefully we can do it here +#pragma GCC diagnostic push +#if __GNUC__ >= 7 +#pragma GCC diagnostic ignored "-Wterminate" +#endif throw Error(log_stream_.str()); +#pragma GCC diagnostic pop } private: diff --git a/3rdparty/mshadow/mshadow/packet-inl.h b/3rdparty/mshadow/mshadow/packet-inl.h index 1b3d11a34114..e517c8facf6e 100644 --- a/3rdparty/mshadow/mshadow/packet-inl.h +++ b/3rdparty/mshadow/mshadow/packet-inl.h @@ -93,7 +93,11 @@ inline void* AlignedMallocPitch(size_t *out_pitch, if (res == NULL) { LOG(FATAL) << "AlignedMallocPitch failed"; } +#if __GNUC__ >= 6 +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif return res; +#pragma GCC diagnostic pop } /*! diff --git a/Makefile b/Makefile index c050dae5e45a..ad17675ce583 100644 --- a/Makefile +++ b/Makefile @@ -99,7 +99,9 @@ CFLAGS += -DDMLC_LOG_STACK_TRACE_SIZE=0 CFLAGS += -DDMLC_LOG_FATAL_THROW=1 ifeq ($(DEV), 1) - CFLAGS += -g -Werror + # Excluded from Werror: + # 1) variables used in '#pragma omp parallel' are considered unused + CFLAGS += -g -Werror -Wno-error=unused-variable -Wno-error=maybe-uninitialized -Wno-error=unused-function NVCCFLAGS += -Werror cross-execution-space-call endif diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index ae55c12fb5ac..587a4088fd7d 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -782,7 +782,6 @@ build_ubuntu_gpu_cuda101_cudnn7() { set -ex build_ccache_wrappers make \ - DEV=1 \ USE_BLAS=openblas \ USE_MKLDNN=0 \ USE_CUDA=1 \ @@ -801,7 +800,6 @@ build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test() { set -ex build_ccache_wrappers make \ - DEV=1 \ USE_BLAS=openblas \ USE_MKLDNN=1 \ USE_CUDA=1 \ From 62158acd106af6c7c1ebe8cac53ee182b0636aee Mon Sep 17 00:00:00 2001 From: Nick Guletskii Date: Thu, 21 May 2020 02:25:35 +0300 Subject: [PATCH 05/14] Use a pre-c++17 way of distinguishing between device types --- src/operator/rnn-inl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h index 1bd351fe0a9c..180e945000e4 100644 --- a/src/operator/rnn-inl.h +++ b/src/operator/rnn-inl.h @@ -844,7 +844,7 @@ class RNNOp { #endif // MXNET_USE_CUDNN == 1 && defined(__CUDACC__) #if !defined(__CUDACC__) // cuda doesn't support C++17 - if constexpr (std::is_same::value) { + if (ctx_.dev_type == kCPU) { int projection_size = 0; if (param_.projection_size.has_value()) { projection_size = param_.projection_size.value(); From b9bb0195a7870f1926ec9ad12c5ab89d8a6acecd Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Sat, 11 Apr 2020 22:59:51 +0000 Subject: [PATCH 06/14] Greatly simplify qemu setup --- ci/README.md | 95 +---- ci/dev_menu.py | 2 +- ci/docker/Dockerfile.build.android_armv7 | 4 +- ci/docker/Dockerfile.build.android_armv8 | 4 +- ci/docker/Dockerfile.build.armv6 | 4 +- ci/docker/Dockerfile.build.armv7 | 4 +- ci/docker/Dockerfile.build.armv8 | 4 +- ...t.arm_qemu => Dockerfile.build.test.armv7} | 32 +- ...rm_qemu.sh => Dockerfile.build.test.armv8} | 38 +- ci/docker/install/ubuntu_arm_qemu_bin.sh | 40 -- ci/docker/qemu/README.md | 18 - ci/docker/qemu/runtime_functions.py | 134 ------- ci/docker/qemu/vmcontrol.py | 360 ------------------ ci/docker/runtime_functions.sh | 12 + ci/jenkins/Jenkins_steps.groovy | 40 +- ci/jenkins/Jenkinsfile_edge | 7 +- ci/qemu/README.md | 92 ----- ci/qemu/copy.sh | 23 -- ci/qemu/init.sh | 23 -- ci/qemu/initrd_modif/inittab | 38 -- ci/qemu/install.sh | 32 -- ci/qemu/mxnet_requirements.txt | 7 - ci/qemu/preseed.cfg | 68 ---- ci/qemu/preseed.sh | 29 -- ci/qemu/run.sh | 33 -- ci/qemu/test_requirements.txt | 3 - 26 files changed, 108 insertions(+), 1038 deletions(-) rename ci/docker/{Dockerfile.build.test.arm_qemu => Dockerfile.build.test.armv7} (67%) rename ci/docker/{install/ubuntu_arm_qemu.sh => Dockerfile.build.test.armv8} (60%) mode change 100755 => 100644 delete mode 100755 ci/docker/install/ubuntu_arm_qemu_bin.sh delete mode 100644 ci/docker/qemu/README.md delete mode 100755 ci/docker/qemu/runtime_functions.py delete mode 100644 ci/docker/qemu/vmcontrol.py delete mode 100644 ci/qemu/README.md delete mode 100755 ci/qemu/copy.sh delete mode 100755 ci/qemu/init.sh delete mode 100644 ci/qemu/initrd_modif/inittab delete mode 100755 ci/qemu/install.sh delete mode 100644 ci/qemu/mxnet_requirements.txt delete mode 100644 ci/qemu/preseed.cfg delete mode 100755 ci/qemu/preseed.sh delete mode 100755 ci/qemu/run.sh delete mode 100644 ci/qemu/test_requirements.txt diff --git a/ci/README.md b/ci/README.md index 155a0104a125..7172bd955491 100644 --- a/ci/README.md +++ b/ci/README.md @@ -111,90 +111,37 @@ significantly. You can set this directory explicitly by setting CCACHE_DIR envir variable. All ccache instances are currently set to be 10 Gigabytes max in size. -## Testing with QEMU -To run the unit tests under qemu: -``` -./build.py -p armv7 && ./build.py -p test.arm_qemu ./runtime_functions.py run_ut_py3_qemu -``` - -To get a shell on the container and debug issues with the emulator itself, we build the container -and then execute it interactively. We can afterwards use port 2222 on the host to connect with SSH. - - -``` -ci/build.py -p test.arm_qemu -b && docker run -p2222:2222 -ti mxnetci/build.test.arm_qemu -``` +## Testing with ARM / Edge devices with QEMU -Then from another terminal: +We build on [QEMU](https://www.qemu.org/) and Linux [Kernel Support for +miscellaneous Binary +Formats](https://www.kernel.org/doc/html/v5.6/admin-guide/binfmt-misc.html) for +testing MXNet on edge devices. Test can be invoked with the same syntax as for +non-virtualized platforms: ``` -ssh -o StrictHostKeyChecking=no -p 2222 qemu@localhost +./build.py -p armv7 +./build.py -p test.armv7 /work/runtime_functions.sh unittest_ubuntu_python3_armv7 ``` -There are two pre-configured users: `root` and `qemu` both without passwords. - - -### Example of reproducing a test result with QEMU on ARM - - -You might want to enable a debug build first: - -``` -$ git diff -diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh -index 39631f9..666ceea 100755 ---- a/ci/docker/runtime_functions.sh -+++ b/ci/docker/runtime_functions.sh -@@ -172,6 +172,7 @@ build_armv7() { - -DUSE_LAPACK=OFF \ - -DBUILD_CPP_EXAMPLES=OFF \ - -Dmxnet_LINKER_LIBS=-lgfortran \ -+ -DCMAKE_BUILD_TYPE=Debug \ - -G Ninja /work/mxnet - - ninja -v +For the test step to succeed, you must run Linux kernel 4.8 or later and have qemu installed. +On Debian and Ubuntu systems, run the following command to install the dependencies: ``` +sudo apt install binfmt-support qemu-user-static -Then we build the project for armv7, the test container and start QEMU inside docker: - -``` -ci/build.py -p armv7 -ci/build.py -p test.arm_qemu -b && docker run -p2222:2222 -ti mxnetci/build.test.arm_qemu +# Use qemu-binfmt-conf.sh to register all binary types with the kernel +wget https://raw.githubusercontent.com/qemu/qemu/stable-4.1/scripts/qemu-binfmt-conf.sh +chmod +x qemu-binfmt-conf.sh +sudo ./qemu-binfmt-conf.sh --persistent yes --qemu-suffix "-static" --qemu-path "/usr/bin" --systemd ALL ``` - - -At this point we copy artifacts and sources to the VM, in another terminal (host) do the following: +If you run into segmentation faults at the beginning of the emulated tests, you +probably have a ancient version of Qemu on your system (or found a bug in +upstream Qemu). In that situation, you can rely on the +`multiarch/qemu-user-static` Docker project to register a set of up-to-date Qemu +binaries from their Docker image with your kernel: ``` -# Copy mxnet sources to the VM -rsync --delete -e 'ssh -p2222' --exclude='.git/' -zvaP ./ qemu@localhost:mxnet - - -# Ssh into the vm -ssh -p2222 qemu@localhost - -cd mxnet - -# Execute a single failing C++ test -build/tests/mxnet_unit_tests --gtest_filter="ACTIVATION_PERF.ExecuteBidirectional" - -# To install MXNet: -sudo pip3 install --upgrade --force-reinstall build/mxnet-1.3.1-py2.py3-none-any.whl - -# Execute a single python test: - -nosetests-3.4 -v -s tests/python/unittest/test_ndarray.py - - -# Debug with cgdb -sudo apt install -y libstdc++6-6-dbg -cgdb build/tests/mxnet_unit_tests - -(gdb) !pwd -/home/qemu/mxnet -(gdb) set substitute-path /work /home/qemu -(gdb) set substitute-path /build/gcc-6-6mK9AW/gcc-6-6.3.0/build/arm-linux-gnueabihf/libstdc++-v3/include/ /usr/include/c++/6/ -(gdb) r --gtest_filter="ACTIVATION_PERF.ExecuteBidirectional" +docker run --rm --privileged multiarch/qemu-user-static --reset -p yes ``` diff --git a/ci/dev_menu.py b/ci/dev_menu.py index e9f031e1b171..962e4ecfe03f 100755 --- a/ci/dev_menu.py +++ b/ci/dev_menu.py @@ -167,7 +167,7 @@ def provision_virtualenv(venv_path=DEFAULT_PYENV): ('[Docker] Python3 ARMv7 unittests (QEMU)', [ "ci/build.py -p armv7", - "ci/build.py -p test.arm_qemu ./runtime_functions.py run_ut_py3_qemu" + "ci/build.py -p test.armv7 /work/runtime_functions.sh unittest_ubuntu_python3_armv7" ]), ('Clean (RESET HARD) repository (Warning! erases local changes / DATA LOSS)', Confirm("ci/docker/runtime_functions.sh clean_repo")) diff --git a/ci/docker/Dockerfile.build.android_armv7 b/ci/docker/Dockerfile.build.android_armv7 index 96ca04e9f5e6..8d9fb6481e2e 100644 --- a/ci/docker/Dockerfile.build.android_armv7 +++ b/ci/docker/Dockerfile.build.android_armv7 @@ -46,10 +46,10 @@ RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ mkdir /usr/local/openblas-android && \ cd /usr/local/OpenBLAS && \ export TOOLCHAIN=/usr/local/android-ndk-r19/toolchains/llvm/prebuilt/linux-x86_64 && \ - make NOFORTRAN=1 ARM_SOFTFP_ABI=1 \ + make NOFORTRAN=1 ARM_SOFTFP_ABI=1 NO_SHARED=1 \ LDFLAGS="-L/usr/local/android-ndk-r19/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/lib/gcc/arm-linux-androideabi/4.9.x -lm" \ CC=$TOOLCHAIN/bin/armv7a-linux-androideabi16-clang AR=$TOOLCHAIN/bin/arm-linux-androideabi-ar && \ - make PREFIX=/usr/local/openblas-android install && \ + make PREFIX=/usr/local/openblas-android NO_SHARED=1 install && \ cd /usr/local && \ rm -rf OpenBLAS ENV OpenBLAS_HOME=/usr/local/openblas-android diff --git a/ci/docker/Dockerfile.build.android_armv8 b/ci/docker/Dockerfile.build.android_armv8 index 81adc80edf14..a78113a33bae 100644 --- a/ci/docker/Dockerfile.build.android_armv8 +++ b/ci/docker/Dockerfile.build.android_armv8 @@ -46,10 +46,10 @@ RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ mkdir /usr/local/openblas-android && \ cd /usr/local/OpenBLAS && \ export TOOLCHAIN=/usr/local/android-ndk-r19/toolchains/llvm/prebuilt/linux-x86_64 && \ - make NOFORTRAN=1 \ + make NOFORTRAN=1 NO_SHARED=1 \ LDFLAGS="-L/usr/local/android-ndk-r21/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/lib/gcc/aarch64-linux-android/4.9.x -lm" \ CC=$TOOLCHAIN/bin/aarch64-linux-android21-clang AR=$TOOLCHAIN/bin/aarch64-linux-android-ar && \ - make PREFIX=/usr/local/openblas-android install && \ + make PREFIX=/usr/local/openblas-android NO_SHARED=1 install && \ cd /usr/local && \ rm -rf OpenBLAS ENV OpenBLAS_HOME=/usr/local/openblas-android diff --git a/ci/docker/Dockerfile.build.armv6 b/ci/docker/Dockerfile.build.armv6 index 02e16da11616..83186369d829 100644 --- a/ci/docker/Dockerfile.build.armv6 +++ b/ci/docker/Dockerfile.build.armv6 @@ -50,8 +50,8 @@ ENV CMAKE_TOOLCHAIN_FILE=/usr/local/armv6-eabihf--glibc--stable-2020.02-2/share/ RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ cd /usr/local/OpenBLAS && \ - make NOFORTRAN=1 CC=/usr/local/armv6-eabihf--glibc--stable-2020.02-2/bin/arm-linux-gcc && \ - make PREFIX=/usr/local/armv6-eabihf--glibc--stable-2020.02-2/arm-buildroot-linux-gnueabihf/sysroot install && \ + make NOFORTRAN=1 NO_SHARED=1 CC=/usr/local/armv6-eabihf--glibc--stable-2020.02-2/bin/arm-linux-gcc && \ + make PREFIX=/usr/local/armv6-eabihf--glibc--stable-2020.02-2/arm-buildroot-linux-gnueabihf/sysroot NO_SHARED=1 install && \ cd /usr/local && \ rm -rf OpenBLAS diff --git a/ci/docker/Dockerfile.build.armv7 b/ci/docker/Dockerfile.build.armv7 index a9cc6d1e83a4..d207d79485ae 100644 --- a/ci/docker/Dockerfile.build.armv7 +++ b/ci/docker/Dockerfile.build.armv7 @@ -45,8 +45,8 @@ ENV CMAKE_TOOLCHAIN_FILE=/usr/local/arm-linux-gnueabihf-toolchain.cmake RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ cd /usr/local/OpenBLAS && \ - make NOFORTRAN=1 CC=arm-linux-gnueabihf-gcc && \ - make PREFIX=/usr/local/arm-linux-gnueabihf install && \ + make NOFORTRAN=1 NO_SHARED=1 CC=arm-linux-gnueabihf-gcc && \ + make PREFIX=/usr/local/arm-linux-gnueabihf NO_SHARED=1 install && \ cd /usr/local && \ rm -rf OpenBLAS diff --git a/ci/docker/Dockerfile.build.armv8 b/ci/docker/Dockerfile.build.armv8 index adf6873fb40c..d318cc2f02d4 100644 --- a/ci/docker/Dockerfile.build.armv8 +++ b/ci/docker/Dockerfile.build.armv8 @@ -45,8 +45,8 @@ ENV CMAKE_TOOLCHAIN_FILE=/usr/aarch64-linux-gnu-toolchain.cmake RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ cd /usr/local/OpenBLAS && \ - make NOFORTRAN=1 CC=aarch64-linux-gnu-gcc && \ - make PREFIX=/usr/aarch64-linux-gnu install && \ + make NOFORTRAN=1 NO_SHARED=1 CC=aarch64-linux-gnu-gcc && \ + make PREFIX=/usr/aarch64-linux-gnu NO_SHARED=1 install && \ cd /usr/local && \ rm -rf OpenBLAS diff --git a/ci/docker/Dockerfile.build.test.arm_qemu b/ci/docker/Dockerfile.build.test.armv7 similarity index 67% rename from ci/docker/Dockerfile.build.test.arm_qemu rename to ci/docker/Dockerfile.build.test.armv7 index 5dc610a524b0..d49e7a5582c1 100644 --- a/ci/docker/Dockerfile.build.test.arm_qemu +++ b/ci/docker/Dockerfile.build.test.armv7 @@ -16,22 +16,21 @@ # specific language governing permissions and limitations # under the License. # -# Dockerfile to build and run MXNet on Ubuntu 16.04 for CPU +# Dockerfile to test MXNet on Ubuntu 20.04 ARMv7 CPU -FROM ubuntu:16.04 +FROM arm32v7/ubuntu:20.04 -WORKDIR /work +WORKDIR /usr/local -RUN apt-get update -COPY install/ubuntu_python.sh /work/ -COPY install/requirements /work/ -RUN /work/ubuntu_python.sh - -COPY install/ubuntu_arm_qemu.sh /work -RUN /work/ubuntu_arm_qemu.sh - -COPY install/ubuntu_arm_qemu_bin.sh /work -RUN /work/ubuntu_arm_qemu_bin.sh +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + python3 \ + python3-pip \ + python3-numpy \ + python3-scipy \ + python3-nose \ + python3-nose-timer \ + python3-requests \ + && rm -rf /var/lib/apt/lists/* ARG USER_ID=0 ARG GROUP_ID=0 @@ -39,9 +38,4 @@ COPY install/ubuntu_adduser.sh /work/ RUN /work/ubuntu_adduser.sh COPY runtime_functions.sh /work/ -COPY qemu/* /work/ - -# SSH to the Qemu VM -EXPOSE 2222/tcp - -CMD ["./runtime_functions.py","run_qemu_interactive"] +WORKDIR /work/mxnet \ No newline at end of file diff --git a/ci/docker/install/ubuntu_arm_qemu.sh b/ci/docker/Dockerfile.build.test.armv8 old mode 100755 new mode 100644 similarity index 60% rename from ci/docker/install/ubuntu_arm_qemu.sh rename to ci/docker/Dockerfile.build.test.armv8 index 79ab67bfdbe6..bee4d85c6a97 --- a/ci/docker/install/ubuntu_arm_qemu.sh +++ b/ci/docker/Dockerfile.build.test.armv8 @@ -1,5 +1,4 @@ -#!/usr/bin/env bash - +# -*- mode: dockerfile -*- # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -16,22 +15,27 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# +# Dockerfile to test MXNet on Ubuntu 20.04 ARMv8 CPU + +FROM arm64v8/ubuntu:20.04 -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image +WORKDIR /usr/local -set -exuo pipefail +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + python3 \ + python3-pip \ + python3-numpy \ + python3-scipy \ + python3-nose \ + python3-nose-timer \ + python3-requests \ + && rm -rf /var/lib/apt/lists/* -apt-get install -y \ - cmake \ - curl \ - wget \ - git \ - qemu \ - qemu-system-arm \ - unzip \ - bzip2 \ - vim-nox \ - toilet +ARG USER_ID=0 +ARG GROUP_ID=0 +COPY install/ubuntu_adduser.sh /work/ +RUN /work/ubuntu_adduser.sh -pip3 install ipython +COPY runtime_functions.sh /work/ +WORKDIR /work/mxnet \ No newline at end of file diff --git a/ci/docker/install/ubuntu_arm_qemu_bin.sh b/ci/docker/install/ubuntu_arm_qemu_bin.sh deleted file mode 100755 index d4f81185c169..000000000000 --- a/ci/docker/install/ubuntu_arm_qemu_bin.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -exuo pipefail - -# -# This disk image and kernels for virtual testing with QEMU is generated with some manual OS -# installation steps with the scripts and documentation found in the ci/qemu/ folder. -# -# The image has a base Debian OS and MXNet runtime dependencies installed. -# The root password is empty and there's a "qemu" user without password. SSH access is enabled as -# well. -# -# See also: ci/qemu/README.md -# - -REMOTE="https://s3-us-west-2.amazonaws.com/mxnet-ci-prod-slave-data" -curl -f ${REMOTE}/vda_debian_stretch.qcow2.bz2 | bunzip2 > vda.qcow2 -curl -f ${REMOTE}/vmlinuz -o vmlinuz -curl -f ${REMOTE}/initrd.img -o initrd.img - diff --git a/ci/docker/qemu/README.md b/ci/docker/qemu/README.md deleted file mode 100644 index c06b34562b57..000000000000 --- a/ci/docker/qemu/README.md +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - - - - -These are files used in the docker container that runs QEMU diff --git a/ci/docker/qemu/runtime_functions.py b/ci/docker/qemu/runtime_functions.py deleted file mode 100755 index 5a57cb8dae6a..000000000000 --- a/ci/docker/qemu/runtime_functions.py +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python3 - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# -*- coding: utf-8 -*- -"""Runtime functions to use in docker / testing""" - -__author__ = 'Pedro Larroy' -__version__ = '0.1' - -import os -import sys -import subprocess -import argparse -import logging -from subprocess import call, check_call, Popen, DEVNULL, PIPE -import time -import sys -import types -import glob -import vmcontrol -from vmcontrol import qemu_ssh, qemu_provision, qemu_rsync_to_host, VM - -def activate_this(base): - import site - import os - import sys - if sys.platform == 'win32': - site_packages = os.path.join(base, 'Lib', 'site-packages') - else: - site_packages = os.path.join(base, 'lib', 'python%s' % sys.version[:3], 'site-packages') - prev_sys_path = list(sys.path) - sys.real_prefix = sys.prefix - sys.prefix = base - # Move the added items to the front of the path: - new_sys_path = [] - for item in list(sys.path): - if item not in prev_sys_path: - new_sys_path.append(item) - sys.path.remove(item) - sys.path[:0] = new_sys_path - - - - -def run_ut_py3_qemu(): - """Run unit tests in the emulator and copy the results back to the host through the mounted - volume in /mxnet""" - from vmcontrol import VM - with VM() as vm: - qemu_provision(vm.ssh_port) - logging.info("execute tests") - qemu_ssh(vm.ssh_port, "./runtime_functions.py", "run_ut_python3_qemu_internal") - qemu_rsync_to_host(vm.ssh_port, "*.xml", "mxnet") - logging.info("copied to host") - logging.info("tests finished, vm shutdown.") - vm.shutdown() - -def run_ut_python3_qemu_internal(): - """this runs inside the vm""" - pkg = glob.glob('mxnet_dist/*.whl')[0] - logging.info("=== NOW Running inside QEMU ===") - logging.info("PIP Installing %s", pkg) - check_call(['sudo', 'pip3', 'install', pkg]) - logging.info("PIP Installing mxnet/test_requirements.txt") - check_call(['sudo', 'pip3', 'install', '-r', 'mxnet/test_requirements.txt']) - logging.info("Running tests in mxnet/tests/python/unittest/") - check_call(['nosetests', '--with-timer', '--with-xunit', '--xunit-file', 'nosetests_unittest.xml', '--verbose', 'mxnet/tests/python/unittest/test_engine.py']) - # Example to run a single unit test: - # check_call(['nosetests', '--with-timer', '--with-xunit', '--xunit-file', 'nosetests_unittest.xml', '--verbose', 'mxnet/tests/python/unittest/test_ndarray.py:test_ndarray_fluent']) - - - -def run_qemu_interactive(): - vm = VM(interactive=True) - vm.detach() - vm.start() - vm.wait() - logging.info("QEMU finished") - -################################ - -def parsed_args(): - parser = argparse.ArgumentParser(description="""python runtime functions""", epilog="") - parser.add_argument('command',nargs='*', - help="Name of the function to run with arguments") - args = parser.parse_args() - return (args, parser) - -def script_name() -> str: - return os.path.split(sys.argv[0])[1] - -def chdir_to_script_directory(): - # We need to be in the same directory than the script so the commands in the dockerfiles work as - # expected. But the script can be invoked from a different path - base = os.path.split(os.path.realpath(__file__))[0] - os.chdir(base) - -def main(): - logging.getLogger().setLevel(logging.INFO) - logging.basicConfig(format='{}: %(asctime)-15s %(message)s'.format(script_name())) - chdir_to_script_directory() - - # Run function with name passed as argument - (args, parser) = parsed_args() - logging.info("%s", args.command) - if args.command: - fargs = args.command[1:] - globals()[args.command[0]](*fargs) - return 0 - else: - parser.print_help() - fnames = [x for x in globals() if type(globals()[x]) is types.FunctionType] - print('\nAvailable functions: {}'.format(' '.join(fnames))) - return 1 - -if __name__ == '__main__': - sys.exit(main()) - diff --git a/ci/docker/qemu/vmcontrol.py b/ci/docker/qemu/vmcontrol.py deleted file mode 100644 index 31ef4d2550c3..000000000000 --- a/ci/docker/qemu/vmcontrol.py +++ /dev/null @@ -1,360 +0,0 @@ -#!/usr/bin/env python3 - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# -*- coding: utf-8 -*- -"""Utilities to control a guest VM, used for virtual testing with QEMU""" - -__author__ = 'Pedro Larroy' -__version__ = '0.1' - -import os -import sys -import subprocess -import argparse -import logging -from subprocess import call, check_call, Popen, DEVNULL, PIPE -import time -import sys -import multiprocessing -import shlex - -################################################### -# -# Virtual testing with QEMU -# -# We start QEMU instances that have a local port in the host redirected to the ssh port. -# -# The VMs are provisioned after boot, tests are run and then they are stopped -# -QEMU_SSH_PORT=2222 -QEMU_RAM=4096 - -QEMU_RUN=""" -qemu-system-arm -M virt -m {ram} \ - -kernel vmlinuz \ - -initrd initrd.img \ - -append 'root=/dev/vda1' \ - -drive if=none,file=vda.qcow2,format=qcow2,id=hd \ - -device virtio-blk-device,drive=hd \ - -netdev user,id=mynet,hostfwd=tcp::{ssh_port}-:22 \ - -device virtio-net-device,netdev=mynet \ - -display none -nographic -""" - -QEMU_RUN_INTERACTIVE=""" -qemu-system-arm -M virt -m {ram} \ - -kernel vmlinuz \ - -initrd initrd.img \ - -append 'root=/dev/vda1' \ - -drive if=none,file=vda.qcow2,format=qcow2,id=hd \ - -device virtio-blk-device,drive=hd \ - -netdev user,id=mynet,hostfwd=tcp::{ssh_port}-:22 \ - -device virtio-net-device,netdev=mynet \ - -nographic -""" - -def retry(target_exception, tries=4, delay_s=1, backoff=2): - """Retry calling the decorated function using an exponential backoff. - - http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ - original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry - - :param target_exception: the exception to check. may be a tuple of - exceptions to check - :type target_exception: Exception or tuple - :param tries: number of times to try (not retry) before giving up - :type tries: int - :param delay_s: initial delay between retries in seconds - :type delay_s: int - :param backoff: backoff multiplier e.g. value of 2 will double the delay - each retry - :type backoff: int - """ - import time - from functools import wraps - - def decorated_retry(f): - @wraps(f) - def f_retry(*args, **kwargs): - mtries, mdelay = tries, delay_s - while mtries > 1: - try: - return f(*args, **kwargs) - except target_exception as e: - logging.warning("Exception: %s, Retrying in %d seconds...", str(e), mdelay) - time.sleep(mdelay) - mtries -= 1 - mdelay *= backoff - return f(*args, **kwargs) - - return f_retry # true decorator - - return decorated_retry - - - - -class VMError(RuntimeError): - pass - -class VM: - """Control of the virtual machine""" - def __init__(self, ssh_port=QEMU_SSH_PORT, ram=QEMU_RAM, interactive=False): - self.log = logging.getLogger(VM.__name__) - self.ssh_port = ssh_port - self.timeout_s = 300 - self.qemu_process = None - self._detach = False - self._interactive = interactive - self.ram = ram - - def __enter__(self): - self.start() - return self - - def __exit__(self, exc_type, exc_value, traceback): - if not self._detach: - self.shutdown() - self.terminate() - - def start(self): - sys.stderr.flush() - call(['toilet', '-f', 'smbraille', 'Starting QEMU']) - sys.stdout.flush() - self.log.info("Starting VM, ssh port redirected to localhost:%s (inside docker, not exposed by default)", self.ssh_port) - if self.is_running(): - raise VMError("VM is running, shutdown first") - if self._interactive: - self.qemu_process = Popen(shlex.split(QEMU_RUN_INTERACTIVE.format(ssh_port=self.ssh_port, ram=self.ram))) - return - else: - self.log.info("Starting in non-interactive mode. Terminal output is disabled.") - self.qemu_process = Popen(shlex.split(QEMU_RUN.format(ssh_port=self.ssh_port, ram=self.ram)), stdout=DEVNULL, stdin=DEVNULL, stderr=PIPE) - def keep_waiting(): - return self.is_running() - - logging.info("waiting for ssh to be open in the VM (timeout {}s)".format(self.timeout_s)) - ssh_working = wait_ssh_open('127.0.0.1', self.ssh_port, keep_waiting, self.timeout_s) - - if not self.is_running(): - (_, stderr) = self.qemu_process.communicate() - raise VMError("VM failed to start, retcode: {}, stderr: {}".format( self.retcode(), stderr.decode())) - - if not ssh_working: - if self.is_running(): - self.log.error("VM running but SSH is not working") - self.terminate() - raise VMError("SSH is not working after {} seconds".format(self.timeout_s)) - self.log.info("VM is online and SSH is up") - - def is_running(self): - return self.qemu_process and self.qemu_process.poll() is None - - def retcode(self): - if self.qemu_process: - return self.qemu_process.poll() - else: - raise RuntimeError('qemu process was not started') - - def terminate(self): - if self.qemu_process: - logging.info("send term signal") - self.qemu_process.terminate() - time.sleep(3) - logging.info("send kill signal") - self.qemu_process.kill() - self.qemu_process.wait() - self.qemu_process = None - else: - logging.warn("VM.terminate: QEMU process not running") - - def detach(self): - self._detach = True - - def shutdown(self): - if self.qemu_process: - logging.info("Shutdown via ssh") - # ssh connection will be closed with an error - call(["ssh", "-o", "StrictHostKeyChecking=no", "-p", str(self.ssh_port), "qemu@localhost", - "sudo", "poweroff"]) - ret = self.qemu_process.wait(timeout=90) - self.log.info("VM on port %s has shutdown (exit code %d)", self.ssh_port, ret) - self.qemu_process = None - - def wait(self): - if self.qemu_process: - self.qemu_process.wait() - - def __del__(self): - if self.is_running and not self._detach: - logging.info("VM destructor hit") - self.terminate() - - -def qemu_ssh(ssh_port=QEMU_SSH_PORT, *args): - check_call(["ssh", "-o", "ServerAliveInterval=5", "-o", "StrictHostKeyChecking=no", "-p{}".format(ssh_port), "qemu@localhost", *args]) - - -def qemu_rsync(ssh_port, local_path, remote_path): - check_call(['rsync', '-e', 'ssh -o StrictHostKeyChecking=no -p{}'.format(ssh_port), '-a', local_path, 'qemu@localhost:{}'.format(remote_path)]) - -def qemu_rsync_to_host(ssh_port, remote_path, local_path): - check_call(['rsync', '-e', 'ssh -o StrictHostKeyChecking=no -p{}'.format(ssh_port), '-va', 'qemu@localhost:{}'.format(remote_path), local_path]) - - -@retry(subprocess.CalledProcessError) -def qemu_provision(ssh_port=QEMU_SSH_PORT): - import glob - logging.info("Provisioning the VM with artifacts and sources") - - artifact = glob.glob('/work/mxnet/build/*.whl') - for x in artifact: - qemu_rsync(ssh_port, x, 'mxnet_dist/') - qemu_rsync(ssh_port, '/work/runtime_functions.py','') - qemu_rsync(ssh_port, '/work/vmcontrol.py','') - qemu_rsync(ssh_port, 'mxnet/tests', 'mxnet') - qemu_rsync(ssh_port, 'mxnet/ci/qemu/test_requirements.txt', 'mxnet/test_requirements.txt') - logging.info("Provisioning completed successfully.") - - -def wait_ssh_open(server, port, keep_waiting=None, timeout=None): - """ Wait for network service to appear - @param server: host to connect to (str) - @param port: port (int) - @param timeout: in seconds, if None or 0 wait forever - @return: True of False, if timeout is None may return only True or - throw unhandled network exception - """ - import socket - import errno - import time - log = logging.getLogger('wait_ssh_open') - sleep_s = 1 - if timeout: - from time import time as now - # time module is needed to calc timeout shared between two exceptions - end = now() + timeout - - while True: - log.debug("Sleeping for %s second(s)", sleep_s) - time.sleep(sleep_s) - s = socket.socket() - try: - if keep_waiting and not keep_waiting(): - log.debug("keep_waiting() is set and evaluates to False") - return False - - if timeout: - next_timeout = end - now() - if next_timeout < 0: - log.debug("connect time out") - return False - else: - log.debug("connect timeout %d s", next_timeout) - s.settimeout(next_timeout) - - log.debug("connect %s:%d", server, port) - s.connect((server, port)) - ret = s.recv(1024).decode() - if ret and ret.startswith('SSH'): - s.close() - log.info("wait_ssh_open: port %s:%s is open and ssh is ready", server, port) - return True - else: - log.debug("Didn't get the SSH banner") - s.close() - - except ConnectionError as err: - log.debug("ConnectionError %s", err) - if sleep_s == 0: - sleep_s = 1 - else: - sleep_s *= 2 - - except socket.gaierror as err: - log.debug("gaierror %s",err) - return False - - except socket.timeout as err: - # this exception occurs only if timeout is set - if timeout: - return False - - except TimeoutError as err: - # catch timeout exception from underlying network library - # this one is different from socket.timeout - raise - - -def wait_port_open(server, port, timeout=None): - """ Wait for network service to appear - @param server: host to connect to (str) - @param port: port (int) - @param timeout: in seconds, if None or 0 wait forever - @return: True of False, if timeout is None may return only True or - throw unhandled network exception - """ - import socket - import errno - import time - sleep_s = 0 - if timeout: - from time import time as now - # time module is needed to calc timeout shared between two exceptions - end = now() + timeout - - while True: - logging.debug("Sleeping for %s second(s)", sleep_s) - time.sleep(sleep_s) - s = socket.socket() - try: - if timeout: - next_timeout = end - now() - if next_timeout < 0: - return False - else: - s.settimeout(next_timeout) - - logging.info("connect %s %d", server, port) - s.connect((server, port)) - - except ConnectionError as err: - logging.debug("ConnectionError %s", err) - if sleep_s == 0: - sleep_s = 1 - - except socket.gaierror as err: - logging.debug("gaierror %s",err) - return False - - except socket.timeout as err: - # this exception occurs only if timeout is set - if timeout: - return False - - except TimeoutError as err: - # catch timeout exception from underlying network library - # this one is different from socket.timeout - raise - - else: - s.close() - logging.info("wait_port_open: port %s:%s is open", server, port) - return True - diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 587a4088fd7d..9141a21b2457 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -1402,6 +1402,18 @@ test_ubuntu_cpu_python3() { popd } +# QEMU based ARM tests +unittest_ubuntu_python3_arm() { + set -ex + export PYTHONPATH=./python/ + export MXNET_MKLDNN_DEBUG=0 # Ignored if not present + export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 + export MXNET_ENABLE_CYTHON=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 + python3 -m nose --verbose tests/python/unittest/test_engine.py +} + # Functions that run the nightly Tests: #Runs Apache RAT Check on MXNet Source for License Headers diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index 3f5fb2503b56..378ae8177ec5 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -26,9 +26,6 @@ utils = load('ci/Jenkinsfile_utils.groovy') mx_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libtvm_runtime.so, lib/libtvmop.so, lib/tvmop.conf, build/libcustomop_lib.so, build/libcustomop_gpu_lib.so, build/libsubgraph_lib.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a' mx_lib_cython = 'lib/libmxnet.so, lib/libmxnet.a, lib/libtvm_runtime.so, lib/libtvmop.so, lib/tvmop.conf, build/libcustomop_lib.so, build/libcustomop_gpu_lib.so, build/libsubgraph_lib.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a, python/mxnet/_cy3/*.so, python/mxnet/_ffi/_cy3/*.so' -// Python wheels -mx_pip = 'build/*.whl' - // mxnet cmake libraries, in cmake builds we do not produce a libnvvm static library by default. mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/tvm/libtvm_runtime.so, build/libtvmop.so, build/tvmop.conf, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so' mx_cmake_lib_no_tvm_op = 'build/libmxnet.so, build/libmxnet.a, build/libcustomop_lib.so, build/libcustomop_gpu_lib.so, build/libsubgraph_lib.so, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so' @@ -460,27 +457,28 @@ def compile_armv8_jetson_gpu() { }] } -def compile_armv7_cpu() { - return ['ARMv7':{ +def compile_armv6_cpu() { + return ['ARMv6':{ node(NODE_LINUX_CPU) { - ws('workspace/build-ARMv7') { + ws('workspace/build-ARMv6') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('armv7', 'build_armv7', false) - utils.pack_lib('armv7', mx_pip) + utils.docker_run('armv6', 'build_armv6', false) + utils.pack_lib('armv6', mx_lib) } } } }] } -def compile_armv6_cpu() { - return ['ARMv6':{ +def compile_armv7_cpu() { + return ['ARMv7':{ node(NODE_LINUX_CPU) { - ws('workspace/build-ARMv6') { + ws('workspace/build-ARMv7') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('armv6', 'build_armv6', false) + utils.docker_run('armv7', 'build_armv7', false) + utils.pack_lib('armv7', mx_lib) } } } @@ -494,6 +492,7 @@ def compile_armv8_cpu() { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() utils.docker_run('armv8', 'build_armv8', false) + utils.pack_lib('armv8', mx_lib) } } } @@ -1361,8 +1360,21 @@ def test_qemu_armv7_cpu() { node(NODE_LINUX_CPU) { ws('workspace/ut-armv7-qemu') { timeout(time: max_time, unit: 'MINUTES') { - utils.unpack_and_init('armv7', mx_pip) - sh "ci/build.py --docker-registry ${env.DOCKER_CACHE_REGISTRY} -p test.arm_qemu ./runtime_functions.py run_ut_py3_qemu" + utils.unpack_and_init('armv7', mx_lib) + utils.docker_run('test.armv7', 'unittest_ubuntu_python3_arm', false) + } + } + } + }] +} + +def test_qemu_armv8_cpu() { + return ['ARMv8 QEMU': { + node(NODE_LINUX_CPU) { + ws('workspace/ut-armv8-qemu') { + timeout(time: max_time, unit: 'MINUTES') { + utils.unpack_and_init('armv8', mx_lib) + utils.docker_run('test.armv8', 'unittest_ubuntu_python3_arm', false) } } } diff --git a/ci/jenkins/Jenkinsfile_edge b/ci/jenkins/Jenkinsfile_edge index 9d8e01399d7c..9e2abf558dd2 100644 --- a/ci/jenkins/Jenkinsfile_edge +++ b/ci/jenkins/Jenkinsfile_edge @@ -40,11 +40,12 @@ core_logic: { custom_steps.compile_armv8_cpu(), custom_steps.compile_armv8_android_cpu(), custom_steps.compile_armv7_android_cpu() - ]) + ]) utils.parallel_stage('Tests', [ - custom_steps.test_qemu_armv7_cpu() - ]) + custom_steps.test_qemu_armv7_cpu(), + custom_steps.test_qemu_armv8_cpu() + ]) } , failure_handler: { diff --git a/ci/qemu/README.md b/ci/qemu/README.md deleted file mode 100644 index 4beca4a03690..000000000000 --- a/ci/qemu/README.md +++ /dev/null @@ -1,92 +0,0 @@ - - - - - - - - - - - - - - - - - -# QEMU base image creation - -This folder contains scripts and configuration to create a QEMU virtual drive with a debian system. - -The order of execution is: -- `init.sh` to download the installation kernel and ramdisk -- `preseed.sh` to preseed the debian installer so it doesn't ask questions -- `copy.sh` to extract the kernel and ramdisk from the installed system -- `run.sh` to boot the system and fine tune the image - -# Description of the process: - -# Preparing the base image - -First, an installation is made using installer kernel and initrd by using the scripts above. - -# After installation, we extract initrd and kernel from the installation drive - -The commands look like this: - -`virt-copy-out -a hda.qcow2 /boot/initrd.img-4.15.0-30-generic-lpae .` - -In the same way for the kernel. - -Then we install packages and dependencies on the qemu image: - -apt install -y sudo python3-dev virtualenv wget libgfortran3 libopenblas-base rsync build-essential -libopenblas-dev libomp5 - -We enable sudo and passwordless logins: - -Add file `/etc/sudoers.d/01-qemu` -With content: -``` -qemu ALL=(ALL) NOPASSWD: ALL -``` - -Edit: `/etc/ssh/sshd_config` - -And set the following options: -``` -PermitEmptyPasswords yes -PasswordAuthentication yes -PermitRootLogin yes -``` - -Disable root and user passwords with `passwd -d` - -Edit ` /etc/pam.d/common-auth` - -Replace `auth [success=1 default=ignore] pam_unix.so nullok_secure` by -``` -auth [success=1 default=ignore] pam_unix.so nullok -``` - -As root to install system wide: - -``` -wget -nv https://bootstrap.pypa.io/get-pip.py -python3 get-pip.py -apt-get clean -``` - -Afterwards install mxnet python3 deps: - -``` -pip3 install -r mxnet_requirements.txt -``` - - -To access qemu control console from tmux: `ctrl-a a c` - -# CI and Testing - -Formally, [runtime_functions.py](https://github.com/apache/incubator-mxnet/blob/master/ci/docker/qemu/runtime_functions.py) would [run](https://github.com/apache/incubator-mxnet/blob/8beea18e3d9835f90b59d3f9de8f9945ac819423/ci/docker/qemu/runtime_functions.py#L81) *pip install -r [mxnet/tests/requirements.txt](https://github.com/apache/incubator-mxnet/blob/master/tests/requirements.txt)*. If the requirements change, there can be an unfortunate side-effect that there are no wheel files for Raspberry Pi for the new requirement. This would trigger a build from source on the emulator, which can take a long time and cause job timeouts. Therefore, we no longer install the `tests/requirements.txt` requirements, but rather rely on [test_requirements.txt](https://github.com/apache/incubator-mxnet/blob/master/ci/qemu/test_requirements.txt) to maintain the requirements for the qemu tests. Should any requirements changes lead to a job time out, it is incumbent on the submitter to update the image to include the requirement and unblock ci. diff --git a/ci/qemu/copy.sh b/ci/qemu/copy.sh deleted file mode 100755 index f39a9d083509..000000000000 --- a/ci/qemu/copy.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash -exuo pipefail - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Extract kernel from image - -set -ex -virt-copy-out -a vda.qcow2 /boot/vmlinuz-3.16.0-6-armmp-lpae /boot/initrd.img-3.16.0-6-armmp-lpae . diff --git a/ci/qemu/init.sh b/ci/qemu/init.sh deleted file mode 100755 index 1698cb10f272..000000000000 --- a/ci/qemu/init.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash -exuo pipefail - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Download the installer and ramdisk for intallation -set -ex -wget -O installer-vmlinuz http://http.us.debian.org/debian/dists/jessie/main/installer-armhf/current/images/netboot/vmlinuz -wget -O installer-initrd.gz http://http.us.debian.org/debian/dists/jessie/main/installer-armhf/current/images/netboot/initrd.gz diff --git a/ci/qemu/initrd_modif/inittab b/ci/qemu/initrd_modif/inittab deleted file mode 100644 index 064512595fbc..000000000000 --- a/ci/qemu/initrd_modif/inittab +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# /etc/inittab -# busybox init configuration for debian-installer - -# main rc script -::sysinit:/sbin/reopen-console /sbin/debian-installer-startup - -# main setup program -::respawn:/sbin/reopen-console /sbin/debian-installer - -# convenience shells -tty2::askfirst:-/bin/sh -tty3::askfirst:-/bin/sh - -# logging -#tty4::respawn:/usr/bin/tail -f /var/log/syslog - -# Stuff to do before rebooting -::ctrlaltdel:/sbin/shutdown > /dev/null 2>&1 - -# re-exec init on receipt of SIGHUP/SIGUSR1 -::restart:/sbin/init diff --git a/ci/qemu/install.sh b/ci/qemu/install.sh deleted file mode 100755 index 8531b033d074..000000000000 --- a/ci/qemu/install.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex -rm -f vda.qcow2 -sudo ./preseed.sh -qemu-img create -f qcow2 vda.qcow2 10G -qemu-system-arm -M virt -m 1024 \ - -kernel installer-vmlinuz \ - -append BOOT_DEBUG=2,DEBIAN_FRONTEND=noninteractive \ - -initrd installer-initrd_automated.gz \ - -drive if=none,file=vda.qcow2,format=qcow2,id=hd \ - -device virtio-blk-device,drive=hd \ - -netdev user,id=mynet \ - -device virtio-net-device,netdev=mynet \ - -nographic -no-reboot diff --git a/ci/qemu/mxnet_requirements.txt b/ci/qemu/mxnet_requirements.txt deleted file mode 100644 index 2ab0fd9612e5..000000000000 --- a/ci/qemu/mxnet_requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -urllib3<1.23,>=1.21.1 -requests<2.19.0,>=2.18.4 -graphviz<0.9.0,>=0.8.1 -numpy>1.16.0,<2.0.0 -mock -nose -nose-timer diff --git a/ci/qemu/preseed.cfg b/ci/qemu/preseed.cfg deleted file mode 100644 index 23a8fc3baebf..000000000000 --- a/ci/qemu/preseed.cfg +++ /dev/null @@ -1,68 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -d-i debian-installer/locale string en_US -d-i keyboard-configuration/xkb-keymap select us -d-i netcfg/get_hostname string debian-qemu -d-i netcfg/get_domain string lab -d-i passwd/root-login boolean true -d-i passwd/root-password password debian -d-i passwd/root-password-again password debian -d-i clock-setup/utc boolean true -d-i mirror/country string US -d-i mirror/https/proxy string -d-i mirror/http/proxy string -d-i mirror/ftp/proxy string -d-i mirror/http/countries select US -d-i mirror/http/hostname string ftp.us.debian.org -d-i mirror/http/mirror select ftp.us.debian.org -d-i localechooser/preferred-locale select en_US.UTF-8 -apt-mirror-setup apt-setup/use_mirror boolean false -apt-mirror-setup apt-setup/mirror/error select Retry -d-i passwd/username string qemu -d-i passwd/user-password password qemu -d-i passwd/user-password-again password qemu -user-setup-udeb passwd/username string qemu -user-setup-udeb passwd/user-fullname string qemu -d-i time/zone string GMT -d-i partman-auto/choose_recipe select atomic -#partman-auto partman-auto/select_disk select /var/lib/partman/devices/=dev=vda -#partman-auto partman-auto/automatically_partition select -#partman-target partman-target/no_root error -#partman-auto partman-auto/init_automatically_partition select 50some_device__________regular -#partman-auto partman-auto/disk string vda -#partman-auto partman-auto/expert_recipe string \ -# boot-root :: \ -# 100 10000 1000000000 ext4 \ -# $primary{ } \ -# lv_name{ root } \ -# method{ format } \ -# format{ } \ -# use_filesystem{ } \ -# filesystem{ ext4 } \ -# mountpoint{ / } . -# -#d-i partman-partitioning/confirm_write_new_label boolean true -#d-i partman/choose_partition select finish -#d-i partman/confirm boolean true -#d-i partman/confirm_nooverwrite boolean true -#partman-base partman/choose_partition select 90finish__________finish -#partman-basicfilesystems partman-basicfilesystems/swap_check_failed boolean -d-i popularity-contest/participate boolean false -d-i tasksel/first multiselect SSH server, standard system utilities -d-i debian-installer/main-menu select Finish the installation -d-i debian-installer/exit/poweroff boolean true diff --git a/ci/qemu/preseed.sh b/ci/qemu/preseed.sh deleted file mode 100755 index ad005548fbbe..000000000000 --- a/ci/qemu/preseed.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash -exuo pipefail - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex -rm -rf initrd -mkdir -p initrd -cd initrd -gunzip -c ../installer-initrd.gz | cpio -i -cp ../preseed.cfg . -cp ../initrd_modif/inittab etc/inittab -cp ../initrd_modif/S10syslog lib/debian-installer-startup.d/S10syslog -find . | cpio --create --format 'newc' | gzip -c > ../installer-initrd_automated.gz -echo "Done!" diff --git a/ci/qemu/run.sh b/ci/qemu/run.sh deleted file mode 100755 index eeff4e1fdccb..000000000000 --- a/ci/qemu/run.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash -exuo pipefail - - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex -disk=${1:-vda.qcow2} -qemu-system-arm -M virt -m 1024 \ - -kernel vmlinuz-3.16.0-6-armmp-lpae \ - -initrd initrd.img-3.16.0-6-armmp-lpae \ - -smp 4 \ - -append 'root=/dev/vda1' \ - -drive if=none,file=$disk,format=qcow2,id=hd \ - -device virtio-blk-device,drive=hd \ - -netdev user,id=mynet,hostfwd=tcp::2222-:22 \ - -device virtio-net-device,netdev=mynet \ - -nographic -# -display none diff --git a/ci/qemu/test_requirements.txt b/ci/qemu/test_requirements.txt deleted file mode 100644 index 77037d89c673..000000000000 --- a/ci/qemu/test_requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -mock -nose -nose-timer \ No newline at end of file From ea2b8d51574ab599bf219a91a4059df53fc6381a Mon Sep 17 00:00:00 2001 From: Nick Guletskii Date: Fri, 22 May 2020 17:44:59 +0300 Subject: [PATCH 07/14] Request the C++ standard library and extensions --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index e630730115a2..92f93e7bb15e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,9 @@ if(CMAKE_CROSSCOMPILING) endif() project(mxnet C CXX) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS ON) if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/config.cmake) # Load config.cmake only if mxnet is not compiled as a dependency of another project From 0b1355ffb746b93a8171c10fcca99f56bc24434e Mon Sep 17 00:00:00 2001 From: Nick Guletskii Date: Fri, 22 May 2020 19:32:53 +0300 Subject: [PATCH 08/14] Upgrade dmlc-core to resolve build errors --- 3rdparty/dmlc-core | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dmlc-core b/3rdparty/dmlc-core index b3a4c715bfc3..5df8305fe699 160000 --- a/3rdparty/dmlc-core +++ b/3rdparty/dmlc-core @@ -1 +1 @@ -Subproject commit b3a4c715bfc37a08f245844a800933f10e47c1ea +Subproject commit 5df8305fe699d3b503d10c60a231ab0223142407 From 567518bc145f7b76c454f3adf2cacee317511be9 Mon Sep 17 00:00:00 2001 From: Nick Guletskii Date: Fri, 22 May 2020 19:53:47 +0300 Subject: [PATCH 09/14] Remove leftovers from C++17 dev type check --- src/operator/rnn-inl.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h index 180e945000e4..ede1d5f4717f 100644 --- a/src/operator/rnn-inl.h +++ b/src/operator/rnn-inl.h @@ -843,7 +843,6 @@ class RNNOp { } #endif // MXNET_USE_CUDNN == 1 && defined(__CUDACC__) -#if !defined(__CUDACC__) // cuda doesn't support C++17 if (ctx_.dev_type == kCPU) { int projection_size = 0; if (param_.projection_size.has_value()) { @@ -922,7 +921,6 @@ class RNNOp { param_.mode); } } -#endif } void Backward(const OpContext &ctx, From 0a921a4432ac657b47be44af86ab498fee66f964 Mon Sep 17 00:00:00 2001 From: Nick Guletskii Date: Fri, 22 May 2020 23:28:31 +0300 Subject: [PATCH 10/14] Fix CPU-only RRNOp Forward --- src/operator/rnn-inl.h | 111 ++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 56 deletions(-) diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h index ede1d5f4717f..fdce937e50d1 100644 --- a/src/operator/rnn-inl.h +++ b/src/operator/rnn-inl.h @@ -842,46 +842,65 @@ class RNNOp { #endif // MXNET_USE_CUDNN_GE_7200 } #endif // MXNET_USE_CUDNN == 1 && defined(__CUDACC__) +#if !defined(__CUDACC__) + int projection_size = 0; + if (param_.projection_size.has_value()) { + projection_size = param_.projection_size.value(); + } - if (ctx_.dev_type == kCPU) { - int projection_size = 0; + // allocate temp space + const size_t work_cpu_space_size = GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_, + param_.state_size, projection_size, direction, param_.mode); + if (!temp_init_space_ || temp_cpu_space_size_ < work_cpu_space_size) { + temp_cpu_space_size_ = work_cpu_space_size; + temp_cpu_space_ = NDArray(TShape({static_cast(temp_cpu_space_size_)}), ctx_, + false, in_data[rnn_enum::kData].type_flag_); + temp_init_space_ = true; + } + DType* work_cpu_space = static_cast(temp_cpu_space_.data().dptr_); + + if (ctx.is_train || ctx.need_grad) { + mshadow::Random *prnd = ctx.requested[0].get_random(s); + std::mt19937 &rnd_engine = prnd->GetRndEngine(); + + // allocate reserve space if (param_.projection_size.has_value()) { - projection_size = param_.projection_size.value(); + LOG(FATAL) << "No training support for LSTM with projection on CPU currently."; } - // allocate temp space - const size_t work_cpu_space_size = GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_, - param_.state_size, projection_size, direction, param_.mode); - if (!temp_init_space_ || temp_cpu_space_size_ < work_cpu_space_size) { - temp_cpu_space_size_ = work_cpu_space_size; - temp_cpu_space_ = NDArray(TShape({static_cast(temp_cpu_space_size_)}), ctx_, + const size_t r_size = GetRNNReserveSpaceSize(param_.num_layers, direction, + param_.seq_length_, param_.batch_size_, + param_.state_size, param_.mode); + if (!init_space_ || reserve_cpu_space_size_ < r_size) { + reserve_cpu_space_size_ = r_size; + reserve_cpu_space_ = NDArray(TShape({static_cast(reserve_cpu_space_size_)}), ctx_, false, in_data[rnn_enum::kData].type_flag_); - temp_init_space_ = true; + init_space_ = true; } - DType* work_cpu_space = static_cast(temp_cpu_space_.data().dptr_); - - if (ctx.is_train || ctx.need_grad) { - mshadow::Random *prnd = ctx.requested[0].get_random(s); - std::mt19937 &rnd_engine = prnd->GetRndEngine(); - - // allocate reserve space - if (param_.projection_size.has_value()) { - LOG(FATAL) << "No training support for LSTM with projection on CPU currently."; - } - - const size_t r_size = GetRNNReserveSpaceSize(param_.num_layers, direction, - param_.seq_length_, param_.batch_size_, - param_.state_size, param_.mode); - if (!init_space_ || reserve_cpu_space_size_ < r_size) { - reserve_cpu_space_size_ = r_size; - reserve_cpu_space_ = NDArray(TShape({static_cast(reserve_cpu_space_size_)}), ctx_, - false, in_data[rnn_enum::kData].type_flag_); - init_space_ = true; - } - DType* reserve_space_ptr = static_cast(reserve_cpu_space_.data().dptr_); + DType* reserve_space_ptr = static_cast(reserve_cpu_space_.data().dptr_); - RNNForwardTraining(work_cpu_space, - reserve_space_ptr, + RNNForwardTraining(work_cpu_space, + reserve_space_ptr, + param_.state_outputs, + param_.num_layers, + direction, + param_.seq_length_, + param_.batch_size_, + param_.input_size_, + param_.state_size, + x.dptr_, + hx.dptr_, + cx_ptr, + w.dptr_, + b_ptr, + y.dptr_, + hy_ptr, + cy_ptr, + param_.p, + param_.mode, + rnd_engine); + } else { + RNNForwardInference(work_cpu_space, param_.state_outputs, param_.num_layers, direction, @@ -889,6 +908,7 @@ class RNNOp { param_.batch_size_, param_.input_size_, param_.state_size, + projection_size, x.dptr_, hx.dptr_, cx_ptr, @@ -897,30 +917,9 @@ class RNNOp { y.dptr_, hy_ptr, cy_ptr, - param_.p, - param_.mode, - rnd_engine); - } else { - RNNForwardInference(work_cpu_space, - param_.state_outputs, - param_.num_layers, - direction, - param_.seq_length_, - param_.batch_size_, - param_.input_size_, - param_.state_size, - projection_size, - x.dptr_, - hx.dptr_, - cx_ptr, - w.dptr_, - b_ptr, - y.dptr_, - hy_ptr, - cy_ptr, - param_.mode); - } + param_.mode); } +#endif // !defined(__CUDACC__) } void Backward(const OpContext &ctx, From a5faf8cf02703c873993c3dc77e8f609477105a5 Mon Sep 17 00:00:00 2001 From: Nick Guletskii Date: Sat, 23 May 2020 02:15:09 +0300 Subject: [PATCH 11/14] Change the ARM8 build to work like the ARM7 build --- ci/docker/runtime_functions.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 9141a21b2457..4b544e4b63ef 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -331,20 +331,32 @@ build_armv7() { } build_armv8() { + set -ex + pushd . + cd /work/build + + # Lapack functionality will be included and statically linked to openblas. + # But USE_LAPACK needs to be set to OFF, otherwise the main CMakeLists.txt + # file tries to add -llapack. Lapack functionality though, requires -lgfortran + # to be linked additionally. + build_ccache_wrappers cmake \ -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ + -DCMAKE_CROSSCOMPILING=ON \ -DUSE_CUDA=OFF \ - -DSUPPORT_F16C=OFF \ -DUSE_OPENCV=OFF \ -DUSE_OPENMP=ON \ - -DUSE_LAPACK=OFF \ -DUSE_SIGNAL_HANDLER=ON \ -DCMAKE_BUILD_TYPE=Release \ -DUSE_MKL_IF_AVAILABLE=OFF \ + -DUSE_LAPACK=OFF \ + -DBUILD_CPP_EXAMPLES=OFF \ -G Ninja /work/mxnet + ninja build_wheel + popd } From 36c78b60132c530dff82a0d89c8f746dcfa95846 Mon Sep 17 00:00:00 2001 From: Nick Guletskii Date: Sat, 23 May 2020 19:28:05 +0300 Subject: [PATCH 12/14] Revert "Fix CPU-only RRNOp Forward" This reverts commit 0a921a4432ac657b47be44af86ab498fee66f964. --- src/operator/rnn-inl.h | 111 +++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 55 deletions(-) diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h index fdce937e50d1..ede1d5f4717f 100644 --- a/src/operator/rnn-inl.h +++ b/src/operator/rnn-inl.h @@ -842,65 +842,46 @@ class RNNOp { #endif // MXNET_USE_CUDNN_GE_7200 } #endif // MXNET_USE_CUDNN == 1 && defined(__CUDACC__) -#if !defined(__CUDACC__) - int projection_size = 0; - if (param_.projection_size.has_value()) { - projection_size = param_.projection_size.value(); - } - - // allocate temp space - const size_t work_cpu_space_size = GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_, - param_.state_size, projection_size, direction, param_.mode); - if (!temp_init_space_ || temp_cpu_space_size_ < work_cpu_space_size) { - temp_cpu_space_size_ = work_cpu_space_size; - temp_cpu_space_ = NDArray(TShape({static_cast(temp_cpu_space_size_)}), ctx_, - false, in_data[rnn_enum::kData].type_flag_); - temp_init_space_ = true; - } - DType* work_cpu_space = static_cast(temp_cpu_space_.data().dptr_); - if (ctx.is_train || ctx.need_grad) { - mshadow::Random *prnd = ctx.requested[0].get_random(s); - std::mt19937 &rnd_engine = prnd->GetRndEngine(); - - // allocate reserve space + if (ctx_.dev_type == kCPU) { + int projection_size = 0; if (param_.projection_size.has_value()) { - LOG(FATAL) << "No training support for LSTM with projection on CPU currently."; + projection_size = param_.projection_size.value(); } - const size_t r_size = GetRNNReserveSpaceSize(param_.num_layers, direction, - param_.seq_length_, param_.batch_size_, - param_.state_size, param_.mode); - if (!init_space_ || reserve_cpu_space_size_ < r_size) { - reserve_cpu_space_size_ = r_size; - reserve_cpu_space_ = NDArray(TShape({static_cast(reserve_cpu_space_size_)}), ctx_, + // allocate temp space + const size_t work_cpu_space_size = GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_, + param_.state_size, projection_size, direction, param_.mode); + if (!temp_init_space_ || temp_cpu_space_size_ < work_cpu_space_size) { + temp_cpu_space_size_ = work_cpu_space_size; + temp_cpu_space_ = NDArray(TShape({static_cast(temp_cpu_space_size_)}), ctx_, false, in_data[rnn_enum::kData].type_flag_); - init_space_ = true; + temp_init_space_ = true; } - DType* reserve_space_ptr = static_cast(reserve_cpu_space_.data().dptr_); + DType* work_cpu_space = static_cast(temp_cpu_space_.data().dptr_); - RNNForwardTraining(work_cpu_space, - reserve_space_ptr, - param_.state_outputs, - param_.num_layers, - direction, - param_.seq_length_, - param_.batch_size_, - param_.input_size_, - param_.state_size, - x.dptr_, - hx.dptr_, - cx_ptr, - w.dptr_, - b_ptr, - y.dptr_, - hy_ptr, - cy_ptr, - param_.p, - param_.mode, - rnd_engine); - } else { - RNNForwardInference(work_cpu_space, + if (ctx.is_train || ctx.need_grad) { + mshadow::Random *prnd = ctx.requested[0].get_random(s); + std::mt19937 &rnd_engine = prnd->GetRndEngine(); + + // allocate reserve space + if (param_.projection_size.has_value()) { + LOG(FATAL) << "No training support for LSTM with projection on CPU currently."; + } + + const size_t r_size = GetRNNReserveSpaceSize(param_.num_layers, direction, + param_.seq_length_, param_.batch_size_, + param_.state_size, param_.mode); + if (!init_space_ || reserve_cpu_space_size_ < r_size) { + reserve_cpu_space_size_ = r_size; + reserve_cpu_space_ = NDArray(TShape({static_cast(reserve_cpu_space_size_)}), ctx_, + false, in_data[rnn_enum::kData].type_flag_); + init_space_ = true; + } + DType* reserve_space_ptr = static_cast(reserve_cpu_space_.data().dptr_); + + RNNForwardTraining(work_cpu_space, + reserve_space_ptr, param_.state_outputs, param_.num_layers, direction, @@ -908,7 +889,6 @@ class RNNOp { param_.batch_size_, param_.input_size_, param_.state_size, - projection_size, x.dptr_, hx.dptr_, cx_ptr, @@ -917,9 +897,30 @@ class RNNOp { y.dptr_, hy_ptr, cy_ptr, - param_.mode); + param_.p, + param_.mode, + rnd_engine); + } else { + RNNForwardInference(work_cpu_space, + param_.state_outputs, + param_.num_layers, + direction, + param_.seq_length_, + param_.batch_size_, + param_.input_size_, + param_.state_size, + projection_size, + x.dptr_, + hx.dptr_, + cx_ptr, + w.dptr_, + b_ptr, + y.dptr_, + hy_ptr, + cy_ptr, + param_.mode); + } } -#endif // !defined(__CUDACC__) } void Backward(const OpContext &ctx, From 8ae6298bd89d3f0ca7e09d009fe38705e06f3ad8 Mon Sep 17 00:00:00 2001 From: Nick Guletskii Date: Sat, 23 May 2020 20:20:33 +0300 Subject: [PATCH 13/14] Hack around the lack of constexpr if --- src/operator/rnn-inl.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h index ede1d5f4717f..5eca5c6c2ecd 100644 --- a/src/operator/rnn-inl.h +++ b/src/operator/rnn-inl.h @@ -861,9 +861,13 @@ class RNNOp { DType* work_cpu_space = static_cast(temp_cpu_space_.data().dptr_); if (ctx.is_train || ctx.need_grad) { - mshadow::Random *prnd = ctx.requested[0].get_random(s); - std::mt19937 &rnd_engine = prnd->GetRndEngine(); - + mshadow::Random *prnd = ctx.requested[0].get_random(s); + // Hack: the surrounding if condition would be a constexpr if in C++17. + // Since this branch can only be reached if the xpu == cpu, the cast is valid. + // Using macros with defined(__CUDACC__) instead of the if statement results in errors + // related to unused variables which are declared above. + auto cpu_prnd = reinterpret_cast *>(prnd); + std::mt19937 &rnd_engine = cpu_prnd->GetRndEngine(); // allocate reserve space if (param_.projection_size.has_value()) { LOG(FATAL) << "No training support for LSTM with projection on CPU currently."; From 53147e0a3493cb235269de32809afdbab1df561f Mon Sep 17 00:00:00 2001 From: Nick Guletskii Date: Sat, 23 May 2020 22:07:04 +0300 Subject: [PATCH 14/14] Adjust the list of files to be packed in ARM jobs --- ci/jenkins/Jenkins_steps.groovy | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index 378ae8177ec5..98c774b284ec 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -464,7 +464,7 @@ def compile_armv6_cpu() { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() utils.docker_run('armv6', 'build_armv6', false) - utils.pack_lib('armv6', mx_lib) + utils.pack_lib('armv6', mx_cmake_lib) } } } @@ -478,7 +478,7 @@ def compile_armv7_cpu() { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() utils.docker_run('armv7', 'build_armv7', false) - utils.pack_lib('armv7', mx_lib) + utils.pack_lib('armv7', mx_cmake_lib) } } } @@ -492,7 +492,7 @@ def compile_armv8_cpu() { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() utils.docker_run('armv8', 'build_armv8', false) - utils.pack_lib('armv8', mx_lib) + utils.pack_lib('armv8', mx_cmake_lib) } } } @@ -1360,7 +1360,7 @@ def test_qemu_armv7_cpu() { node(NODE_LINUX_CPU) { ws('workspace/ut-armv7-qemu') { timeout(time: max_time, unit: 'MINUTES') { - utils.unpack_and_init('armv7', mx_lib) + utils.unpack_and_init('armv7', mx_cmake_lib) utils.docker_run('test.armv7', 'unittest_ubuntu_python3_arm', false) } } @@ -1373,7 +1373,7 @@ def test_qemu_armv8_cpu() { node(NODE_LINUX_CPU) { ws('workspace/ut-armv8-qemu') { timeout(time: max_time, unit: 'MINUTES') { - utils.unpack_and_init('armv8', mx_lib) + utils.unpack_and_init('armv8', mx_cmake_lib) utils.docker_run('test.armv8', 'unittest_ubuntu_python3_arm', false) } }