Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

One-step ROCm manywheel/libtorch docker build #1418

Merged
merged 9 commits into from
Jun 8, 2023
2 changes: 1 addition & 1 deletion .github/workflows/build-libtorch-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
run: |
libtorch/build_docker.sh
build-docker-rocm:
runs-on: ubuntu-22.04
runs-on: linux.12xlarge
strategy:
matrix:
rocm_version: ["5.4.2", "5.5"]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-manywheel-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
run: |
manywheel/build_docker.sh
build-docker-rocm:
runs-on: ubuntu-22.04
runs-on: linux.12xlarge
strategy:
matrix:
rocm_version: ["5.4.2", "5.5"]
Expand Down
25 changes: 4 additions & 21 deletions common/install_miopen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ if [[ $ROCM_INT -lt 40001 ]]; then
exit 0
fi

# CHANGED: Do not uninstall. To avoid out of disk space issues, we will copy lib over existing.
# Uninstall existing package, to avoid errors during later yum install indicating packages did not change.
#yum remove -y miopen-hip
yum remove -y miopen-hip

# Function to retry functions that sometimes timeout or have flaky failures
retry () {
Expand Down Expand Up @@ -77,24 +75,14 @@ elif [[ $ROCM_INT -ge 50100 ]] && [[ $ROCM_INT -lt 50200 ]]; then
elif [[ $ROCM_INT -ge 50000 ]] && [[ $ROCM_INT -lt 50100 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
MIOPEN_BRANCH="release/rocm-rel-5.0-staging"
elif [[ $ROCM_INT -ge 40500 ]] && [[ $ROCM_INT -lt 50000 ]]; then
MIOPEN_CMAKE_COMMON_FLAGS="${MIOPEN_CMAKE_COMMON_FLAGS} -DMIOPEN_USE_HIP_KERNELS=Off -DMIOPEN_DEFAULT_FIND_MODE=Normal"
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
MIOPEN_BRANCH="release/rocm-rel-4.5-staging"
elif [[ $ROCM_INT -ge 40300 ]] && [[ $ROCM_INT -lt 40500 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx900_64;gfx906_60;gfx906_64;gfx90878;gfx1030_36"
MIOPEN_BRANCH="release/rocm-rel-4.3-staging"
elif [[ $ROCM_INT -ge 40200 ]] && [[ $ROCM_INT -lt 40300 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx803_36;gfx803_64;gfx900_56;gfx900_64;gfx906_60;gfx906_64;gfx90878"
MIOPEN_BRANCH="rocm-4.2.x-staging"
else
echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
exit 1
fi

git clone https://github.com/ROCmSoftwarePlatform/MIOpen -b ${MIOPEN_BRANCH}
pushd MIOpen
# remove .git to save disk space ince CI runner was running out
# remove .git to save disk space since CI runner was running out
rm -rf .git
# Don't build MLIR to save docker build time
# since we are disabling MLIR backend for MIOpen anyway
Expand Down Expand Up @@ -122,18 +110,13 @@ PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang
-DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}/hip;${ROCM_INSTALL_PATH}"
make MIOpen -j $(nproc)

# CHANGED: Do not build package.
# Build MIOpen package
#make -j $(nproc) package
make -j $(nproc) package

# clean up since CI runner was running out of disk space
rm -rf /usr/local/cget

# CHANGED: Do not install package, just copy lib over existing.
#yum install -y miopen-*.rpm
dest=$(ls ${ROCM_INSTALL_PATH}/lib/libMIOpen.so.1.0.*)
rm -f ${dest}
cp lib/libMIOpen.so.1.0 ${dest}
yum install -y miopen-*.rpm

popd
rm -rf MIOpen
2 changes: 1 addition & 1 deletion common/install_rocm_magma.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ else
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
fi
for arch in $amdgpu_targets; do
echo "DEVCCFLAGS += --amdgpu-target=$arch" >> make.inc
echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc
done
# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
Expand Down
14 changes: 4 additions & 10 deletions libtorch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -60,24 +60,18 @@ FROM cpu as rocm
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
ENV MKLROOT /opt/intel
ADD ./common/install_rocm.sh install_rocm.sh
# No need to install ROCm as base docker image should have full ROCm install
#ADD ./common/install_rocm.sh install_rocm.sh
ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
#ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
# gfortran and python needed for building magma from source for ROCm
RUN apt-get update -y && \
apt-get install gfortran -y && \
apt-get install python -y && \
apt-get clean

FROM rocm as rocm5.4.2
RUN ROCM_VERSION=5.4.2 bash ./install_rocm.sh && rm install_rocm.sh
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh

FROM rocm as rocm5.5
RUN ROCM_VERSION=5.5 bash ./install_rocm.sh && rm install_rocm.sh
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh

FROM ${BASE_TARGET} as final
# Install LLVM
Expand Down
4 changes: 2 additions & 2 deletions libtorch/build_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ case ${GPU_ARCH_TYPE} in
DOCKER_GPU_BUILD_ARG=""
;;
rocm)
BASE_TARGET=rocm${GPU_ARCH_VERSION}
BASE_TARGET=rocm
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-magma
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
Expand Down
15 changes: 7 additions & 8 deletions manywheel/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,14 @@ FROM cpu_final as rocm_final
ARG ROCM_VERSION=3.7
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
# Install ROCm
ADD ./common/install_rocm.sh install_rocm.sh
RUN ROCM_VERSION=${ROCM_VERSION} bash ./install_rocm.sh && rm install_rocm.sh
# No need to install ROCm as base docker image should have full ROCm install
#ADD ./common/install_rocm.sh install_rocm.sh
#RUN ROCM_VERSION=${ROCM_VERSION} bash ./install_rocm.sh && rm install_rocm.sh
ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
# cmake3 is needed for the MIOpen build
RUN ln -sf /usr/local/bin/cmake /usr/bin/cmake3
### The following is now performed beforehand in a new GPU_IMAGE with magma and miopen preinstalled
#ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
#ADD ./common/install_miopen.sh install_miopen.sh
#RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
ADD ./common/install_miopen.sh install_miopen.sh
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
2 changes: 1 addition & 1 deletion manywheel/build_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ case ${GPU_ARCH_TYPE} in
TARGET=rocm_final
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-rocm:${GPU_ARCH_VERSION}
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-magma-miopen-staging
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
Expand Down