Skip to content

Commit

Permalink
ARM docker build with 24.12 pytorch fw image (#581)
Browse files Browse the repository at this point in the history
### Description
Updates ARM Dockerfile to work with 24.12 pytorch FW image

### Type of changes

- [ ]  Bug fix (non-breaking change which fixes an issue)
- [ ]  New feature (non-breaking change which adds functionality)
- [X]  Refactor
- [ ]  Documentation update
- [ ]  Other (please describe):

### CI Pipeline Configuration

ARM build is not covered by pre-merge CI, is covered nightly. Having
said that, this change might not work with CI at all because the blossom
runners have a kernel/CUDA mismatch on their Grace systems.... more info
TBD

### Usage
```
docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 -it build-image-name:tag /bin/bash
```
### Pre-submit Checklist
<!--- Ensure all items are completed before submitting -->

 - [X] I have tested these changes locally
 - [N/A] I have updated the documentation accordingly
 - [N/A] I have added/updated tests as needed
 - [X] All existing tests pass successfully

---------

Signed-off-by: Timur Rvachov <[email protected]>
  • Loading branch information
trvachov authored Jan 10, 2025
1 parent 43d2ca3 commit d36c18e
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 34 deletions.
100 changes: 67 additions & 33 deletions Dockerfile.arm
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Base image with apex and transformer engine, but without NeMo or Megatron-LM.
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.12-py3

FROM rust:1.82.0 as rust-env

Expand Down Expand Up @@ -55,24 +55,27 @@ RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip --disable-pip-version-check --no-cache-di
git+https://github.com/Dao-AILab/[email protected]

# Build LLVM and triton
# It's important to select a specific version of LLVM as per triton's README instructions, and
# also important to constrain the build targets to the systems we care about or else there will
# be many strange unlinked symbol issues. Here we assume this dockerfile is build on an aarch64
# target (host), and build for NVIDIA GPUS (NVPTX). Unclear why, but we also need to build for
# AMDGPUs to get triton to properly build or else there are linker issues.
RUN git clone https://github.com/llvm/llvm-project.git && \
pip install ninja && \
cd llvm-project && \
git fetch origin 5e5a22caf88ac1ccfa8dc5720295fdeba0ad9372 && \
git checkout 5e5a22caf88ac1ccfa8dc5720295fdeba0ad9372 && \
git fetch origin 10dc3a8e916d73291269e5e2b82dd22681489aa1 && \
git checkout 10dc3a8e916d73291269e5e2b82dd22681489aa1 && \
mkdir build && cd build && \
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON ../llvm -DLLVM_ENABLE_PROJECTS="mlir;llvm" && \
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_ENABLE_PROJECTS="mlir;llvm" -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" ../llvm && \
ninja && \
export LLVM_BUILD_DIR=${WORKDIR}/llvm-project/build && \

cd ${WORKDIR} && \
git clone https://github.com/triton-lang/triton.git && \
pip install cmake wheel pybind11 && \
cd triton && \
git fetch origin 79c6c9b209a5692b9a895398f4f3a033f8f80415 && \
git checkout 79c6c9b209a5692b9a895398f4f3a033f8f80415 && \
LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib LLVM_SYSPATH=$LLVM_BUILD_DIR pip install python/ && \

git fetch origin release/3.1.x && \
git checkout release/3.1.x && \
LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib LLVM_SYSPATH=$LLVM_BUILD_DIR pip install --verbose python/ && \
cd ${WORKDIR} && \
rm -rf llvm-project && \
rm -rf triton
Expand All @@ -93,25 +96,20 @@ RUN rm -rf /build

# Addressing Security Scan Vulnerabilities
RUN rm -rf /opt/pytorch/pytorch/third_party/onnx
RUN apt-get update && \
apt-get install -y openssh-client=1:8.9p1-3ubuntu0.10 && \
rm -rf /var/lib/apt/lists/*
RUN apt purge -y libslurm37 libpmi2-0 && \
RUN apt purge -y libpmi2-0 && \
apt autoremove -y
RUN source /usr/local/nvm/nvm.sh && \
NODE_VER=$(nvm current) && \
nvm deactivate && \
nvm uninstall $NODE_VER && \
sed -i "/NVM/d" /root/.bashrc && \
sed -i "/nvm.sh/d" /etc/bash.bashrc

# Use UV to install python packages from the workspace. This just installs packages into the system's python
# environment, and does not use the current uv.lock file.
# environment, and does not use the current uv.lock file. Note that with python 3.12, we now need to set
# UV_BREAK_SYSTEM_PACKAGES, since the pytorch base image has made the decision not to use a virtual environment and UV
# does not respect the PIP_BREAK_SYSTEM_PACKAGES environment variable set in the base dockerfile.
COPY --from=ghcr.io/astral-sh/uv:0.4.25 /uv /usr/local/bin/uv
ENV UV_LINK_MODE=copy \
UV_COMPILE_BYTECODE=1 \
UV_PYTHON_DOWNLOADS=never \
UV_SYSTEM_PYTHON=true
UV_SYSTEM_PYTHON=true \
UV_NO_CACHE=1 \
UV_BREAK_SYSTEM_PACKAGES=1

# Install the bionemo-geomtric requirements ahead of copying over the rest of the repo, so that we can cache their
# installation. These involve building some torch extensions, so they can take a while to install.
Expand All @@ -133,12 +131,35 @@ COPY --from=rust-env /usr/local/rustup /usr/local/rustup
ENV PATH="/usr/local/cargo/bin:/usr/local/rustup/bin:${PATH}"
ENV RUSTUP_HOME="/usr/local/rustup"

# Build decord
# # Build decord
# This needs a specific version of ffmpeg:
# root@e1fc53d00844:/workspace/bionemo2# ffmpeg -version
# ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
# built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
# configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/aarch64-linux-gnu --incdir=/usr/include/aarch64-linux-gnu --arch=arm64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
# libavutil 56. 70.100 / 56. 70.100
# libavcodec 58.134.100 / 58.134.100
# libavformat 58. 76.100 / 58. 76.100
# libavdevice 58. 13.100 / 58. 13.100
# libavfilter 7.110.100 / 7.110.100
# libswscale 5. 9.100 / 5. 9.100
# libswresample 3. 9.100 / 3. 9.100
# libpostproc 55. 9.100 / 55. 9.100
#
# Issue link: https://github.com/dmlc/decord/issues/257
# Diff to make it all work https://github.com/dmlc/decord/issues/186#issuecomment-1171882325

# Consider this:
# sudo apt install libnvidia-decode-550
# cp /usr/lib/aarch64-linux-gnu/libnvcuvid* /usr/local/cuda/
# cmake .. -DUSE_CUDA=ON -DCMAKE_BUILD_TYPE=Release

RUN apt-get update && \
apt-get install -y build-essential python3-dev python3-setuptools make cmake && \
apt-get install -y ffmpeg libavcodec-dev libavfilter-dev libavformat-dev libavutil-dev && \
apt-get install -y ffmpeg libavcodec-dev libavfilter-dev libavformat-dev libavutil-dev
RUN --mount=type=bind,source=./arm_build/decord_ffmpeg6_fix.patch,target=/decord_ffmpeg6_fix.patch \
git clone --recursive https://github.com/dmlc/decord && \
cd decord && \
cd decord && git apply /decord_ffmpeg6_fix.patch && \
mkdir build && cd build && \
cmake .. -DUSE_CUDA=0 -DCMAKE_BUILD_TYPE=Release && \
make && \
Expand Down Expand Up @@ -173,20 +194,28 @@ RUN git clone --single-branch --branch 1.15.0rc4 https://github.com/single-cell-

WORKDIR /workspace/bionemo2
# Note, we need to mount the .git folder here so that setuptools-scm is able to fetch git tag for version.
# For some reason, we do not need to do the tensorstore verson package hack on arm64, while we do need this for x86 build.
RUN --mount=type=bind,source=./.git,target=./.git \
--mount=type=bind,source=./requirements-test.txt,target=/requirements-test.txt \
--mount=type=bind,source=./requirements-cve.txt,target=/requirements-cve.txt \
<<EOF
set -eo pipefail
uv pip install maturin --no-build-isolation && uv pip install --no-build-isolation \
uv pip install maturin --no-build-isolation --break-system-packages
RUN --mount=type=bind,source=./.git,target=./.git \
--mount=type=bind,source=./requirements-test.txt,target=/requirements-test.txt \
--mount=type=bind,source=./requirements-cve.txt,target=/requirements-cve.txt \
pip install --use-deprecated=legacy-resolver --no-build-isolation \
tensorstore==0.1.45

RUN --mount=type=bind,source=./.git,target=./.git \
--mount=type=bind,source=./requirements-test.txt,target=/requirements-test.txt \
--mount=type=bind,source=./requirements-cve.txt,target=/requirements-cve.txt \
# Comment out mamba install in NeMo as this causes issues.
sed -i "/mamba-ssm/d" ./3rdparty/NeMo/requirements/requirements_nlp.txt && \
uv pip install --no-build-isolation \
./3rdparty/* \
./sub-packages/bionemo-* \
-r /requirements-cve.txt \
-r /requirements-test.txt
rm -rf ./3rdparty
rm -rf /tmp/*
rm -rf ./sub-packages/bionemo-noodles/target
EOF
-r /requirements-test.txt && rm -rf ./3rdparty && rm -rf /tmp/* && rm -rf ./sub-packages/bionemo-noodles/target \
&& rm -rf /root/.cache/*

# In the devcontainer image, we just copy over the finished `dist-packages` folder from the build image back into the
# base pytorch container. We can then set up a non-root user and uninstall the bionemo and 3rd-party packages, so that
Expand Down Expand Up @@ -286,6 +315,12 @@ for sub in ./3rdparty/* ./sub-packages/bionemo-*; do
uv pip install --no-deps --no-build-isolation --editable $sub
done
EOF
# This is needed because faiss is not compatible with ARM at all.
# Bionemo doesn't use faiss, but megatron core does.
# We do not use this codepath at all, therefore we just make is_sve_supported return False
# to circumvent python import issues
RUN sed -i '42i\ # Bionemo hack to fix ARM issues with faiss\n return False' /usr/local/lib/python3.12/dist-packages/faiss/loader.py

# Since the entire repo is owned by root, swithcing username for development breaks things.
ARG USERNAME=bionemo
RUN chown $USERNAME:$USERNAME -R /workspace/bionemo2/
Expand All @@ -312,7 +347,6 @@ COPY --from=rust-env /usr/local/rustup /usr/local/rustup


# RUN rm -rf /usr/local/cargo /usr/local/rustup
RUN rm -rf /root/.cache/bazel
RUN chmod 777 -R /workspace/bionemo2/

# Transformer engine attention defaults
Expand Down
73 changes: 73 additions & 0 deletions arm_build/decord_ffmpeg6_fix.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# This is a patch file for decord https://github.com/dmlc/decord
# needed to build decord against ffmpeg6, taken from
# https://github.com/dmlc/decord/issues/186#issuecomment-1171882325
# This needs to be removed once decord natively supports latest ffmpeg versions.
diff --git a/src/video/ffmpeg/ffmpeg_common.h b/src/video/ffmpeg/ffmpeg_common.h
index b0b973f..f0f7316 100644
--- a/src/video/ffmpeg/ffmpeg_common.h
+++ b/src/video/ffmpeg/ffmpeg_common.h
@@ -21,6 +21,7 @@
extern "C" {
#endif
#include <libavcodec/avcodec.h>
+#include <libavcodec/bsf.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavfilter/avfilter.h>
diff --git a/src/video/nvcodec/cuda_threaded_decoder.cc b/src/video/nvcodec/cuda_threaded_decoder.cc
index 62bc7ee..957a90d 100644
--- a/src/video/nvcodec/cuda_threaded_decoder.cc
+++ b/src/video/nvcodec/cuda_threaded_decoder.cc
@@ -17,7 +17,7 @@ namespace decord {
namespace cuda {
using namespace runtime;

-CUThreadedDecoder::CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, AVInputFormat *iformat)
+CUThreadedDecoder::CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, const AVInputFormat *iformat)
: device_id_(device_id), stream_({device_id, false}), device_{}, ctx_{}, parser_{}, decoder_{},
pkt_queue_{}, frame_queue_{},
run_(false), frame_count_(0), draining_(false),
@@ -70,7 +70,7 @@ CUThreadedDecoder::CUThreadedDecoder(int device_id, AVCodecParameters *codecpar,
}
}

-void CUThreadedDecoder::InitBitStreamFilter(AVCodecParameters *codecpar, AVInputFormat *iformat) {
+void CUThreadedDecoder::InitBitStreamFilter(AVCodecParameters *codecpar, const AVInputFormat *iformat) {
const char* bsf_name = nullptr;
if (AV_CODEC_ID_H264 == codecpar->codec_id) {
// H.264
diff --git a/src/video/nvcodec/cuda_threaded_decoder.h b/src/video/nvcodec/cuda_threaded_decoder.h
index d7e6fcd..61958a1 100644
--- a/src/video/nvcodec/cuda_threaded_decoder.h
+++ b/src/video/nvcodec/cuda_threaded_decoder.h
@@ -46,7 +46,7 @@ class CUThreadedDecoder final : public ThreadedDecoderInterface {
using FrameOrderQueuePtr = std::unique_ptr<FrameOrderQueue>;

public:
- CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, AVInputFormat *iformat);
+ CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, const AVInputFormat *iformat);
void SetCodecContext(AVCodecContext *dec_ctx, int width = -1, int height = -1, int rotation = 0);
bool Initialized() const;
void Start();
@@ -70,7 +70,7 @@ class CUThreadedDecoder final : public ThreadedDecoderInterface {
void LaunchThreadImpl();
void RecordInternalError(std::string message);
void CheckErrorStatus();
- void InitBitStreamFilter(AVCodecParameters *codecpar, AVInputFormat *iformat);
+ void InitBitStreamFilter(AVCodecParameters *codecpar, const AVInputFormat *iformat);

int device_id_;
CUStream stream_;
diff --git a/src/video/video_reader.cc b/src/video/video_reader.cc
index af4858d..99c9635 100644
--- a/src/video/video_reader.cc
+++ b/src/video/video_reader.cc
@@ -145,7 +145,7 @@ VideoReader::~VideoReader(){

void VideoReader::SetVideoStream(int stream_nb) {
if (!fmt_ctx_) return;
- AVCodec *dec;
+ const AVCodec *dec;
int st_nb = av_find_best_stream(fmt_ctx_.get(), AVMEDIA_TYPE_VIDEO, stream_nb, -1, &dec, 0);
// LOG(INFO) << "find best stream: " << st_nb;
CHECK_GE(st_nb, 0) << "ERROR cannot find video stream with wanted index: " << stream_nb;
5 changes: 5 additions & 0 deletions ci/scripts/run_pytest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ source "$SCRIPT_DIR/utils.sh" || { echo "Failed to source utils.sh" >&2; exit 1;
# Set up BioNeMo home directory
set_bionemo_home || exit 1

# Echo some useful information
lscpu
nvidia-smi
uname -a

# Set up pytest options
PYTEST_OPTIONS=(
-v
Expand Down

0 comments on commit d36c18e

Please sign in to comment.