Skip to content

Commit

Permalink
Pick what we want to keep, drop what we don't want to bring across
Browse files Browse the repository at this point in the history
  • Loading branch information
danpetry committed Nov 4, 2024
1 parent 611bc61 commit c4b292a
Show file tree
Hide file tree
Showing 10 changed files with 481 additions and 241 deletions.
1 change: 1 addition & 0 deletions .gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions abs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# macOS 12.3 or above is required for running the GPU variant (MPS support). No way to specify this for only the GPU
# variant, so it's specified for both.
extra_labels_for_os:
osx-arm64: [ventura]
44 changes: 42 additions & 2 deletions recipe/bld.bat
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
@echo On

:: The PyTorch test suite includes some symlinks, which aren't resolved on Windows, leading to packaging errors.
:: ATTN! These change and have to be updated manually, often with each release.
:: (no current symlinks being packaged. Leaving this information here as it took some months to find the issue. Look out
:: for a failure with error message: "conda_package_handling.exceptions.ArchiveCreationError: <somefile> Cannot stat
:: while writing file")

set TH_BINARY_BUILD=1
set PYTORCH_BUILD_VERSION=%PKG_VERSION%
set PYTORCH_BUILD_NUMBER=%PKG_BUILDNUM%

:: uncomment to debug cmake build
:: set CMAKE_VERBOSE_MAKEFILE=1

if "%pytorch_variant%" == "gpu" (
set build_with_cuda=1
set desired_cuda=%CUDA_VERSION:~0,-1%.%CUDA_VERSION:~-1,1%
Expand All @@ -12,6 +21,7 @@ if "%pytorch_variant%" == "gpu" (
set USE_CUDA=0
)

:: =============================== CUDA FLAGS> ======================================
if "%build_with_cuda%" == "" goto cuda_flags_end

set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda%
Expand All @@ -23,12 +33,25 @@ if "%desired_cuda%" == "10.0" set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all

:cuda_flags_end
:: =============================== CUDA FLAGS< ======================================

set USE_MKLDNN=1

:: Tensorpipe cannot be used on windows
set USE_TENSORPIPE=0
set DISTUTILS_USE_SDK=1
set BUILD_TEST=0
:: Don't increase MAX_JOBS to NUMBER_OF_PROCESSORS, as it will run out of heap
set CPU_COUNT=1
set MAX_JOBS=%CPU_COUNT%
:: Use our Pybind11, Eigen
set USE_SYSTEM_PYBIND11=1
set USE_SYSTEM_EIGEN_INSTALL=1

set CMAKE_INCLUDE_PATH=%LIBRARY_PREFIX%\include
set LIB=%LIBRARY_PREFIX%\lib;%LIB%

:: =============================== CUDA> ======================================
IF "%build_with_cuda%" == "" goto cuda_end

set MAGMA_HOME=%LIBRARY_PREFIX%
Expand All @@ -38,13 +61,30 @@ set "PATH=%CUDA_BIN_PATH%;%PATH%"
set CUDNN_INCLUDE_DIR=%LIBRARY_PREFIX%\include

:cuda_end
:: =============================== CUDA< ======================================

set CMAKE_GENERATOR=Ninja
set "CMAKE_GENERATOR_PLATFORM="
set "CMAKE_PREFIX_PATH=%LIBRARY_PREFIX%"
set CMAKE_BUILD_TYPE=Release
:: This is so that CMake finds the environment's Python, not another one
set Python_EXECUTABLE=%PYTHON%
set Python3_EXECUTABLE=%PYTHON%

:: This is the default, but just in case it changes, one day.
set BUILD_DOCS=OFF

:: Force usage of MKL. If MKL can't be found, cmake will raise an error.
set BLAS=MKL

:: Tell Pytorch's embedded FindMKL where to find MKL.
set INTEL_MKL_DIR=%LIBRARY_PREFIX%

set "libuv_ROOT=%LIBRARY_PREFIX%"
set "USE_SYSTEM_SLEEF=OFF"
set "BUILD_CUSTOM_PROTOBUF=OFF"
:: Note that BUILD_CUSTOM_PROTOBUF=OFF (which would use our protobuf) doesn't work properly as of last testing, and results in
:: duplicate symbols at link time.
:: set "BUILD_CUSTOM_PROTOBUF=OFF"

%PYTHON% -m pip install . --no-deps -vv
%PYTHON% -m pip install . --no-deps --no-build-isolation -vv
if errorlevel 1 exit /b 1
114 changes: 93 additions & 21 deletions recipe/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

set -ex

echo "####################################################################"
echo "Building PyTorch using BLAS implementation: $blas_impl "
echo "####################################################################"

# https://github.com/conda-forge/pytorch-cpu-feedstock/issues/243
# https://github.com/pytorch/pytorch/blob/v2.3.1/setup.py#L341
export PACKAGE_TYPE=conda
Expand All @@ -14,9 +18,21 @@ rm -rf pyproject.toml

export USE_NUMA=0
export USE_ITT=0

#################### ADJUST COMPILER AND LINKER FLAGS #####################
# Pytorch's build system doesn't like us setting the c++ standard and will
# issue a warning. In particular, if it's set to anything other than c++14,
# we'll get compiler errors. Let's just remove it like we're told.
export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-std=c++[0-9][0-9]//g')"
# The below three lines expose symbols that would otherwise be hidden or
# optimised away. They were here before, so removing them would potentially
# break users' programs
export CFLAGS="$(echo $CFLAGS | sed 's/-fvisibility-inlines-hidden//g')"
export CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fvisibility-inlines-hidden//g')"
export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,--as-needed//g')"
# The default conda LDFLAGs include -Wl,-dead_strip_dylibs, which removes all the
# MKL sequential, core, etc. libraries, resulting in a "Symbol not found: _mkl_blas_caxpy"
# error on osx-64.
export LDFLAGS="$(echo $LDFLAGS | sed 's/-Wl,-dead_strip_dylibs//g')"
export LDFLAGS_LD="$(echo $LDFLAGS_LD | sed 's/-dead_strip_dylibs//g')"
if [[ "$c_compiler" == "clang" ]]; then
Expand Down Expand Up @@ -45,11 +61,37 @@ fi
# can be imported on system without a GPU
LDFLAGS="${LDFLAGS//-Wl,-z,now/-Wl,-z,lazy}"

################ CONFIGURE CMAKE FOR CONDA ENVIRONMENT ###################
if [[ "$OSTYPE" != "darwin"* ]]; then
export CMAKE_SYSROOT=$CONDA_BUILD_SYSROOT
else
export CMAKE_OSX_SYSROOT=$CONDA_BUILD_SYSROOT
fi
# Required to make the right SDK found on Anaconda's CI system. Ideally should be fixed in the CI or conda-build
if [[ "${build_platform}" = "osx-arm64" ]]; then
export DEVELOPER_DIR=/Library/Developer/CommandLineTools
fi
export CMAKE_GENERATOR=Ninja
export CMAKE_LIBRARY_PATH=$PREFIX/lib:$PREFIX/include:$CMAKE_LIBRARY_PATH
export CMAKE_PREFIX_PATH=$PREFIX
export CMAKE_BUILD_TYPE=Release

# Apparently, the PATH that conda generates when stacking environments, does not
# have a logical order, potentially leading to CMake looking for (and finding)
# things in the wrong (e.g. parent) environment. In particular, we want to avoid
# finding the wrong Python interpreter.
# Additionally, we explicitly tell CMake where the correct Python interpreter is,
# because simply setting the PATH doesn't work completely.
export PATH=$PREFIX/bin:$PREFIX:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$PATH
export Python3_ROOT_DIR=${PREFIX}
export Python3_EXECUTABLE="${PYTHON}"

# Uncomment to use ccache; development only
# export CMAKE_C_COMPILER_LAUNCHER=ccache
# export CMAKE_CXX_COMPILER_LAUNCHER=ccache
# export CMAKE_CUDA_COMPILER_LAUNCHER=ccache
# export CCACHE_BASEDIR=${PREFIX}/../

for ARG in $CMAKE_ARGS; do
if [[ "$ARG" == "-DCMAKE_"* ]]; then
cmake_arg=$(echo $ARG | cut -d= -f1)
Expand All @@ -61,6 +103,7 @@ for ARG in $CMAKE_ARGS; do
done
unset CMAKE_INSTALL_PREFIX
export TH_BINARY_BUILD=1
# Use our build version and number for inserting into binaries
export PYTORCH_BUILD_VERSION=$PKG_VERSION
export PYTORCH_BUILD_NUMBER=$PKG_BUILDNUM

Expand All @@ -70,6 +113,16 @@ export BUILD_TEST=0
export USE_SYSTEM_SLEEF=1
# use our protobuf
export BUILD_CUSTOM_PROTOBUF=OFF
export USE_SYSTEM_PYBIND11=1
export USE_SYSTEM_EIGEN_INSTALL=1
# TODO:Unvendor onnx. Requires our package to provide ONNXConfig.cmake etc first
# Breakpad is missing a ppc64 and s390x port
case "$build_platform" in
linux-ppc64le|linux-s390x)
export USE_BREAKPAD=OFF
;;
esac

rm -rf $PREFIX/bin/protoc

if [[ "${target_platform}" != "${build_platform}" ]]; then
Expand All @@ -94,15 +147,20 @@ if [[ "${CI}" == "github_actions" ]]; then
# cirun-openstack-gpu-2xlarge, which has 32GB RAM, 8 CPUs
export MAX_JOBS=4
else
export MAX_JOBS=${CPU_COUNT}
# Leave a spare core for other tasks. This may need to be reduced further
# if we get out of memory errors. (Each job uses a certain amount of memory.)
export MAX_JOBS=$((CPU_COUNT > 1 ? CPU_COUNT - 1 : 1))
fi

if [[ "$blas_impl" == "generic" ]]; then
if [[ "$blas_impl" == "openblas" ]]; then
# Fake openblas
export BLAS=OpenBLAS
sed -i.bak "s#FIND_LIBRARY.*#set(OpenBLAS_LIB ${PREFIX}/lib/liblapack${SHLIB_EXT} ${PREFIX}/lib/libcblas${SHLIB_EXT} ${PREFIX}/lib/libblas${SHLIB_EXT})#g" cmake/Modules/FindOpenBLAS.cmake
else
#sed -i.bak "s#FIND_LIBRARY.*#set(OpenBLAS_LIB ${PREFIX}/lib/liblapack${SHLIB_EXT} ${PREFIX}/lib/libcblas${SHLIB_EXT} ${PREFIX}/lib/libblas${SHLIB_EXT})#g" cmake/Modules/FindOpenBLAS.cmake
elif [[ "$blas_impl" == "mkl" ]]; then
export BLAS=MKL
else
echo "[ERROR] Unsupported BLAS implementation '${blas_impl}'" >&2
exit 1
fi

if [[ "$PKG_NAME" == "pytorch" ]]; then
Expand Down Expand Up @@ -131,7 +189,14 @@ if [[ "$OSTYPE" == "darwin"* ]]; then
# was added. Revisit later
export USE_MKLDNN=0
fi
elif [[ ${cuda_compiler_version} != "None" ]]; then

if [[ ${gpu_variant} == "metal" ]]; then
export USE_MPS=1
else
export USE_MPS=0
fi

elif [[ ${gpu_variant} == "cuda"* ]]; then
if [[ "$target_platform" == "linux-aarch64" ]]; then
# https://github.com/pytorch/pytorch/pull/121975
# https://github.com/conda-forge/pytorch-cpu-feedstock/issues/264
Expand All @@ -149,29 +214,39 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
if [[ "${target_platform}" != "${build_platform}" ]]; then
export CUDA_TOOLKIT_ROOT=${CUDA_HOME}
fi
# Warning from pytorch v1.12.1: In the future we will require one to
# explicitly pass TORCH_CUDA_ARCH_LIST to cmake instead of implicitly
# setting it as an env variable.
#
# +PTX should go to the oldest arch. There's a modest runtime performance
# hit for (unlisted) newer arches on doing this, but that must be set
# when wide compatibility is a concern.
#
# https://pytorch.org/docs/stable/cpp_extension.html (Compute capabilities)
# https://github.com/pytorch/builder/blob/c85da84005b44041b75e1eb3221ea7dcbd1b28aa/conda/pytorch-nightly/build.sh#L53-L89
if [[ ${cuda_compiler_version} == 9.0* ]]; then
export TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;7.0+PTX"
export TORCH_CUDA_ARCH_LIST="3.5+PTX;5.0;6.0;7.0"
export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
elif [[ ${cuda_compiler_version} == 9.2* ]]; then
export TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0+PTX"
export TORCH_CUDA_ARCH_LIST="3.5+PTX;5.0;6.0;6.1;7.0"
export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
elif [[ ${cuda_compiler_version} == 10.* ]]; then
export TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5+PTX"
export TORCH_CUDA_ARCH_LIST="3.5+PTX;5.0;6.0;6.1;7.0;7.5"
export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
elif [[ ${cuda_compiler_version} == 11.0* ]]; then
export TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0+PTX"
export TORCH_CUDA_ARCH_LIST="3.5+PTX;5.0;6.0;6.1;7.0;7.5;8.0"
export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
elif [[ ${cuda_compiler_version} == 11.1 ]]; then
export TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
export TORCH_CUDA_ARCH_LIST="3.5+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6"
export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
elif [[ ${cuda_compiler_version} == 11.2 ]]; then
export TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
export TORCH_CUDA_ARCH_LIST="3.5+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6"
export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
elif [[ ${cuda_compiler_version} == 11.8 ]]; then
export TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9+PTX"
export TORCH_CUDA_ARCH_LIST="3.5+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9"
export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
elif [[ ${cuda_compiler_version} == 12.0 ]]; then
export TORCH_CUDA_ARCH_LIST="5.0;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX"
export TORCH_CUDA_ARCH_LIST="5.0+PTX;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0"
# $CUDA_HOME not set in CUDA 12.0. Using $PREFIX
export CUDA_TOOLKIT_ROOT_DIR="${PREFIX}"
if [[ "${target_platform}" != "${build_platform}" ]]; then
Expand All @@ -185,7 +260,8 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
export CUDA_TOOLKIT_ROOT=${PREFIX}
fi
else
echo "unsupported cuda version. edit build_pytorch.sh"
echo "No CUDA architecture list exists for CUDA v${cuda_compiler_version}"
echo "in build.sh. Use https://en.wikipedia.org/wiki/CUDA#GPUs_supported to make one."
exit 1
fi
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
Expand All @@ -195,22 +271,18 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
export USE_STATIC_NCCL=0
export USE_STATIC_CUDNN=0
export MAGMA_HOME="${PREFIX}"
export CUDA_INC_PATH="${PREFIX}/targets/x86_64-linux/include/" # Point cmake to the header files
else
if [[ "$target_platform" != *-64 ]]; then
# Breakpad seems to not work on aarch64 or ppc64le
# https://github.com/pytorch/pytorch/issues/67083
export USE_BREAKPAD=0
fi
# MKLDNN is an Apache-2.0 licensed library for DNNs and is used
# for CPU builds. Not to be confused with MKL.
export USE_MKLDNN=1
export USE_CUDA=0
export CMAKE_TOOLCHAIN_FILE="${RECIPE_DIR}/cross-linux.cmake"
#export CMAKE_TOOLCHAIN_FILE="${RECIPE_DIR}/cross-linux.cmake"
fi

echo '${CXX}'=${CXX}
echo '${PREFIX}'=${PREFIX}
$PREFIX/bin/python -m pip $PIP_ACTION . --no-deps -vvv --no-clean \
$PREFIX/bin/python -m pip $PIP_ACTION . --no-deps --no-build-isolation -vvv --no-clean \
| sed "s,${CXX},\$\{CXX\},g" \
| sed "s,${PREFIX},\$\{PREFIX\},g"

Expand Down
Loading

0 comments on commit c4b292a

Please sign in to comment.