diff --git a/.azure_pipelines/job_templates/prepare-build-template.yml b/.azure_pipelines/job_templates/prepare-build-template.yml index 200baeda96783..0755c07e2672c 100644 --- a/.azure_pipelines/job_templates/prepare-build-template.yml +++ b/.azure_pipelines/job_templates/prepare-build-template.yml @@ -46,7 +46,7 @@ steps: curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output .\tmp_bin\sccache.exe curl -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output .\tmp_bin\sccache-cl.exe copy .\tmp_bin\sccache.exe .\tmp_bin\nvcc.exe - curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.3/randomtemp.exe --output .\tmp_bin\randomtemp.exe + curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output .\tmp_bin\randomtemp.exe displayName: Install sccache and randomtemp condition: not(eq(variables.CUDA_VERSION, '')) diff --git a/.azure_pipelines/job_templates/set-environment-variables.yml b/.azure_pipelines/job_templates/set-environment-variables.yml index cc0d1d36a44ca..40d1cb384b2af 100644 --- a/.azure_pipelines/job_templates/set-environment-variables.yml +++ b/.azure_pipelines/job_templates/set-environment-variables.yml @@ -120,9 +120,7 @@ steps: Write-Host "##vso[task.setvariable variable=CMAKE_LIBRARY_PATH;]$(Build.SourcesDirectory)\mkl\lib;$env:CMAKE_LIBRARY_PATH" Write-Host "##vso[task.setvariable variable=ADDITIONAL_PATH;]$(Build.SourcesDirectory)\tmp_bin" Write-Host "##vso[task.setvariable variable=SCCACHE_IDLE_TIMEOUT;]1500" - Write-Host "##vso[task.setvariable variable=RANDOMTEMP_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\nvcc.exe" - Write-Host "##vso[task.setvariable variable=CUDA_NVCC_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\randomtemp.exe" - Write-Host "##vso[task.setvariable variable=RANDOMTEMP_BASEDIR;]$(Build.SourcesDirectory)\tmp_bin" + Write-Host "##vso[task.setvariable variable=CMAKE_CUDA_COMPILER_LAUNCHER;]$(Build.SourcesDirectory)/tmp_bin/randomtemp.exe;$(Build.SourcesDirectory)/tmp_bin/sccache.exe" displayName: Set MKL, sccache and randomtemp environment variables # View current environment variables diff --git a/.circleci/docker/ubuntu-cuda/Dockerfile b/.circleci/docker/ubuntu-cuda/Dockerfile index 38be22a901938..9c9e40387066e 100644 --- a/.circleci/docker/ubuntu-cuda/Dockerfile +++ b/.circleci/docker/ubuntu-cuda/Dockerfile @@ -75,7 +75,7 @@ RUN rm install_cmake.sh ADD ./common/install_cache.sh install_cache.sh ENV PATH /opt/cache/bin:$PATH RUN bash ./install_cache.sh && rm install_cache.sh -ENV CUDA_NVCC_EXECUTABLE=/opt/cache/lib/nvcc +ENV CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache # Add jni.h for java host build ADD ./common/install_jni.sh install_jni.sh @@ -94,6 +94,7 @@ ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT} # AWS specific CUDA build guidance ENV TORCH_CUDA_ARCH_LIST Maxwell ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all" +ENV CUDA_PATH /usr/local/cuda # Install LLVM dev version (Defined in the pytorch/builder github repository) COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm diff --git a/.github/templates/windows_ci_workflow.yml.j2 b/.github/templates/windows_ci_workflow.yml.j2 index bbb7ab9582f31..a6d1e3577d22c 100644 --- a/.github/templates/windows_ci_workflow.yml.j2 +++ b/.github/templates/windows_ci_workflow.yml.j2 @@ -55,8 +55,8 @@ env: CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }} {%- if cuda_version != "cpu" %} TORCH_CUDA_ARCH_LIST: "7.0" - USE_CUDA: 1 {%- endif %} + USE_CUDA: !{{ 1 if cuda_version != "cpu" else 0 }} !{{ common.concurrency(build_environment) }} diff --git a/.github/workflows/generated-win-vs2019-cpu-py3.yml b/.github/workflows/generated-win-vs2019-cpu-py3.yml index 1b632599b41a0..a646defa060a9 100644 --- a/.github/workflows/generated-win-vs2019-cpu-py3.yml +++ b/.github/workflows/generated-win-vs2019-cpu-py3.yml @@ -31,6 +31,7 @@ env: AWS_DEFAULT_REGION: us-east-1 CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }} CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }} + USE_CUDA: 0 concurrency: group: win-vs2019-cpu-py3-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} diff --git a/.jenkins/caffe2/build.sh b/.jenkins/caffe2/build.sh index 8862d4206a6f6..19ee5d8f6f44b 100755 --- a/.jenkins/caffe2/build.sh +++ b/.jenkins/caffe2/build.sh @@ -29,7 +29,8 @@ if [ -z "${SCCACHE}" ] && which ccache > /dev/null; then ln -sf "$(which ccache)" ./ccache/g++ ln -sf "$(which ccache)" ./ccache/x86_64-linux-gnu-gcc if [[ "${BUILD_ENVIRONMENT}" == *-cuda* ]]; then - ln -sf "$(which ccache)" ./ccache/nvcc + mkdir -p ./ccache/cuda + ln -sf "$(which ccache)" ./ccache/cuda/nvcc fi export CACHE_WRAPPER_DIR="$PWD/ccache" export PATH="$CACHE_WRAPPER_DIR:$PATH" @@ -93,7 +94,8 @@ if [[ $BUILD_ENVIRONMENT == *cuda* ]]; then # Explicitly set path to NVCC such that the symlink to ccache or sccache is used if [ -n "${CACHE_WRAPPER_DIR}" ]; then - build_args+=("CUDA_NVCC_EXECUTABLE=${CACHE_WRAPPER_DIR}/nvcc") + build_args+=("CUDA_NVCC_EXECUTABLE=${CACHE_WRAPPER_DIR}/cuda/nvcc") + build_args+=("CMAKE_CUDA_COMPILER_LAUNCHER=${CACHE_WRAPPER_DIR}/ccache") fi # Ensure FindCUDA.cmake can infer the right path to the CUDA toolkit. diff --git a/.jenkins/pytorch/win-test-helpers/build_pytorch.bat b/.jenkins/pytorch/win-test-helpers/build_pytorch.bat index 1cedecb3de121..8b9589f1b9787 100644 --- a/.jenkins/pytorch/win-test-helpers/build_pytorch.bat +++ b/.jenkins/pytorch/win-test-helpers/build_pytorch.bat @@ -97,23 +97,20 @@ set CXX=sccache-cl set CMAKE_GENERATOR=Ninja if "%USE_CUDA%"=="1" ( - copy %TMP_DIR_WIN%\bin\sccache.exe %TMP_DIR_WIN%\bin\nvcc.exe - :: randomtemp is used to resolve the intermittent build error related to CUDA. :: code: https://github.com/peterjc123/randomtemp-rust :: issue: https://github.com/pytorch/pytorch/issues/25393 :: - :: Previously, CMake uses CUDA_NVCC_EXECUTABLE for finding nvcc and then - :: the calls are redirected to sccache. sccache looks for the actual nvcc - :: in PATH, and then pass the arguments to it. - :: Currently, randomtemp is placed before sccache (%TMP_DIR_WIN%\bin\nvcc) - :: so we are actually pretending sccache instead of nvcc itself. - curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.3/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe + :: CMake requires a single command as CUDA_NVCC_EXECUTABLE, so we push the wrappers + :: randomtemp.exe and sccache.exe into a batch file which CMake invokes. + curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe if errorlevel 1 exit /b if not errorlevel 0 exit /b - set RANDOMTEMP_EXECUTABLE=%TMP_DIR_WIN%\bin\nvcc.exe - set CUDA_NVCC_EXECUTABLE=%TMP_DIR_WIN%\bin\randomtemp.exe - set RANDOMTEMP_BASEDIR=%TMP_DIR_WIN%\bin + echo @"%TMP_DIR_WIN%\bin\randomtemp.exe" "%TMP_DIR_WIN%\bin\sccache.exe" "%CUDA_PATH%\bin\nvcc.exe" %%* > "%TMP_DIR%/bin/nvcc.bat" + cat %TMP_DIR%/bin/nvcc.bat + set CUDA_NVCC_EXECUTABLE=%TMP_DIR%/bin/nvcc.bat + for /F "usebackq delims=" %%n in (`cygpath -m "%CUDA_PATH%\bin\nvcc.exe"`) do set CMAKE_CUDA_COMPILER=%%n + set CMAKE_CUDA_COMPILER_LAUNCHER=%TMP_DIR%/bin/randomtemp.exe;%TMP_DIR%\bin\sccache.exe ) @echo off diff --git a/CMakeLists.txt b/CMakeLists.txt index a0f1d0ec165e8..9fea1ac1ee1d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -517,16 +517,14 @@ if(MSVC) endforeach(flag_var) # Try harder - list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "/w" "-w") + string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /w -w") endif(MSVC) -list(APPEND CUDA_NVCC_FLAGS "-Xfatbin" "-compress-all") -list(APPEND CUDA_NVCC_FLAGS_DEBUG "-Xfatbin" "-compress-all") -list(APPEND CUDA_NVCC_FLAGS_RELWITHDEBINFO "-Xfatbin" "-compress-all") +string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all") if(NOT MSVC) - list(APPEND CUDA_NVCC_FLAGS_DEBUG "-g" "-lineinfo" "--source-in-ptx") - list(APPEND CUDA_NVCC_FLAGS_RELWITHDEBINFO "-g" "-lineinfo" "--source-in-ptx") + string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -g -lineinfo --source-in-ptx") + string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -g -lineinfo --source-in-ptx") endif(NOT MSVC) # Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not @@ -667,6 +665,16 @@ endif() include(cmake/Dependencies.cmake) +if((CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10.2) AND (CMAKE_HOST_SYSTEM_NAME MATCHES "Windows")) + # CUDA < 10.2 doesn't support compiling and extracting header dependencies in + # one call, so instead CMake calls nvcc twice with && in between. + # However, on windows cmd.exe has a 8191 character limit for commands which we + # start hitting. This moves most argments into a file to avoid going over the limit + + set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_OBJECTS ON) + set(CMAKE_NINJA_FORCE_RESPONSE_FILE ON CACHE INTERNAL "") +endif() + if(USE_FBGEMM) string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM") endif() diff --git a/aten/CMakeLists.txt b/aten/CMakeLists.txt index 7ba92a6decee7..ca45d5a741eab 100644 --- a/aten/CMakeLists.txt +++ b/aten/CMakeLists.txt @@ -69,12 +69,6 @@ if(USE_CUDA AND USE_ROCM) message(FATAL_ERROR "Both CUDA and ROCm are enabled and found. PyTorch can only be built with either of them. Please turn one off by using either USE_CUDA=OFF or USE_ROCM=OFF.") endif() -if(MSVC) - # we want to respect the standard, and we are bored of those **** . - add_definitions(-D_CRT_SECURE_NO_DEPRECATE=1) - list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "/wd4819" "-Xcompiler" "/wd4503" "-Xcompiler" "/wd4190" "-Xcompiler" "/wd4244" "-Xcompiler" "/wd4251" "-Xcompiler" "/wd4275" "-Xcompiler" "/wd4522") -endif(MSVC) - if(USE_ROCM) # TODO: AT_HIP_ENABLED (change this once we represent HIP as HIP in # ATen proper) diff --git a/c10/cuda/CMakeLists.txt b/c10/cuda/CMakeLists.txt index 3803498b33523..a95bd278e2022 100644 --- a/c10/cuda/CMakeLists.txt +++ b/c10/cuda/CMakeLists.txt @@ -49,9 +49,7 @@ if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY}) endif() # ---[ Dependency of c10_cuda -target_link_libraries(c10_cuda PUBLIC c10) - -target_link_libraries(c10_cuda INTERFACE torch::cudart) +target_link_libraries(c10_cuda PUBLIC c10 torch::cudart) target_include_directories( c10_cuda PUBLIC diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 09a1df310423c..a850ec66181db 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -895,19 +895,18 @@ elseif(USE_CUDA) set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE) if(CUDA_SEPARABLE_COMPILATION) # Separate compilation fails when kernels using `thrust::sort_by_key` - # are linked with the rest of CUDA code. Workaround by linking them separately - set(_generated_name "torch_cuda_w_sort_by_key_intermediate_link${CMAKE_C_OUTPUT_EXTENSION}") - set(torch_cuda_w_sort_by_key_link_file "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/torch_cuda.dir/${CMAKE_CFG_INTDIR}/${_generated_name}") - cuda_wrap_srcs(torch_cuda OBJ Caffe2_GPU_W_SORT_BY_KEY_OBJ ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}) - CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${torch_cuda_w_sort_by_key_link_file}" torch_cpu "${_options}" "${torch_cuda_SEPARABLE_COMPILATION_OBJECTS}") - set( torch_cuda_SEPARABLE_COMPILATION_OBJECTS ) - # Pass compiled sort-by-key object + device-linked fatbin as extra dependencies of torch_cuda - cuda_add_library(torch_cuda ${Caffe2_GPU_SRCS} ${torch_cuda_w_sort_by_key_link_file} ${Caffe2_GPU_W_SORT_BY_KEY_OBJ}) + # are linked with the rest of CUDA code. Workaround by linking them separately. + add_library(torch_cuda ${Caffe2_GPU_SRCS}) + set_property(TARGET torch_cuda PROPERTY CUDA_SEPARABLE_COMPILATION ON) + + add_library(torch_cuda_w_sort_by_key OBJECT ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}) + set_property(TARGET torch_cuda_w_sort_by_key PROPERTY CUDA_SEPARABLE_COMPILATION OFF) + target_link_libraries(torch_cuda PRIVATE torch_cuda_w_sort_by_key) elseif(BUILD_SPLIT_CUDA) - cuda_add_library(torch_cuda_cpp ${Caffe2_GPU_SRCS_CPP} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CPP}) - cuda_add_library(torch_cuda_cu ${Caffe2_GPU_SRCS_CU} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CU}) + add_library(torch_cuda_cpp ${Caffe2_GPU_SRCS_CPP} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CPP}) + add_library(torch_cuda_cu ${Caffe2_GPU_SRCS_CU} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CU}) else() - cuda_add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}) + add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}) endif() set(CUDA_LINK_LIBRARIES_KEYWORD) if(BUILD_SPLIT_CUDA) @@ -1803,7 +1802,7 @@ if(BUILD_TEST) if(USE_CUDA) foreach(test_src ${Caffe2_GPU_TEST_SRCS}) get_filename_component(test_name ${test_src} NAME_WE) - cuda_add_executable(${test_name} "${test_src}") + add_executable(${test_name} "${test_src}") target_link_libraries(${test_name} torch_library gtest_main) target_include_directories(${test_name} PRIVATE $) target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index ca560288a41ad..564fcebc0b08c 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -33,6 +33,50 @@ macro(enable_ubsan) endif() endmacro() +# ---[ CUDA +if(USE_CUDA) + # public/*.cmake uses CAFFE2_USE_* + set(CAFFE2_USE_CUDA ${USE_CUDA}) + set(CAFFE2_USE_CUDNN ${USE_CUDNN}) + set(CAFFE2_USE_NVRTC ${USE_NVRTC}) + set(CAFFE2_USE_TENSORRT ${USE_TENSORRT}) + include(${CMAKE_CURRENT_LIST_DIR}/public/cuda.cmake) + if(CAFFE2_USE_CUDA) + # A helper variable recording the list of Caffe2 dependent libraries + # torch::cudart is dealt with separately, due to CUDA_ADD_LIBRARY + # design reason (it adds CUDA_LIBRARIES itself). + set(Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS + caffe2::cufft caffe2::curand caffe2::cublas) + if(CAFFE2_USE_NVRTC) + list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cuda caffe2::nvrtc) + else() + caffe2_update_option(USE_NVRTC OFF) + endif() + if(CAFFE2_USE_CUDNN) + list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn-public) + else() + caffe2_update_option(USE_CUDNN OFF) + endif() + if(CAFFE2_USE_TENSORRT) + list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::tensorrt) + else() + caffe2_update_option(USE_TENSORRT OFF) + endif() + else() + message(WARNING + "Not compiling with CUDA. Suppress this warning with " + "-DUSE_CUDA=OFF.") + caffe2_update_option(USE_CUDA OFF) + caffe2_update_option(USE_CUDNN OFF) + caffe2_update_option(USE_NVRTC OFF) + caffe2_update_option(USE_TENSORRT OFF) + set(CAFFE2_USE_CUDA OFF) + set(CAFFE2_USE_CUDNN OFF) + set(CAFFE2_USE_NVRTC OFF) + set(CAFFE2_USE_TENSORRT OFF) + endif() +endif() + # ---[ Custom Protobuf if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND (NOT INTERN_BUILD_MOBILE OR BUILD_CAFFE2_MOBILE)) disable_ubsan() @@ -77,8 +121,8 @@ endif(MSVC) # ---[ Threads include(${CMAKE_CURRENT_LIST_DIR}/public/threads.cmake) -if(TARGET Threads::Threads) - list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS Threads::Threads) +if(TARGET caffe2::Threads) + list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS caffe2::Threads) else() message(FATAL_ERROR "Cannot find threading library. Caffe2 requires Threads to compile.") @@ -661,7 +705,7 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST) # We need to replace googletest cmake scripts too. # Otherwise, it will sometimes break the build. # To make the git clean after the build, we make a backup first. - if(MSVC AND MSVC_Z7_OVERRIDE) + if((MSVC AND MSVC_Z7_OVERRIDE) OR USE_CUDA) execute_process( COMMAND ${CMAKE_COMMAND} "-DFILENAME=${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googletest/cmake/internal_utils.cmake" @@ -1181,50 +1225,6 @@ if(USE_LLVM) endif(LLVM_FOUND) endif(USE_LLVM) -# ---[ CUDA -if(USE_CUDA) - # public/*.cmake uses CAFFE2_USE_* - set(CAFFE2_USE_CUDA ${USE_CUDA}) - set(CAFFE2_USE_CUDNN ${USE_CUDNN}) - set(CAFFE2_USE_NVRTC ${USE_NVRTC}) - set(CAFFE2_USE_TENSORRT ${USE_TENSORRT}) - include(${CMAKE_CURRENT_LIST_DIR}/public/cuda.cmake) - if(CAFFE2_USE_CUDA) - # A helper variable recording the list of Caffe2 dependent libraries - # torch::cudart is dealt with separately, due to CUDA_ADD_LIBRARY - # design reason (it adds CUDA_LIBRARIES itself). - set(Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS - caffe2::cufft caffe2::curand caffe2::cublas) - if(CAFFE2_USE_NVRTC) - list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cuda caffe2::nvrtc) - else() - caffe2_update_option(USE_NVRTC OFF) - endif() - if(CAFFE2_USE_CUDNN) - list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn-public) - else() - caffe2_update_option(USE_CUDNN OFF) - endif() - if(CAFFE2_USE_TENSORRT) - list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::tensorrt) - else() - caffe2_update_option(USE_TENSORRT OFF) - endif() - else() - message(WARNING - "Not compiling with CUDA. Suppress this warning with " - "-DUSE_CUDA=OFF.") - caffe2_update_option(USE_CUDA OFF) - caffe2_update_option(USE_CUDNN OFF) - caffe2_update_option(USE_NVRTC OFF) - caffe2_update_option(USE_TENSORRT OFF) - set(CAFFE2_USE_CUDA OFF) - set(CAFFE2_USE_CUDNN OFF) - set(CAFFE2_USE_NVRTC OFF) - set(CAFFE2_USE_TENSORRT OFF) - endif() -endif() - # ---[ cuDNN if(USE_CUDNN) set(CUDNN_FRONTEND_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/../third_party/cudnn_frontend/include) @@ -1371,6 +1371,8 @@ if(USE_GLOO) set(ENV{GLOO_ROCM_ARCH} "${PYTORCH_ROCM_ARCH}") endif() if(NOT USE_SYSTEM_GLOO) + # gloo uses cuda_add_library + torch_update_find_cuda_flags() add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/gloo) else() add_library(gloo SHARED IMPORTED) @@ -1417,6 +1419,8 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE) set(TP_BUILD_LIBUV ON CACHE BOOL "" FORCE) set(TP_STATIC_OR_SHARED STATIC CACHE STRING "" FORCE) + # Tensorpipe uses cuda_add_library + torch_update_find_cuda_flags() add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe) list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe) @@ -1560,7 +1564,6 @@ function(add_onnx_tensorrt_subdir) endfunction() if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) if(USE_TENSORRT) - set(CMAKE_CUDA_COMPILER ${CUDA_NVCC_EXECUTABLE}) add_onnx_tensorrt_subdir() include_directories("${CMAKE_CURRENT_LIST_DIR}/../third_party/onnx-tensorrt") caffe2_interface_library(nvonnxparser_static onnx_trt_library) @@ -1579,8 +1582,7 @@ endif() if(NOT INTERN_BUILD_MOBILE) set(TORCH_CUDA_ARCH_LIST $ENV{TORCH_CUDA_ARCH_LIST}) - set(TORCH_NVCC_FLAGS $ENV{TORCH_NVCC_FLAGS}) - separate_arguments(TORCH_NVCC_FLAGS) + string(APPEND CMAKE_CUDA_FLAGS " $ENV{TORCH_NVCC_FLAGS}") set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) # Top-level build config @@ -1599,7 +1601,7 @@ if(NOT INTERN_BUILD_MOBILE) if(MSVC) # we want to respect the standard, and we are bored of those **** . add_definitions(-D_CRT_SECURE_NO_DEPRECATE=1) - list(APPEND CUDA_NVCC_FLAGS "-Xcompiler=/wd4819,/wd4503,/wd4190,/wd4244,/wd4251,/wd4275,/wd4522") + string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler=/wd4819,/wd4503,/wd4190,/wd4244,/wd4251,/wd4275,/wd4522") endif() if(NOT MSVC) @@ -1610,22 +1612,19 @@ if(NOT INTERN_BUILD_MOBILE) endif() endif() - list(APPEND CUDA_NVCC_FLAGS -Wno-deprecated-gpu-targets) - list(APPEND CUDA_NVCC_FLAGS --expt-extended-lambda) + string(APPEND CMAKE_CUDA_FLAGS " -Wno-deprecated-gpu-targets --expt-extended-lambda") if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(CMAKE_CXX_STANDARD 14) endif() - list(APPEND CUDA_NVCC_FLAGS ${TORCH_NVCC_FLAGS}) - if(CMAKE_POSITION_INDEPENDENT_CODE AND NOT MSVC) - list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-fPIC") - endif() - if(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5) message(STATUS "Found CUDA with FP16 support, compiling with torch.cuda.HalfTensor") - list(APPEND CUDA_NVCC_FLAGS "-DCUDA_HAS_FP16=1" "-D__CUDA_NO_HALF_OPERATORS__" "-D__CUDA_NO_HALF_CONVERSIONS__" - "-D__CUDA_NO_BFLOAT16_CONVERSIONS__" "-D__CUDA_NO_HALF2_OPERATORS__") + string(APPEND CMAKE_CUDA_FLAGS " -DCUDA_HAS_FP16=1" + " -D__CUDA_NO_HALF_OPERATORS__" + " -D__CUDA_NO_HALF_CONVERSIONS__" + " -D__CUDA_NO_HALF2_OPERATORS__" + " -D__CUDA_NO_BFLOAT16_CONVERSIONS__") add_compile_options(-DCUDA_HAS_FP16=1) else() message(STATUS "Could not find CUDA with FP16 support, compiling without torch.CudaHalfTensor") diff --git a/cmake/GoogleTestPatch.cmake b/cmake/GoogleTestPatch.cmake index 36018ace1d89a..c7fbb6ce9f02e 100644 --- a/cmake/GoogleTestPatch.cmake +++ b/cmake/GoogleTestPatch.cmake @@ -20,5 +20,6 @@ else(REVERT) file(READ ${FILENAME} content) file(WRITE ${BACKUP} "${content}") string(REGEX REPLACE "[-/]Z[iI]" "/Z7" content "${content}") + string(REGEX REPLACE "Threads::Threads" "caffe2::Threads" content "${content}") file(WRITE ${FILENAME} "${content}") endif(REVERT) diff --git a/cmake/Modules/FindCUB.cmake b/cmake/Modules/FindCUB.cmake index aff82aad4553f..e053964e6e479 100644 --- a/cmake/Modules/FindCUB.cmake +++ b/cmake/Modules/FindCUB.cmake @@ -3,6 +3,7 @@ # CUB_INCLUDE_DIRS - the CUB include directory find_path(CUB_INCLUDE_DIR + HINTS "${CUDA_TOOLKIT_INCLUDE}" NAMES cub/cub.cuh DOC "The directory where CUB includes reside" ) diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index c905196b9ca3f..0066e8b49139b 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -103,9 +103,10 @@ function(caffe2_print_configuration_summary) message(STATUS " nvrtc : ${__tmp}") message(STATUS " CUDA include path : ${CUDA_INCLUDE_DIRS}") message(STATUS " NVCC executable : ${CUDA_NVCC_EXECUTABLE}") - message(STATUS " NVCC flags : ${CUDA_NVCC_FLAGS}") - message(STATUS " CUDA host compiler : ${CUDA_HOST_COMPILER}") - message(STATUS " NVCC --device-c : ${CUDA_SEPARABLE_COMPILATION}") + message(STATUS " CUDA compiler : ${CMAKE_CUDA_COMPILER}") + message(STATUS " CUDA flags : ${CMAKE_CUDA_FLAGS}") + message(STATUS " CUDA host compiler : ${CMAKE_CUDA_HOST_COMPILER}") + message(STATUS " CUDA --device-c : ${CUDA_SEPARABLE_COMPILATION}") message(STATUS " USE_TENSORRT : ${USE_TENSORRT}") if(${USE_TENSORRT}) message(STATUS " TensorRT runtime library: ${TENSORRT_LIBRARY}") diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake index 7ba2bb6d4c76f..6c38b850ff59a 100644 --- a/cmake/public/cuda.cmake +++ b/cmake/public/cuda.cmake @@ -35,6 +35,13 @@ if(NOT CUDA_FOUND) set(CAFFE2_USE_CUDA OFF) return() endif() + +# Enable CUDA language support +set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}") +enable_language(CUDA) +set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) + message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION}) message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE}) message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR}) @@ -435,6 +442,8 @@ endif() # setting nvcc arch flags torch_cuda_get_nvcc_gencode_flag(NVCC_FLAGS_EXTRA) +# CMake 3.18 adds integrated support for architecture selection, but we can't rely on it +set(CMAKE_CUDA_ARCHITECTURES OFF) list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA}") @@ -453,14 +462,10 @@ endforeach() string(REPLACE ";" "," SUPPRESS_WARNING_FLAGS "${SUPPRESS_WARNING_FLAGS}") list(APPEND CUDA_NVCC_FLAGS -Xcudafe ${SUPPRESS_WARNING_FLAGS}) -# Set C++14 support set(CUDA_PROPAGATE_HOST_FLAGS_BLOCKLIST "-Werror") if(MSVC) list(APPEND CUDA_NVCC_FLAGS "--Werror" "cross-execution-space-call") list(APPEND CUDA_NVCC_FLAGS "--no-host-device-move-forward") -else() - list(APPEND CUDA_NVCC_FLAGS "-std=c++14") - list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-fPIC") endif() # OpenMP flags for NVCC with Clang-cl @@ -477,9 +482,15 @@ endif() # Debug and Release symbol support if(MSVC) if(${CAFFE2_USE_MSVC_STATIC_RUNTIME}) - list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-MT$<$:d>") + string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MTd") + string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MT") + string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MT") + string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MT") else() - list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-MD$<$:d>") + string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MDd") + string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MD") + string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MD") + string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MD") endif() if(CUDA_NVCC_FLAGS MATCHES "Zi") list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-FS") @@ -493,3 +504,11 @@ list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr") # Set expt-extended-lambda to support lambda on device list(APPEND CUDA_NVCC_FLAGS "--expt-extended-lambda") + +foreach(FLAG ${CUDA_NVCC_FLAGS}) + string(FIND "${FLAG}" " " flag_space_position) + if(NOT flag_space_position EQUAL -1) + message(FATAL_ERROR "Found spaces in CUDA_NVCC_FLAGS entry '${FLAG}'") + endif() + string(APPEND CMAKE_CUDA_FLAGS " ${FLAG}") +endforeach() diff --git a/cmake/public/threads.cmake b/cmake/public/threads.cmake index f223f497c76f4..749619d64d99a 100644 --- a/cmake/public/threads.cmake +++ b/cmake/public/threads.cmake @@ -1,16 +1,29 @@ +if(TARGET caffe2::Threads) + return() +endif() + find_package(Threads REQUIRED) -# For newer CMake, Threads::Threads is already defined. Otherwise, we will -# provide a backward compatible wrapper for Threads::Threads. -if(THREADS_FOUND AND NOT TARGET Threads::Threads) - add_library(Threads::Threads INTERFACE IMPORTED) + +# Threads::Threads doesn't work if the target has CUDA code +if(THREADS_FOUND) + add_library(caffe2::Threads INTERFACE IMPORTED) if(THREADS_HAVE_PTHREAD_ARG) - set_property(TARGET Threads::Threads - PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread") + set(compile_options + $<$:-pthread> + $<$:-pthread>) + if(USE_CUDA) + list(APPEND compile_options + $<$:-Xcompiler -pthread>) + endif() + + set_property(TARGET caffe2::Threads + PROPERTY INTERFACE_COMPILE_OPTIONS + ${compile_options}) endif() if(CMAKE_THREAD_LIBS_INIT) - set_property(TARGET Threads::Threads + set_property(TARGET caffe2::Threads PROPERTY INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") endif() endif() diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake index 02ddba701776e..15f5afd31924a 100644 --- a/cmake/public/utils.cmake +++ b/cmake/public/utils.cmake @@ -348,7 +348,7 @@ macro(torch_cuda_based_add_library cuda_target) if(USE_ROCM) hip_add_library(${cuda_target} ${ARGN}) elseif(USE_CUDA) - cuda_add_library(${cuda_target} ${ARGN}) + add_library(${cuda_target} ${ARGN}) else() endif() endmacro() @@ -388,10 +388,11 @@ endmacro() # torch_compile_options(lib_name) function(torch_compile_options libname) set_property(TARGET ${libname} PROPERTY CXX_STANDARD 14) + set(private_compile_options "") # ---[ Check if warnings should be errors. if(WERROR) - target_compile_options(${libname} PRIVATE -Werror) + list(APPEND private_compile_options -Werror) endif() if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE) @@ -405,38 +406,50 @@ function(torch_compile_options libname) endif() target_compile_options(${libname} PUBLIC - ${MSVC_RUNTIME_LIBRARY_OPTION} - $<$,$>:${MSVC_DEBINFO_OPTION}> - /EHsc - /DNOMINMAX - /wd4267 - /wd4251 - /wd4522 - /wd4522 - /wd4838 - /wd4305 - /wd4244 - /wd4190 - /wd4101 - /wd4996 - /wd4275 - /bigobj + $<$: + ${MSVC_RUNTIME_LIBRARY_OPTION} + $<$,$>:${MSVC_DEBINFO_OPTION}> + /EHsc + /DNOMINMAX + /wd4267 + /wd4251 + /wd4522 + /wd4522 + /wd4838 + /wd4305 + /wd4244 + /wd4190 + /wd4101 + /wd4996 + /wd4275 + /bigobj> ) else() - target_compile_options(${libname} PRIVATE + list(APPEND private_compile_options -Wall -Wextra -Wno-unused-parameter + -Wno-unused-variable + -Wno-unused-function + -Wno-unused-result + -Wno-unused-local-typedefs -Wno-missing-field-initializers -Wno-write-strings -Wno-unknown-pragmas + -Wno-type-limits + -Wno-array-bounds + -Wno-unknown-pragmas + -Wno-sign-compare + -Wno-strict-overflow + -Wno-strict-aliasing + -Wno-error=deprecated-declarations # Clang has an unfixed bug leading to spurious missing braces # warnings, see https://bugs.llvm.org/show_bug.cgi?id=21629 -Wno-missing-braces ) if(NOT APPLE) - target_compile_options(${libname} PRIVATE + list(APPEND private_compile_options # Considered to be flaky. See the discussion at # https://github.com/pytorch/pytorch/pull/9608 -Wno-maybe-uninitialized) @@ -446,10 +459,23 @@ function(torch_compile_options libname) if(MSVC) elseif(WERROR) - target_compile_options(${libname} PRIVATE -Wno-strict-overflow) + list(APPEND private_compile_options -Wno-strict-overflow) endif() endif() + target_compile_options(${libname} PRIVATE + $<$:${private_compile_options}>) + if(USE_CUDA) + string(FIND "${private_compile_options}" " " space_position) + if(NOT space_position EQUAL -1) + message(FATAL_ERROR "Found spaces in private_compile_options='${private_compile_options}'") + endif() + # Convert CMake list to comma-separated list + string(REPLACE ";" "," private_compile_options "${private_compile_options}") + target_compile_options(${libname} PRIVATE + $<$:-Xcompiler=${private_compile_options}>) + endif() + if(NOT WIN32 AND NOT USE_ASAN) # Enable hidden visibility by default to make it easier to debug issues with # TORCH_API annotations. Hidden visibility with selective default visibility @@ -458,11 +484,13 @@ function(torch_compile_options libname) # Unfortunately, hidden visibility messes up some ubsan warnings because # templated classes crossing library boundary get duplicated (but identical) # definitions. It's easier to just disable it. - target_compile_options(${libname} PRIVATE "-fvisibility=hidden") + target_compile_options(${libname} PRIVATE + $<$: -fvisibility=hidden>) endif() # Use -O2 for release builds (-O3 doesn't improve perf, and -Os results in perf regression) - target_compile_options(${libname} PRIVATE "$<$,$>:-O2>") + target_compile_options(${libname} PRIVATE + $<$,$,$>>:-O2>) endfunction() @@ -484,3 +512,40 @@ function(torch_set_target_props libname) set_target_properties(${libname} PROPERTIES STATIC_LIBRARY_FLAGS_DEBUG "/NODEFAULTLIB:${VCOMP_LIB}d") endif() endfunction() + + +############################################################################## +# Set old-style FindCuda.cmake compile flags from modern CMake cuda flags. +# Usage: +# torch_update_find_cuda_flags() +function(torch_update_find_cuda_flags) + # Convert -O2 -Xcompiler="-O2 -Wall" to "-O2;-Xcompiler=-O2,-Wall" + if(USE_CUDA) + separate_arguments(FLAGS UNIX_COMMAND "${CMAKE_CUDA_FLAGS}") + string(REPLACE " " "," FLAGS "${FLAGS}") + set(CUDA_NVCC_FLAGS ${FLAGS} PARENT_SCOPE) + + separate_arguments(FLAGS_DEBUG UNIX_COMMAND "${CMAKE_CUDA_FLAGS_DEBUG}") + string(REPLACE " " "," FLAGS_DEBUG "${FLAGS_DEBUG}") + set(CUDA_NVCC_FLAGS_DEBUG "${FLAGS_DEBUG}" PARENT_SCOPE) + + separate_arguments(FLAGS_RELEASE UNIX_COMMAND "${CMAKE_CUDA_FLAGS_RELEASE}") + string(REPLACE " " "," FLAGS_RELEASE "${FLAGS_RELEASE}") + set(CUDA_NVCC_FLAGS_RELEASE "${FLAGS_RELEASE}" PARENT_SCOPE) + + separate_arguments(FLAGS_MINSIZEREL UNIX_COMMAND "${CMAKE_CUDA_FLAGS_MINSIZEREL}") + string(REPLACE " " "," FLAGS_MINSIZEREL "${FLAGS_MINSIZEREL}") + set(CUDA_NVCC_FLAGS_MINSIZEREL "${FLAGS_MINSIZEREL}" PARENT_SCOPE) + + separate_arguments(FLAGS_RELWITHDEBINFO UNIX_COMMAND "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO}") + string(REPLACE " " "," FLAGS_RELWITHDEBINFO "${FLAGS_RELWITHDEBINFO}") + set(CUDA_NVCC_FLAGS_RELWITHDEBINFO "${FLAGS_RELWITHDEBINFO}" PARENT_SCOPE) + + message(STATUS "Converting CMAKE_CUDA_FLAGS to CUDA_NVCC_FLAGS:\n" + " CUDA_NVCC_FLAGS = ${FLAGS}\n" + " CUDA_NVCC_FLAGS_DEBUG = ${FLAGS_DEBUG}\n" + " CUDA_NVCC_FLAGS_RELEASE = ${FLAGS_RELEASE}\n" + " CUDA_NVCC_FLAGS_RELWITHDEBINFO = ${FLAGS_RELWITHDEBINFO}\n" + " CUDA_NVCC_FLAGS_MINSIZEREL = ${FLAGS_MINSIZEREL}") + endif() +endfunction() diff --git a/modules/detectron/CMakeLists.txt b/modules/detectron/CMakeLists.txt index 8041e71d35f5a..bffc074e39a67 100644 --- a/modules/detectron/CMakeLists.txt +++ b/modules/detectron/CMakeLists.txt @@ -10,7 +10,7 @@ if(BUILD_CAFFE2_OPS) # Note(ilijar): Since Detectron ops currently have no # CPU implementation, we only build GPU ops for now. if(USE_CUDA) - CUDA_ADD_LIBRARY( + add_library( caffe2_detectron_ops_gpu SHARED ${Detectron_CPU_SRCS} ${Detectron_GPU_SRCS}) diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt index 2e48773a88863..c640c01803897 100644 --- a/test/cpp/c10d/CMakeLists.txt +++ b/test/cpp/c10d/CMakeLists.txt @@ -1,5 +1,5 @@ if(USE_CUDA) - cuda_add_library(c10d_cuda_test CUDATest.cu) + add_library(c10d_cuda_test CUDATest.cu) target_include_directories(c10d_cuda_test PRIVATE $) target_link_libraries(c10d_cuda_test torch_cuda) add_dependencies(c10d_cuda_test torch_cuda) diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py index ed0df72e89847..9a42e733bebdd 100644 --- a/tools/setup_helpers/cmake.py +++ b/tools/setup_helpers/cmake.py @@ -243,7 +243,7 @@ def generate( var: var for var in ('BLAS', 'BUILDING_WITH_TORCH_LIBS', - 'CUDA_HOST_COMPILER', + 'CUDA_HOST_COMILER', 'CUDA_NVCC_EXECUTABLE', 'CUDA_SEPARABLE_COMPILATION', 'CUDNN_LIBRARY', @@ -267,6 +267,15 @@ def generate( 'OPENSSL_ROOT_DIR') }) + # Aliases which are lower priority than their canonical option + low_priority_aliases = { + 'CUDA_HOST_COMPILER': 'CMAKE_CUDA_HOST_COMPILER', + 'CUDAHOSTCXX': 'CUDA_HOST_COMPILER', + 'CMAKE_CUDA_HOST_COMPILER': 'CUDA_HOST_COMPILER', + 'CMAKE_CUDA_COMPILER': 'CUDA_NVCC_EXECUTABLE', + 'CUDACXX': 'CUDA_NVCC_EXECUTABLE' + } + for var, val in my_env.items(): # We currently pass over all environment variables that start with "BUILD_", "USE_", and "CMAKE_". This is # because we currently have no reliable way to get the list of all build options we have specified in @@ -279,6 +288,11 @@ def generate( elif var.startswith(('BUILD_', 'USE_', 'CMAKE_')) or var.endswith(('EXITCODE', 'EXITCODE__TRYRUN_OUTPUT')): build_options[var] = val + if var in low_priority_aliases: + key = low_priority_aliases[var] + if key not in build_options: + build_options[key] = val + # The default value cannot be easily obtained in CMakeLists.txt. We set it here. py_lib_path = sysconfig.get_path('purelib') cmake_prefix_path = build_options.get('CMAKE_PREFIX_PATH', None) diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index 3524c2bdf5435..ad49c825613f5 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -111,6 +111,7 @@ else() endif() if(USE_CUDA) + include(${TORCH_ROOT}/cmake/public/cuda.cmake) append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS) list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA}) @@ -119,16 +120,7 @@ if(USE_CUDA) list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN) endif() - if(MSVC) - list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib) - list(APPEND TORCH_PYTHON_INCLUDE_DIRECTORIES "${NVTOOLEXT_HOME}/include") - elseif(APPLE) - list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvToolsExt.dylib) - else() - find_library(LIBNVTOOLSEXT libnvToolsExt.so PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/) - list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${LIBNVTOOLSEXT}) - endif() - + list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext) endif() if(USE_ROCM) diff --git a/torch/lib/libshm/CMakeLists.txt b/torch/lib/libshm/CMakeLists.txt index cb4305170cebc..1022ce84c339f 100644 --- a/torch/lib/libshm/CMakeLists.txt +++ b/torch/lib/libshm/CMakeLists.txt @@ -67,13 +67,13 @@ if(UNIX AND NOT APPLE) # site above though in case there was a reason we were testing # against clock_gettime. In principle, the choice of symbol you # test for shouldn't matter. - set(CMAKE_REQUIRED_LIBRARIES Threads::Threads) + set(CMAKE_REQUIRED_LIBRARIES caffe2::Threads) check_library_exists(rt shm_open "sys/mman.h" NEED_RT_AND_PTHREAD) unset(CMAKE_REQUIRED_LIBRARIES) if(NEED_RT_AND_PTHREAD) message(STATUS "Needs it, linking against pthread and rt") - target_link_libraries(shm rt Threads::Threads) - target_link_libraries(torch_shm_manager rt Threads::Threads) + target_link_libraries(shm rt caffe2::Threads) + target_link_libraries(torch_shm_manager rt caffe2::Threads) endif() endif() endif()