Skip to content

Commit

Permalink
CMake: default to -arch=native for CUDA build (llama/10320)
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesGaessler authored and ggerganov committed Nov 18, 2024
1 parent 351c728 commit 77d37f5
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions src/ggml-cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@ if (CUDAToolkit_FOUND)
message(STATUS "CUDA Toolkit found")

if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
# 52 == lowest CUDA 12 standard
# 60 == FP16 CUDA intrinsics
# 61 == integer CUDA intrinsics
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
# native == GPUs available at build time
# 52 == Maxwell, lowest CUDA 12 standard
# 60 == P100, FP16 CUDA intrinsics
# 61 == Pascal, __dp4a instruction (per-byte integer dot product)
# 70 == V100, FP16 tensor cores
# 75 == Turing, int6 tensor cores
if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6")
set(CMAKE_CUDA_ARCHITECTURES "native")
elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
else()
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75")
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
endif()
endif()
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
Expand Down

0 comments on commit 77d37f5

Please sign in to comment.