Skip to content

Commit

Permalink
add GGML_AVX_VNNI to enable avx-vnni, fix checks
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Dec 1, 2024
1 parent 6d78e0f commit 854eff8
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .devops/llama-server.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN \
scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
# Build llama-server
cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
cmake --build build --target llama-server -j $(nproc) && \
Expand Down
4 changes: 0 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,6 @@ if (NOT DEFINED GGML_LLAMAFILE)
set(GGML_LLAMAFILE_DEFAULT ON)
endif()

if (NOT DEFINED GGML_AMX)
set(GGML_AMX ON)
endif()

if (NOT DEFINED GGML_CUDA_GRAPHS)
set(GGML_CUDA_GRAPHS_DEFAULT ON)
endif()
Expand Down
1 change: 1 addition & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)

option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
option(GGML_AVX512 "ggml: enable AVX512" OFF)
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
Expand Down
9 changes: 9 additions & 0 deletions ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,12 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
elseif (GGML_AVX)
list(APPEND ARCH_FLAGS /arch:AVX)
endif()
if (GGML_AVX_VNNI)
list(APPEND ARCH_DEFINITIONS __AVXVNNI__)
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
list(APPEND ARCH_FLAGS -mavxvnni)
endif()
endif()
else()
if (GGML_NATIVE)
list(APPEND ARCH_FLAGS -march=native)
Expand All @@ -233,6 +239,9 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
if (GGML_AVX2)
list(APPEND ARCH_FLAGS -mavx2)
endif()
if (GGML_AVX_VNNI)
list(APPEND ARCH_FLAGS -mavxvnni)
endif()
if (GGML_AVX512)
list(APPEND ARCH_FLAGS -mavx512f)
list(APPEND ARCH_FLAGS -mavx512dq)
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cpu/cpu-feats-x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,8 @@ static int ggml_backend_cpu_x86_score() {
score += ggml_cpu_has_f16c () * 1<<1;
score += ggml_cpu_has_ssse3 () * 1<<2;
score += ggml_cpu_has_sse3 () * 1<<3;
score += ggml_cpu_has_avx () * 1<<5;
score += ggml_cpu_has_avx_vnni () * 1<<4;
score += ggml_cpu_has_avx () * 1<<5;
score += ggml_cpu_has_avx2 () * 1<<6;
score += ggml_cpu_has_avx512 () * 1<<7;
// score += ggml_cpu_has_avx512_vbmi() * 1<<8; // not used
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cpu/ggml-cpu-aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ static inline __m512i sum_i16_pairs_int_32x16(const __m512i x) {
}

static inline __m512i mul_sum_us8_pairs_int32x16(const __m512i ax, const __m512i sy) {
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
#if defined(__AVX512VNNI__)
const __m512i zero = _mm512_setzero_si512();
return _mm512_dpbusd_epi32(zero, ax, sy);
#else
Expand Down

0 comments on commit 854eff8

Please sign in to comment.