Skip to content

Commit 854eff8

Browse files
committed
add GGML_AVX_VNNI to enable avx-vnni, fix checks
1 parent 6d78e0f commit 854eff8

File tree

6 files changed

+13
-7
lines changed

6 files changed

+13
-7
lines changed

.devops/llama-server.Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ RUN \
1515
scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
1616
scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
1717
scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
18-
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
18+
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
1919
# Build llama-server
2020
cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
2121
cmake --build build --target llama-server -j $(nproc) && \

CMakeLists.txt

-4
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,6 @@ if (NOT DEFINED GGML_LLAMAFILE)
9696
set(GGML_LLAMAFILE_DEFAULT ON)
9797
endif()
9898

99-
if (NOT DEFINED GGML_AMX)
100-
set(GGML_AMX ON)
101-
endif()
102-
10399
if (NOT DEFINED GGML_CUDA_GRAPHS)
104100
set(GGML_CUDA_GRAPHS_DEFAULT ON)
105101
endif()

ggml/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
9696
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
9797

9898
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
99+
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
99100
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
100101
option(GGML_AVX512 "ggml: enable AVX512" OFF)
101102
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)

ggml/src/ggml-cpu/CMakeLists.txt

+9
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,12 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
217217
elseif (GGML_AVX)
218218
list(APPEND ARCH_FLAGS /arch:AVX)
219219
endif()
220+
if (GGML_AVX_VNNI)
221+
list(APPEND ARCH_DEFINITIONS __AVXVNNI__)
222+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
223+
list(APPEND ARCH_FLAGS -mavxvnni)
224+
endif()
225+
endif()
220226
else()
221227
if (GGML_NATIVE)
222228
list(APPEND ARCH_FLAGS -march=native)
@@ -233,6 +239,9 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
233239
if (GGML_AVX2)
234240
list(APPEND ARCH_FLAGS -mavx2)
235241
endif()
242+
if (GGML_AVX_VNNI)
243+
list(APPEND ARCH_FLAGS -mavxvnni)
244+
endif()
236245
if (GGML_AVX512)
237246
list(APPEND ARCH_FLAGS -mavx512f)
238247
list(APPEND ARCH_FLAGS -mavx512dq)

ggml/src/ggml-cpu/cpu-feats-x86.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,8 @@ static int ggml_backend_cpu_x86_score() {
281281
score += ggml_cpu_has_f16c () * 1<<1;
282282
score += ggml_cpu_has_ssse3 () * 1<<2;
283283
score += ggml_cpu_has_sse3 () * 1<<3;
284-
score += ggml_cpu_has_avx () * 1<<5;
285284
score += ggml_cpu_has_avx_vnni () * 1<<4;
285+
score += ggml_cpu_has_avx () * 1<<5;
286286
score += ggml_cpu_has_avx2 () * 1<<6;
287287
score += ggml_cpu_has_avx512 () * 1<<7;
288288
// score += ggml_cpu_has_avx512_vbmi() * 1<<8; // not used

ggml/src/ggml-cpu/ggml-cpu-aarch64.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ static inline __m512i sum_i16_pairs_int_32x16(const __m512i x) {
128128
}
129129

130130
static inline __m512i mul_sum_us8_pairs_int32x16(const __m512i ax, const __m512i sy) {
131-
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
131+
#if defined(__AVX512VNNI__)
132132
const __m512i zero = _mm512_setzero_si512();
133133
return _mm512_dpbusd_epi32(zero, ax, sy);
134134
#else

0 commit comments

Comments
 (0)