File tree 6 files changed +13
-7
lines changed
6 files changed +13
-7
lines changed Original file line number Diff line number Diff line change 15
15
scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
16
16
scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
17
17
scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
18
- scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
18
+ scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON - DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
19
19
# Build llama-server
20
20
cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
21
21
cmake --build build --target llama-server -j $(nproc) && \
Original file line number Diff line number Diff line change @@ -96,10 +96,6 @@ if (NOT DEFINED GGML_LLAMAFILE)
96
96
set (GGML_LLAMAFILE_DEFAULT ON )
97
97
endif ()
98
98
99
- if (NOT DEFINED GGML_AMX)
100
- set (GGML_AMX ON )
101
- endif ()
102
-
103
99
if (NOT DEFINED GGML_CUDA_GRAPHS)
104
100
set (GGML_CUDA_GRAPHS_DEFAULT ON )
105
101
endif ()
Original file line number Diff line number Diff line change @@ -96,6 +96,7 @@ option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
96
96
option (GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON )
97
97
98
98
option (GGML_AVX "ggml: enable AVX" ${INS_ENB} )
99
+ option (GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF )
99
100
option (GGML_AVX2 "ggml: enable AVX2" ${INS_ENB} )
100
101
option (GGML_AVX512 "ggml: enable AVX512" OFF )
101
102
option (GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF )
Original file line number Diff line number Diff line change @@ -217,6 +217,12 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
217
217
elseif (GGML_AVX)
218
218
list (APPEND ARCH_FLAGS /arch:AVX)
219
219
endif ()
220
+ if (GGML_AVX_VNNI)
221
+ list (APPEND ARCH_DEFINITIONS __AVXVNNI__)
222
+ if (CMAKE_C_COMPILER_ID STREQUAL "Clang" )
223
+ list (APPEND ARCH_FLAGS -mavxvnni)
224
+ endif ()
225
+ endif ()
220
226
else ()
221
227
if (GGML_NATIVE)
222
228
list (APPEND ARCH_FLAGS -march=native)
@@ -233,6 +239,9 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
233
239
if (GGML_AVX2)
234
240
list (APPEND ARCH_FLAGS -mavx2)
235
241
endif ()
242
+ if (GGML_AVX_VNNI)
243
+ list (APPEND ARCH_FLAGS -mavxvnni)
244
+ endif ()
236
245
if (GGML_AVX512)
237
246
list (APPEND ARCH_FLAGS -mavx512f)
238
247
list (APPEND ARCH_FLAGS -mavx512dq)
Original file line number Diff line number Diff line change @@ -281,8 +281,8 @@ static int ggml_backend_cpu_x86_score() {
281
281
score += ggml_cpu_has_f16c () * 1 <<1 ;
282
282
score += ggml_cpu_has_ssse3 () * 1 <<2 ;
283
283
score += ggml_cpu_has_sse3 () * 1 <<3 ;
284
- score += ggml_cpu_has_avx () * 1 <<5 ;
285
284
score += ggml_cpu_has_avx_vnni () * 1 <<4 ;
285
+ score += ggml_cpu_has_avx () * 1 <<5 ;
286
286
score += ggml_cpu_has_avx2 () * 1 <<6 ;
287
287
score += ggml_cpu_has_avx512 () * 1 <<7 ;
288
288
// score += ggml_cpu_has_avx512_vbmi() * 1<<8; // not used
Original file line number Diff line number Diff line change @@ -128,7 +128,7 @@ static inline __m512i sum_i16_pairs_int_32x16(const __m512i x) {
128
128
}
129
129
130
130
static inline __m512i mul_sum_us8_pairs_int32x16 (const __m512i ax , const __m512i sy ) {
131
- #if defined(__AVXVNNI__ ) || (defined( __AVX512VNNI__ ) && defined( __AVX512VL__ ) )
131
+ #if defined(__AVX512VNNI__ )
132
132
const __m512i zero = _mm512_setzero_si512 ();
133
133
return _mm512_dpbusd_epi32 (zero , ax , sy );
134
134
#else
You can’t perform that action at this time.
0 commit comments