Skip to content

Commit 59c8895

Browse files
slarenarthw
authored andcommitted
ggml : automatic selection of best CPU backend (ggml-org#10606)
* ggml : automatic selection of best CPU backend * amx : minor opt * add GGML_AVX_VNNI to enable avx-vnni, fix checks
1 parent d7e94ac commit 59c8895

12 files changed

+599
-156
lines changed

.devops/llama-server.Dockerfile

+16-4
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,34 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential git libcurl4-openssl-dev
6+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
77

88
WORKDIR /app
99

1010
COPY . .
1111

12-
ENV LLAMA_CURL=1
1312

14-
RUN make -j$(nproc) llama-server
13+
RUN \
14+
# Build multiple versions of the CPU backend
15+
scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
16+
scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
17+
scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
18+
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
19+
# Build llama-server
20+
cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
21+
cmake --build build --target llama-server -j $(nproc) && \
22+
# Copy the built libraries to /app/lib
23+
mkdir -p /app/lib && \
24+
mv libggml-cpu* /app/lib/ && \
25+
find build -name "*.so" -exec cp {} /app/lib/ \;
1526

1627
FROM ubuntu:$UBUNTU_VERSION AS runtime
1728

1829
RUN apt-get update && \
1930
apt-get install -y libcurl4-openssl-dev libgomp1 curl
2031

21-
COPY --from=build /app/llama-server /llama-server
32+
COPY --from=build /app/build/bin/llama-server /llama-server
33+
COPY --from=build /app/lib/ /
2234

2335
ENV LC_ALL=C.utf8
2436
# Must be set to 0.0.0.0 so it can listen to requests from host machine

CMakeLists.txt

-4
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,6 @@ if (NOT DEFINED GGML_LLAMAFILE)
9696
set(GGML_LLAMAFILE_DEFAULT ON)
9797
endif()
9898

99-
if (NOT DEFINED GGML_AMX)
100-
set(GGML_AMX ON)
101-
endif()
102-
10399
if (NOT DEFINED GGML_CUDA_GRAPHS)
104100
set(GGML_CUDA_GRAPHS_DEFAULT ON)
105101
endif()

Package.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,5 +88,5 @@ let package = Package(
8888
linkerSettings: linkerSettings
8989
)
9090
],
91-
cxxLanguageStandard: .cxx11
91+
cxxLanguageStandard: .cxx17
9292
)

ggml/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
9696
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
9797

9898
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
99+
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
99100
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
100101
option(GGML_AVX512 "ggml: enable AVX512" OFF)
101102
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)

ggml/src/ggml-backend-impl.h

+38-20
Original file line numberDiff line numberDiff line change
@@ -211,27 +211,45 @@ extern "C" {
211211
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
212212

213213
// Add backend dynamic loading support to the backend
214-
typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
215214

216-
#ifdef GGML_BACKEND_DL
217-
#ifdef __cplusplus
218-
# define GGML_BACKEND_DL_IMPL(reg_fn) \
219-
extern "C" { \
220-
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
221-
} \
222-
ggml_backend_reg_t ggml_backend_init(void) { \
223-
return reg_fn(); \
224-
}
225-
#else
226-
# define GGML_BACKEND_DL_IMPL(reg_fn) \
227-
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
228-
ggml_backend_reg_t ggml_backend_init(void) { \
229-
return reg_fn(); \
230-
}
231-
#endif
232-
#else
233-
# define GGML_BACKEND_DL_IMPL(reg_fn)
234-
#endif
215+
// Initialize the backend
216+
typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
217+
// Optional: obtain a score for the backend based on the system configuration
218+
// Higher scores are preferred, 0 means the backend is not supported in the current system
219+
typedef int (*ggml_backend_score_t)(void);
220+
221+
#ifdef GGML_BACKEND_DL
222+
# ifdef __cplusplus
223+
# define GGML_BACKEND_DL_IMPL(reg_fn) \
224+
extern "C" { \
225+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
226+
} \
227+
ggml_backend_reg_t ggml_backend_init(void) { \
228+
return reg_fn(); \
229+
}
230+
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
231+
extern "C" { \
232+
GGML_BACKEND_API int ggml_backend_score(void); \
233+
} \
234+
int ggml_backend_score(void) { \
235+
return score_fn(); \
236+
}
237+
# else
238+
# define GGML_BACKEND_DL_IMPL(reg_fn) \
239+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
240+
ggml_backend_reg_t ggml_backend_init(void) { \
241+
return reg_fn(); \
242+
}
243+
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
244+
GGML_BACKEND_API int ggml_backend_score(void); \
245+
int ggml_backend_score(void) { \
246+
return score_fn(); \
247+
}
248+
# endif
249+
#else
250+
# define GGML_BACKEND_DL_IMPL(reg_fn)
251+
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn)
252+
#endif
235253

236254
#ifdef __cplusplus
237255
}

0 commit comments

Comments
 (0)