Skip to content

Commit dadab7c

Browse files
committed
ggml : automatic selection of best CPU backend
1 parent 7cc2d2c commit dadab7c

File tree

7 files changed

+584
-147
lines changed

7 files changed

+584
-147
lines changed

.devops/llama-server.Dockerfile

+16-4
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,34 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential git libcurl4-openssl-dev
6+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
77

88
WORKDIR /app
99

1010
COPY . .
1111

12-
ENV LLAMA_CURL=1
1312

14-
RUN make -j$(nproc) llama-server
13+
RUN \
14+
# Build multiple versions of the CPU backend
15+
scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
16+
scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
17+
scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
18+
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
19+
# Build llama-server
20+
cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
21+
cmake --build build --target llama-server -j $(nproc) && \
22+
# Copy the built libraries to /app/lib
23+
mkdir -p /app/lib && \
24+
mv libggml-cpu* /app/lib/ && \
25+
find build -name "*.so" -exec cp {} /app/lib/ \;
1526

1627
FROM ubuntu:$UBUNTU_VERSION AS runtime
1728

1829
RUN apt-get update && \
1930
apt-get install -y libcurl4-openssl-dev libgomp1 curl
2031

21-
COPY --from=build /app/llama-server /llama-server
32+
COPY --from=build /app/build/bin/llama-server /llama-server
33+
COPY --from=build /app/lib/ /
2234

2335
ENV LC_ALL=C.utf8
2436
# Must be set to 0.0.0.0 so it can listen to requests from host machine

ggml/src/ggml-backend-impl.h

+38-20
Original file line numberDiff line numberDiff line change
@@ -211,27 +211,45 @@ extern "C" {
211211
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
212212

213213
// Add backend dynamic loading support to the backend
214-
typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
215214

216-
#ifdef GGML_BACKEND_DL
217-
#ifdef __cplusplus
218-
# define GGML_BACKEND_DL_IMPL(reg_fn) \
219-
extern "C" { \
220-
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
221-
} \
222-
ggml_backend_reg_t ggml_backend_init(void) { \
223-
return reg_fn(); \
224-
}
225-
#else
226-
# define GGML_BACKEND_DL_IMPL(reg_fn) \
227-
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
228-
ggml_backend_reg_t ggml_backend_init(void) { \
229-
return reg_fn(); \
230-
}
231-
#endif
232-
#else
233-
# define GGML_BACKEND_DL_IMPL(reg_fn)
234-
#endif
215+
// Initialize the backend
216+
typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
217+
// Optional: obtain a score for the backend based on the system configuration
218+
// Higher scores are preferred, 0 means the backend is not supported in the current system
219+
typedef int (*ggml_backend_score_t)(void);
220+
221+
#ifdef GGML_BACKEND_DL
222+
# ifdef __cplusplus
223+
# define GGML_BACKEND_DL_IMPL(reg_fn) \
224+
extern "C" { \
225+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
226+
} \
227+
ggml_backend_reg_t ggml_backend_init(void) { \
228+
return reg_fn(); \
229+
}
230+
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
231+
extern "C" { \
232+
GGML_BACKEND_API int ggml_backend_score(void); \
233+
} \
234+
int ggml_backend_score(void) { \
235+
return score_fn(); \
236+
}
237+
# else
238+
# define GGML_BACKEND_DL_IMPL(reg_fn) \
239+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
240+
ggml_backend_reg_t ggml_backend_init(void) { \
241+
return reg_fn(); \
242+
}
243+
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
244+
GGML_BACKEND_API int ggml_backend_score(void); \
245+
int ggml_backend_score(void) { \
246+
return score_fn(); \
247+
}
248+
# endif
249+
#else
250+
# define GGML_BACKEND_DL_IMPL(reg_fn)
251+
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn)
252+
#endif
235253

236254
#ifdef __cplusplus
237255
}

0 commit comments

Comments
 (0)