Skip to content

Commit 59f4db1

Browse files
authored
ggml : add predefined list of CPU backend variants to build (#10626)
* ggml : add predefined list of CPU backend variants to build * update CPU dockerfiles
1 parent 2803540 commit 59f4db1

11 files changed

+483
-372
lines changed

.devops/full.Dockerfile

+22-9
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,36 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
6+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
7+
8+
WORKDIR /app
9+
10+
COPY . .
711

8-
COPY requirements.txt requirements.txt
9-
COPY requirements requirements
12+
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
13+
cmake --build build -j $(nproc) && \
14+
mkdir -p /app/lib && \
15+
find build -name "*.so" -exec cp {} /app/lib/ \;
1016

11-
RUN pip install --upgrade pip setuptools wheel \
12-
&& pip install -r requirements.txt
17+
FROM ubuntu:$UBUNTU_VERSION as runtime
1318

1419
WORKDIR /app
1520

16-
COPY . .
21+
RUN apt-get update && \
22+
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
1723

18-
ENV LLAMA_CURL=1
24+
COPY requirements.txt /app/requirements.txt
25+
COPY requirements /app/requirements
26+
COPY .devops/tools.sh /app/tools.sh
1927

28+
RUN pip install --upgrade pip setuptools wheel && \
29+
pip install -r /app/requirements.txt
2030

21-
RUN make -j$(nproc)
31+
COPY --from=build /app/build/bin/ /app/
32+
COPY --from=build /app/lib/ /app/
33+
COPY --from=build /app/convert_hf_to_gguf.py /app/
34+
COPY --from=build /app/gguf-py /app/gguf-py
2235

2336
ENV LC_ALL=C.utf8
2437

25-
ENTRYPOINT ["/app/.devops/tools.sh"]
38+
ENTRYPOINT ["/app/tools.sh"]

.devops/llama-cli.Dockerfile

+11-5
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,27 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential git
6+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
77

88
WORKDIR /app
99

1010
COPY . .
1111

12-
RUN make -j$(nproc) llama-cli
12+
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
13+
cmake --build build -j $(nproc) && \
14+
mkdir -p /app/lib && \
15+
find build -name "*.so" -exec cp {} /app/lib/ \;
1316

1417
FROM ubuntu:$UBUNTU_VERSION AS runtime
1518

19+
WORKDIR /app
20+
1621
RUN apt-get update && \
17-
apt-get install -y libgomp1
22+
apt-get install -y libcurl4-openssl-dev libgomp1 curl
1823

19-
COPY --from=build /app/llama-cli /llama-cli
24+
COPY --from=build /app/build/bin/llama-cli /app/
25+
COPY --from=build /app/lib/ /app/
2026

2127
ENV LC_ALL=C.utf8
2228

23-
ENTRYPOINT [ "/llama-cli" ]
29+
ENTRYPOINT [ "/app/llama-cli" ]

.devops/llama-server.Dockerfile

+7-15
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,25 @@ WORKDIR /app
99

1010
COPY . .
1111

12-
13-
RUN \
14-
# Build multiple versions of the CPU backend
15-
scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
16-
scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
17-
scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
18-
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
19-
# Build llama-server
20-
cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
21-
cmake --build build --target llama-server -j $(nproc) && \
22-
# Copy the built libraries to /app/lib
12+
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
13+
cmake --build build -j $(nproc) && \
2314
mkdir -p /app/lib && \
24-
mv libggml-cpu* /app/lib/ && \
2515
find build -name "*.so" -exec cp {} /app/lib/ \;
2616

2717
FROM ubuntu:$UBUNTU_VERSION AS runtime
2818

19+
WORKDIR /app
20+
2921
RUN apt-get update && \
3022
apt-get install -y libcurl4-openssl-dev libgomp1 curl
3123

32-
COPY --from=build /app/build/bin/llama-server /llama-server
33-
COPY --from=build /app/lib/ /
24+
COPY --from=build /app/build/bin/llama-server /app/
25+
COPY --from=build /app/lib/ /app/
3426

3527
ENV LC_ALL=C.utf8
3628
# Must be set to 0.0.0.0 so it can listen to requests from host machine
3729
ENV LLAMA_ARG_HOST=0.0.0.0
3830

3931
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
4032

41-
ENTRYPOINT [ "/llama-server" ]
33+
ENTRYPOINT [ "/app/llama-server" ]

ggml/CMakeLists.txt

+24-25
Original file line numberDiff line numberDiff line change
@@ -92,30 +92,33 @@ else()
9292
set(INS_ENB ON)
9393
endif()
9494

95-
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
96-
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
97-
98-
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
99-
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
100-
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
101-
option(GGML_AVX512 "ggml: enable AVX512" OFF)
102-
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
103-
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
104-
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
105-
option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
106-
option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
107-
option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
108-
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
95+
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
96+
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
97+
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
98+
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
99+
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
100+
option(GGML_AVX512 "ggml: enable AVX512F" OFF)
101+
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
102+
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
103+
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
109104
if (NOT MSVC)
110-
option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512
105+
# in MSVC F16C and FMA is implied with AVX2/AVX512
106+
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
107+
option(GGML_F16C "ggml: enable F16C" ${INS_ENB})
108+
# MSVC does not seem to support AMX
109+
option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
110+
option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
111+
option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
111112
endif()
112-
option(GGML_LASX "ggml: enable lasx" ON)
113-
option(GGML_LSX "ggml: enable lsx" ON)
114-
option(GGML_RVV "ggml: enable rvv" ON)
115-
option(GGML_SVE "ggml: enable SVE" OFF)
113+
option(GGML_LASX "ggml: enable lasx" ON)
114+
option(GGML_LSX "ggml: enable lsx" ON)
115+
option(GGML_RVV "ggml: enable rvv" ON)
116+
option(GGML_SVE "ggml: enable SVE" OFF)
117+
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
118+
116119

117120
if (WIN32)
118-
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows Version")
121+
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows version")
119122
endif()
120123

121124
# ggml core
@@ -180,11 +183,7 @@ option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
180183
set(CMAKE_C_STANDARD 11)
181184
set(CMAKE_C_STANDARD_REQUIRED true)
182185

183-
if (GGML_SYCL)
184-
set(CMAKE_CXX_STANDARD 17)
185-
else()
186-
set(CMAKE_CXX_STANDARD 11)
187-
endif()
186+
set(CMAKE_CXX_STANDARD 17)
188187
set(CMAKE_CXX_STANDARD_REQUIRED true)
189188

190189
set(THREADS_PREFER_PTHREAD_FLAG ON)

ggml/src/CMakeLists.txt

+35
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,42 @@ function(ggml_add_backend backend)
269269
endif()
270270
endfunction()
271271

272+
function(ggml_add_cpu_backend_variant tag_name)
273+
set(GGML_CPU_TAG_NAME ${tag_name})
274+
# other: OPENMP LLAMAFILE CPU_HBM
275+
foreach (feat NATIVE
276+
AVX AVX2 AVX_VNNI FMA F16C
277+
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
278+
AMX_TILE AMX_INT8 AMX_BF16)
279+
set(GGML_${feat} OFF)
280+
endforeach()
281+
282+
foreach (feat ${ARGN})
283+
set(GGML_${feat} ON)
284+
endforeach()
285+
286+
ggml_add_cpu_backend_variant_impl(${tag_name})
287+
endfunction()
288+
272289
ggml_add_backend(CPU)
290+
291+
if (GGML_CPU_ALL_VARIANTS)
292+
if (NOT GGML_BACKEND_DL)
293+
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
294+
endif()
295+
ggml_add_cpu_backend_variant(sandybridge AVX)
296+
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
297+
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
298+
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
299+
if (NOT MSVC)
300+
# MSVC doesn't support AVX-VNNI or AMX
301+
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
302+
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
303+
endif()
304+
else ()
305+
ggml_add_cpu_backend_variant_impl("")
306+
endif()
307+
273308
ggml_add_backend(BLAS)
274309
ggml_add_backend(CANN)
275310
ggml_add_backend(CUDA)

ggml/src/ggml-backend-reg.cpp

+21-11
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,10 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent)
483483
best_score = s;
484484
best_path = entry.path().string();
485485
}
486+
} else {
487+
if (!silent) {
488+
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
489+
}
486490
}
487491
}
488492
}
@@ -505,15 +509,21 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent)
505509
}
506510

507511
void ggml_backend_load_all() {
508-
ggml_backend_load_best("blas", true);
509-
ggml_backend_load_best("cann", true);
510-
ggml_backend_load_best("cuda", true);
511-
ggml_backend_load_best("hip", true);
512-
ggml_backend_load_best("kompute", true);
513-
ggml_backend_load_best("metal", true);
514-
ggml_backend_load_best("rpc", true);
515-
ggml_backend_load_best("sycl", true);
516-
ggml_backend_load_best("vulkan", true);
517-
ggml_backend_load_best("musa", true);
518-
ggml_backend_load_best("cpu", true);
512+
#ifdef NDEBUG
513+
bool silent = true;
514+
#else
515+
bool silent = false;
516+
#endif
517+
518+
ggml_backend_load_best("blas", silent);
519+
ggml_backend_load_best("cann", silent);
520+
ggml_backend_load_best("cuda", silent);
521+
ggml_backend_load_best("hip", silent);
522+
ggml_backend_load_best("kompute", silent);
523+
ggml_backend_load_best("metal", silent);
524+
ggml_backend_load_best("rpc", silent);
525+
ggml_backend_load_best("sycl", silent);
526+
ggml_backend_load_best("vulkan", silent);
527+
ggml_backend_load_best("musa", silent);
528+
ggml_backend_load_best("cpu", silent);
519529
}

0 commit comments

Comments
 (0)