Skip to content

Commit 188d40f

Browse files
ggerganoviThalay
authored andcommitted
sync : ggml (ggml-org#2001)
* sync : update scripts * sync : ggml * talk-llama : sync llama.cpp * make : WHISPER_CUBLAS -> WHISPER_CUDA * ci : try to fix sycl build * talk-llama : fix make build
1 parent 260eeb8 commit 188d40f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+16433
-13207
lines changed

CMakeLists.txt

+12-4
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ else()
7474
option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
7575
option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
7676
option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
77-
option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
77+
option(WHISPER_CUDA "whisper: support for CUDA" OFF)
78+
option(WHISPER_CUBLAS "whisper: support for CUDA (deprecated)" OFF)
7879
option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
7980
option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
8081
option(WHISPER_SYCL "whisper: use SYCL" OFF)
@@ -240,6 +241,11 @@ if (WHISPER_BLAS)
240241
endif ()
241242

242243
if (WHISPER_CUBLAS)
244+
message(WARNING "WHISPER_CUBLAS is deprecated and will be removed in the future.\nUse WHISPER_CUDA instead")
245+
set(WHISPER_CUDA ON)
246+
endif()
247+
248+
if (WHISPER_CUDA)
243249
cmake_minimum_required(VERSION 3.17)
244250

245251
find_package(CUDAToolkit)
@@ -249,9 +255,11 @@ if (WHISPER_CUBLAS)
249255

250256
enable_language(CUDA)
251257

252-
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
258+
file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
259+
list(APPEND GGML_SOURCES_CUDA ggml-cuda.h)
260+
list(APPEND GGML_SOURCES_CUDA ggml-cuda.cu)
253261

254-
add_compile_definitions(GGML_USE_CUBLAS)
262+
add_compile_definitions(GGML_USE_CUDA)
255263

256264
if (WHISPER_STATIC)
257265
if (WIN32)
@@ -286,7 +294,7 @@ if (WHISPER_HIPBLAS)
286294

287295
if (${hipblas_FOUND} AND ${hip_FOUND})
288296
message(STATUS "HIP and hipBLAS found")
289-
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)
297+
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA)
290298
add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
291299
set_property(TARGET ggml-rocm PROPERTY POSITION_INDEPENDENT_CODE ON)
292300
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)

Makefile

+28-8
Original file line numberDiff line numberDiff line change
@@ -216,35 +216,48 @@ ifdef WHISPER_OPENBLAS
216216
endif
217217

218218
ifdef WHISPER_CUBLAS
219+
# WHISPER_CUBLAS is deprecated and will be removed in the future
220+
WHISPER_CUDA := 1
221+
endif
222+
223+
ifdef WHISPER_CUDA
219224
ifeq ($(shell expr $(NVCC_VERSION) \>= 11.6), 1)
220225
CUDA_ARCH_FLAG ?= native
221226
else
222227
CUDA_ARCH_FLAG ?= all
223228
endif
224229

225-
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
226-
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
230+
CFLAGS += -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
231+
CXXFLAGS += -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
227232
LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
228233
WHISPER_OBJ += ggml-cuda.o
234+
WHISPER_OBJ += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
229235
NVCC = nvcc
230236
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=$(CUDA_ARCH_FLAG)
231237

232-
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
238+
ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh
239+
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@
240+
241+
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
233242
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
234243
endif
235244

236245
ifdef WHISPER_HIPBLAS
237246
ROCM_PATH ?= /opt/rocm
238247
HIPCC ?= $(ROCM_PATH)/bin/hipcc
239248
GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
240-
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
241-
CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
249+
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
250+
CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
242251
LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
243252
LDFLAGS += -lhipblas -lamdhip64 -lrocblas
244253
HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
245254
WHISPER_OBJ += ggml-cuda.o
255+
WHISPER_OBJ += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
256+
257+
ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh
258+
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
246259

247-
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
260+
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
248261
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
249262
endif
250263

@@ -309,6 +322,13 @@ $(info I CC: $(CCV))
309322
$(info I CXX: $(CXXV))
310323
$(info )
311324

325+
ifdef WHISPER_CUBLAS
326+
$(info !!!!)
327+
$(info WHISPER_CUBLAS is deprecated and will be removed in the future. Use WHISPER_CUDA instead.)
328+
$(info !!!!)
329+
$(info )
330+
endif
331+
312332
#
313333
# Build library
314334
#
@@ -410,8 +430,8 @@ lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
410430
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
411431
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
412432

413-
talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/unicode.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
414-
$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/unicode.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
433+
talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
434+
$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
415435

416436
#
417437
# Audio samples

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -414,11 +414,11 @@ For more information about the Core ML implementation please refer to PR [#1037]
414414
With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
415415
First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
416416

417-
Now build `whisper.cpp` with cuBLAS support:
417+
Now build `whisper.cpp` with CUDA support:
418418

419419
```
420420
make clean
421-
WHISPER_CUBLAS=1 make -j
421+
WHISPER_CUDA=1 make -j
422422
```
423423

424424
## OpenCL GPU support via CLBlast

examples/common-ggml.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ bool ggml_common_quantize_0(
7070
case GGML_FTYPE_MOSTLY_IQ1_S:
7171
case GGML_FTYPE_MOSTLY_IQ4_NL:
7272
case GGML_FTYPE_MOSTLY_IQ4_XS:
73+
case GGML_FTYPE_MOSTLY_IQ1_M:
7374
{
7475
fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
7576
return false;
@@ -193,6 +194,8 @@ bool ggml_common_quantize_0(
193194
case GGML_TYPE_I8:
194195
case GGML_TYPE_I16:
195196
case GGML_TYPE_I32:
197+
case GGML_TYPE_I64:
198+
case GGML_TYPE_F64:
196199
case GGML_TYPE_Q8_1:
197200
case GGML_TYPE_Q8_K:
198201
case GGML_TYPE_IQ2_XXS:
@@ -203,6 +206,7 @@ bool ggml_common_quantize_0(
203206
case GGML_TYPE_IQ1_S:
204207
case GGML_TYPE_IQ4_NL:
205208
case GGML_TYPE_IQ4_XS:
209+
case GGML_TYPE_IQ1_M:
206210
case GGML_TYPE_COUNT:
207211
{
208212
fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));

examples/talk-llama/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
if (WHISPER_SDL2)
22
# talk-llama
33
set(TARGET talk-llama)
4-
add_executable(${TARGET} talk-llama.cpp llama.cpp unicode.cpp)
4+
add_executable(${TARGET} talk-llama.cpp llama.cpp unicode.cpp unicode-data.cpp)
55
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
66

77
if (WHISPER_CLBLAST)

0 commit comments

Comments
 (0)