Skip to content

Commit bb16eff

Browse files
authored
headers fix; add kquants_iter for hipblas and add gfx803 (ggml-org#1)
* kquants_iter for hipblas and add gfx803 * Update CMakeLists.txt with hipblas kquants_iter and DMMV_F16 * remove dmmv_f16 for now
1 parent c8ae945 commit bb16eff

File tree

3 files changed

+13
-5
lines changed

3 files changed

+13
-5
lines changed

CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ if (LLAMA_HIPBLAS)
335335
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y})
336336
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
337337
target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::hipblas)
338+
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
338339

339340
if (LLAMA_STATIC)
340341
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")

Makefile

+8-3
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ ifndef UNAME_M
2121
UNAME_M := $(shell uname -m)
2222
endif
2323

24-
CCV = $(shell $(CC) --version | head -n 1)
25-
CXXV = $(shell $(CXX) --version | head -n 1)
24+
CCV := $(shell $(CC) --version | head -n 1)
25+
CXXV := $(shell $(CXX) --version | head -n 1)
2626

2727
# Mac OS + Arm can report x86_64
2828
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
@@ -207,13 +207,18 @@ ifdef LLAMA_HIPBLAS
207207
ROCM_PATH ?= /opt/rocm
208208
CC := $(ROCM_PATH)/llvm/bin/clang
209209
CXX := $(ROCM_PATH)/llvm/bin/clang++
210-
GPU_TARGETS = gfx900 gfx906 gfx908 gfx90a gfx1030
210+
GPU_TARGETS = gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030
211211
LLAMA_CUDA_DMMV_X ?= 32
212212
LLAMA_CUDA_DMMV_Y ?= 1
213213
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
214214
CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
215215
LDFLAGS += -L/opt/rocm/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64
216216
OBJS += ggml-cuda.o
217+
ifdef LLAMA_CUDA_KQUANTS_ITER
218+
CXXFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
219+
else
220+
CXXFLAGS += -DK_QUANTS_PER_ITERATION=2
221+
endif
217222
ggml-cuda.o: CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
218223
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
219224
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y)

ggml.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,11 @@ inline static void* ggml_aligned_malloc(size_t size) {
230230
#endif
231231
#elif defined(GGML_USE_OPENBLAS)
232232
#include <cblas.h>
233-
#elif defined(GGML_USE_CUBLAS) | defined(GGML_USE_HIPBLAS)
233+
#endif
234+
#if defined(GGML_USE_CUBLAS)
234235
#include "ggml-cuda.h"
235-
#elif defined(GGML_USE_CLBLAST)
236+
#endif
237+
#if defined(GGML_USE_CLBLAST)
236238
#include "ggml-opencl.h"
237239
#endif
238240

0 commit comments

Comments
 (0)