Skip to content

Commit b7cb4cf

Browse files
committed
additional fixes
1 parent fadae72 commit b7cb4cf

File tree

2 files changed

+1
-21
lines changed

2 files changed

+1
-21
lines changed

Makefile

+1-20
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ ifdef LLAMA_HIPBLAS
198198
LLAMA_CUDA_KQUANTS_ITER ?= 1
199199
LLAMA_CUDA_FORCE_DMMV ?= true
200200
HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
201-
HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64
201+
HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas
202202
HIP_OBJS += ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
203203
ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
204204
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
@@ -223,25 +223,6 @@ ggml_v2-cuda-legacy.o: otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-l
223223
$(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
224224
endif # LLAMA_HIPBLAS
225225

226-
ifdef LLAMA_HIPBLAS
227-
ROCM_PATH ?= /opt/rocm
228-
CC := $(ROCM_PATH)/llvm/bin/clang
229-
CXX := $(ROCM_PATH)/llvm/bin/clang++
230-
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100
231-
LLAMA_CUDA_DMMV_X ?= 32
232-
LLAMA_CUDA_MMV_Y ?= 1
233-
LLAMA_CUDA_KQUANTS_ITER ?= 2
234-
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
235-
CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
236-
LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas
237-
OBJS += ggml-cuda.o
238-
ggml-cuda.o: CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
239-
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
240-
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
241-
ggml-cuda.o: CXXFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
242-
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
243-
$(CXX) $(CXXFLAGS) -x hip -c -o $@ $<
244-
endif # LLAMA_HIPBLAS
245226

246227
ifdef LLAMA_METAL
247228
CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG

ggml-cuda.cu

-1
Original file line numberDiff line numberDiff line change
@@ -4644,7 +4644,6 @@ struct cuda_buffer {
46444644

46454645
static cuda_buffer g_cuda_buffer_pool[GGML_CUDA_MAX_DEVICES][MAX_CUDA_BUFFERS];
46464646
static std::atomic_flag g_cuda_pool_lock = ATOMIC_FLAG_INIT;
4647-
static bool g_mul_mat_q = false;
46484647

46494648
static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
46504649
scoped_spin_lock lock(g_cuda_pool_lock);

0 commit comments

Comments
 (0)