Skip to content

Commit b58c189

Browse files
committed
Add multi-gpu CuBLAS support to new GUI
1 parent 0c1c71b commit b58c189

File tree

2 files changed

+22
-8
lines changed

2 files changed

+22
-8
lines changed

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ ifdef LLAMA_HIPBLAS
178178
CC := $(ROCM_PATH)/llvm/bin/clang
179179
CXX := $(ROCM_PATH)/llvm/bin/clang++
180180
GPU_TARGETS = gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100
181-
LLAMA_CUDA_DMMV_X ?= 64
181+
LLAMA_CUDA_DMMV_X ?= 256
182182
LLAMA_CUDA_DMMV_Y ?= 2
183183

184184
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
@@ -207,7 +207,7 @@ endif
207207

208208
ggml-cuda.o: CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
209209
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
210-
-DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y) \
210+
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) \
211211
-DGGML_CUDA_FORCE_DMMV
212212

213213

koboldcpp.py

+20-6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import os
1010
import argparse
1111
import json, sys, http.server, time, asyncio, socket, threading
12+
import re
1213
from concurrent.futures import ThreadPoolExecutor
1314

1415
stop_token_max = 10
@@ -764,21 +765,27 @@ def getfilename(var, text):
764765
quick_gpu_layers_entry,quick_gpu_layers_label = makelabelentry(quick_tab,"GPU Layers:", gpulayers_var, 4, 50)
765766
quick_gpu_selector_label = makelabel(quick_tab, "GPU ID:", 3)
766767
quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3"], width=60, variable=gpu_choice_var, state="readonly")
768+
CUDA_quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3","All"], width=60, variable=gpu_choice_var, state="readonly")
767769
quick_lowvram_box = makecheckbox(quick_tab, "Low VRAM", lowvram_var, 5)
768770

769-
# hides gpu options when CLBlast is not chosen
770771
def changerunmode(a,b,c):
771772
index = runopts_var.get()
772773
if index == "Use CLBlast" or index == "Use CuBLAS":
773774
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
774-
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
775775
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
776-
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
776+
if index == "Use CLBlast":
777+
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
778+
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
779+
elif index == "Use CuBLAS":
780+
CUDA_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
781+
CUDA_quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
777782
else:
778783
gpu_selector_label.grid_forget()
779784
gpu_selector_box.grid_forget()
785+
CUDA_gpu_selector_box.grid_forget()
780786
quick_gpu_selector_label.grid_forget()
781787
quick_gpu_selector_box.grid_forget()
788+
CUDA_quick_gpu_selector_box.grid_forget()
782789

783790
if index == "Use CuBLAS":
784791
lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw")
@@ -829,6 +836,7 @@ def changerunmode(a,b,c):
829836
gpu_layers_entry,gpu_layers_label = makelabelentry(hardware_tab,"GPU Layers:", gpulayers_var, 4, 50)
830837
gpu_selector_label = makelabel(hardware_tab, "GPU ID:", 3)
831838
gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3"], width=60, variable=gpu_choice_var, state="readonly")
839+
CUDA_gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3", "All"], width=60, variable=gpu_choice_var, state="readonly")
832840
lowvram_box = makecheckbox(hardware_tab, "Low VRAM", lowvram_var, 5)
833841

834842
# presets selector
@@ -958,12 +966,18 @@ def switch_old_gui():
958966
args.stream = stream.get()==1
959967
args.smartcontext = smartcontext.get()==1
960968
args.unbantokens = unbantokens.get()==1
969+
gpu_choice_str = gpu_choice_var.get()
970+
if gpu_choice_str.isdigit():
971+
gpuchoiceidx = int(gpu_choice_var.get())-1
961972

962-
gpuchoiceidx = int(gpu_choice_var.get())-1
963973
if runopts_var.get() == runopts[1]:
964974
args.useclblast = [[0,0], [1,0], [0,1]][gpuchoiceidx]
965975
if runopts_var.get() == runopts[2]:
966-
args.usecublas = ["lowvram",str(gpuchoiceidx)] if lowvram_var.get() == 1 else ["normal",str(gpuchoiceidx)]
976+
if gpu_choice_str.lower() == "all":
977+
args.usecublas = ["lowvram"] if lowvram_var.get() == 1 else ["normal"]
978+
else:
979+
args.usecublas = ["lowvram",str(gpuchoiceidx)] if lowvram_var.get() == 1 else ["normal",str(gpuchoiceidx)]
980+
967981
if gpulayers_var.get():
968982
args.gpulayers = int(gpulayers_var.get())
969983
if runopts_var.get()==runopts[3]:
@@ -1329,7 +1343,7 @@ def main(args):
13291343
compatgroup = parser.add_mutually_exclusive_group()
13301344
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
13311345
compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
1332-
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires Nvidia GPU. Select lowvram to not allocate VRAM scratch buffer. Enter a number after to select a different main GPU.", nargs='*',metavar=('[lowvram|normal] [main GPU ID]'), choices=['normal', 'lowvram', '0', '1', '2'])
1346+
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID]'), choices=['normal', 'lowvram', '0', '1', '2'])
13331347
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
13341348
args = parser.parse_args()
13351349
main(args)

0 commit comments

Comments
 (0)