Skip to content

Commit b379f9d

Browse files
committed
Revert "amd multigpu full layer offload w/o vram scratch"
This reverts commit 9adfc8e.
1 parent 9adfc8e commit b379f9d

File tree

1 file changed

+0
-14
lines changed

1 file changed

+0
-14
lines changed

llama.cpp

-14
Original file line numberDiff line numberDiff line change
@@ -1224,32 +1224,18 @@ static void llama_model_load_internal(
12241224

12251225
#ifdef GGML_USE_CUBLAS
12261226
const int max_backend_supported_layers = hparams.n_layer + 3;
1227-
#if defined(GGML_USE_HIPBLAS)
1228-
const int max_offloadable_layers = low_vram ? hparams.n_layer + 3 : hparams.n_layer + 3;
1229-
#else
12301227
const int max_offloadable_layers = low_vram ? hparams.n_layer + 1 : hparams.n_layer + 3;
1231-
#endif
12321228
if (n_gpu_layers > (int) hparams.n_layer + 1) {
12331229
if (low_vram) {
1234-
#if defined(GGML_USE_HIPBLAS)
1235-
fprintf(stderr, "%s: offloading v cache to GPU\n", __func__);
1236-
vram_kv_cache += MEM_REQ_KV_SELF().at(model.type) / 2;
1237-
#else
12381230
fprintf(stderr, "%s: cannot offload v cache to GPU due to low VRAM option\n", __func__);
1239-
#endif
12401231
} else {
12411232
fprintf(stderr, "%s: offloading v cache to GPU\n", __func__);
12421233
vram_kv_cache += MEM_REQ_KV_SELF().at(model.type) / 2;
12431234
}
12441235
}
12451236
if (n_gpu_layers > (int) hparams.n_layer + 2) {
12461237
if (low_vram) {
1247-
#if defined(GGML_USE_HIPBLAS)
1248-
fprintf(stderr, "%s: offloading k cache to GPU\n", __func__);
1249-
vram_kv_cache += MEM_REQ_KV_SELF().at(model.type) / 2;
1250-
#else
12511238
fprintf(stderr, "%s: cannot offload k cache to GPU due to low VRAM option\n", __func__);
1252-
#endif
12531239
} else {
12541240
fprintf(stderr, "%s: offloading k cache to GPU\n", __func__);
12551241
vram_kv_cache += MEM_REQ_KV_SELF().at(model.type) / 2;

0 commit comments

Comments
 (0)