Skip to content

Commit 2d7bf11

Browse files
committed
llama : fix vram_scratch var
1 parent 2a4e41a commit 2d7bf11

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

llama.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,7 @@ static void llama_model_load_internal(
10761076

10771077
// prepare memory for the weights
10781078
size_t vram_weights = 0;
1079+
size_t vram_scratch = 0;
10791080
{
10801081
const uint32_t n_embd = hparams.n_embd;
10811082
const uint32_t n_layer = hparams.n_layer;
@@ -1152,8 +1153,9 @@ static void llama_model_load_internal(
11521153
fprintf(stderr, "%s: mem required = %7.2f MB (+ %7.2f MB per state)\n", __func__,
11531154
mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
11541155

1156+
(void) vram_scratch;
11551157
#ifdef GGML_USE_CUBLAS
1156-
const size_t vram_scratch = n_batch * MB;
1158+
vram_scratch = n_batch * MB;
11571159
ggml_cuda_set_scratch_size(vram_scratch);
11581160
if (n_gpu_layers > 0) {
11591161
fprintf(stderr, "%s: allocating batch_size x 1 MB = %ld MB VRAM for the scratch buffer\n",

0 commit comments

Comments
 (0)