File tree 1 file changed +3
-1
lines changed
1 file changed +3
-1
lines changed Original file line number Diff line number Diff line change @@ -1076,6 +1076,7 @@ static void llama_model_load_internal(
1076
1076
1077
1077
// prepare memory for the weights
1078
1078
size_t vram_weights = 0 ;
1079
+ size_t vram_scratch = 0 ;
1079
1080
{
1080
1081
const uint32_t n_embd = hparams.n_embd ;
1081
1082
const uint32_t n_layer = hparams.n_layer ;
@@ -1152,8 +1153,9 @@ static void llama_model_load_internal(
1152
1153
fprintf (stderr, " %s: mem required = %7.2f MB (+ %7.2f MB per state)\n " , __func__,
1153
1154
mem_required / 1024.0 / 1024.0 , mem_required_state / 1024.0 / 1024.0 );
1154
1155
1156
+ (void ) vram_scratch;
1155
1157
#ifdef GGML_USE_CUBLAS
1156
- const size_t vram_scratch = n_batch * MB;
1158
+ vram_scratch = n_batch * MB;
1157
1159
ggml_cuda_set_scratch_size (vram_scratch);
1158
1160
if (n_gpu_layers > 0 ) {
1159
1161
fprintf (stderr, " %s: allocating batch_size x 1 MB = %ld MB VRAM for the scratch buffer\n " ,
You can’t perform that action at this time.
0 commit comments