@@ -301,7 +301,11 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
301
301
const auto & hparams = model.hparams ;
302
302
size_t vram_total = 0 ;
303
303
const int n_gpu = std::min (gpulayers, int (hparams.n_layers ));
304
+ #if defined(GGML_USE_CLBLAST)
304
305
fprintf (stderr, " %s: [opencl] offloading %d layers to GPU\n " , __func__, n_gpu);
306
+ #else
307
+ fprintf (stderr, " %s: [CUDA] offloading %d layers to GPU\n " , __func__, n_gpu);
308
+ #endif
305
309
for (int i = 0 ; i < n_gpu; ++i) {
306
310
const auto & layer = model.layers [i];
307
311
layer.ffn_up_proj ->backend = GGML_BACKEND_GPU;
@@ -320,7 +324,11 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
320
324
ggml_cuda_transform_tensor (layer.c_attn_out_proj_weight ->data ,layer.c_attn_out_proj_weight ); vram_total += ggml_nbytes (layer.c_attn_out_proj_weight );
321
325
#endif
322
326
}
323
- fprintf (stderr, " %s: [opencl] total VRAM used: %zu MB\n " , __func__, vram_total / 1024 / 1024 );
327
+ #if defined(GGML_USE_CLBLAST)
328
+ fprintf (stderr, " %s: [opencl] total VRAM used: %zu MB\n " , __func__, vram_total / 1024 / 1024 );
329
+ #else
330
+ fprintf (stderr, " %s: [CUDA] total VRAM used: %zu MB\n " , __func__, vram_total / 1024 / 1024 );
331
+ #endif
324
332
}
325
333
#endif
326
334
0 commit comments