Skip to content

Commit cf65429

Browse files
committed
print cuda or opencl based on what's used
1 parent 72c16d2 commit cf65429

File tree

3 files changed

+27
-3
lines changed

3 files changed

+27
-3
lines changed

otherarch/gptj_v3.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,11 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
348348
const auto & hparams = model.hparams;
349349
size_t vram_total = 0;
350350
const int n_gpu = std::min(gpulayers, int(hparams.n_layer));
351+
#if defined(GGML_USE_CLBLAST)
351352
fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu);
353+
#else
354+
fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu);
355+
#endif
352356
for (int i = 0; i < n_gpu; ++i) {
353357
const auto & layer = model.layers[i];
354358
layer.c_attn_q_proj_w->backend = GGML_BACKEND_GPU;
@@ -373,7 +377,11 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
373377
ggml_cuda_transform_tensor(layer.c_mlp_proj_w->data,layer.c_mlp_proj_w); vram_total += ggml_nbytes(layer.c_mlp_proj_w);
374378
#endif
375379
}
376-
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
380+
#if defined(GGML_USE_CLBLAST)
381+
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
382+
#else
383+
fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
384+
#endif
377385
}
378386
#endif
379387

otherarch/mpt_v3.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,11 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
301301
const auto & hparams = model.hparams;
302302
size_t vram_total = 0;
303303
const int n_gpu = std::min(gpulayers, int(hparams.n_layers));
304+
#if defined(GGML_USE_CLBLAST)
304305
fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu);
306+
#else
307+
fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu);
308+
#endif
305309
for (int i = 0; i < n_gpu; ++i) {
306310
const auto & layer = model.layers[i];
307311
layer.ffn_up_proj->backend = GGML_BACKEND_GPU;
@@ -320,7 +324,11 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
320324
ggml_cuda_transform_tensor(layer.c_attn_out_proj_weight->data,layer.c_attn_out_proj_weight); vram_total += ggml_nbytes(layer.c_attn_out_proj_weight);
321325
#endif
322326
}
323-
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
327+
#if defined(GGML_USE_CLBLAST)
328+
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
329+
#else
330+
fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
331+
#endif
324332
}
325333
#endif
326334

otherarch/neox_v3.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,11 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
335335
const auto & hparams = model.hparams;
336336
size_t vram_total = 0;
337337
const int n_gpu = std::min(gpulayers, int(hparams.n_layer));
338+
#if defined(GGML_USE_CLBLAST)
338339
fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu);
340+
#else
341+
fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu);
342+
#endif
339343
for (int i = 0; i < n_gpu; ++i) {
340344
const auto & layer = model.layers[i];
341345
layer.c_attn_attn_w->backend = GGML_BACKEND_GPU;
@@ -354,7 +358,11 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
354358
ggml_cuda_transform_tensor(layer.c_mlp_proj_w->data,layer.c_mlp_proj_w); vram_total += ggml_nbytes(layer.c_mlp_proj_w);
355359
#endif
356360
}
357-
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
361+
#if defined(GGML_USE_CLBLAST)
362+
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
363+
#else
364+
fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
365+
#endif
358366
}
359367
#endif
360368

0 commit comments

Comments
 (0)