Skip to content

Commit 2a4e41a

Browse files
committed
llama : fix compile warnings
1 parent 17366df commit 2a4e41a

File tree

2 files changed

+19
-18
lines changed

2 files changed

+19
-18
lines changed

ggml.c

+11-11
Original file line numberDiff line numberDiff line change
@@ -14720,12 +14720,12 @@ static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fou
1472014720
const int64_t * ne = tensor->ne;
1472114721
const size_t * nb = tensor->nb;
1472214722

14723-
fprintf(fout, "%-6s %-12s %8d %8jd %jd %jd %jd %16zu %16zu %16zu %16zu %16p %32s\n",
14723+
fprintf(fout, "%-6s %-12s %8d %8d %d %d %d %16zu %16zu %16zu %16zu %16p %32s\n",
1472414724
ggml_type_name(tensor->type),
1472514725
ggml_op_name (tensor->op),
1472614726
tensor->n_dims,
14727-
ne[0], ne[1], ne[2], ne[3],
14728-
nb[0], nb[1], nb[2], nb[3],
14727+
(int) ne[0], (int) ne[1], (int) ne[2], (int) ne[3],
14728+
nb[0], nb[1], nb[2], nb[3],
1472914729
tensor->data,
1473014730
tensor->name);
1473114731
}
@@ -14734,13 +14734,13 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char
1473414734
const int64_t * ne = tensor->ne;
1473514735
const size_t * nb = tensor->nb;
1473614736

14737-
fprintf(fout, "%-6s %-6s %-12s %8d %jd %jd %jd %jd %16zu %16zu %16zu %16zu %8d %16p %32s\n",
14737+
fprintf(fout, "%-6s %-6s %-12s %8d %d %d %d %d %16zu %16zu %16zu %16zu %8d %16p %32s\n",
1473814738
arg,
1473914739
ggml_type_name(tensor->type),
1474014740
ggml_op_name (tensor->op),
1474114741
tensor->n_dims,
14742-
ne[0], ne[1], ne[2], ne[3],
14743-
nb[0], nb[1], nb[2], nb[3],
14742+
(int) ne[0], (int) ne[1], (int) ne[2], (int) ne[3],
14743+
nb[0], nb[1], nb[2], nb[3],
1474414744
tensor->n_tasks,
1474514745
tensor->data,
1474614746
tensor->name);
@@ -14763,11 +14763,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
1476314763
FILE * fout = stdout;
1476414764

1476514765
fprintf(fout, "\n");
14766-
fprintf(fout, "%-16s %8x\n", "magic", GGML_FILE_MAGIC);
14767-
fprintf(fout, "%-16s %8d\n", "version", GGML_FILE_VERSION);
14768-
fprintf(fout, "%-16s %8d\n", "leafs", cgraph->n_leafs);
14769-
fprintf(fout, "%-16s %8d\n", "nodes", cgraph->n_nodes);
14770-
fprintf(fout, "%-16s %8ju\n", "eval", size_eval);
14766+
fprintf(fout, "%-16s %8x\n", "magic", GGML_FILE_MAGIC);
14767+
fprintf(fout, "%-16s %8d\n", "version", GGML_FILE_VERSION);
14768+
fprintf(fout, "%-16s %8d\n", "leafs", cgraph->n_leafs);
14769+
fprintf(fout, "%-16s %8d\n", "nodes", cgraph->n_nodes);
14770+
fprintf(fout, "%-16s %8d\n", "eval", (int) size_eval);
1477114771

1477214772
// header
1477314773
fprintf(fout, "\n");

llama.cpp

+8-7
Original file line numberDiff line numberDiff line change
@@ -1059,23 +1059,23 @@ static void llama_model_load_internal(
10591059
}
10601060
}
10611061

1062+
(void) main_gpu;
10621063
#if defined(GGML_USE_CUBLAS)
10631064
fprintf(stderr, "%s: using CUDA for GPU acceleration\n", __func__);
10641065
ggml_cuda_set_main_device(main_gpu);
1065-
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
1066+
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
10661067
#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU_SPLIT
10671068
#elif defined(GGML_USE_CLBLAST)
10681069
fprintf(stderr, "%s: using OpenCL for GPU acceleration\n", __func__);
1069-
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
1070+
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
10701071
#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU
10711072
#else
1072-
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CPU
1073+
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CPU
10731074
#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_CPU
10741075
#endif
10751076

10761077
// prepare memory for the weights
10771078
size_t vram_weights = 0;
1078-
size_t vram_scratch = 0;
10791079
{
10801080
const uint32_t n_embd = hparams.n_embd;
10811081
const uint32_t n_layer = hparams.n_layer;
@@ -1152,17 +1152,17 @@ static void llama_model_load_internal(
11521152
fprintf(stderr, "%s: mem required = %7.2f MB (+ %7.2f MB per state)\n", __func__,
11531153
mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
11541154

1155-
const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer));
1156-
11571155
#ifdef GGML_USE_CUBLAS
1158-
vram_scratch = n_batch * MB;
1156+
const size_t vram_scratch = n_batch * MB;
11591157
ggml_cuda_set_scratch_size(vram_scratch);
11601158
if (n_gpu_layers > 0) {
11611159
fprintf(stderr, "%s: allocating batch_size x 1 MB = %ld MB VRAM for the scratch buffer\n",
11621160
__func__, vram_scratch / MB);
11631161
}
11641162
#endif // GGML_USE_CUBLAS
11651163
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
1164+
const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer));
1165+
11661166
fprintf(stderr, "%s: offloading %d layers to GPU\n", __func__, n_gpu);
11671167
if (n_gpu_layers > (int) hparams.n_layer) {
11681168
fprintf(stderr, "%s: offloading output layer to GPU\n", __func__);
@@ -1331,6 +1331,7 @@ static bool llama_eval_internal(
13311331
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
13321332

13331333
const int i_gpu_start = n_layer - n_gpu_layers;
1334+
(void) i_gpu_start;
13341335

13351336
for (int il = 0; il < n_layer; ++il) {
13361337
offload_func_t offload_func = llama_nop;

0 commit comments

Comments
 (0)