From a7fb802dc65afcd57360ef7729598b7072079d44 Mon Sep 17 00:00:00 2001 From: Matt Clayton Date: Fri, 11 Apr 2025 16:15:50 -0400 Subject: [PATCH 1/3] llava: Fix cpu-only clip image encoding --- examples/llava/clip.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index a55b3f3835184..31641c36d70dd 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -337,6 +337,7 @@ struct clip_ctx { ? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr) : nullptr); + ggml_backend_t backend_cpu_raw_ptr = backend_cpu.get(); if (backend) { LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend.get())); backend_ptrs.push_back(backend.get()); @@ -346,8 +347,8 @@ struct clip_ctx { LOG_INF("%s: CLIP using CPU backend\n", __func__); } - backend_ptrs.push_back(backend_cpu.get()); - backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu.get())); + backend_ptrs.push_back(backend_cpu_raw_ptr); + backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu_raw_ptr)); sched.reset( ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), 8192, false) @@ -2610,7 +2611,9 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima } } - ggml_backend_cpu_set_n_threads(ctx->backend_cpu.get(), n_threads); + ggml_backend_t cpu_backend_raw_ptr = + ggml_backend_is_cpu(ctx->backend.get()) ? ctx->backend.get() : ctx->backend_cpu.get(); + ggml_backend_cpu_set_n_threads(cpu_backend_raw_ptr, n_threads); auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf); if (status != GGML_STATUS_SUCCESS) { From afb1eafe62750c9ec86366fa5f701eba3198845b Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 11 Apr 2025 22:51:07 +0200 Subject: [PATCH 2/3] clip : no smart ptr for ggml_backend_t --- examples/llava/clip.cpp | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 31641c36d70dd..b67cccf70b4d1 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -323,8 +323,8 @@ struct clip_ctx { std::vector backend_ptrs; std::vector backend_buft; - ggml_backend_ptr backend; - ggml_backend_ptr backend_cpu; + ggml_backend_t backend; + ggml_backend_t backend_cpu; ggml_backend_buffer_ptr buf; ggml_backend_sched_ptr sched; @@ -332,28 +332,34 @@ struct clip_ctx { clip_image_size load_image_size; clip_ctx(clip_context_params & ctx_params) { - backend_cpu = ggml_backend_ptr(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr)); - backend = ggml_backend_ptr(ctx_params.use_gpu + backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr); + backend = ctx_params.use_gpu ? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr) - : nullptr); + : nullptr; - ggml_backend_t backend_cpu_raw_ptr = backend_cpu.get(); if (backend) { - LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend.get())); - backend_ptrs.push_back(backend.get()); - backend_buft.push_back(ggml_backend_get_default_buffer_type(backend.get())); + LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend)); + backend_ptrs.push_back(backend); + backend_buft.push_back(ggml_backend_get_default_buffer_type(backend)); } else { - backend = std::move(backend_cpu); + backend = backend_cpu; LOG_INF("%s: CLIP using CPU backend\n", __func__); } - backend_ptrs.push_back(backend_cpu_raw_ptr); - backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu_raw_ptr)); + backend_ptrs.push_back(backend); + backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu)); sched.reset( ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), 8192, false) ); } + + ~clip_ctx() { + ggml_backend_free(backend); + if (backend != backend_cpu) { + ggml_backend_free(backend_cpu); + } + } }; static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_image_f32_batch & imgs) { @@ -1429,7 +1435,7 @@ struct clip_model_loader { } // alloc memory and offload data - ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(ctx_clip.backend.get()); + ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(ctx_clip.backend); ctx_clip.buf.reset(ggml_backend_alloc_ctx_tensors_from_buft(ctx_clip.ctx_data.get(), buft)); ggml_backend_buffer_set_usage(ctx_clip.buf.get(), GGML_BACKEND_BUFFER_USAGE_WEIGHTS); for (auto & t : tensors_to_load) { @@ -2611,9 +2617,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima } } - ggml_backend_t cpu_backend_raw_ptr = - ggml_backend_is_cpu(ctx->backend.get()) ? ctx->backend.get() : ctx->backend_cpu.get(); - ggml_backend_cpu_set_n_threads(cpu_backend_raw_ptr, n_threads); + ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads); auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf); if (status != GGML_STATUS_SUCCESS) { From 676dc0c333a65f8beef425a4a65957698e24420b Mon Sep 17 00:00:00 2001 From: Matt Clayton Date: Fri, 11 Apr 2025 17:01:08 -0400 Subject: [PATCH 3/3] Fix for backend_ptr push_back --- examples/llava/clip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index b67cccf70b4d1..49c90b7506e73 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -346,7 +346,7 @@ struct clip_ctx { LOG_INF("%s: CLIP using CPU backend\n", __func__); } - backend_ptrs.push_back(backend); + backend_ptrs.push_back(backend_cpu); backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu)); sched.reset(