Skip to content

Commit 3aca763

Browse files
committed
only cuda free for non mmq
1 parent e69a8c9 commit 3aca763

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4321,7 +4321,7 @@ static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
43214321
b.size = 0;
43224322
return ptr;
43234323
}
4324-
if(worst_i!=-1) //no buffer that fits our needs, resize largest one to save memory
4324+
if(worst_i!=-1 && !g_mul_mat_q) //no buffer that fits our needs, resize largest one to save memory (non mmq only)
43254325
{
43264326
cuda_buffer& b = g_cuda_buffer_pool[id][worst_i];
43274327
b.size = 0;

0 commit comments

Comments
 (0)