Skip to content

Commit 9f16a4c

Browse files
committed
switch to upstream implementation of pool malloc
1 parent 6659652 commit 9f16a4c

File tree

1 file changed

+36
-28
lines changed

1 file changed

+36
-28
lines changed

ggml-cuda.cu

+36-28
Original file line numberDiff line numberDiff line change
@@ -4293,45 +4293,53 @@ static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
42934293
scoped_spin_lock lock(g_cuda_pool_lock);
42944294
int id;
42954295
CUDA_CHECK(cudaGetDevice(&id));
4296-
4297-
int best_i = -1;
4298-
size_t best_size = std::numeric_limits<size_t>::max(); //smallest unused buffer that fits our needs
4299-
int worst_i = -1;
4300-
size_t worst_size = 0; //largest unused buffer seen so far
4301-
4296+
#ifdef DEBUG_CUDA_MALLOC
4297+
int nnz = 0;
4298+
size_t max_size = 0, tot_size = 0;
4299+
#endif
4300+
size_t best_diff = 1ull << 36;
4301+
int ibest = -1;
43024302
for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
43034303
cuda_buffer& b = g_cuda_buffer_pool[id][i];
4304-
if (b.size > 0 && b.size >= size && b.size < best_size)
4305-
{
4306-
best_i = i;
4307-
best_size = b.size;
4308-
}
4309-
if (b.size > 0 && b.size > worst_size)
4310-
{
4311-
worst_i = i;
4312-
worst_size = b.size;
4304+
if (b.ptr != nullptr) {
4305+
#ifdef DEBUG_CUDA_MALLOC
4306+
++nnz;
4307+
tot_size += b.size;
4308+
if (b.size > max_size) max_size = b.size;
4309+
#endif
4310+
if (b.size >= size) {
4311+
size_t diff = b.size - size;
4312+
if (diff < best_diff) {
4313+
best_diff = diff;
4314+
ibest = i;
4315+
if (!best_diff) {
4316+
void * ptr = b.ptr;
4317+
*actual_size = b.size;
4318+
b.ptr = nullptr;
4319+
b.size = 0;
4320+
return ptr;
4321+
}
4322+
}
4323+
}
43134324
}
43144325
}
4315-
if(best_i!=-1) //found the smallest buffer that fits our needs
4316-
{
4317-
cuda_buffer& b = g_cuda_buffer_pool[id][best_i];
4326+
if (ibest >= 0) {
4327+
cuda_buffer& b = g_cuda_buffer_pool[id][ibest];
43184328
void * ptr = b.ptr;
43194329
*actual_size = b.size;
43204330
b.ptr = nullptr;
43214331
b.size = 0;
43224332
return ptr;
43234333
}
4324-
if(worst_i!=-1) //no buffer that fits our needs, resize largest one to save memory
4325-
{
4326-
cuda_buffer& b = g_cuda_buffer_pool[id][worst_i];
4327-
b.size = 0;
4328-
void * ptr = b.ptr;
4329-
cudaFree(ptr);
4330-
b.ptr = ptr = nullptr;
4331-
}
4334+
#ifdef DEBUG_CUDA_MALLOC
4335+
fprintf(stderr, "%s: %d buffers, max_size = %u MB, tot_size = %u MB, requested %u MB\n", __func__, nnz,
4336+
(uint32_t)(max_size/1024/1024), (uint32_t)(tot_size/1024/1024), (uint32_t)(size/1024/1024));
4337+
#endif
43324338
void * ptr;
4333-
CUDA_CHECK(cudaMalloc((void **) &ptr, size));
4334-
*actual_size = size;
4339+
size_t look_ahead_size = (size_t) (1.05 * size);
4340+
look_ahead_size = 256 * ((look_ahead_size + 255)/256);
4341+
CUDA_CHECK(cudaMalloc((void **) &ptr, look_ahead_size));
4342+
*actual_size = look_ahead_size;
43354343
return ptr;
43364344
}
43374345

0 commit comments

Comments
 (0)