Skip to content

Commit 1e6e363

Browse files
committed
test zero max buffer size
1 parent 8854044 commit 1e6e363

File tree

2 files changed

+19
-14
lines changed

2 files changed

+19
-14
lines changed

ggml-alloc.c

+13-13
Original file line numberDiff line numberDiff line change
@@ -988,19 +988,19 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
988988
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
989989
}
990990

991-
if (this_size > max_size) {
992-
fprintf(stderr, "%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
993-
__func__, t->name,
994-
ggml_backend_buft_name(buft),
995-
this_size, max_size);
996-
for (size_t i = 0; i < n_buffers; i++) {
997-
ggml_backend_buffer_free(buffers[i]);
998-
}
999-
free(buffers);
1000-
return NULL;
1001-
}
1002-
1003-
if ((cur_buf_size + this_size) > max_size) {
991+
//if (this_size > max_size) {
992+
// fprintf(stderr, "%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
993+
// __func__, t->name,
994+
// ggml_backend_buft_name(buft),
995+
// this_size, max_size);
996+
// for (size_t i = 0; i < n_buffers; i++) {
997+
// ggml_backend_buffer_free(buffers[i]);
998+
// }
999+
// free(buffers);
1000+
// return NULL;
1001+
//}
1002+
1003+
if (cur_buf_size != 0 && (cur_buf_size + this_size) > max_size) {
10041004
// allocate tensors in the current buffer
10051005
if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
10061006
return NULL;

ggml-cuda.cu

+6-1
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,11 @@ GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_alignment(ggml_backend
576576
GGML_UNUSED(buft);
577577
}
578578

579+
GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
580+
ggml_backend_cuda_buffer_type_context * buft_ctx = (ggml_backend_cuda_buffer_type_context *)buft->context;
581+
return ggml_cuda_info().devices[buft_ctx->device].vmm_granularity;
582+
}
583+
579584
GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
580585
size_t size = ggml_nbytes(tensor);
581586
int64_t ne0 = tensor->ne[0];
@@ -595,7 +600,7 @@ static ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface = {
595600
/* .get_name = */ ggml_backend_cuda_buffer_type_name,
596601
/* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer,
597602
/* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment,
598-
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
603+
/* .get_max_size = */ ggml_backend_cuda_buffer_type_get_max_size,
599604
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
600605
/* .is_host = */ NULL,
601606
};

0 commit comments

Comments
 (0)