Skip to content

Commit b5c5b4b

Browse files
committed
Avoid allocate buffer for offloaded tensor when using no-mmap
1 parent 67509db commit b5c5b4b

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

llama.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -668,13 +668,21 @@ struct llama_model_loader {
668668

669669
struct ggml_tensor * get_tensor_for(llama_load_tensor & lt, ggml_backend backend) {
670670
struct ggml_tensor * tensor;
671+
672+
if (backend != GGML_BACKEND_CPU) {
673+
ggml_set_no_alloc(ggml_ctx, true);
674+
}
671675
if (lt.ne.size() == 2) {
672676
tensor = ggml_new_tensor_2d(ggml_ctx, lt.type, lt.ne.at(0), lt.ne.at(1));
673677
} else {
674678
LLAMA_ASSERT(lt.ne.size() == 1);
675679
tensor = ggml_new_tensor_1d(ggml_ctx, lt.type, lt.ne.at(0));
676680
}
677681
ggml_set_name(tensor, lt.name.c_str());
682+
683+
if (backend != GGML_BACKEND_CPU) {
684+
ggml_set_no_alloc(ggml_ctx, use_mmap);
685+
}
678686
LLAMA_ASSERT(lt.ggml_tensor == NULL); // if this fails, we called get_tensor twice on the same tensor
679687
tensor->backend = backend;
680688
lt.ggml_tensor = tensor;
@@ -713,6 +721,11 @@ struct llama_model_loader {
713721
}
714722
LLAMA_ASSERT(lt.ggml_tensor); // unused tensors should have been caught by load_data already
715723
lt.data = (uint8_t *) lt.ggml_tensor->data;
724+
// allocate temp buffer if not using mmap
725+
if (!use_mmap && lt.data == NULL) {
726+
lt.data = (uint8_t*)malloc(ggml_nbytes(lt.ggml_tensor));
727+
}
728+
716729
load_data_for(lt);
717730
switch(lt.ggml_tensor->backend) {
718731
case GGML_BACKEND_CPU:
@@ -726,11 +739,17 @@ struct llama_model_loader {
726739
#ifdef GGML_USE_CUBLAS
727740
case GGML_BACKEND_CUDA:
728741
ggml_cuda_load_data(lt.data, lt.ggml_tensor);
742+
if (!use_mmap) {
743+
free(lt.data);
744+
}
729745
break;
730746
#endif
731747
#ifdef GGML_USE_CLBLAST
732748
case GGML_BACKEND_CL:
733749
ggml_cl_transform_tensor(lt.data, lt.ggml_tensor);
750+
if (!use_mmap) {
751+
free(lt.data);
752+
}
734753
break;
735754
#endif
736755
default:

0 commit comments

Comments
 (0)