Skip to content

Commit 6e6152b

Browse files
committed
optional per-tensor loading
1 parent f531fe8 commit 6e6152b

File tree

1 file changed

+17
-6
lines changed

1 file changed

+17
-6
lines changed

llama.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,6 +1374,7 @@ using llama_files = std::vector<std::unique_ptr<llama_file>>;
13741374
struct llama_mmap {
13751375
void * addr;
13761376
size_t size;
1377+
bool prefetch;
13771378

13781379
llama_mmap(const llama_mmap &) = delete;
13791380

@@ -1402,6 +1403,7 @@ struct llama_mmap {
14021403

14031404
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
14041405
size = file->size;
1406+
this->prefetch = prefetch > 0;
14051407
int fd = fileno(file->fp);
14061408
int flags = MAP_SHARED;
14071409
// prefetch/readahead impairs performance on NUMA systems
@@ -1503,6 +1505,7 @@ struct llama_mmap {
15031505
GGML_UNUSED(numa);
15041506

15051507
size = file->size;
1508+
this->prefetch = prefetch > 0;
15061509

15071510
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
15081511

@@ -1592,9 +1595,10 @@ struct llama_anonymous_mmap : llama_mmap {
15921595
#ifndef MAP_ANONYMOUS
15931596
#define MAP_ANONYMOUS MAP_ANON
15941597
#endif
1595-
llama_anonymous_mmap(struct llama_file * file) {
1598+
llama_anonymous_mmap(struct llama_file * file, bool prefetch) {
15961599
this->file = file;
15971600
size = file->size;
1601+
this->prefetch = prefetch;
15981602
addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
15991603
if (addr == MAP_FAILED) { // NOLINT
16001604
throw std::runtime_error(format("mmap(.., MAP_ANONYMOUS) failed: %s", strerror(errno)));
@@ -1620,9 +1624,10 @@ struct llama_anonymous_mmap : llama_mmap {
16201624
}
16211625
}
16221626
#elif defined(_WIN32)
1623-
llama_anonymous_mmap(struct llama_file * file) {
1627+
llama_anonymous_mmap(struct llama_file * file, bool prefetch) {
16241628
this->file = file;
16251629
size = file->size;
1630+
this->prefetch = prefetch;
16261631

16271632
HANDLE hMapping = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, size >> 32, size, NULL);
16281633
if (hMapping == NULL) {
@@ -3699,7 +3704,7 @@ struct llama_model_loader {
36993704
if (use_mmap) {
37003705
mappings.reserve(files.size());
37013706
for (const auto & file : files) {
3702-
std::unique_ptr<llama_mmap> mapping(anonymous ? new llama_anonymous_mmap(file.get()) : new llama_mmap(file.get(), prefetch ? -1 : 0, ggml_is_numa()));
3707+
std::unique_ptr<llama_mmap> mapping(anonymous ? new llama_anonymous_mmap(file.get(), prefetch) : new llama_mmap(file.get(), prefetch ? -1 : 0, ggml_is_numa()));
37033708
if (mlock_mmaps) {
37043709
std::unique_ptr<llama_mlock> mlock_mmap(new llama_mlock());
37053710
mlock_mmap->init(mapping->addr);
@@ -3799,8 +3804,11 @@ struct llama_model_loader {
37993804

38003805
if (use_mmap) {
38013806
for (uint32_t idx = 0; idx < files.size(); idx++) {
3802-
for (auto range : get_mapping_ranges(idx, ctx)) {
3803-
mappings.at(idx)->populate(range.first, range.second);
3807+
const auto & mapping = mappings.at(idx);
3808+
if (mapping->prefetch) {
3809+
for (auto range : get_mapping_ranges(idx, ctx)) {
3810+
mapping->populate(range.first, range.second);
3811+
}
38043812
}
38053813
}
38063814
}
@@ -3838,6 +3846,9 @@ struct llama_model_loader {
38383846
}
38393847

38403848
GGML_ASSERT(buf_mmap || cur->data); // either we have a buffer to allocate the tensor in, or it is already allocated
3849+
if (!mapping->prefetch) {
3850+
mapping->populate(weight->offs, weight->offs + n_size);
3851+
}
38413852
if (buf_mmap && cur->data == nullptr) {
38423853
ggml_backend_tensor_alloc(buf_mmap, cur, data);
38433854
if (lmlocks) {
@@ -3846,7 +3857,7 @@ struct llama_model_loader {
38463857
}
38473858
} else {
38483859
ggml_backend_tensor_set(cur, data, 0, n_size);
3849-
mappings.at(weight->idx)->unmap_fragment(weight->offs, weight->offs + n_size);
3860+
mapping->unmap_fragment(weight->offs, weight->offs + n_size);
38503861
}
38513862
} else {
38523863
GGML_ASSERT(weight->idx < files.size());

0 commit comments

Comments
 (0)