Skip to content

Commit f096f5d

Browse files
committed
mmap: per-context prefetch
1 parent db932a5 commit f096f5d

File tree

1 file changed

+49
-45
lines changed

1 file changed

+49
-45
lines changed

llama.cpp

Lines changed: 49 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,46 +1388,31 @@ struct llama_mmap {
13881388
*ptr = *ptr & ~(page_size - 1);
13891389
}
13901390

1391-
virtual void populate(size_t first, size_t last) const {
1392-
GGML_UNUSED(first);
1393-
GGML_UNUSED(last);
1394-
1395-
// either already populated or populated dynamically
1396-
}
1397-
13981391
#ifdef _POSIX_MAPPED_FILES
13991392
static constexpr bool SUPPORTED = true;
14001393

1394+
bool numa;
1395+
14011396
// list of mapped fragments (first_offset, last_offset)
14021397
std::vector<std::pair<size_t, size_t>> mapped_fragments;
14031398

14041399
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
14051400
size = file->size;
14061401
this->prefetch = prefetch > 0;
1402+
this->numa = numa;
14071403
int fd = fileno(file->fp);
1408-
int flags = MAP_SHARED;
1409-
// prefetch/readahead impairs performance on NUMA systems
1410-
if (numa) { prefetch = 0; }
14111404
#ifdef __linux__
14121405
// advise the kernel to read the file sequentially (increases readahead)
14131406
if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
14141407
LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
14151408
strerror(errno));
14161409
}
1417-
if (prefetch) { flags |= MAP_POPULATE; }
14181410
#endif
1419-
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
1411+
addr = mmap(NULL, file->size, PROT_READ, MAP_SHARED, fd, 0);
14201412
if (addr == MAP_FAILED) { // NOLINT
14211413
throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
14221414
}
14231415

1424-
if (prefetch > 0) {
1425-
// advise the kernel to preload the mapped memory
1426-
if (posix_madvise(addr, std::min(file->size, prefetch), POSIX_MADV_WILLNEED)) {
1427-
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
1428-
strerror(errno));
1429-
}
1430-
}
14311416
if (numa) {
14321417
// advise the kernel not to use readahead
14331418
// (because the next page might not belong on the same node)
@@ -1441,6 +1426,25 @@ struct llama_mmap {
14411426
mapped_fragments.emplace_back(0, file->size);
14421427
}
14431428

1429+
virtual void populate(size_t first, size_t last) const {
1430+
// prefetch/readahead impairs performance on NUMA systems
1431+
if (!numa) {
1432+
int page_size = sysconf(_SC_PAGESIZE);
1433+
align_to_previous_page(&first, page_size);
1434+
align_to_next_page(&last, page_size);
1435+
#ifdef __linux__
1436+
if (madvise((char *) addr + first, last - first, MADV_POPULATE_READ)) {
1437+
LLAMA_LOG_WARN("warning: madvise(.., MADV_POPULATE_READ) failed: %s\n", strerror(errno));
1438+
}
1439+
#else
1440+
// advise the kernel to preload the mapped memory
1441+
if (posix_madvise((char *) addr + first, last - first, POSIX_MADV_WILLNEED)) {
1442+
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n", strerror(errno));
1443+
}
1444+
#endif
1445+
}
1446+
}
1447+
14441448
// partially unmap the file in the range [first, last)
14451449
void unmap_fragment(size_t first, size_t last) {
14461450
// note: this function must not be called multiple times with overlapping ranges
@@ -1523,30 +1527,30 @@ struct llama_mmap {
15231527
if (addr == NULL) {
15241528
throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
15251529
}
1530+
}
15261531

1527-
if (prefetch > 0) {
1532+
virtual void populate(size_t first, size_t last) const {
15281533
#if _WIN32_WINNT >= 0x602
1529-
// PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it
1530-
BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
1531-
HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
1532-
1533-
// may fail on pre-Windows 8 systems
1534-
pPrefetchVirtualMemory = reinterpret_cast<decltype(pPrefetchVirtualMemory)> (GetProcAddress(hKernel32, "PrefetchVirtualMemory"));
1535-
1536-
if (pPrefetchVirtualMemory) {
1537-
// advise the kernel to preload the mapped memory
1538-
WIN32_MEMORY_RANGE_ENTRY range;
1539-
range.VirtualAddress = addr;
1540-
range.NumberOfBytes = (SIZE_T) std::min(size, prefetch);
1541-
if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
1542-
LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
1543-
llama_format_win_err(GetLastError()).c_str());
1544-
}
1534+
// PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it
1535+
BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
1536+
HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
1537+
1538+
// may fail on pre-Windows 8 systems
1539+
pPrefetchVirtualMemory = reinterpret_cast<decltype(pPrefetchVirtualMemory)> (GetProcAddress(hKernel32, "PrefetchVirtualMemory"));
1540+
1541+
if (pPrefetchVirtualMemory) {
1542+
// advise the kernel to preload the mapped memory
1543+
WIN32_MEMORY_RANGE_ENTRY range;
1544+
range.VirtualAddress = (char *) addr + first;
1545+
range.NumberOfBytes = last - first;
1546+
if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
1547+
LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
1548+
llama_format_win_err(GetLastError()).c_str());
15451549
}
1550+
}
15461551
#else
1547-
throw std::runtime_error("PrefetchVirtualMemory unavailable");
1552+
throw std::runtime_error("PrefetchVirtualMemory unavailable");
15481553
#endif
1549-
}
15501554
}
15511555

15521556
virtual void unmap_fragment(size_t first, size_t last) {
@@ -1572,6 +1576,13 @@ struct llama_mmap {
15721576
throw std::runtime_error("mmap not supported");
15731577
}
15741578

1579+
void populate(size_t first, size_t last) {
1580+
GGML_UNUSED(first);
1581+
GGML_UNUSED(last);
1582+
1583+
throw std::runtime_error("mmap not supported");
1584+
}
1585+
15751586
void unmap_fragment(size_t first, size_t last) {
15761587
GGML_UNUSED(first);
15771588
GGML_UNUSED(last);
@@ -1690,13 +1701,6 @@ struct llama_anonymous_mmap : llama_mmap {
16901701

16911702
throw std::runtime_error("mmap not supported");
16921703
}
1693-
1694-
void populate(size_t first, size_t last) const override {
1695-
GGML_UNUSED(first);
1696-
GGML_UNUSED(last);
1697-
1698-
throw std::runtime_error("mmap not supported");
1699-
}
17001704
#endif
17011705
};
17021706

0 commit comments

Comments
 (0)