@@ -1388,46 +1388,31 @@ struct llama_mmap {
1388
1388
*ptr = *ptr & ~(page_size - 1);
1389
1389
}
1390
1390
1391
- virtual void populate(size_t first, size_t last) const {
1392
- GGML_UNUSED(first);
1393
- GGML_UNUSED(last);
1394
-
1395
- // either already populated or populated dynamically
1396
- }
1397
-
1398
1391
#ifdef _POSIX_MAPPED_FILES
1399
1392
static constexpr bool SUPPORTED = true;
1400
1393
1394
+ bool numa;
1395
+
1401
1396
// list of mapped fragments (first_offset, last_offset)
1402
1397
std::vector<std::pair<size_t, size_t>> mapped_fragments;
1403
1398
1404
1399
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
1405
1400
size = file->size;
1406
1401
this->prefetch = prefetch > 0;
1402
+ this->numa = numa;
1407
1403
int fd = fileno(file->fp);
1408
- int flags = MAP_SHARED;
1409
- // prefetch/readahead impairs performance on NUMA systems
1410
- if (numa) { prefetch = 0; }
1411
1404
#ifdef __linux__
1412
1405
// advise the kernel to read the file sequentially (increases readahead)
1413
1406
if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
1414
1407
LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
1415
1408
strerror(errno));
1416
1409
}
1417
- if (prefetch) { flags |= MAP_POPULATE; }
1418
1410
#endif
1419
- addr = mmap(NULL, file->size, PROT_READ, flags , fd, 0);
1411
+ addr = mmap(NULL, file->size, PROT_READ, MAP_SHARED , fd, 0);
1420
1412
if (addr == MAP_FAILED) { // NOLINT
1421
1413
throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
1422
1414
}
1423
1415
1424
- if (prefetch > 0) {
1425
- // advise the kernel to preload the mapped memory
1426
- if (posix_madvise(addr, std::min(file->size, prefetch), POSIX_MADV_WILLNEED)) {
1427
- LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
1428
- strerror(errno));
1429
- }
1430
- }
1431
1416
if (numa) {
1432
1417
// advise the kernel not to use readahead
1433
1418
// (because the next page might not belong on the same node)
@@ -1441,6 +1426,25 @@ struct llama_mmap {
1441
1426
mapped_fragments.emplace_back(0, file->size);
1442
1427
}
1443
1428
1429
+ virtual void populate(size_t first, size_t last) const {
1430
+ // prefetch/readahead impairs performance on NUMA systems
1431
+ if (!numa) {
1432
+ int page_size = sysconf(_SC_PAGESIZE);
1433
+ align_to_previous_page(&first, page_size);
1434
+ align_to_next_page(&last, page_size);
1435
+ #ifdef __linux__
1436
+ if (madvise((char *) addr + first, last - first, MADV_POPULATE_READ)) {
1437
+ LLAMA_LOG_WARN("warning: madvise(.., MADV_POPULATE_READ) failed: %s\n", strerror(errno));
1438
+ }
1439
+ #else
1440
+ // advise the kernel to preload the mapped memory
1441
+ if (posix_madvise((char *) addr + first, last - first, POSIX_MADV_WILLNEED)) {
1442
+ LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n", strerror(errno));
1443
+ }
1444
+ #endif
1445
+ }
1446
+ }
1447
+
1444
1448
// partially unmap the file in the range [first, last)
1445
1449
void unmap_fragment(size_t first, size_t last) {
1446
1450
// note: this function must not be called multiple times with overlapping ranges
@@ -1523,30 +1527,30 @@ struct llama_mmap {
1523
1527
if (addr == NULL) {
1524
1528
throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
1525
1529
}
1530
+ }
1526
1531
1527
- if (prefetch > 0) {
1532
+ virtual void populate(size_t first, size_t last) const {
1528
1533
#if _WIN32_WINNT >= 0x602
1529
- // PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it
1530
- BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
1531
- HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
1532
-
1533
- // may fail on pre-Windows 8 systems
1534
- pPrefetchVirtualMemory = reinterpret_cast<decltype(pPrefetchVirtualMemory)> (GetProcAddress(hKernel32, "PrefetchVirtualMemory"));
1535
-
1536
- if (pPrefetchVirtualMemory) {
1537
- // advise the kernel to preload the mapped memory
1538
- WIN32_MEMORY_RANGE_ENTRY range;
1539
- range.VirtualAddress = addr;
1540
- range.NumberOfBytes = (SIZE_T) std::min(size, prefetch);
1541
- if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
1542
- LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
1543
- llama_format_win_err(GetLastError()).c_str());
1544
- }
1534
+ // PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it
1535
+ BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
1536
+ HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
1537
+
1538
+ // may fail on pre-Windows 8 systems
1539
+ pPrefetchVirtualMemory = reinterpret_cast<decltype(pPrefetchVirtualMemory)> (GetProcAddress(hKernel32, "PrefetchVirtualMemory"));
1540
+
1541
+ if (pPrefetchVirtualMemory) {
1542
+ // advise the kernel to preload the mapped memory
1543
+ WIN32_MEMORY_RANGE_ENTRY range;
1544
+ range.VirtualAddress = (char *) addr + first;
1545
+ range.NumberOfBytes = last - first;
1546
+ if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
1547
+ LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
1548
+ llama_format_win_err(GetLastError()).c_str());
1545
1549
}
1550
+ }
1546
1551
#else
1547
- throw std::runtime_error("PrefetchVirtualMemory unavailable");
1552
+ throw std::runtime_error("PrefetchVirtualMemory unavailable");
1548
1553
#endif
1549
- }
1550
1554
}
1551
1555
1552
1556
virtual void unmap_fragment(size_t first, size_t last) {
@@ -1572,6 +1576,13 @@ struct llama_mmap {
1572
1576
throw std::runtime_error("mmap not supported");
1573
1577
}
1574
1578
1579
+ void populate(size_t first, size_t last) {
1580
+ GGML_UNUSED(first);
1581
+ GGML_UNUSED(last);
1582
+
1583
+ throw std::runtime_error("mmap not supported");
1584
+ }
1585
+
1575
1586
void unmap_fragment(size_t first, size_t last) {
1576
1587
GGML_UNUSED(first);
1577
1588
GGML_UNUSED(last);
@@ -1690,13 +1701,6 @@ struct llama_anonymous_mmap : llama_mmap {
1690
1701
1691
1702
throw std::runtime_error("mmap not supported");
1692
1703
}
1693
-
1694
- void populate(size_t first, size_t last) const override {
1695
- GGML_UNUSED(first);
1696
- GGML_UNUSED(last);
1697
-
1698
- throw std::runtime_error("mmap not supported");
1699
- }
1700
1704
#endif
1701
1705
};
1702
1706
0 commit comments