Skip to content

Commit bd4c82c

Browse files
yhuang-inteltorvalds
authored andcommitted
mm, THP, swap: delay splitting THP after swapped out
In this patch, splitting transparent huge page (THP) during swapping out is delayed from after adding the THP into the swap cache to after swapping out finishes. After the patch, more operations for the anonymous THP reclaiming, such as writing the THP to the swap device, removing the THP from the swap cache could be batched. So that the performance of anonymous THP swapping out could be improved. This is the second step for the THP swap support. The plan is to delay splitting the THP step by step and avoid splitting the THP finally. With the patchset, the swap out throughput improves 42% (from about 5.81GB/s to about 8.25GB/s) in the vm-scalability swap-w-seq test case with 16 processes. At the same time, the IPI (reflect TLB flushing) reduced about 78.9%. The test is done on a Xeon E5 v3 system. The swap device used is a RAM simulated PMEM (persistent memory) device. To test the sequential swapping out, the test case creates 8 processes, which sequentially allocate and write to the anonymous pages until the RAM and part of the swap device is used up. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: "Huang, Ying" <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Minchan Kim <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Shaohua Li <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: "Kirill A . Shutemov" <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Dan Williams <[email protected]> Cc: Jens Axboe <[email protected]> Cc: Ross Zwisler <[email protected]> [for brd.c, zram_drv.c, pmem.c] Cc: Vishal L Verma <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent d6810d7 commit bd4c82c

File tree

1 file changed

+52
-43
lines changed

1 file changed

+52
-43
lines changed

mm/vmscan.c

Lines changed: 52 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,9 @@ static inline int is_page_cache_freeable(struct page *page)
536536
* that isolated the page, the page cache radix tree and
537537
* optional buffer heads at page->private.
538538
*/
539-
return page_count(page) - page_has_private(page) == 2;
539+
int radix_pins = PageTransHuge(page) && PageSwapCache(page) ?
540+
HPAGE_PMD_NR : 1;
541+
return page_count(page) - page_has_private(page) == 1 + radix_pins;
540542
}
541543

542544
static int may_write_to_inode(struct inode *inode, struct scan_control *sc)
@@ -666,6 +668,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
666668
bool reclaimed)
667669
{
668670
unsigned long flags;
671+
int refcount;
669672

670673
BUG_ON(!PageLocked(page));
671674
BUG_ON(mapping != page_mapping(page));
@@ -696,11 +699,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
696699
* Note that if SetPageDirty is always performed via set_page_dirty,
697700
* and thus under tree_lock, then this ordering is not required.
698701
*/
699-
if (!page_ref_freeze(page, 2))
702+
if (unlikely(PageTransHuge(page)) && PageSwapCache(page))
703+
refcount = 1 + HPAGE_PMD_NR;
704+
else
705+
refcount = 2;
706+
if (!page_ref_freeze(page, refcount))
700707
goto cannot_free;
701708
/* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
702709
if (unlikely(PageDirty(page))) {
703-
page_ref_unfreeze(page, 2);
710+
page_ref_unfreeze(page, refcount);
704711
goto cannot_free;
705712
}
706713

@@ -1122,58 +1129,56 @@ static unsigned long shrink_page_list(struct list_head *page_list,
11221129
* Try to allocate it some swap space here.
11231130
* Lazyfree page could be freed directly
11241131
*/
1125-
if (PageAnon(page) && PageSwapBacked(page) &&
1126-
!PageSwapCache(page)) {
1127-
if (!(sc->gfp_mask & __GFP_IO))
1128-
goto keep_locked;
1129-
if (PageTransHuge(page)) {
1130-
/* cannot split THP, skip it */
1131-
if (!can_split_huge_page(page, NULL))
1132-
goto activate_locked;
1133-
/*
1134-
* Split pages without a PMD map right
1135-
* away. Chances are some or all of the
1136-
* tail pages can be freed without IO.
1137-
*/
1138-
if (!compound_mapcount(page) &&
1139-
split_huge_page_to_list(page, page_list))
1140-
goto activate_locked;
1141-
}
1142-
if (!add_to_swap(page)) {
1143-
if (!PageTransHuge(page))
1144-
goto activate_locked;
1145-
/* Split THP and swap individual base pages */
1146-
if (split_huge_page_to_list(page, page_list))
1147-
goto activate_locked;
1148-
if (!add_to_swap(page))
1149-
goto activate_locked;
1150-
}
1151-
1152-
/* XXX: We don't support THP writes */
1153-
if (PageTransHuge(page) &&
1154-
split_huge_page_to_list(page, page_list)) {
1155-
delete_from_swap_cache(page);
1156-
goto activate_locked;
1157-
}
1132+
if (PageAnon(page) && PageSwapBacked(page)) {
1133+
if (!PageSwapCache(page)) {
1134+
if (!(sc->gfp_mask & __GFP_IO))
1135+
goto keep_locked;
1136+
if (PageTransHuge(page)) {
1137+
/* cannot split THP, skip it */
1138+
if (!can_split_huge_page(page, NULL))
1139+
goto activate_locked;
1140+
/*
1141+
* Split pages without a PMD map right
1142+
* away. Chances are some or all of the
1143+
* tail pages can be freed without IO.
1144+
*/
1145+
if (!compound_mapcount(page) &&
1146+
split_huge_page_to_list(page,
1147+
page_list))
1148+
goto activate_locked;
1149+
}
1150+
if (!add_to_swap(page)) {
1151+
if (!PageTransHuge(page))
1152+
goto activate_locked;
1153+
/* Fallback to swap normal pages */
1154+
if (split_huge_page_to_list(page,
1155+
page_list))
1156+
goto activate_locked;
1157+
if (!add_to_swap(page))
1158+
goto activate_locked;
1159+
}
11581160

1159-
may_enter_fs = 1;
1161+
may_enter_fs = 1;
11601162

1161-
/* Adding to swap updated mapping */
1162-
mapping = page_mapping(page);
1163+
/* Adding to swap updated mapping */
1164+
mapping = page_mapping(page);
1165+
}
11631166
} else if (unlikely(PageTransHuge(page))) {
11641167
/* Split file THP */
11651168
if (split_huge_page_to_list(page, page_list))
11661169
goto keep_locked;
11671170
}
11681171

1169-
VM_BUG_ON_PAGE(PageTransHuge(page), page);
1170-
11711172
/*
11721173
* The page is mapped into the page tables of one or more
11731174
* processes. Try to unmap it here.
11741175
*/
11751176
if (page_mapped(page)) {
1176-
if (!try_to_unmap(page, ttu_flags | TTU_BATCH_FLUSH)) {
1177+
enum ttu_flags flags = ttu_flags | TTU_BATCH_FLUSH;
1178+
1179+
if (unlikely(PageTransHuge(page)))
1180+
flags |= TTU_SPLIT_HUGE_PMD;
1181+
if (!try_to_unmap(page, flags)) {
11771182
nr_unmap_fail++;
11781183
goto activate_locked;
11791184
}
@@ -1313,7 +1318,11 @@ static unsigned long shrink_page_list(struct list_head *page_list,
13131318
* Is there need to periodically free_page_list? It would
13141319
* appear not as the counts should be low
13151320
*/
1316-
list_add(&page->lru, &free_pages);
1321+
if (unlikely(PageTransHuge(page))) {
1322+
mem_cgroup_uncharge(page);
1323+
(*get_compound_page_dtor(page))(page);
1324+
} else
1325+
list_add(&page->lru, &free_pages);
13171326
continue;
13181327

13191328
activate_locked:

0 commit comments

Comments
 (0)