Skip to content

Commit 616b837

Browse files
x-y-ztorvalds
authored andcommitted
mm: thp: enable thp migration in generic path
Add thp migration's core code, including conversions between a PMD entry and a swap entry, setting PMD migration entry, removing PMD migration entry, and waiting on PMD migration entries. This patch makes it possible to support thp migration. If you fail to allocate a destination page as a thp, you just split the source thp as we do now, and then enter the normal page migration. If you succeed to allocate destination thp, you enter thp migration. Subsequent patches actually enable thp migration for each caller of page migration by allowing its get_new_page() callback to allocate thps. [[email protected]: fix gcc-4.9.0 -Wmissing-braces warning] Link: http://lkml.kernel.org/r/[email protected] [[email protected]: fix x86_64 allnoconfig warning] Signed-off-by: Zi Yan <[email protected]> Acked-by: Kirill A. Shutemov <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: Anshuman Khandual <[email protected]> Cc: Dave Hansen <[email protected]> Cc: David Nellans <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Minchan Kim <[email protected]> Cc: Naoya Horiguchi <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Michal Hocko <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 9c670ea commit 616b837

File tree

7 files changed

+212
-13
lines changed

7 files changed

+212
-13
lines changed

arch/x86/include/asm/pgtable_64.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
210210
((type) << (SWP_TYPE_FIRST_BIT)) \
211211
| ((offset) << SWP_OFFSET_FIRST_BIT) })
212212
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
213+
#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) })
213214
#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
215+
#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val })
214216

215217
extern int kern_addr_valid(unsigned long addr);
216218
extern void cleanup_highmap(void);

include/linux/swapops.h

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
103103
#ifdef CONFIG_MIGRATION
104104
static inline swp_entry_t make_migration_entry(struct page *page, int write)
105105
{
106-
BUG_ON(!PageLocked(page));
106+
BUG_ON(!PageLocked(compound_head(page)));
107+
107108
return swp_entry(write ? SWP_MIGRATION_WRITE : SWP_MIGRATION_READ,
108109
page_to_pfn(page));
109110
}
@@ -126,7 +127,7 @@ static inline struct page *migration_entry_to_page(swp_entry_t entry)
126127
* Any use of migration entries may only occur while the
127128
* corresponding page is locked
128129
*/
129-
BUG_ON(!PageLocked(p));
130+
BUG_ON(!PageLocked(compound_head(p)));
130131
return p;
131132
}
132133

@@ -148,7 +149,11 @@ static inline int is_migration_entry(swp_entry_t swp)
148149
{
149150
return 0;
150151
}
151-
#define migration_entry_to_page(swp) NULL
152+
static inline struct page *migration_entry_to_page(swp_entry_t entry)
153+
{
154+
return NULL;
155+
}
156+
152157
static inline void make_migration_entry_read(swp_entry_t *entryp) { }
153158
static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
154159
spinlock_t *ptl) { }
@@ -163,6 +168,68 @@ static inline int is_write_migration_entry(swp_entry_t entry)
163168

164169
#endif
165170

171+
struct page_vma_mapped_walk;
172+
173+
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
174+
extern void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
175+
struct page *page);
176+
177+
extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw,
178+
struct page *new);
179+
180+
extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd);
181+
182+
static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
183+
{
184+
swp_entry_t arch_entry;
185+
186+
arch_entry = __pmd_to_swp_entry(pmd);
187+
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
188+
}
189+
190+
static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
191+
{
192+
swp_entry_t arch_entry;
193+
194+
arch_entry = __swp_entry(swp_type(entry), swp_offset(entry));
195+
return __swp_entry_to_pmd(arch_entry);
196+
}
197+
198+
static inline int is_pmd_migration_entry(pmd_t pmd)
199+
{
200+
return !pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd));
201+
}
202+
#else
203+
static inline void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
204+
struct page *page)
205+
{
206+
BUILD_BUG();
207+
}
208+
209+
static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw,
210+
struct page *new)
211+
{
212+
BUILD_BUG();
213+
}
214+
215+
static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { }
216+
217+
static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
218+
{
219+
return swp_entry(0, 0);
220+
}
221+
222+
static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
223+
{
224+
return __pmd(0);
225+
}
226+
227+
static inline int is_pmd_migration_entry(pmd_t pmd)
228+
{
229+
return 0;
230+
}
231+
#endif
232+
166233
#ifdef CONFIG_MEMORY_FAILURE
167234

168235
extern atomic_long_t num_poisoned_pages __read_mostly;

mm/huge_memory.c

Lines changed: 79 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,10 +1684,24 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
16841684
spin_unlock(ptl);
16851685
tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
16861686
} else {
1687-
struct page *page = pmd_page(orig_pmd);
1688-
page_remove_rmap(page, true);
1689-
VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
1690-
VM_BUG_ON_PAGE(!PageHead(page), page);
1687+
struct page *page = NULL;
1688+
int flush_needed = 1;
1689+
1690+
if (pmd_present(orig_pmd)) {
1691+
page = pmd_page(orig_pmd);
1692+
page_remove_rmap(page, true);
1693+
VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
1694+
VM_BUG_ON_PAGE(!PageHead(page), page);
1695+
} else if (thp_migration_supported()) {
1696+
swp_entry_t entry;
1697+
1698+
VM_BUG_ON(!is_pmd_migration_entry(orig_pmd));
1699+
entry = pmd_to_swp_entry(orig_pmd);
1700+
page = pfn_to_page(swp_offset(entry));
1701+
flush_needed = 0;
1702+
} else
1703+
WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
1704+
16911705
if (PageAnon(page)) {
16921706
zap_deposited_table(tlb->mm, pmd);
16931707
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
@@ -1696,8 +1710,10 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
16961710
zap_deposited_table(tlb->mm, pmd);
16971711
add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
16981712
}
1713+
16991714
spin_unlock(ptl);
1700-
tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE);
1715+
if (flush_needed)
1716+
tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE);
17011717
}
17021718
return 1;
17031719
}
@@ -2745,3 +2761,61 @@ static int __init split_huge_pages_debugfs(void)
27452761
}
27462762
late_initcall(split_huge_pages_debugfs);
27472763
#endif
2764+
2765+
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
2766+
void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
2767+
struct page *page)
2768+
{
2769+
struct vm_area_struct *vma = pvmw->vma;
2770+
struct mm_struct *mm = vma->vm_mm;
2771+
unsigned long address = pvmw->address;
2772+
pmd_t pmdval;
2773+
swp_entry_t entry;
2774+
2775+
if (!(pvmw->pmd && !pvmw->pte))
2776+
return;
2777+
2778+
mmu_notifier_invalidate_range_start(mm, address,
2779+
address + HPAGE_PMD_SIZE);
2780+
2781+
flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
2782+
pmdval = *pvmw->pmd;
2783+
pmdp_invalidate(vma, address, pvmw->pmd);
2784+
if (pmd_dirty(pmdval))
2785+
set_page_dirty(page);
2786+
entry = make_migration_entry(page, pmd_write(pmdval));
2787+
pmdval = swp_entry_to_pmd(entry);
2788+
set_pmd_at(mm, address, pvmw->pmd, pmdval);
2789+
page_remove_rmap(page, true);
2790+
put_page(page);
2791+
2792+
mmu_notifier_invalidate_range_end(mm, address,
2793+
address + HPAGE_PMD_SIZE);
2794+
}
2795+
2796+
void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
2797+
{
2798+
struct vm_area_struct *vma = pvmw->vma;
2799+
struct mm_struct *mm = vma->vm_mm;
2800+
unsigned long address = pvmw->address;
2801+
unsigned long mmun_start = address & HPAGE_PMD_MASK;
2802+
pmd_t pmde;
2803+
swp_entry_t entry;
2804+
2805+
if (!(pvmw->pmd && !pvmw->pte))
2806+
return;
2807+
2808+
entry = pmd_to_swp_entry(*pvmw->pmd);
2809+
get_page(new);
2810+
pmde = pmd_mkold(mk_huge_pmd(new, vma->vm_page_prot));
2811+
if (is_write_migration_entry(entry))
2812+
pmde = maybe_pmd_mkwrite(pmde, vma);
2813+
2814+
flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE);
2815+
page_add_anon_rmap(new, vma, mmun_start, true);
2816+
set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
2817+
if (vma->vm_flags & VM_LOCKED)
2818+
mlock_vma_page(new);
2819+
update_mmu_cache_pmd(vma, address, pvmw->pmd);
2820+
}
2821+
#endif

mm/migrate.c

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,15 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
216216
new = page - pvmw.page->index +
217217
linear_page_index(vma, pvmw.address);
218218

219+
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
220+
/* PMD-mapped THP migration entry */
221+
if (!pvmw.pte) {
222+
VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
223+
remove_migration_pmd(&pvmw, new);
224+
continue;
225+
}
226+
#endif
227+
219228
get_page(new);
220229
pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
221230
if (pte_swp_soft_dirty(*pvmw.pte))
@@ -330,6 +339,27 @@ void migration_entry_wait_huge(struct vm_area_struct *vma,
330339
__migration_entry_wait(mm, pte, ptl);
331340
}
332341

342+
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
343+
void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
344+
{
345+
spinlock_t *ptl;
346+
struct page *page;
347+
348+
ptl = pmd_lock(mm, pmd);
349+
if (!is_pmd_migration_entry(*pmd))
350+
goto unlock;
351+
page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
352+
if (!get_page_unless_zero(page))
353+
goto unlock;
354+
spin_unlock(ptl);
355+
wait_on_page_locked(page);
356+
put_page(page);
357+
return;
358+
unlock:
359+
spin_unlock(ptl);
360+
}
361+
#endif
362+
333363
#ifdef CONFIG_BLOCK
334364
/* Returns true if all buffers are successfully locked */
335365
static bool buffer_migrate_lock_buffers(struct buffer_head *head,
@@ -1088,7 +1118,7 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
10881118
goto out;
10891119
}
10901120

1091-
if (unlikely(PageTransHuge(page))) {
1121+
if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) {
10921122
lock_page(page);
10931123
rc = split_huge_page(page);
10941124
unlock_page(page);

mm/page_vma_mapped.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,16 +138,28 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
138138
if (!pud_present(*pud))
139139
return false;
140140
pvmw->pmd = pmd_offset(pud, pvmw->address);
141-
if (pmd_trans_huge(*pvmw->pmd)) {
141+
if (pmd_trans_huge(*pvmw->pmd) || is_pmd_migration_entry(*pvmw->pmd)) {
142142
pvmw->ptl = pmd_lock(mm, pvmw->pmd);
143-
if (!pmd_present(*pvmw->pmd))
144-
return not_found(pvmw);
145143
if (likely(pmd_trans_huge(*pvmw->pmd))) {
146144
if (pvmw->flags & PVMW_MIGRATION)
147145
return not_found(pvmw);
148146
if (pmd_page(*pvmw->pmd) != page)
149147
return not_found(pvmw);
150148
return true;
149+
} else if (!pmd_present(*pvmw->pmd)) {
150+
if (thp_migration_supported()) {
151+
if (!(pvmw->flags & PVMW_MIGRATION))
152+
return not_found(pvmw);
153+
if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
154+
swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
155+
156+
if (migration_entry_to_page(entry) != page)
157+
return not_found(pvmw);
158+
return true;
159+
}
160+
} else
161+
WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
162+
return not_found(pvmw);
151163
} else {
152164
/* THP pmd was split under us: handle on pte level */
153165
spin_unlock(pvmw->ptl);

mm/pgtable-generic.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
124124
{
125125
pmd_t pmd;
126126
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
127-
VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
127+
VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
128+
!pmd_devmap(*pmdp)) || !pmd_present(*pmdp));
128129
pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
129130
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
130131
return pmd;

mm/rmap.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,6 +1360,19 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
13601360
mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
13611361

13621362
while (page_vma_mapped_walk(&pvmw)) {
1363+
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1364+
/* PMD-mapped THP migration entry */
1365+
if (!pvmw.pte && (flags & TTU_MIGRATION)) {
1366+
VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
1367+
1368+
if (!PageAnon(page))
1369+
continue;
1370+
1371+
set_pmd_migration_entry(&pvmw, page);
1372+
continue;
1373+
}
1374+
#endif
1375+
13631376
/*
13641377
* If the page is mlock()d, we cannot swap it out.
13651378
* If it's recently referenced (perhaps page_referenced

0 commit comments

Comments
 (0)