Skip to content

Commit 39dde65

Browse files
Chen, Kenneth WLinus Torvalds
Chen, Kenneth W
authored and
Linus Torvalds
committed
[PATCH] shared page table for hugetlb page
Following up with the work on shared page table done by Dave McCracken. This set of patch target shared page table for hugetlb memory only. The shared page table is particular useful in the situation of large number of independent processes sharing large shared memory segments. In the normal page case, the amount of memory saved from process' page table is quite significant. For hugetlb, the saving on page table memory is not the primary objective (as hugetlb itself already cuts down page table overhead significantly), instead, the purpose of using shared page table on hugetlb is to allow faster TLB refill and smaller cache pollution upon TLB miss. With PT sharing, pte entries are shared among hundreds of processes, the cache consumption used by all the page table is smaller and in return, application gets much higher cache hit ratio. One other effect is that cache hit ratio with hardware page walker hitting on pte in cache will be higher and this helps to reduce tlb miss latency. These two effects contribute to higher application performance. Signed-off-by: Ken Chen <[email protected]> Acked-by: Hugh Dickins <[email protected]> Cc: Dave McCracken <[email protected]> Cc: William Lee Irwin III <[email protected]> Cc: "Luck, Tony" <[email protected]> Cc: Paul Mackerras <[email protected]> Cc: Benjamin Herrenschmidt <[email protected]> Cc: David Gibson <[email protected]> Cc: Adam Litke <[email protected]> Cc: Paul Mundt <[email protected]> Cc: "David S. Miller" <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent e1dbeda commit 39dde65

File tree

8 files changed

+144
-1
lines changed

8 files changed

+144
-1
lines changed

arch/i386/mm/hugetlbpage.c

Lines changed: 111 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,113 @@
1717
#include <asm/tlb.h>
1818
#include <asm/tlbflush.h>
1919

20+
static unsigned long page_table_shareable(struct vm_area_struct *svma,
21+
struct vm_area_struct *vma,
22+
unsigned long addr, pgoff_t idx)
23+
{
24+
unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
25+
svma->vm_start;
26+
unsigned long sbase = saddr & PUD_MASK;
27+
unsigned long s_end = sbase + PUD_SIZE;
28+
29+
/*
30+
* match the virtual addresses, permission and the alignment of the
31+
* page table page.
32+
*/
33+
if (pmd_index(addr) != pmd_index(saddr) ||
34+
vma->vm_flags != svma->vm_flags ||
35+
sbase < svma->vm_start || svma->vm_end < s_end)
36+
return 0;
37+
38+
return saddr;
39+
}
40+
41+
static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
42+
{
43+
unsigned long base = addr & PUD_MASK;
44+
unsigned long end = base + PUD_SIZE;
45+
46+
/*
47+
* check on proper vm_flags and page table alignment
48+
*/
49+
if (vma->vm_flags & VM_MAYSHARE &&
50+
vma->vm_start <= base && end <= vma->vm_end)
51+
return 1;
52+
return 0;
53+
}
54+
55+
/*
56+
* search for a shareable pmd page for hugetlb.
57+
*/
58+
static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
59+
{
60+
struct vm_area_struct *vma = find_vma(mm, addr);
61+
struct address_space *mapping = vma->vm_file->f_mapping;
62+
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
63+
vma->vm_pgoff;
64+
struct prio_tree_iter iter;
65+
struct vm_area_struct *svma;
66+
unsigned long saddr;
67+
pte_t *spte = NULL;
68+
69+
if (!vma_shareable(vma, addr))
70+
return;
71+
72+
spin_lock(&mapping->i_mmap_lock);
73+
vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
74+
if (svma == vma)
75+
continue;
76+
77+
saddr = page_table_shareable(svma, vma, addr, idx);
78+
if (saddr) {
79+
spte = huge_pte_offset(svma->vm_mm, saddr);
80+
if (spte) {
81+
get_page(virt_to_page(spte));
82+
break;
83+
}
84+
}
85+
}
86+
87+
if (!spte)
88+
goto out;
89+
90+
spin_lock(&mm->page_table_lock);
91+
if (pud_none(*pud))
92+
pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK);
93+
else
94+
put_page(virt_to_page(spte));
95+
spin_unlock(&mm->page_table_lock);
96+
out:
97+
spin_unlock(&mapping->i_mmap_lock);
98+
}
99+
100+
/*
101+
* unmap huge page backed by shared pte.
102+
*
103+
* Hugetlb pte page is ref counted at the time of mapping. If pte is shared
104+
* indicated by page_count > 1, unmap is achieved by clearing pud and
105+
* decrementing the ref count. If count == 1, the pte page is not shared.
106+
*
107+
* called with vma->vm_mm->page_table_lock held.
108+
*
109+
* returns: 1 successfully unmapped a shared pte page
110+
* 0 the underlying pte page is not shared, or it is the last user
111+
*/
112+
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
113+
{
114+
pgd_t *pgd = pgd_offset(mm, *addr);
115+
pud_t *pud = pud_offset(pgd, *addr);
116+
117+
BUG_ON(page_count(virt_to_page(ptep)) == 0);
118+
if (page_count(virt_to_page(ptep)) == 1)
119+
return 0;
120+
121+
pud_clear(pud);
122+
put_page(virt_to_page(ptep));
123+
*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
124+
return 1;
125+
}
126+
20127
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
21128
{
22129
pgd_t *pgd;
@@ -25,8 +132,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
25132

26133
pgd = pgd_offset(mm, addr);
27134
pud = pud_alloc(mm, pgd, addr);
28-
if (pud)
135+
if (pud) {
136+
if (pud_none(*pud))
137+
huge_pmd_share(mm, addr, pud);
29138
pte = (pte_t *) pmd_alloc(mm, pud, addr);
139+
}
30140
BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
31141

32142
return pte;

arch/ia64/mm/hugetlbpage.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
6464
return pte;
6565
}
6666

67+
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
68+
{
69+
return 0;
70+
}
71+
6772
#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
6873

6974
/*

arch/powerpc/mm/hugetlbpage.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
146146
return hugepte_offset(hpdp, addr);
147147
}
148148

149+
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
150+
{
151+
return 0;
152+
}
153+
149154
static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp)
150155
{
151156
pte_t *hugepte = hugepd_page(*hpdp);

arch/sh/mm/hugetlbpage.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
6363
return pte;
6464
}
6565

66+
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
67+
{
68+
return 0;
69+
}
70+
6671
struct page *follow_huge_addr(struct mm_struct *mm,
6772
unsigned long address, int write)
6873
{

arch/sh64/mm/hugetlbpage.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
5353
return pte;
5454
}
5555

56+
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
57+
{
58+
return 0;
59+
}
60+
5661
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
5762
pte_t *ptep, pte_t entry)
5863
{

arch/sparc64/mm/hugetlbpage.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
235235
return pte;
236236
}
237237

238+
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
239+
{
240+
return 0;
241+
}
242+
238243
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
239244
pte_t *ptep, pte_t entry)
240245
{

include/linux/hugetlb.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ extern int sysctl_hugetlb_shm_group;
3535

3636
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr);
3737
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
38+
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
3839
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
3940
int write);
4041
struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,

mm/hugetlb.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
386386
if (!ptep)
387387
continue;
388388

389+
if (huge_pmd_unshare(mm, &address, ptep))
390+
continue;
391+
389392
pte = huge_ptep_get_and_clear(mm, address, ptep);
390393
if (pte_none(pte))
391394
continue;
@@ -658,11 +661,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
658661
BUG_ON(address >= end);
659662
flush_cache_range(vma, address, end);
660663

664+
spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
661665
spin_lock(&mm->page_table_lock);
662666
for (; address < end; address += HPAGE_SIZE) {
663667
ptep = huge_pte_offset(mm, address);
664668
if (!ptep)
665669
continue;
670+
if (huge_pmd_unshare(mm, &address, ptep))
671+
continue;
666672
if (!pte_none(*ptep)) {
667673
pte = huge_ptep_get_and_clear(mm, address, ptep);
668674
pte = pte_mkhuge(pte_modify(pte, newprot));
@@ -671,6 +677,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
671677
}
672678
}
673679
spin_unlock(&mm->page_table_lock);
680+
spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
674681

675682
flush_tlb_range(vma, start, end);
676683
}

0 commit comments

Comments
 (0)