Skip to content

Commit 0034d39

Browse files
kvaneeshmpe
authored andcommitted
powerpc/mm/hash64: Map all the kernel regions in the same 0xc range
This patch maps vmalloc, IO and vmemap regions in the 0xc address range instead of the current 0xd and 0xf range. This brings the mapping closer to radix translation mode. With hash 64K page size each of this region is 512TB whereas with 4K config we are limited by the max page table range of 64TB and hence there regions are of 16TB size. The kernel mapping is now: On 4K hash kernel_region_map_size = 16TB kernel vmalloc start = 0xc000100000000000 kernel IO start = 0xc000200000000000 kernel vmemmap start = 0xc000300000000000 64K hash, 64K radix and 4k radix: kernel_region_map_size = 512TB kernel vmalloc start = 0xc008000000000000 kernel IO start = 0xc00a000000000000 kernel vmemmap start = 0xc00c000000000000 Signed-off-by: Aneesh Kumar K.V <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
1 parent a35a3c6 commit 0034d39

File tree

18 files changed

+172
-109
lines changed

18 files changed

+172
-109
lines changed

arch/powerpc/include/asm/book3s/64/hash-4k.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,19 @@
1313
*/
1414
#define MAX_EA_BITS_PER_CONTEXT 46
1515

16+
/*
17+
* Our page table limit us to 64TB. Hence for the kernel mapping,
18+
* each MAP area is limited to 16 TB.
19+
* The four map areas are: linear mapping, vmap, IO and vmemmap
20+
*/
21+
#define H_KERN_MAP_SIZE (ASM_CONST(1) << (MAX_EA_BITS_PER_CONTEXT - 2))
22+
23+
/*
24+
* Define the address range of the kernel non-linear virtual area
25+
* 16TB
26+
*/
27+
#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000)
28+
1629
#ifndef __ASSEMBLY__
1730
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
1831
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)

arch/powerpc/include/asm/book3s/64/hash-64k.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@
1414
*/
1515
#define MAX_EA_BITS_PER_CONTEXT 49
1616

17+
/*
18+
* We use one context for each MAP area.
19+
*/
20+
#define H_KERN_MAP_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
21+
22+
/*
23+
* Define the address range of the kernel non-linear virtual area
24+
* 2PB
25+
*/
26+
#define H_KERN_VIRT_START ASM_CONST(0xc008000000000000)
27+
1728
/*
1829
* 64k aligned address free up few of the lower bits of RPN for us
1930
* We steal that here. For more deatils look at pte_pfn/pfn_pte()

arch/powerpc/include/asm/book3s/64/hash.h

Lines changed: 61 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929
#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \
3030
H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
3131
#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
32+
/*
33+
* Top 2 bits are ignored in page table walk.
34+
*/
35+
#define EA_MASK (~(0xcUL << 60))
3236

3337
/*
3438
* We store the slot details in the second half of page table.
@@ -42,53 +46,56 @@
4246
#endif
4347

4448
/*
45-
* Define the address range of the kernel non-linear virtual area. In contrast
46-
* to the linear mapping, this is managed using the kernel page tables and then
47-
* inserted into the hash page table to actually take effect, similarly to user
48-
* mappings.
49+
* +------------------------------+
50+
* | |
51+
* | |
52+
* | |
53+
* +------------------------------+ Kernel virtual map end (0xc00e000000000000)
54+
* | |
55+
* | |
56+
* | 512TB/16TB of vmemmap |
57+
* | |
58+
* | |
59+
* +------------------------------+ Kernel vmemmap start
60+
* | |
61+
* | 512TB/16TB of IO map |
62+
* | |
63+
* +------------------------------+ Kernel IO map start
64+
* | |
65+
* | 512TB/16TB of vmap |
66+
* | |
67+
* +------------------------------+ Kernel virt start (0xc008000000000000)
68+
* | |
69+
* | |
70+
* | |
71+
* +------------------------------+ Kernel linear (0xc.....)
4972
*/
50-
#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
5173

52-
/*
53-
* Allow virtual mapping of one context size.
54-
* 512TB for 64K page size
55-
* 64TB for 4K page size
56-
*/
57-
#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
74+
#define H_VMALLOC_START H_KERN_VIRT_START
75+
#define H_VMALLOC_SIZE H_KERN_MAP_SIZE
76+
#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
5877

59-
/*
60-
* 8TB IO mapping size
61-
*/
62-
#define H_KERN_IO_SIZE ASM_CONST(0x80000000000) /* 8T */
63-
64-
/*
65-
* The vmalloc space starts at the beginning of the kernel non-linear virtual
66-
* region, and occupies 504T (64K) or 56T (4K)
67-
*/
68-
#define H_VMALLOC_START H_KERN_VIRT_START
69-
#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE)
70-
#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
78+
#define H_KERN_IO_START H_VMALLOC_END
79+
#define H_KERN_IO_SIZE H_KERN_MAP_SIZE
80+
#define H_KERN_IO_END (H_KERN_IO_START + H_KERN_IO_SIZE)
7181

72-
#define H_KERN_IO_START H_VMALLOC_END
73-
#define H_KERN_IO_END (H_KERN_VIRT_START + H_KERN_VIRT_SIZE)
82+
#define H_VMEMMAP_START H_KERN_IO_END
83+
#define H_VMEMMAP_SIZE H_KERN_MAP_SIZE
84+
#define H_VMEMMAP_END (H_VMEMMAP_START + H_VMEMMAP_SIZE)
7485

7586
/*
7687
* Region IDs
7788
*/
78-
#define REGION_SHIFT 60UL
79-
#define REGION_MASK (0xfUL << REGION_SHIFT)
80-
#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
81-
82-
#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START))
83-
#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
84-
#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
85-
#define USER_REGION_ID (0UL)
89+
#define USER_REGION_ID 1
90+
#define KERNEL_REGION_ID 2
91+
#define VMALLOC_REGION_ID 3
92+
#define IO_REGION_ID 4
93+
#define VMEMMAP_REGION_ID 5
8694

8795
/*
8896
* Defines the address of the vmemap area, in its own region on
8997
* hash table CPUs.
9098
*/
91-
#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
9299

93100
#ifdef CONFIG_PPC_MM_SLICES
94101
#define HAVE_ARCH_UNMAPPED_AREA
@@ -104,6 +111,26 @@
104111
#define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1)
105112

106113
#ifndef __ASSEMBLY__
114+
static inline int get_region_id(unsigned long ea)
115+
{
116+
int id = (ea >> 60UL);
117+
118+
if (id == 0)
119+
return USER_REGION_ID;
120+
121+
VM_BUG_ON(id != 0xc);
122+
VM_BUG_ON(ea >= H_VMEMMAP_END);
123+
124+
if (ea >= H_VMEMMAP_START)
125+
return VMEMMAP_REGION_ID;
126+
else if (ea >= H_KERN_IO_START)
127+
return IO_REGION_ID;
128+
else if (ea >= H_VMALLOC_START)
129+
return VMALLOC_REGION_ID;
130+
131+
return KERNEL_REGION_ID;
132+
}
133+
107134
#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS)
108135
#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS)
109136
static inline int hash__pgd_bad(pgd_t pgd)

arch/powerpc/include/asm/book3s/64/mmu-hash.h

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,8 @@ extern void slb_set_size(u16 size);
588588
#endif
589589

590590
#define MAX_VMALLOC_CTX_CNT 1
591-
#define MAX_MEMMAP_CTX_CNT 1
591+
#define MAX_IO_CTX_CNT 1
592+
#define MAX_VMEMMAP_CTX_CNT 1
592593

593594
/*
594595
* 256MB segment
@@ -601,13 +602,10 @@ extern void slb_set_size(u16 size);
601602
* would give a protovsid of 0x1fffffffff. That will result in a VSID 0
602603
* because of the modulo operation in vsid scramble.
603604
*
604-
* We add one extra context to MIN_USER_CONTEXT so that we can map kernel
605-
* context easily. The +1 is to map the unused 0xe region mapping.
606605
*/
607606
#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2)
608607
#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
609-
MAX_MEMMAP_CTX_CNT + 2)
610-
608+
MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT)
611609
/*
612610
* For platforms that support on 65bit VA we limit the context bits
613611
*/
@@ -776,7 +774,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
776774
/*
777775
* Bad address. We return VSID 0 for that
778776
*/
779-
if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
777+
if ((ea & EA_MASK) >= H_PGTABLE_RANGE)
780778
return 0;
781779

782780
if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
@@ -803,28 +801,29 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
803801
* 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff]
804802
* 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff]
805803
* 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff]
806-
807-
* 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ]
808-
* 0x00006 - Not used - Can map 0xe000000000000000 range.
809-
* 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ]
810804
*
811-
* So we can compute the context from the region (top nibble) by
812-
* subtracting 11, or 0xc - 1.
805+
* vmap, IO, vmemap
806+
*
807+
* 0x00005 - [ 0xc008000000000000 - 0xc009ffffffffffff]
808+
* 0x00006 - [ 0xc00a000000000000 - 0xc00bffffffffffff]
809+
* 0x00007 - [ 0xc00c000000000000 - 0xc00dffffffffffff]
810+
*
813811
*/
814812
static inline unsigned long get_kernel_context(unsigned long ea)
815813
{
816-
unsigned long region_id = REGION_ID(ea);
814+
unsigned long region_id = get_region_id(ea);
817815
unsigned long ctx;
818816
/*
819-
* For linear mapping we do support multiple context
817+
* Depending on Kernel config, kernel region can have one context
818+
* or more.
820819
*/
821820
if (region_id == KERNEL_REGION_ID) {
822821
/*
823822
* We already verified ea to be not beyond the addr limit.
824823
*/
825-
ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT);
824+
ctx = 1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT);
826825
} else
827-
ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT;
826+
ctx = region_id + MAX_KERNEL_CTX_CNT - 2;
828827
return ctx;
829828
}
830829

arch/powerpc/include/asm/book3s/64/pgtable.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,6 @@ extern unsigned long __kernel_virt_size;
279279
extern unsigned long __kernel_io_start;
280280
extern unsigned long __kernel_io_end;
281281
#define KERN_VIRT_START __kernel_virt_start
282-
#define KERN_VIRT_SIZE __kernel_virt_size
283282
#define KERN_IO_START __kernel_io_start
284283
#define KERN_IO_END __kernel_io_end
285284

arch/powerpc/include/asm/book3s/64/radix.h

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -72,19 +72,17 @@
7272
* | |
7373
* | |
7474
* | |
75-
* +------------------------------+ Kernel IO map end (0xc010000000000000)
75+
* +------------------------------+ Kernel vmemmap end (0xc010000000000000)
7676
* | |
77+
* | 512TB |
7778
* | |
78-
* | 1/2 of virtual map |
79+
* +------------------------------+ Kernel IO map end/vmemap start
7980
* | |
81+
* | 512TB |
8082
* | |
81-
* +------------------------------+ Kernel IO map start
83+
* +------------------------------+ Kernel vmap end/ IO map start
8284
* | |
83-
* | 1/4 of virtual map |
84-
* | |
85-
* +------------------------------+ Kernel vmemap start
86-
* | |
87-
* | 1/4 of virtual map |
85+
* | 512TB |
8886
* | |
8987
* +------------------------------+ Kernel virt start (0xc008000000000000)
9088
* | |
@@ -93,25 +91,24 @@
9391
* +------------------------------+ Kernel linear (0xc.....)
9492
*/
9593

96-
#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
97-
#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000)
98-
94+
#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
9995
/*
100-
* The vmalloc space starts at the beginning of that region, and
101-
* occupies a quarter of it on radix config.
102-
* (we keep a quarter for the virtual memmap)
96+
* 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick
97+
* the same value as hash.
10398
*/
99+
#define RADIX_KERN_MAP_SIZE (1UL << 49)
100+
104101
#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START
105-
#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2)
102+
#define RADIX_VMALLOC_SIZE RADIX_KERN_MAP_SIZE
106103
#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE)
107-
/*
108-
* Defines the address of the vmemap area, in its own region on
109-
* hash table CPUs.
110-
*/
111-
#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END)
112104

113-
#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1))
114-
#define RADIX_KERN_IO_END (RADIX_KERN_VIRT_START + RADIX_KERN_VIRT_SIZE)
105+
#define RADIX_KERN_IO_START RADIX_VMALLOC_END
106+
#define RADIX_KERN_IO_SIZE RADIX_KERN_MAP_SIZE
107+
#define RADIX_KERN_IO_END (RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE)
108+
109+
#define RADIX_VMEMMAP_START RADIX_KERN_IO_END
110+
#define RADIX_VMEMMAP_SIZE RADIX_KERN_MAP_SIZE
111+
#define RADIX_VMEMMAP_END (RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE)
115112

116113
#ifndef __ASSEMBLY__
117114
#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)

arch/powerpc/include/asm/page.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ static inline bool pfn_valid(unsigned long pfn)
139139
* return true for some vmalloc addresses, which is incorrect. So explicitly
140140
* check that the address is in the kernel region.
141141
*/
142-
#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \
142+
/* may be can drop get_region_id */
143+
#define virt_addr_valid(kaddr) (get_region_id((unsigned long)kaddr) == KERNEL_REGION_ID && \
143144
pfn_valid(virt_to_pfn(kaddr)))
144145
#else
145146
#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))

arch/powerpc/kvm/book3s_hv_rm_xics.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,7 @@ static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
822822
raddr = per_cpu_ptr(addr, cpu);
823823
l = (unsigned long)raddr;
824824

825-
if (REGION_ID(l) == VMALLOC_REGION_ID) {
825+
if (get_region_id(l) == VMALLOC_REGION_ID) {
826826
l = vmalloc_to_phys(raddr);
827827
raddr = (unsigned int *)l;
828828
}

arch/powerpc/mm/copro_fault.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
105105
u64 vsid, vsidkey;
106106
int psize, ssize;
107107

108-
switch (REGION_ID(ea)) {
108+
switch (get_region_id(ea)) {
109109
case USER_REGION_ID:
110110
pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
111111
if (mm == NULL)
@@ -117,10 +117,14 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
117117
break;
118118
case VMALLOC_REGION_ID:
119119
pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
120-
if (ea < VMALLOC_END)
121-
psize = mmu_vmalloc_psize;
122-
else
123-
psize = mmu_io_psize;
120+
psize = mmu_vmalloc_psize;
121+
ssize = mmu_kernel_ssize;
122+
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
123+
vsidkey = SLB_VSID_KERNEL;
124+
break;
125+
case IO_REGION_ID:
126+
pr_devel("%s: 0x%llx -- IO_REGION_ID\n", __func__, ea);
127+
psize = mmu_io_psize;
124128
ssize = mmu_kernel_ssize;
125129
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
126130
vsidkey = SLB_VSID_KERNEL;

0 commit comments

Comments
 (0)