Skip to content

Commit bb1520d

Browse files
Alexander Gordeevhcahca
Alexander Gordeev
authored andcommitted
s390/mm: start kernel with DAT enabled
The setup of the kernel virtual address space is spread throughout the sources, boot stages and config options like this: 1. The available physical memory regions are queried and stored as mem_detect information for later use in the decompressor. 2. Based on the physical memory availability the virtual memory layout is established in the decompressor; 3. If CONFIG_KASAN is disabled the kernel paging setup code populates kernel pgtables and turns DAT mode on. It uses the information stored at step [1]. 4. If CONFIG_KASAN is enabled the kernel early boot kasan setup populates kernel pgtables and turns DAT mode on. It uses the information stored at step [1]. The kasan setup creates early_pg_dir directory and directly overwrites swapper_pg_dir entries to make shadow memory pages available. Move the kernel virtual memory setup to the decompressor and start the kernel with DAT turned on right from the very first istruction. That completely eliminates the boot phase when the kernel runs in DAT-off mode, simplies the overall design and consolidates pgtables setup. The identity mapping is created in the decompressor, while kasan shadow mappings are still created by the early boot kernel code. Share with decompressor the existing kasan memory allocator. It decreases the size of a newly requested memory block from pgalloc_pos and ensures that kernel image is not overwritten. pgalloc_low and pgalloc_pos pointers are made preserved boot variables for that. Use the bootdata infrastructure to setup swapper_pg_dir and invalid_pg_dir directories used by the kernel later. The interim early_pg_dir directory established by the kasan initialization code gets eliminated as result. As the kernel runs in DAT-on mode only the PSW_KERNEL_BITS define gets PSW_MASK_DAT bit by default. Additionally, the setup_lowcore_dat_off() and setup_lowcore_dat_on() routines get merged, since there is no DAT-off mode stage anymore. The memory mappings are created with RW+X protection that allows the early boot code setting up all necessary data and services for the kernel being booted. Just before the paging is enabled the memory protection is changed to RO+X for text, RO+NX for read-only data and RW+NX for kernel data and the identity mapping. Reviewed-by: Heiko Carstens <[email protected]> Signed-off-by: Alexander Gordeev <[email protected]> Signed-off-by: Heiko Carstens <[email protected]>
1 parent bd50b74 commit bb1520d

File tree

17 files changed

+448
-189
lines changed

17 files changed

+448
-189
lines changed

arch/s390/boot/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ endif
3535

3636
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
3737

38-
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
38+
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o vmem.o
3939
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
4040
obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
4141
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o

arch/s390/boot/boot.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ struct machine_info {
1616

1717
struct vmlinux_info {
1818
unsigned long default_lma;
19-
void (*entry)(void);
19+
unsigned long entry;
2020
unsigned long image_size; /* does not include .bss */
2121
unsigned long bss_size; /* uncompressed image .bss size */
2222
unsigned long bootdata_off;
@@ -27,6 +27,9 @@ struct vmlinux_info {
2727
unsigned long rela_dyn_start;
2828
unsigned long rela_dyn_end;
2929
unsigned long amode31_size;
30+
unsigned long init_mm_off;
31+
unsigned long swapper_pg_dir_off;
32+
unsigned long invalid_pg_dir_off;
3033
};
3134

3235
void startup_kernel(void);
@@ -41,6 +44,7 @@ void print_missing_facilities(void);
4144
void sclp_early_setup_buffer(void);
4245
void print_pgm_check_info(void);
4346
unsigned long get_random_base(unsigned long safe_addr);
47+
void setup_vmem(unsigned long online_end, unsigned long asce_limit);
4448
void __printf(1, 2) decompressor_printk(const char *fmt, ...);
4549
void error(char *m);
4650

arch/s390/boot/startup.c

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <asm/diag.h>
1212
#include <asm/uv.h>
1313
#include <asm/abs_lowcore.h>
14+
#include <asm/mem_detect.h>
1415
#include "decompressor.h"
1516
#include "boot.h"
1617
#include "uv.h"
@@ -166,9 +167,10 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
166167
#endif
167168
}
168169

169-
static void setup_kernel_memory_layout(void)
170+
static unsigned long setup_kernel_memory_layout(void)
170171
{
171172
unsigned long vmemmap_start;
173+
unsigned long asce_limit;
172174
unsigned long rte_size;
173175
unsigned long pages;
174176
unsigned long vmax;
@@ -183,18 +185,18 @@ static void setup_kernel_memory_layout(void)
183185
vmalloc_size > _REGION2_SIZE ||
184186
vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
185187
_REGION2_SIZE) {
186-
vmax = _REGION1_SIZE;
188+
asce_limit = _REGION1_SIZE;
187189
rte_size = _REGION2_SIZE;
188190
} else {
189-
vmax = _REGION2_SIZE;
191+
asce_limit = _REGION2_SIZE;
190192
rte_size = _REGION3_SIZE;
191193
}
192194
/*
193195
* forcing modules and vmalloc area under the ultravisor
194196
* secure storage limit, so that any vmalloc allocation
195197
* we do could be used to back secure guest storage.
196198
*/
197-
vmax = adjust_to_uv_max(vmax);
199+
vmax = adjust_to_uv_max(asce_limit);
198200
#ifdef CONFIG_KASAN
199201
/* force vmalloc and modules below kasan shadow */
200202
vmax = min(vmax, KASAN_SHADOW_START);
@@ -223,6 +225,8 @@ static void setup_kernel_memory_layout(void)
223225
/* make sure vmemmap doesn't overlay with vmalloc area */
224226
VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
225227
vmemmap = (struct page *)vmemmap_start;
228+
229+
return asce_limit;
226230
}
227231

228232
/*
@@ -256,6 +260,9 @@ static void offset_vmlinux_info(unsigned long offset)
256260
vmlinux.rela_dyn_start += offset;
257261
vmlinux.rela_dyn_end += offset;
258262
vmlinux.dynsym_start += offset;
263+
vmlinux.init_mm_off += offset;
264+
vmlinux.swapper_pg_dir_off += offset;
265+
vmlinux.invalid_pg_dir_off += offset;
259266
}
260267

261268
static unsigned long reserve_amode31(unsigned long safe_addr)
@@ -268,7 +275,10 @@ void startup_kernel(void)
268275
{
269276
unsigned long random_lma;
270277
unsigned long safe_addr;
278+
unsigned long asce_limit;
279+
unsigned long online_end;
271280
void *img;
281+
psw_t psw;
272282

273283
detect_facilities();
274284

@@ -290,7 +300,8 @@ void startup_kernel(void)
290300
sanitize_prot_virt_host();
291301
setup_ident_map_size(detect_memory());
292302
setup_vmalloc_size();
293-
setup_kernel_memory_layout();
303+
asce_limit = setup_kernel_memory_layout();
304+
online_end = min(get_mem_detect_end(), ident_map_size);
294305

295306
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
296307
random_lma = get_random_base(safe_addr);
@@ -307,9 +318,23 @@ void startup_kernel(void)
307318
} else if (__kaslr_offset)
308319
memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
309320

321+
/*
322+
* The order of the following operations is important:
323+
*
324+
* - handle_relocs() must follow clear_bss_section() to establish static
325+
* memory references to data in .bss to be used by setup_vmem()
326+
* (i.e init_mm.pgd)
327+
*
328+
* - setup_vmem() must follow handle_relocs() to be able using
329+
* static memory references to data in .bss (i.e init_mm.pgd)
330+
*
331+
* - copy_bootdata() must follow setup_vmem() to propagate changes to
332+
* bootdata made by setup_vmem()
333+
*/
310334
clear_bss_section();
311-
copy_bootdata();
312335
handle_relocs(__kaslr_offset);
336+
setup_vmem(online_end, asce_limit);
337+
copy_bootdata();
313338

314339
if (__kaslr_offset) {
315340
/*
@@ -321,5 +346,11 @@ void startup_kernel(void)
321346
if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
322347
memset(img, 0, vmlinux.image_size);
323348
}
324-
vmlinux.entry();
349+
350+
/*
351+
* Jump to the decompressed kernel entry point and switch DAT mode on.
352+
*/
353+
psw.addr = vmlinux.entry;
354+
psw.mask = PSW_KERNEL_BITS;
355+
__load_psw(psw);
325356
}

0 commit comments

Comments
 (0)