Skip to content

Commit 1493baa

Browse files
deepak0414gregkh
authored andcommitted
riscv: VMAP_STACK overflow detection thread-safe
[ Upstream commit be97d0d ] commit 31da94c ("riscv: add VMAP_STACK overflow detection") added support for CONFIG_VMAP_STACK. If overflow is detected, CPU switches to `shadow_stack` temporarily before switching finally to per-cpu `overflow_stack`. If two CPUs/harts are racing and end up in over flowing kernel stack, one or both will end up corrupting each other state because `shadow_stack` is not per-cpu. This patch optimizes per-cpu overflow stack switch by directly picking per-cpu `overflow_stack` and gets rid of `shadow_stack`. Following are the changes in this patch - Defines an asm macro to obtain per-cpu symbols in destination register. - In entry.S, when overflow is detected, per-cpu overflow stack is located using per-cpu asm macro. Computing per-cpu symbol requires a temporary register. x31 is saved away into CSR_SCRATCH (CSR_SCRATCH is anyways zero since we're in kernel). Please see Links for additional relevant disccussion and alternative solution. Tested by `echo EXHAUST_STACK > /sys/kernel/debug/provoke-crash/DIRECT` Kernel crash log below Insufficient stack space to handle exception!/debug/provoke-crash/DIRECT Task stack: [0xff20000010a98000..0xff20000010a9c000] Overflow stack: [0xff600001f7d98370..0xff600001f7d99370] CPU: 1 PID: 205 Comm: bash Not tainted 6.1.0-rc2-00001-g328a1f96f7b9 #34 Hardware name: riscv-virtio,qemu (DT) epc : __memset+0x60/0xfc ra : recursive_loop+0x48/0xc6 [lkdtm] epc : ffffffff808de0e4 ra : ffffffff0163a752 sp : ff20000010a97e80 gp : ffffffff815c0330 tp : ff600000820ea280 t0 : ff20000010a97e88 t1 : 000000000000002e t2 : 3233206874706564 s0 : ff20000010a982b0 s1 : 0000000000000012 a0 : ff20000010a97e88 a1 : 0000000000000000 a2 : 0000000000000400 a3 : ff20000010a98288 a4 : 0000000000000000 a5 : 0000000000000000 a6 : fffffffffffe43f0 a7 : 00007fffffffffff s2 : ff20000010a97e88 s3 : ffffffff01644680 s4 : ff20000010a9be90 s5 : ff600000842ba6c0 s6 : 00aaaaaac29e42b0 s7 : 00fffffff0aa3684 s8 : 00aaaaaac2978040 s9 : 0000000000000065 s10: 00ffffff8a7cad10 s11: 00ffffff8a76a4e0 t3 : ffffffff815dbaf4 t4 : ffffffff815dbaf4 t5 : ffffffff815dbab8 t6 : ff20000010a9bb48 status: 0000000200000120 badaddr: ff20000010a97e88 cause: 000000000000000f Kernel panic - not syncing: Kernel stack overflow CPU: 1 PID: 205 Comm: bash Not tainted 6.1.0-rc2-00001-g328a1f96f7b9 #34 Hardware name: riscv-virtio,qemu (DT) Call Trace: [<ffffffff80006754>] dump_backtrace+0x30/0x38 [<ffffffff808de798>] show_stack+0x40/0x4c [<ffffffff808ea2a8>] dump_stack_lvl+0x44/0x5c [<ffffffff808ea2d8>] dump_stack+0x18/0x20 [<ffffffff808dec06>] panic+0x126/0x2fe [<ffffffff800065ea>] walk_stackframe+0x0/0xf0 [<ffffffff0163a752>] recursive_loop+0x48/0xc6 [lkdtm] SMP: stopping secondary CPUs ---[ end Kernel panic - not syncing: Kernel stack overflow ]--- Cc: Guo Ren <[email protected]> Cc: Jisheng Zhang <[email protected]> Link: https://lore.kernel.org/linux-riscv/Y347B0x4VUNOd6V7@xhacker/T/#t Link: https://lore.kernel.org/lkml/[email protected]/ Signed-off-by: Deepak Gupta <[email protected]> Co-developed-by: Sami Tolvanen <[email protected]> Signed-off-by: Sami Tolvanen <[email protected]> Acked-by: Guo Ren <[email protected]> Tested-by: Nathan Chancellor <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent c99fff6 commit 1493baa

File tree

6 files changed

+34
-99
lines changed

6 files changed

+34
-99
lines changed

arch/riscv/include/asm/asm-prototypes.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
2525
DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
2626
DECLARE_DO_ERROR_INFO(do_trap_break);
2727

28-
asmlinkage unsigned long get_overflow_stack(void);
2928
asmlinkage void handle_bad_stack(struct pt_regs *regs);
3029
asmlinkage void do_page_fault(struct pt_regs *regs);
3130
asmlinkage void do_irq(struct pt_regs *regs);

arch/riscv/include/asm/asm.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,28 @@
8282
.endr
8383
.endm
8484

85+
#ifdef CONFIG_SMP
86+
#ifdef CONFIG_32BIT
87+
#define PER_CPU_OFFSET_SHIFT 2
88+
#else
89+
#define PER_CPU_OFFSET_SHIFT 3
90+
#endif
91+
92+
.macro asm_per_cpu dst sym tmp
93+
REG_L \tmp, TASK_TI_CPU_NUM(tp)
94+
slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT
95+
la \dst, __per_cpu_offset
96+
add \dst, \dst, \tmp
97+
REG_L \tmp, 0(\dst)
98+
la \dst, \sym
99+
add \dst, \dst, \tmp
100+
.endm
101+
#else /* CONFIG_SMP */
102+
.macro asm_per_cpu dst sym tmp
103+
la \dst, \sym
104+
.endm
105+
#endif /* CONFIG_SMP */
106+
85107
/* save all GPs except x1 ~ x5 */
86108
.macro save_from_x6_to_x31
87109
REG_S x6, PT_T1(sp)

arch/riscv/include/asm/thread_info.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,6 @@
3434

3535
#ifndef __ASSEMBLY__
3636

37-
extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
38-
extern unsigned long spin_shadow_stack;
39-
4037
#include <asm/processor.h>
4138
#include <asm/csr.h>
4239

arch/riscv/kernel/asm-offsets.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ void asm_offsets(void)
3939
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
4040
OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
4141

42+
OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
4243
OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]);
4344
OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]);
4445
OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]);

arch/riscv/kernel/entry.S

Lines changed: 10 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010
#include <asm/asm.h>
1111
#include <asm/csr.h>
1212
#include <asm/unistd.h>
13+
#include <asm/page.h>
1314
#include <asm/thread_info.h>
1415
#include <asm/asm-offsets.h>
1516
#include <asm/errata_list.h>
17+
#include <linux/sizes.h>
1618

1719
SYM_CODE_START(handle_exception)
1820
/*
@@ -170,67 +172,15 @@ SYM_CODE_END(ret_from_exception)
170172

171173
#ifdef CONFIG_VMAP_STACK
172174
SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
173-
/*
174-
* Takes the psuedo-spinlock for the shadow stack, in case multiple
175-
* harts are concurrently overflowing their kernel stacks. We could
176-
* store any value here, but since we're overflowing the kernel stack
177-
* already we only have SP to use as a scratch register. So we just
178-
* swap in the address of the spinlock, as that's definately non-zero.
179-
*
180-
* Pairs with a store_release in handle_bad_stack().
181-
*/
182-
1: la sp, spin_shadow_stack
183-
REG_AMOSWAP_AQ sp, sp, (sp)
184-
bnez sp, 1b
185-
186-
la sp, shadow_stack
187-
addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
188-
189-
//save caller register to shadow stack
190-
addi sp, sp, -(PT_SIZE_ON_STACK)
191-
REG_S x1, PT_RA(sp)
192-
REG_S x5, PT_T0(sp)
193-
REG_S x6, PT_T1(sp)
194-
REG_S x7, PT_T2(sp)
195-
REG_S x10, PT_A0(sp)
196-
REG_S x11, PT_A1(sp)
197-
REG_S x12, PT_A2(sp)
198-
REG_S x13, PT_A3(sp)
199-
REG_S x14, PT_A4(sp)
200-
REG_S x15, PT_A5(sp)
201-
REG_S x16, PT_A6(sp)
202-
REG_S x17, PT_A7(sp)
203-
REG_S x28, PT_T3(sp)
204-
REG_S x29, PT_T4(sp)
205-
REG_S x30, PT_T5(sp)
206-
REG_S x31, PT_T6(sp)
207-
208-
la ra, restore_caller_reg
209-
tail get_overflow_stack
210-
211-
restore_caller_reg:
212-
//save per-cpu overflow stack
213-
REG_S a0, -8(sp)
214-
//restore caller register from shadow_stack
215-
REG_L x1, PT_RA(sp)
216-
REG_L x5, PT_T0(sp)
217-
REG_L x6, PT_T1(sp)
218-
REG_L x7, PT_T2(sp)
219-
REG_L x10, PT_A0(sp)
220-
REG_L x11, PT_A1(sp)
221-
REG_L x12, PT_A2(sp)
222-
REG_L x13, PT_A3(sp)
223-
REG_L x14, PT_A4(sp)
224-
REG_L x15, PT_A5(sp)
225-
REG_L x16, PT_A6(sp)
226-
REG_L x17, PT_A7(sp)
227-
REG_L x28, PT_T3(sp)
228-
REG_L x29, PT_T4(sp)
229-
REG_L x30, PT_T5(sp)
230-
REG_L x31, PT_T6(sp)
175+
/* we reach here from kernel context, sscratch must be 0 */
176+
csrrw x31, CSR_SCRATCH, x31
177+
asm_per_cpu sp, overflow_stack, x31
178+
li x31, OVERFLOW_STACK_SIZE
179+
add sp, sp, x31
180+
/* zero out x31 again and restore x31 */
181+
xor x31, x31, x31
182+
csrrw x31, CSR_SCRATCH, x31
231183

232-
//load per-cpu overflow stack
233-
REG_L sp, -8(sp)
234184
addi sp, sp, -(PT_SIZE_ON_STACK)
235185

236186
//save context to overflow stack

arch/riscv/kernel/traps.c

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -408,48 +408,14 @@ int is_valid_bugaddr(unsigned long pc)
408408
#endif /* CONFIG_GENERIC_BUG */
409409

410410
#ifdef CONFIG_VMAP_STACK
411-
/*
412-
* Extra stack space that allows us to provide panic messages when the kernel
413-
* has overflowed its stack.
414-
*/
415-
static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
411+
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
416412
overflow_stack)__aligned(16);
417-
/*
418-
* A temporary stack for use by handle_kernel_stack_overflow. This is used so
419-
* we can call into C code to get the per-hart overflow stack. Usage of this
420-
* stack must be protected by spin_shadow_stack.
421-
*/
422-
long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16);
423-
424-
/*
425-
* A pseudo spinlock to protect the shadow stack from being used by multiple
426-
* harts concurrently. This isn't a real spinlock because the lock side must
427-
* be taken without a valid stack and only a single register, it's only taken
428-
* while in the process of panicing anyway so the performance and error
429-
* checking a proper spinlock gives us doesn't matter.
430-
*/
431-
unsigned long spin_shadow_stack;
432-
433-
asmlinkage unsigned long get_overflow_stack(void)
434-
{
435-
return (unsigned long)this_cpu_ptr(overflow_stack) +
436-
OVERFLOW_STACK_SIZE;
437-
}
438413

439414
asmlinkage void handle_bad_stack(struct pt_regs *regs)
440415
{
441416
unsigned long tsk_stk = (unsigned long)current->stack;
442417
unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
443418

444-
/*
445-
* We're done with the shadow stack by this point, as we're on the
446-
* overflow stack. Tell any other concurrent overflowing harts that
447-
* they can proceed with panicing by releasing the pseudo-spinlock.
448-
*
449-
* This pairs with an amoswap.aq in handle_kernel_stack_overflow.
450-
*/
451-
smp_store_release(&spin_shadow_stack, 0);
452-
453419
console_verbose();
454420

455421
pr_emerg("Insufficient stack space to handle exception!\n");

0 commit comments

Comments
 (0)