Skip to content

Commit 2352904

Browse files
Mark Rutlandwilldeacon
Mark Rutland
authored andcommitted
arm64: entry: fix non-NMI user<->kernel transitions
When built with PROVE_LOCKING, NO_HZ_FULL, and CONTEXT_TRACKING_FORCE will WARN() at boot time that interrupts are enabled when we call context_tracking_user_enter(), despite the DAIF flags indicating that IRQs are masked. The problem is that we're not tracking IRQ flag changes accurately, and so lockdep believes interrupts are enabled when they are not (and vice-versa). We can shuffle things so to make this more accurate. For kernel->user transitions there are a number of constraints we need to consider: 1) When we call __context_tracking_user_enter() HW IRQs must be disabled and lockdep must be up-to-date with this. 2) Userspace should be treated as having IRQs enabled from the PoV of both lockdep and tracing. 3) As context_tracking_user_enter() stops RCU from watching, we cannot use RCU after calling it. 4) IRQ flag tracing and lockdep have state that must be manipulated before RCU is disabled. ... with similar constraints applying for user->kernel transitions, with the ordering reversed. The generic entry code has enter_from_user_mode() and exit_to_user_mode() helpers to handle this. We can't use those directly, so we add arm64 copies for now (without the instrumentation markers which aren't used on arm64). These replace the existing user_exit() and user_exit_irqoff() calls spread throughout handlers, and the exception unmasking is left as-is. Note that: * The accounting for debug exceptions from userspace now happens in el0_dbg() and ret_to_user(), so this is removed from debug_exception_enter() and debug_exception_exit(). As user_exit_irqoff() wakes RCU, the userspace-specific check is removed. * The accounting for syscalls now happens in el0_svc(), el0_svc_compat(), and ret_to_user(), so this is removed from el0_svc_common(). This does not adversely affect the workaround for erratum 1463225, as this does not depend on any of the state tracking. * In ret_to_user() we mask interrupts with local_daif_mask(), and so we need to inform lockdep and tracing. Here a trace_hardirqs_off() is sufficient and safe as we have not yet exited kernel context and RCU is usable. * As PROVE_LOCKING selects TRACE_IRQFLAGS, the ifdeferry in entry.S only needs to check for the latter. * EL0 SError handling will be dealt with in a subsequent patch, as this needs to be treated as an NMI. Prior to this patch, booting an appropriately-configured kernel would result in spats as below: | DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled()) | WARNING: CPU: 2 PID: 1 at kernel/locking/lockdep.c:5280 check_flags.part.54+0x1dc/0x1f0 | Modules linked in: | CPU: 2 PID: 1 Comm: init Not tainted 5.10.0-rc3 #3 | Hardware name: linux,dummy-virt (DT) | pstate: 804003c5 (Nzcv DAIF +PAN -UAO -TCO BTYPE=--) | pc : check_flags.part.54+0x1dc/0x1f0 | lr : check_flags.part.54+0x1dc/0x1f0 | sp : ffff80001003bd80 | x29: ffff80001003bd80 x28: ffff66ce801e0000 | x27: 00000000ffffffff x26: 00000000000003c0 | x25: 0000000000000000 x24: ffffc31842527258 | x23: ffffc31842491368 x22: ffffc3184282d000 | x21: 0000000000000000 x20: 0000000000000001 | x19: ffffc318432ce000 x18: 0080000000000000 | x17: 0000000000000000 x16: ffffc31840f18a78 | x15: 0000000000000001 x14: ffffc3184285c810 | x13: 0000000000000001 x12: 0000000000000000 | x11: ffffc318415857a0 x10: ffffc318406614c0 | x9 : ffffc318415857a0 x8 : ffffc31841f1d000 | x7 : 647261685f706564 x6 : ffffc3183ff7c66c | x5 : ffff66ce801e0000 x4 : 0000000000000000 | x3 : ffffc3183fe00000 x2 : ffffc31841500000 | x1 : e956dc24146b3500 x0 : 0000000000000000 | Call trace: | check_flags.part.54+0x1dc/0x1f0 | lock_is_held_type+0x10c/0x188 | rcu_read_lock_sched_held+0x70/0x98 | __context_tracking_enter+0x310/0x350 | context_tracking_enter.part.3+0x5c/0xc8 | context_tracking_user_enter+0x6c/0x80 | finish_ret_to_user+0x2c/0x13cr Signed-off-by: Mark Rutland <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: James Morse <[email protected]> Cc: Will Deacon <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Will Deacon <[email protected]>
1 parent 105fc33 commit 2352904

File tree

5 files changed

+51
-48
lines changed

5 files changed

+51
-48
lines changed

arch/arm64/include/asm/exception.h

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ static inline u32 disr_to_esr(u64 disr)
3434
asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
3535
asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
3636
asmlinkage void enter_from_user_mode(void);
37+
asmlinkage void exit_to_user_mode(void);
3738
void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
3839
void do_undefinstr(struct pt_regs *regs);
3940
void do_bti(struct pt_regs *regs);

arch/arm64/kernel/entry-common.c

+26-14
Original file line numberDiff line numberDiff line change
@@ -119,15 +119,25 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
119119

120120
asmlinkage void noinstr enter_from_user_mode(void)
121121
{
122+
lockdep_hardirqs_off(CALLER_ADDR0);
122123
CT_WARN_ON(ct_state() != CONTEXT_USER);
123124
user_exit_irqoff();
125+
trace_hardirqs_off_finish();
126+
}
127+
128+
asmlinkage void noinstr exit_to_user_mode(void)
129+
{
130+
trace_hardirqs_on_prepare();
131+
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
132+
user_enter_irqoff();
133+
lockdep_hardirqs_on(CALLER_ADDR0);
124134
}
125135

126136
static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
127137
{
128138
unsigned long far = read_sysreg(far_el1);
129139

130-
user_exit_irqoff();
140+
enter_from_user_mode();
131141
local_daif_restore(DAIF_PROCCTX);
132142
far = untagged_addr(far);
133143
do_mem_abort(far, esr, regs);
@@ -145,35 +155,35 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
145155
if (!is_ttbr0_addr(far))
146156
arm64_apply_bp_hardening();
147157

148-
user_exit_irqoff();
158+
enter_from_user_mode();
149159
local_daif_restore(DAIF_PROCCTX);
150160
do_mem_abort(far, esr, regs);
151161
}
152162

153163
static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
154164
{
155-
user_exit_irqoff();
165+
enter_from_user_mode();
156166
local_daif_restore(DAIF_PROCCTX);
157167
do_fpsimd_acc(esr, regs);
158168
}
159169

160170
static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
161171
{
162-
user_exit_irqoff();
172+
enter_from_user_mode();
163173
local_daif_restore(DAIF_PROCCTX);
164174
do_sve_acc(esr, regs);
165175
}
166176

167177
static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
168178
{
169-
user_exit_irqoff();
179+
enter_from_user_mode();
170180
local_daif_restore(DAIF_PROCCTX);
171181
do_fpsimd_exc(esr, regs);
172182
}
173183

174184
static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
175185
{
176-
user_exit_irqoff();
186+
enter_from_user_mode();
177187
local_daif_restore(DAIF_PROCCTX);
178188
do_sysinstr(esr, regs);
179189
}
@@ -185,35 +195,35 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
185195
if (!is_ttbr0_addr(instruction_pointer(regs)))
186196
arm64_apply_bp_hardening();
187197

188-
user_exit_irqoff();
198+
enter_from_user_mode();
189199
local_daif_restore(DAIF_PROCCTX);
190200
do_sp_pc_abort(far, esr, regs);
191201
}
192202

193203
static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
194204
{
195-
user_exit_irqoff();
205+
enter_from_user_mode();
196206
local_daif_restore(DAIF_PROCCTX);
197207
do_sp_pc_abort(regs->sp, esr, regs);
198208
}
199209

200210
static void noinstr el0_undef(struct pt_regs *regs)
201211
{
202-
user_exit_irqoff();
212+
enter_from_user_mode();
203213
local_daif_restore(DAIF_PROCCTX);
204214
do_undefinstr(regs);
205215
}
206216

207217
static void noinstr el0_bti(struct pt_regs *regs)
208218
{
209-
user_exit_irqoff();
219+
enter_from_user_mode();
210220
local_daif_restore(DAIF_PROCCTX);
211221
do_bti(regs);
212222
}
213223

214224
static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
215225
{
216-
user_exit_irqoff();
226+
enter_from_user_mode();
217227
local_daif_restore(DAIF_PROCCTX);
218228
bad_el0_sync(regs, 0, esr);
219229
}
@@ -226,7 +236,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
226236
if (system_uses_irq_prio_masking())
227237
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
228238

229-
user_exit_irqoff();
239+
enter_from_user_mode();
230240
do_debug_exception(far, esr, regs);
231241
local_daif_restore(DAIF_PROCCTX_NOIRQ);
232242
}
@@ -236,12 +246,13 @@ static void noinstr el0_svc(struct pt_regs *regs)
236246
if (system_uses_irq_prio_masking())
237247
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
238248

249+
enter_from_user_mode();
239250
do_el0_svc(regs);
240251
}
241252

242253
static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
243254
{
244-
user_exit_irqoff();
255+
enter_from_user_mode();
245256
local_daif_restore(DAIF_PROCCTX);
246257
do_ptrauth_fault(regs, esr);
247258
}
@@ -302,7 +313,7 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
302313
#ifdef CONFIG_COMPAT
303314
static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
304315
{
305-
user_exit_irqoff();
316+
enter_from_user_mode();
306317
local_daif_restore(DAIF_PROCCTX);
307318
do_cp15instr(esr, regs);
308319
}
@@ -312,6 +323,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
312323
if (system_uses_irq_prio_masking())
313324
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
314325

326+
enter_from_user_mode();
315327
do_el0_svc_compat(regs);
316328
}
317329

arch/arm64/kernel/entry.S

+13-22
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,18 @@
3030
#include <asm/unistd.h>
3131

3232
/*
33-
* Context tracking subsystem. Used to instrument transitions
34-
* between user and kernel mode.
33+
* Context tracking and irqflag tracing need to instrument transitions between
34+
* user and kernel mode.
3535
*/
36-
.macro ct_user_exit_irqoff
37-
#ifdef CONFIG_CONTEXT_TRACKING
36+
.macro user_exit_irqoff
37+
#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
3838
bl enter_from_user_mode
3939
#endif
4040
.endm
4141

42-
.macro ct_user_enter
43-
#ifdef CONFIG_CONTEXT_TRACKING
44-
bl context_tracking_user_enter
42+
.macro user_enter_irqoff
43+
#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
44+
bl exit_to_user_mode
4545
#endif
4646
.endm
4747

@@ -298,9 +298,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
298298
alternative_else_nop_endif
299299

300300
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
301-
.if \el == 0
302-
ct_user_enter
303-
.endif
304301

305302
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
306303
alternative_if_not ARM64_HAS_PAN
@@ -700,21 +697,14 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq)
700697
kernel_entry 0
701698
el0_irq_naked:
702699
gic_prio_irq_setup pmr=x20, tmp=x0
703-
ct_user_exit_irqoff
700+
user_exit_irqoff
704701
enable_da_f
705702

706-
#ifdef CONFIG_TRACE_IRQFLAGS
707-
bl trace_hardirqs_off
708-
#endif
709-
710703
tbz x22, #55, 1f
711704
bl do_el0_irq_bp_hardening
712705
1:
713706
irq_handler
714707

715-
#ifdef CONFIG_TRACE_IRQFLAGS
716-
bl trace_hardirqs_on
717-
#endif
718708
b ret_to_user
719709
SYM_CODE_END(el0_irq)
720710

@@ -733,7 +723,7 @@ SYM_CODE_START_LOCAL(el0_error)
733723
el0_error_naked:
734724
mrs x25, esr_el1
735725
gic_prio_kentry_setup tmp=x2
736-
ct_user_exit_irqoff
726+
user_exit_irqoff
737727
enable_dbg
738728
mov x0, sp
739729
mov x1, x25
@@ -748,10 +738,14 @@ SYM_CODE_END(el0_error)
748738
SYM_CODE_START_LOCAL(ret_to_user)
749739
disable_daif
750740
gic_prio_kentry_setup tmp=x3
741+
#ifdef CONFIG_TRACE_IRQFLAGS
742+
bl trace_hardirqs_off
743+
#endif
751744
ldr x19, [tsk, #TSK_TI_FLAGS]
752745
and x2, x19, #_TIF_WORK_MASK
753746
cbnz x2, work_pending
754747
finish_ret_to_user:
748+
user_enter_irqoff
755749
/* Ignore asynchronous tag check faults in the uaccess routines */
756750
clear_mte_async_tcf
757751
enable_step_tsk x19, x2
@@ -767,9 +761,6 @@ work_pending:
767761
mov x0, sp // 'regs'
768762
mov x1, x19
769763
bl do_notify_resume
770-
#ifdef CONFIG_TRACE_IRQFLAGS
771-
bl trace_hardirqs_on // enabled while in userspace
772-
#endif
773764
ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step
774765
b finish_ret_to_user
775766
SYM_CODE_END(ret_to_user)

arch/arm64/kernel/syscall.c

-1
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
120120
*/
121121

122122
cortex_a76_erratum_1463225_svc_handler();
123-
user_exit_irqoff();
124123
local_daif_restore(DAIF_PROCCTX);
125124

126125
if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) {

arch/arm64/mm/fault.c

+11-11
Original file line numberDiff line numberDiff line change
@@ -789,16 +789,14 @@ void __init hook_debug_fault_code(int nr,
789789
*/
790790
static void debug_exception_enter(struct pt_regs *regs)
791791
{
792-
/*
793-
* Tell lockdep we disabled irqs in entry.S. Do nothing if they were
794-
* already disabled to preserve the last enabled/disabled addresses.
795-
*/
796-
if (interrupts_enabled(regs))
797-
trace_hardirqs_off();
792+
if (!user_mode(regs)) {
793+
/*
794+
* Tell lockdep we disabled irqs in entry.S. Do nothing if they were
795+
* already disabled to preserve the last enabled/disabled addresses.
796+
*/
797+
if (interrupts_enabled(regs))
798+
trace_hardirqs_off();
798799

799-
if (user_mode(regs)) {
800-
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
801-
} else {
802800
/*
803801
* We might have interrupted pretty much anything. In
804802
* fact, if we're a debug exception, we can even interrupt
@@ -819,8 +817,10 @@ static void debug_exception_exit(struct pt_regs *regs)
819817
{
820818
preempt_enable_no_resched();
821819

822-
if (!user_mode(regs))
823-
rcu_nmi_exit();
820+
if (user_mode(regs))
821+
return;
822+
823+
rcu_nmi_exit();
824824

825825
if (interrupts_enabled(regs))
826826
trace_hardirqs_on();

0 commit comments

Comments
 (0)