Skip to content

Commit 6fd5de0

Browse files
fweisbecgregkh
authored andcommitted
irq_work: Force raised irq work to run on irq work interrupt
commit 76a3306 upstream. The nohz full kick, which restarts the tick when any resource depend on it, can't be executed anywhere given the operation it does on timers. If it is called from the scheduler or timers code, chances are that we run into a deadlock. This is why we run the nohz full kick from an irq work. That way we make sure that the kick runs on a virgin context. However if that's the case when irq work runs in its own dedicated self-ipi, things are different for the big bunch of archs that don't support the self triggered way. In order to support them, irq works are also handled by the timer interrupt as fallback. Now when irq works run on the timer interrupt, the context isn't blank. More precisely, they can run in the context of the hrtimer that runs the tick. But the nohz kick cancels and restarts this hrtimer and cancelling an hrtimer from itself isn't allowed. This is why we run in an endless loop: Kernel panic - not syncing: Watchdog detected hard LOCKUP on cpu 2 CPU: 2 PID: 7538 Comm: kworker/u8:8 Not tainted 3.16.0+ #34 Workqueue: btrfs-endio-write normal_work_helper [btrfs] ffff880244c06c88 000000001b486fe1 ffff880244c06bf0 ffffffff8a7f1e37 ffffffff8ac52a18 ffff880244c06c78 ffffffff8a7ef928 0000000000000010 ffff880244c06c88 ffff880244c06c20 000000001b486fe1 0000000000000000 Call Trace: <NMI[<ffffffff8a7f1e37>] dump_stack+0x4e/0x7a [<ffffffff8a7ef928>] panic+0xd4/0x207 [<ffffffff8a1450e8>] watchdog_overflow_callback+0x118/0x120 [<ffffffff8a186b0e>] __perf_event_overflow+0xae/0x350 [<ffffffff8a184f80>] ? perf_event_task_disable+0xa0/0xa0 [<ffffffff8a01a4cf>] ? x86_perf_event_set_period+0xbf/0x150 [<ffffffff8a187934>] perf_event_overflow+0x14/0x20 [<ffffffff8a020386>] intel_pmu_handle_irq+0x206/0x410 [<ffffffff8a01937b>] perf_event_nmi_handler+0x2b/0x50 [<ffffffff8a007b72>] nmi_handle+0xd2/0x390 [<ffffffff8a007aa5>] ? nmi_handle+0x5/0x390 [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0 [<ffffffff8a008062>] default_do_nmi+0x72/0x1c0 [<ffffffff8a008268>] do_nmi+0xb8/0x100 [<ffffffff8a7ff66a>] end_repeat_nmi+0x1e/0x2e [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0 [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0 [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0 <<EOE><IRQ[<ffffffff8a0ccd2f>] lock_acquired+0xaf/0x450 [<ffffffff8a0f74c5>] ? lock_hrtimer_base.isra.20+0x25/0x50 [<ffffffff8a7fc678>] _raw_spin_lock_irqsave+0x78/0x90 [<ffffffff8a0f74c5>] ? lock_hrtimer_base.isra.20+0x25/0x50 [<ffffffff8a0f74c5>] lock_hrtimer_base.isra.20+0x25/0x50 [<ffffffff8a0f7723>] hrtimer_try_to_cancel+0x33/0x1e0 [<ffffffff8a0f78ea>] hrtimer_cancel+0x1a/0x30 [<ffffffff8a109237>] tick_nohz_restart+0x17/0x90 [<ffffffff8a10a213>] __tick_nohz_full_check+0xc3/0x100 [<ffffffff8a10a25e>] nohz_full_kick_work_func+0xe/0x10 [<ffffffff8a17c884>] irq_work_run_list+0x44/0x70 [<ffffffff8a17c8da>] irq_work_run+0x2a/0x50 [<ffffffff8a0f700b>] update_process_times+0x5b/0x70 [<ffffffff8a109005>] tick_sched_handle.isra.21+0x25/0x60 [<ffffffff8a109b81>] tick_sched_timer+0x41/0x60 [<ffffffff8a0f7aa2>] __run_hrtimer+0x72/0x470 [<ffffffff8a109b40>] ? tick_sched_do_timer+0xb0/0xb0 [<ffffffff8a0f8707>] hrtimer_interrupt+0x117/0x270 [<ffffffff8a034357>] local_apic_timer_interrupt+0x37/0x60 [<ffffffff8a80010f>] smp_apic_timer_interrupt+0x3f/0x50 [<ffffffff8a7fe52f>] apic_timer_interrupt+0x6f/0x80 To fix this we force non-lazy irq works to run on irq work self-IPIs when available. That ability of the arch to trigger irq work self IPIs is available with arch_irq_work_has_interrupt(). Reported-by: Catalin Iacob <[email protected]> Reported-by: Dave Jones <[email protected]> Acked-by: Peter Zijlstra (Intel) <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Paul E. McKenney <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Signed-off-by: Frederic Weisbecker <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent b677a76 commit 6fd5de0

File tree

3 files changed

+15
-3
lines changed

3 files changed

+15
-3
lines changed

include/linux/irq_work.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu);
3939
#endif
4040

4141
void irq_work_run(void);
42+
void irq_work_tick(void);
4243
void irq_work_sync(struct irq_work *work);
4344

4445
#ifdef CONFIG_IRQ_WORK

kernel/irq_work.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,10 @@ bool irq_work_needs_cpu(void)
115115

116116
raised = &__get_cpu_var(raised_list);
117117
lazy = &__get_cpu_var(lazy_list);
118-
if (llist_empty(raised) && llist_empty(lazy))
119-
return false;
118+
119+
if (llist_empty(raised) || arch_irq_work_has_interrupt())
120+
if (llist_empty(lazy))
121+
return false;
120122

121123
/* All work should have been flushed before going offline */
122124
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
@@ -171,6 +173,15 @@ void irq_work_run(void)
171173
}
172174
EXPORT_SYMBOL_GPL(irq_work_run);
173175

176+
void irq_work_tick(void)
177+
{
178+
struct llist_head *raised = &__get_cpu_var(raised_list);
179+
180+
if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
181+
irq_work_run_list(raised);
182+
irq_work_run_list(&__get_cpu_var(lazy_list));
183+
}
184+
174185
/*
175186
* Synchronize against the irq_work @entry, ensures the entry is not
176187
* currently in use.

kernel/time/timer.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1385,7 +1385,7 @@ void update_process_times(int user_tick)
13851385
rcu_check_callbacks(cpu, user_tick);
13861386
#ifdef CONFIG_IRQ_WORK
13871387
if (in_irq())
1388-
irq_work_run();
1388+
irq_work_tick();
13891389
#endif
13901390
scheduler_tick();
13911391
run_posix_cpu_timers(p);

0 commit comments

Comments
 (0)