Skip to content

Commit b89e427

Browse files
andyrossjgl-meta
authored andcommitted
kernel/sched: Rename/redocument wait_for_switch() -> z_sched_switch_spin()
This trick turns out also to be needed by the abort/join code. Promote it to a more formal-looking internal API and clean up the documentation to (hopefully) clarify the exact behavior and better explain the need. This is one of the more... enchanted bits of the scheduler, and while the trick is IMHO pretty clean, it remains a big SMP footgun. Signed-off-by: Andy Ross <[email protected]>
1 parent d8d119f commit b89e427

File tree

4 files changed

+32
-14
lines changed

4 files changed

+32
-14
lines changed

arch/arc/include/swap_macros.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,11 +416,11 @@ fpu_skip_load :
416416
.macro _store_old_thread_callee_regs
417417

418418
_save_callee_saved_regs
419-
/* Save old thread into switch handle which is required by wait_for_switch.
419+
/* Save old thread into switch handle which is required by z_sched_switch_spin.
420420
* NOTE: we shouldn't save anything related to old thread context after this point!
421421
* TODO: we should add SMP write-after-write data memory barrier here, as we want all
422422
* previous writes completed before setting switch_handle which is polled by other cores
423-
* in wait_for_switch in case of SMP. Though it's not likely that this issue
423+
* in z_sched_switch_spin in case of SMP. Though it's not likely that this issue
424424
* will reproduce in real world as there is some gap before reading switch_handle and
425425
* reading rest of the data we've stored before.
426426
*/

arch/arm64/core/switch.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
7575
#endif
7676

7777
/* save old thread into switch handle which is required by
78-
* wait_for_switch
78+
* z_sched_switch_spin()
7979
*/
8080
str x1, [x1, #___thread_t_switch_handle_OFFSET]
8181

kernel/include/kswap.h

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,30 @@ void z_smp_release_global_lock(struct k_thread *thread);
3030
/* context switching and scheduling-related routines */
3131
#ifdef CONFIG_USE_SWITCH
3232

33-
/* There is an unavoidable SMP race when threads swap -- their thread
34-
* record is in the queue (and visible to other CPUs) before
35-
* arch_switch() finishes saving state. We must spin for the switch
36-
* handle before entering a new thread. See docs on arch_switch().
33+
/* Spin, with the scheduler lock held (!), on a thread that is known
34+
* (!!) to have released the lock and be on a path where it will
35+
* deterministically (!!!) reach arch_switch() in very small constant
36+
* time.
37+
*
38+
* This exists to treat an unavoidable SMP race when threads swap --
39+
* their thread record is in the queue (and visible to other CPUs)
40+
* before arch_switch() finishes saving state. We must spin for the
41+
* switch handle before entering a new thread. See docs on
42+
* arch_switch().
43+
*
44+
* Stated differently: there's a chicken and egg bug with the question
45+
* of "is a thread running or not?". The thread needs to mark itself
46+
* "not running" from its own context, but at that moment it obviously
47+
* is still running until it reaches arch_switch()! Locking can't
48+
* treat this because the scheduler lock can't be released by the
49+
* switched-to thread, which is going to (obviously) be running its
50+
* own code and doesn't know it was switched out.
3751
*
3852
* Note: future SMP architectures may need a fence/barrier or cache
3953
* invalidation here. Current ones don't, and sadly Zephyr doesn't
4054
* have a framework for that yet.
4155
*/
42-
static inline void wait_for_switch(struct k_thread *thread)
56+
static inline void z_sched_switch_spin(struct k_thread *thread)
4357
{
4458
#ifdef CONFIG_SMP
4559
volatile void **shp = (void *)&thread->switch_handle;
@@ -117,7 +131,7 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
117131
}
118132
#endif
119133
z_thread_mark_switched_out();
120-
wait_for_switch(new_thread);
134+
z_sched_switch_spin(new_thread);
121135
_current_cpu->current = new_thread;
122136

123137
#ifdef CONFIG_TIMESLICING
@@ -131,10 +145,9 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
131145
arch_cohere_stacks(old_thread, NULL, new_thread);
132146

133147
#ifdef CONFIG_SMP
134-
/* Add _current back to the run queue HERE. After
135-
* wait_for_switch() we are guaranteed to reach the
136-
* context switch in finite time, avoiding a potential
137-
* deadlock.
148+
/* Now add _current back to the run queue, once we are
149+
* guaranteed to reach the context switch in finite
150+
* time. See z_sched_switch_spin().
138151
*/
139152
z_requeue_current(old_thread);
140153
#endif
@@ -178,6 +191,11 @@ static inline void z_swap_unlocked(void)
178191

179192
extern int arch_swap(unsigned int key);
180193

194+
static inline void z_sched_switch_spin(struct k_thread *thread)
195+
{
196+
ARG_UNUSED(thread);
197+
}
198+
181199
static inline int z_swap_irqlock(unsigned int key)
182200
{
183201
int ret;

kernel/sched.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1095,7 +1095,7 @@ void *z_get_next_switch_handle(void *interrupted)
10951095

10961096
if (old_thread != new_thread) {
10971097
update_metairq_preempt(new_thread);
1098-
wait_for_switch(new_thread);
1098+
z_sched_switch_spin(new_thread);
10991099
arch_cohere_stacks(old_thread, interrupted, new_thread);
11001100

11011101
_current_cpu->swap_ok = 0;

0 commit comments

Comments
 (0)