Skip to content

Commit da6d9f8

Browse files
committed
[libcpu/arm64] add C11 atomic ticket spinlock
Replace the former implementation of flag-based spinlock which is unfair Besides, C11 atomic implementation is more readable (it's C anyway), and maintainable. Cause toolchain can use their builtin optimization and tune for different micro-architectures. For example armv8.5 introduces a better instruction. The compiler can help with that when it knows your target platform in support of it. Signed-off-by: Shell <[email protected]>
1 parent 12ad170 commit da6d9f8

File tree

10 files changed

+260
-105
lines changed

10 files changed

+260
-105
lines changed

Diff for: examples/utest/testcases/kernel/sched_sem_tc.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#error the thread priority should at least be greater than idle
3030
#endif
3131

32-
static rt_atomic_t _star_counter = 1;
32+
static rt_atomic_t _star_counter;
3333
static struct rt_semaphore _thr_exit_sem;
3434
static struct rt_semaphore _level_waiting[TEST_LEVEL_COUNTS];
3535
static rt_thread_t _thread_matrix[TEST_LEVEL_COUNTS][KERN_TEST_CONCURRENT_THREADS];
@@ -157,6 +157,8 @@ static void scheduler_tc(void)
157157
static rt_err_t utest_tc_init(void)
158158
{
159159
LOG_I("Setup environment...");
160+
_star_counter = 1;
161+
rt_memset(_load_average, 0, sizeof(_load_average));
160162
rt_sem_init(&_thr_exit_sem, "test", 0, RT_IPC_FLAG_PRIO);
161163

162164
for (size_t i = 0; i < TEST_LEVEL_COUNTS; i++)

Diff for: libcpu/Kconfig

+3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ if ARCH_ARMV8 && ARCH_CPU_64BIT
1212
config ARCH_HAVE_EFFICIENT_UNALIGNED_ACCESS
1313
bool
1414
default y
15+
config ARCH_USING_GENERIC_CPUID
16+
bool "Using generic cpuid implemenation"
17+
default n
1518
endmenu
1619
endif
1720

Diff for: libcpu/aarch64/common/context_gcc.S

+4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@ int rt_hw_cpu_id(void)
4444
.weak rt_hw_cpu_id
4545
.type rt_hw_cpu_id, @function
4646
rt_hw_cpu_id:
47+
#if RT_CPUS_NR > 1
4748
mrs x0, tpidr_el1
49+
#else
50+
mov x0, xzr
51+
#endif
4852
ret
4953

5054
/*

Diff for: libcpu/aarch64/common/cpu.c

+75-38
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* 2011-09-15 Bernard first version
99
* 2019-07-28 zdzn add smp support
1010
* 2023-02-21 GuEe-GUI mov cpu ofw init to setup
11+
* 2024-04-29 Shell Add generic ticket spinlock using C11 atomic
1112
*/
1213

1314
#include <rthw.h>
@@ -55,65 +56,101 @@ rt_weak rt_uint64_t rt_cpu_mpidr_early[] =
5556
};
5657
#endif /* RT_USING_SMART */
5758

58-
static inline void arch_spin_lock(arch_spinlock_t *lock)
59+
/* in support of C11 atomic */
60+
#if __STDC_VERSION__ >= 201112L
61+
#include <stdatomic.h>
62+
63+
union _spinlock
64+
{
65+
_Atomic(rt_uint32_t) _value;
66+
struct
67+
{
68+
_Atomic(rt_uint16_t) owner;
69+
_Atomic(rt_uint16_t) next;
70+
} ticket;
71+
};
72+
73+
void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock)
5974
{
60-
unsigned int tmp;
61-
62-
asm volatile(
63-
" sevl\n"
64-
"1: wfe\n"
65-
"2: ldaxr %w0, %1\n"
66-
" cbnz %w0, 1b\n"
67-
" stxr %w0, %w2, %1\n"
68-
" cbnz %w0, 2b\n"
69-
: "=&r" (tmp), "+Q" (lock->lock)
70-
: "r" (1)
71-
: "cc", "memory");
75+
union _spinlock *lock = (void *)_lock;
76+
77+
/**
78+
* just a dummy note that this is an atomic operation, though it alway is
79+
* even without usage of atomic API in arm64
80+
*/
81+
atomic_store_explicit(&lock->_value, 0, memory_order_relaxed);
7282
}
7383

74-
static inline int arch_spin_trylock(arch_spinlock_t *lock)
84+
rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *_lock)
7585
{
76-
unsigned int tmp;
77-
78-
asm volatile(
79-
" ldaxr %w0, %1\n"
80-
" cbnz %w0, 1f\n"
81-
" stxr %w0, %w2, %1\n"
82-
"1:\n"
83-
: "=&r" (tmp), "+Q" (lock->lock)
84-
: "r" (1)
85-
: "cc", "memory");
86-
87-
return !tmp;
86+
rt_bool_t rc;
87+
rt_uint32_t readonce;
88+
union _spinlock temp;
89+
union _spinlock *lock = (void *)_lock;
90+
91+
readonce = atomic_load_explicit(&lock->_value, memory_order_acquire);
92+
temp._value = readonce;
93+
94+
if (temp.ticket.owner != temp.ticket.next)
95+
{
96+
rc = RT_FALSE;
97+
}
98+
else
99+
{
100+
temp.ticket.next += 1;
101+
rc = atomic_compare_exchange_strong_explicit(
102+
&lock->_value, &readonce, temp._value,
103+
memory_order_acquire, memory_order_relaxed);
104+
}
105+
return rc;
88106
}
89107

90-
static inline void arch_spin_unlock(arch_spinlock_t *lock)
108+
rt_inline rt_base_t _load_acq_exclusive(_Atomic(rt_uint16_t) *halfword)
91109
{
92-
asm volatile(
93-
" stlr %w1, %0\n"
94-
: "=Q" (lock->lock) : "r" (0) : "memory");
110+
rt_uint32_t old;
111+
__asm__ volatile("ldaxrh %w0, [%1]"
112+
: "=&r"(old)
113+
: "r"(halfword)
114+
: "memory");
115+
return old;
95116
}
96117

97-
void rt_hw_spin_lock_init(arch_spinlock_t *lock)
118+
rt_inline void _send_event_local(void)
98119
{
99-
lock->lock = 0;
120+
__asm__ volatile("sevl");
100121
}
101122

102-
void rt_hw_spin_lock(rt_hw_spinlock_t *lock)
123+
rt_inline void _wait_for_event(void)
103124
{
104-
arch_spin_lock(lock);
125+
__asm__ volatile("wfe" ::: "memory");
105126
}
106127

107-
void rt_hw_spin_unlock(rt_hw_spinlock_t *lock)
128+
void rt_hw_spin_lock(rt_hw_spinlock_t *_lock)
108129
{
109-
arch_spin_unlock(lock);
130+
union _spinlock *lock = (void *)_lock;
131+
rt_uint16_t ticket =
132+
atomic_fetch_add_explicit(&lock->ticket.next, 1, memory_order_relaxed);
133+
134+
if (atomic_load_explicit(&lock->ticket.owner, memory_order_acquire) !=
135+
ticket)
136+
{
137+
_send_event_local();
138+
do
139+
{
140+
_wait_for_event();
141+
}
142+
while (_load_acq_exclusive(&lock->ticket.owner) != ticket);
143+
}
110144
}
111145

112-
rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *lock)
146+
void rt_hw_spin_unlock(rt_hw_spinlock_t *_lock)
113147
{
114-
return arch_spin_trylock(lock);
148+
union _spinlock *lock = (void *)_lock;
149+
atomic_fetch_add_explicit(&lock->ticket.owner, 1, memory_order_release);
115150
}
116151

152+
#endif
153+
117154
static int _cpus_init_data_hardcoded(int num_cpus, rt_uint64_t *cpu_hw_ids, struct cpu_ops_t *cpu_ops[])
118155
{
119156
// load in cpu_hw_ids in cpuid_to_hwid,

0 commit comments

Comments
 (0)