Skip to content

Commit 8116b54

Browse files
manfred-colorfutorvalds
authored andcommitted
ipc/sem.c: document and update memory barriers
Document and update the memory barriers in ipc/sem.c: - Add smp_store_release() to wake_up_sem_queue_prepare() and document why it is needed. - Read q->status using READ_ONCE+smp_acquire__after_ctrl_dep(). as the pair for the barrier inside wake_up_sem_queue_prepare(). - Add comments to all barriers, and mention the rules in the block regarding locking. - Switch to using wake_q_add_safe(). Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Manfred Spraul <[email protected]> Cc: Waiman Long <[email protected]> Cc: Davidlohr Bueso <[email protected]> Cc: <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Will Deacon <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 0d97a82 commit 8116b54

File tree

1 file changed

+41
-25
lines changed

1 file changed

+41
-25
lines changed

ipc/sem.c

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -205,15 +205,38 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
205205
*
206206
* Memory ordering:
207207
* Most ordering is enforced by using spin_lock() and spin_unlock().
208-
* The special case is use_global_lock:
208+
*
209+
* Exceptions:
210+
* 1) use_global_lock: (SEM_BARRIER_1)
209211
* Setting it from non-zero to 0 is a RELEASE, this is ensured by
210-
* using smp_store_release().
212+
* using smp_store_release(): Immediately after setting it to 0,
213+
* a simple op can start.
211214
* Testing if it is non-zero is an ACQUIRE, this is ensured by using
212215
* smp_load_acquire().
213216
* Setting it from 0 to non-zero must be ordered with regards to
214217
* this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
215218
* is inside a spin_lock() and after a write from 0 to non-zero a
216219
* spin_lock()+spin_unlock() is done.
220+
*
221+
* 2) queue.status: (SEM_BARRIER_2)
222+
* Initialization is done while holding sem_lock(), so no further barrier is
223+
* required.
224+
* Setting it to a result code is a RELEASE, this is ensured by both a
225+
* smp_store_release() (for case a) and while holding sem_lock()
226+
* (for case b).
227+
* The AQUIRE when reading the result code without holding sem_lock() is
228+
* achieved by using READ_ONCE() + smp_acquire__after_ctrl_dep().
229+
* (case a above).
230+
* Reading the result code while holding sem_lock() needs no further barriers,
231+
* the locks inside sem_lock() enforce ordering (case b above)
232+
*
233+
* 3) current->state:
234+
* current->state is set to TASK_INTERRUPTIBLE while holding sem_lock().
235+
* The wakeup is handled using the wake_q infrastructure. wake_q wakeups may
236+
* happen immediately after calling wake_q_add. As wake_q_add_safe() is called
237+
* when holding sem_lock(), no further barriers are required.
238+
*
239+
* See also ipc/mqueue.c for more details on the covered races.
217240
*/
218241

219242
#define sc_semmsl sem_ctls[0]
@@ -344,12 +367,8 @@ static void complexmode_tryleave(struct sem_array *sma)
344367
return;
345368
}
346369
if (sma->use_global_lock == 1) {
347-
/*
348-
* Immediately after setting use_global_lock to 0,
349-
* a simple op can start. Thus: all memory writes
350-
* performed by the current operation must be visible
351-
* before we set use_global_lock to 0.
352-
*/
370+
371+
/* See SEM_BARRIER_1 for purpose/pairing */
353372
smp_store_release(&sma->use_global_lock, 0);
354373
} else {
355374
sma->use_global_lock--;
@@ -400,7 +419,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
400419
*/
401420
spin_lock(&sem->lock);
402421

403-
/* pairs with smp_store_release() */
422+
/* see SEM_BARRIER_1 for purpose/pairing */
404423
if (!smp_load_acquire(&sma->use_global_lock)) {
405424
/* fast path successful! */
406425
return sops->sem_num;
@@ -766,15 +785,12 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
766785
static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
767786
struct wake_q_head *wake_q)
768787
{
769-
wake_q_add(wake_q, q->sleeper);
770-
/*
771-
* Rely on the above implicit barrier, such that we can
772-
* ensure that we hold reference to the task before setting
773-
* q->status. Otherwise we could race with do_exit if the
774-
* task is awoken by an external event before calling
775-
* wake_up_process().
776-
*/
777-
WRITE_ONCE(q->status, error);
788+
get_task_struct(q->sleeper);
789+
790+
/* see SEM_BARRIER_2 for purpuse/pairing */
791+
smp_store_release(&q->status, error);
792+
793+
wake_q_add_safe(wake_q, q->sleeper);
778794
}
779795

780796
static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
@@ -2148,9 +2164,11 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
21482164
}
21492165

21502166
do {
2167+
/* memory ordering ensured by the lock in sem_lock() */
21512168
WRITE_ONCE(queue.status, -EINTR);
21522169
queue.sleeper = current;
21532170

2171+
/* memory ordering is ensured by the lock in sem_lock() */
21542172
__set_current_state(TASK_INTERRUPTIBLE);
21552173
sem_unlock(sma, locknum);
21562174
rcu_read_unlock();
@@ -2173,13 +2191,8 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
21732191
*/
21742192
error = READ_ONCE(queue.status);
21752193
if (error != -EINTR) {
2176-
/*
2177-
* User space could assume that semop() is a memory
2178-
* barrier: Without the mb(), the cpu could
2179-
* speculatively read in userspace stale data that was
2180-
* overwritten by the previous owner of the semaphore.
2181-
*/
2182-
smp_mb();
2194+
/* see SEM_BARRIER_2 for purpose/pairing */
2195+
smp_acquire__after_ctrl_dep();
21832196
goto out_free;
21842197
}
21852198

@@ -2189,6 +2202,9 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
21892202
if (!ipc_valid_object(&sma->sem_perm))
21902203
goto out_unlock_free;
21912204

2205+
/*
2206+
* No necessity for any barrier: We are protect by sem_lock()
2207+
*/
21922208
error = READ_ONCE(queue.status);
21932209

21942210
/*

0 commit comments

Comments
 (0)