Skip to content

Commit 765a3f4

Browse files
committed
rcu: Provide grace-period piggybacking API
The following pattern is currently not well supported by RCU: 1. Make data element inaccessible to RCU readers. 2. Do work that probably lasts for more than one grace period. 3. Do something to make sure RCU readers in flight before #1 above have completed. Here are some things that could currently be done: a. Do a synchronize_rcu() unconditionally at either #1 or #3 above. This works, but imposes needless work and latency. b. Post an RCU callback at #1 above that does a wakeup, then wait for the wakeup at #3. This works well, but likely results in an extra unneeded grace period. Open-coding this is also a bit more semi-tricky code than would be good. This commit therefore adds get_state_synchronize_rcu() and cond_synchronize_rcu() APIs. Call get_state_synchronize_rcu() at #1 above and pass its return value to cond_synchronize_rcu() at #3 above. This results in a call to synchronize_rcu() if no grace period has elapsed between #1 and #3, but requires only a load, comparison, and memory barrier if a full grace period did elapse. Requested-by: Peter Zijlstra <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]> Acked-by: Peter Zijlstra <[email protected]>
1 parent f5604f6 commit 765a3f4

File tree

3 files changed

+70
-4
lines changed

3 files changed

+70
-4
lines changed

include/linux/rcutiny.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,16 @@
2727

2828
#include <linux/cache.h>
2929

30+
static inline unsigned long get_state_synchronize_rcu(void)
31+
{
32+
return 0;
33+
}
34+
35+
static inline void cond_synchronize_rcu(unsigned long oldstate)
36+
{
37+
might_sleep();
38+
}
39+
3040
static inline void rcu_barrier_bh(void)
3141
{
3242
wait_rcu_gp(call_rcu_bh);

include/linux/rcutree.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ static inline void synchronize_rcu_bh_expedited(void)
7676
void rcu_barrier(void);
7777
void rcu_barrier_bh(void);
7878
void rcu_barrier_sched(void);
79+
unsigned long get_state_synchronize_rcu(void);
80+
void cond_synchronize_rcu(unsigned long oldstate);
7981

8082
extern unsigned long rcutorture_testseq;
8183
extern unsigned long rcutorture_vernum;

kernel/rcu/tree.c

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,13 +1421,14 @@ static int rcu_gp_init(struct rcu_state *rsp)
14211421

14221422
/* Advance to a new grace period and initialize state. */
14231423
record_gp_stall_check_time(rsp);
1424-
smp_wmb(); /* Record GP times before starting GP. */
1425-
rsp->gpnum++;
1424+
/* Record GP times before starting GP, hence smp_store_release(). */
1425+
smp_store_release(&rsp->gpnum, rsp->gpnum + 1);
14261426
trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
14271427
raw_spin_unlock_irq(&rnp->lock);
14281428

14291429
/* Exclude any concurrent CPU-hotplug operations. */
14301430
mutex_lock(&rsp->onoff_mutex);
1431+
smp_mb__after_unlock_lock(); /* ->gpnum increment before GP! */
14311432

14321433
/*
14331434
* Set the quiescent-state-needed bits in all the rcu_node
@@ -1555,10 +1556,11 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
15551556
}
15561557
rnp = rcu_get_root(rsp);
15571558
raw_spin_lock_irq(&rnp->lock);
1558-
smp_mb__after_unlock_lock();
1559+
smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */
15591560
rcu_nocb_gp_set(rnp, nocb);
15601561

1561-
rsp->completed = rsp->gpnum; /* Declare grace period done. */
1562+
/* Declare grace period done. */
1563+
ACCESS_ONCE(rsp->completed) = rsp->gpnum;
15621564
trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
15631565
rsp->fqs_state = RCU_GP_IDLE;
15641566
rdp = this_cpu_ptr(rsp->rda);
@@ -2637,6 +2639,58 @@ void synchronize_rcu_bh(void)
26372639
}
26382640
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
26392641

2642+
/**
2643+
* get_state_synchronize_rcu - Snapshot current RCU state
2644+
*
2645+
* Returns a cookie that is used by a later call to cond_synchronize_rcu()
2646+
* to determine whether or not a full grace period has elapsed in the
2647+
* meantime.
2648+
*/
2649+
unsigned long get_state_synchronize_rcu(void)
2650+
{
2651+
/*
2652+
* Any prior manipulation of RCU-protected data must happen
2653+
* before the load from ->gpnum.
2654+
*/
2655+
smp_mb(); /* ^^^ */
2656+
2657+
/*
2658+
* Make sure this load happens before the purportedly
2659+
* time-consuming work between get_state_synchronize_rcu()
2660+
* and cond_synchronize_rcu().
2661+
*/
2662+
return smp_load_acquire(&rcu_state->gpnum);
2663+
}
2664+
EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
2665+
2666+
/**
2667+
* cond_synchronize_rcu - Conditionally wait for an RCU grace period
2668+
*
2669+
* @oldstate: return value from earlier call to get_state_synchronize_rcu()
2670+
*
2671+
* If a full RCU grace period has elapsed since the earlier call to
2672+
* get_state_synchronize_rcu(), just return. Otherwise, invoke
2673+
* synchronize_rcu() to wait for a full grace period.
2674+
*
2675+
* Yes, this function does not take counter wrap into account. But
2676+
* counter wrap is harmless. If the counter wraps, we have waited for
2677+
* more than 2 billion grace periods (and way more on a 64-bit system!),
2678+
* so waiting for one additional grace period should be just fine.
2679+
*/
2680+
void cond_synchronize_rcu(unsigned long oldstate)
2681+
{
2682+
unsigned long newstate;
2683+
2684+
/*
2685+
* Ensure that this load happens before any RCU-destructive
2686+
* actions the caller might carry out after we return.
2687+
*/
2688+
newstate = smp_load_acquire(&rcu_state->completed);
2689+
if (ULONG_CMP_GE(oldstate, newstate))
2690+
synchronize_rcu();
2691+
}
2692+
EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
2693+
26402694
static int synchronize_sched_expedited_cpu_stop(void *data)
26412695
{
26422696
/*

0 commit comments

Comments
 (0)