Branch data Line data Source code
1 : : // SPDX-License-Identifier: GPL-2.0+
2 : : /*
3 : : * Sleepable Read-Copy Update mechanism for mutual exclusion.
4 : : *
5 : : * Copyright (C) IBM Corporation, 2006
6 : : * Copyright (C) Fujitsu, 2012
7 : : *
8 : : * Author: Paul McKenney <paulmck@linux.ibm.com>
9 : : * Lai Jiangshan <laijs@cn.fujitsu.com>
10 : : *
11 : : * For detailed explanation of Read-Copy Update mechanism see -
12 : : * Documentation/RCU/ *.txt
13 : : *
14 : : */
15 : :
16 : : #define pr_fmt(fmt) "rcu: " fmt
17 : :
18 : : #include <linux/export.h>
19 : : #include <linux/mutex.h>
20 : : #include <linux/percpu.h>
21 : : #include <linux/preempt.h>
22 : : #include <linux/rcupdate_wait.h>
23 : : #include <linux/sched.h>
24 : : #include <linux/smp.h>
25 : : #include <linux/delay.h>
26 : : #include <linux/module.h>
27 : : #include <linux/srcu.h>
28 : :
29 : : #include "rcu.h"
30 : : #include "rcu_segcblist.h"
31 : :
32 : : /* Holdoff in nanoseconds for auto-expediting. */
33 : : #define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000)
34 : : static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF;
35 : : module_param(exp_holdoff, ulong, 0444);
36 : :
37 : : /* Overflow-check frequency. N bits roughly says every 2**N grace periods. */
38 : : static ulong counter_wrap_check = (ULONG_MAX >> 2);
39 : : module_param(counter_wrap_check, ulong, 0444);
40 : :
41 : : /* Early-boot callback-management, so early that no lock is required! */
42 : : static LIST_HEAD(srcu_boot_list);
43 : : static bool __read_mostly srcu_init_done;
44 : :
45 : : static void srcu_invoke_callbacks(struct work_struct *work);
46 : : static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
47 : : static void process_srcu(struct work_struct *work);
48 : : static void srcu_delay_timer(struct timer_list *t);
49 : :
50 : : /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
51 : : #define spin_lock_rcu_node(p) \
52 : : do { \
53 : : spin_lock(&ACCESS_PRIVATE(p, lock)); \
54 : : smp_mb__after_unlock_lock(); \
55 : : } while (0)
56 : :
57 : : #define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock))
58 : :
59 : : #define spin_lock_irq_rcu_node(p) \
60 : : do { \
61 : : spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
62 : : smp_mb__after_unlock_lock(); \
63 : : } while (0)
64 : :
65 : : #define spin_unlock_irq_rcu_node(p) \
66 : : spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
67 : :
68 : : #define spin_lock_irqsave_rcu_node(p, flags) \
69 : : do { \
70 : : spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
71 : : smp_mb__after_unlock_lock(); \
72 : : } while (0)
73 : :
74 : : #define spin_unlock_irqrestore_rcu_node(p, flags) \
75 : : spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
76 : :
77 : : /*
78 : : * Initialize SRCU combining tree. Note that statically allocated
79 : : * srcu_struct structures might already have srcu_read_lock() and
80 : : * srcu_read_unlock() running against them. So if the is_static parameter
81 : : * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
82 : : */
83 : 105 : static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
84 : : {
85 : 105 : int cpu;
86 : 105 : int i;
87 : 105 : int level = 0;
88 : 105 : int levelspread[RCU_NUM_LVLS];
89 : 105 : struct srcu_data *sdp;
90 : 105 : struct srcu_node *snp;
91 : 105 : struct srcu_node *snp_first;
92 : :
93 : : /* Work out the overall tree geometry. */
94 : 105 : ssp->level[0] = &ssp->node[0];
95 [ - + ]: 105 : for (i = 1; i < rcu_num_lvls; i++)
96 : 0 : ssp->level[i] = ssp->level[i - 1] + num_rcu_lvl[i - 1];
97 : 105 : rcu_init_levelspread(levelspread, num_rcu_lvl);
98 : :
99 : : /* Each pass through this loop initializes one srcu_node structure. */
100 [ + + ]: 315 : srcu_for_each_node_breadth_first(ssp, snp) {
101 : 105 : spin_lock_init(&ACCESS_PRIVATE(snp, lock));
102 : 105 : WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
103 : : ARRAY_SIZE(snp->srcu_data_have_cbs));
104 [ + + ]: 525 : for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
105 : 420 : snp->srcu_have_cbs[i] = 0;
106 : 420 : snp->srcu_data_have_cbs[i] = 0;
107 : : }
108 : 105 : snp->srcu_gp_seq_needed_exp = 0;
109 : 105 : snp->grplo = -1;
110 : 105 : snp->grphi = -1;
111 [ + - ]: 105 : if (snp == &ssp->node[0]) {
112 : : /* Root node, special case. */
113 : 105 : snp->srcu_parent = NULL;
114 : 105 : continue;
115 : : }
116 : :
117 : : /* Non-root node. */
118 [ # # ]: 0 : if (snp == ssp->level[level + 1])
119 : 0 : level++;
120 : 0 : snp->srcu_parent = ssp->level[level - 1] +
121 : 0 : (snp - ssp->level[level]) /
122 : 0 : levelspread[level - 1];
123 : : }
124 : :
125 : : /*
126 : : * Initialize the per-CPU srcu_data array, which feeds into the
127 : : * leaves of the srcu_node tree.
128 : : */
129 : 105 : WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
130 : : ARRAY_SIZE(sdp->srcu_unlock_count));
131 : 105 : level = rcu_num_lvls - 1;
132 : 105 : snp_first = ssp->level[level];
133 [ + + ]: 210 : for_each_possible_cpu(cpu) {
134 : 105 : sdp = per_cpu_ptr(ssp->sda, cpu);
135 : 105 : spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
136 : 105 : rcu_segcblist_init(&sdp->srcu_cblist);
137 : 105 : sdp->srcu_cblist_invoking = false;
138 : 105 : sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
139 : 105 : sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
140 : 105 : sdp->mynode = &snp_first[cpu / levelspread[level]];
141 [ + + ]: 210 : for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
142 [ + - ]: 105 : if (snp->grplo < 0)
143 : 105 : snp->grplo = cpu;
144 : 105 : snp->grphi = cpu;
145 : : }
146 : 105 : sdp->cpu = cpu;
147 : 105 : INIT_WORK(&sdp->work, srcu_invoke_callbacks);
148 : 105 : timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
149 : 105 : sdp->ssp = ssp;
150 : 105 : sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
151 [ + + ]: 105 : if (is_static)
152 : 42 : continue;
153 : :
154 : : /* Dynamically allocated, better be no srcu_read_locks()! */
155 [ + + ]: 189 : for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) {
156 : 126 : sdp->srcu_lock_count[i] = 0;
157 : 126 : sdp->srcu_unlock_count[i] = 0;
158 : : }
159 : : }
160 : 105 : }
161 : :
162 : : /*
163 : : * Initialize non-compile-time initialized fields, including the
164 : : * associated srcu_node and srcu_data structures. The is_static
165 : : * parameter is passed through to init_srcu_struct_nodes(), and
166 : : * also tells us that ->sda has already been wired up to srcu_data.
167 : : */
168 : 105 : static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
169 : : {
170 : 105 : mutex_init(&ssp->srcu_cb_mutex);
171 : 105 : mutex_init(&ssp->srcu_gp_mutex);
172 : 105 : ssp->srcu_idx = 0;
173 : 105 : ssp->srcu_gp_seq = 0;
174 : 105 : ssp->srcu_barrier_seq = 0;
175 : 105 : mutex_init(&ssp->srcu_barrier_mutex);
176 : 105 : atomic_set(&ssp->srcu_barrier_cpu_cnt, 0);
177 : 105 : INIT_DELAYED_WORK(&ssp->work, process_srcu);
178 [ + + ]: 105 : if (!is_static)
179 : 63 : ssp->sda = alloc_percpu(struct srcu_data);
180 : 105 : init_srcu_struct_nodes(ssp, is_static);
181 : 105 : ssp->srcu_gp_seq_needed_exp = 0;
182 : 105 : ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
183 [ - + ]: 105 : smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
184 [ - + ]: 105 : return ssp->sda ? 0 : -ENOMEM;
185 : : }
186 : :
187 : : #ifdef CONFIG_DEBUG_LOCK_ALLOC
188 : :
189 : : int __init_srcu_struct(struct srcu_struct *ssp, const char *name,
190 : : struct lock_class_key *key)
191 : : {
192 : : /* Don't re-initialize a lock while it is held. */
193 : : debug_check_no_locks_freed((void *)ssp, sizeof(*ssp));
194 : : lockdep_init_map(&ssp->dep_map, name, key, 0);
195 : : spin_lock_init(&ACCESS_PRIVATE(ssp, lock));
196 : : return init_srcu_struct_fields(ssp, false);
197 : : }
198 : : EXPORT_SYMBOL_GPL(__init_srcu_struct);
199 : :
200 : : #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
201 : :
202 : : /**
203 : : * init_srcu_struct - initialize a sleep-RCU structure
204 : : * @ssp: structure to initialize.
205 : : *
206 : : * Must invoke this on a given srcu_struct before passing that srcu_struct
207 : : * to any other function. Each srcu_struct represents a separate domain
208 : : * of SRCU protection.
209 : : */
210 : 63 : int init_srcu_struct(struct srcu_struct *ssp)
211 : : {
212 : 63 : spin_lock_init(&ACCESS_PRIVATE(ssp, lock));
213 : 63 : return init_srcu_struct_fields(ssp, false);
214 : : }
215 : : EXPORT_SYMBOL_GPL(init_srcu_struct);
216 : :
217 : : #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
218 : :
219 : : /*
220 : : * First-use initialization of statically allocated srcu_struct
221 : : * structure. Wiring up the combining tree is more than can be
222 : : * done with compile-time initialization, so this check is added
223 : : * to each update-side SRCU primitive. Use ssp->lock, which -is-
224 : : * compile-time initialized, to resolve races involving multiple
225 : : * CPUs trying to garner first-use privileges.
226 : : */
227 : 1542 : static void check_init_srcu_struct(struct srcu_struct *ssp)
228 : : {
229 : 1542 : unsigned long flags;
230 : :
231 : : /* The smp_load_acquire() pairs with the smp_store_release(). */
232 [ + + ]: 1542 : if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/
233 : : return; /* Already initialized. */
234 : 42 : spin_lock_irqsave_rcu_node(ssp, flags);
235 [ - + ]: 42 : if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) {
236 : 0 : spin_unlock_irqrestore_rcu_node(ssp, flags);
237 : 0 : return;
238 : : }
239 : 42 : init_srcu_struct_fields(ssp, true);
240 : 42 : spin_unlock_irqrestore_rcu_node(ssp, flags);
241 : : }
242 : :
243 : : /*
244 : : * Returns approximate total of the readers' ->srcu_lock_count[] values
245 : : * for the rank of per-CPU counters specified by idx.
246 : : */
247 : : static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
248 : : {
249 : : int cpu;
250 : : unsigned long sum = 0;
251 : :
252 : : for_each_possible_cpu(cpu) {
253 : : struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
254 : :
255 : : sum += READ_ONCE(cpuc->srcu_lock_count[idx]);
256 : : }
257 : : return sum;
258 : : }
259 : :
260 : : /*
261 : : * Returns approximate total of the readers' ->srcu_unlock_count[] values
262 : : * for the rank of per-CPU counters specified by idx.
263 : : */
264 : : static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
265 : : {
266 : : int cpu;
267 : : unsigned long sum = 0;
268 : :
269 : : for_each_possible_cpu(cpu) {
270 : : struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
271 : :
272 : : sum += READ_ONCE(cpuc->srcu_unlock_count[idx]);
273 : : }
274 : : return sum;
275 : : }
276 : :
277 : : /*
278 : : * Return true if the number of pre-existing readers is determined to
279 : : * be zero.
280 : : */
281 : 1542 : static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
282 : : {
283 : 1542 : unsigned long unlocks;
284 : :
285 : 1542 : unlocks = srcu_readers_unlock_idx(ssp, idx);
286 : :
287 : : /*
288 : : * Make sure that a lock is always counted if the corresponding
289 : : * unlock is counted. Needs to be a smp_mb() as the read side may
290 : : * contain a read from a variable that is written to before the
291 : : * synchronize_srcu() in the write side. In this case smp_mb()s
292 : : * A and B act like the store buffering pattern.
293 : : *
294 : : * This smp_mb() also pairs with smp_mb() C to prevent accesses
295 : : * after the synchronize_srcu() from being executed before the
296 : : * grace period ends.
297 : : */
298 : 1542 : smp_mb(); /* A */
299 : :
300 : : /*
301 : : * If the locks are the same as the unlocks, then there must have
302 : : * been no readers on this index at some time in between. This does
303 : : * not mean that there are no more readers, as one could have read
304 : : * the current index but not have incremented the lock counter yet.
305 : : *
306 : : * So suppose that the updater is preempted here for so long
307 : : * that more than ULONG_MAX non-nested readers come and go in
308 : : * the meantime. It turns out that this cannot result in overflow
309 : : * because if a reader modifies its unlock count after we read it
310 : : * above, then that reader's next load of ->srcu_idx is guaranteed
311 : : * to get the new value, which will cause it to operate on the
312 : : * other bank of counters, where it cannot contribute to the
313 : : * overflow of these counters. This means that there is a maximum
314 : : * of 2*NR_CPUS increments, which cannot overflow given current
315 : : * systems, especially not on 64-bit systems.
316 : : *
317 : : * OK, how about nesting? This does impose a limit on nesting
318 : : * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient,
319 : : * especially on 64-bit systems.
320 : : */
321 : 1542 : return srcu_readers_lock_idx(ssp, idx) == unlocks;
322 : : }
323 : :
324 : : /**
325 : : * srcu_readers_active - returns true if there are readers. and false
326 : : * otherwise
327 : : * @ssp: which srcu_struct to count active readers (holding srcu_read_lock).
328 : : *
329 : : * Note that this is not an atomic primitive, and can therefore suffer
330 : : * severe errors when invoked on an active srcu_struct. That said, it
331 : : * can be useful as an error check at cleanup time.
332 : : */
333 : : static bool srcu_readers_active(struct srcu_struct *ssp)
334 : : {
335 : : int cpu;
336 : : unsigned long sum = 0;
337 : :
338 : : for_each_possible_cpu(cpu) {
339 : : struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
340 : :
341 : : sum += READ_ONCE(cpuc->srcu_lock_count[0]);
342 : : sum += READ_ONCE(cpuc->srcu_lock_count[1]);
343 : : sum -= READ_ONCE(cpuc->srcu_unlock_count[0]);
344 : : sum -= READ_ONCE(cpuc->srcu_unlock_count[1]);
345 : : }
346 : : return sum;
347 : : }
348 : :
349 : : #define SRCU_INTERVAL 1
350 : :
351 : : /*
352 : : * Return grace-period delay, zero if there are expedited grace
353 : : * periods pending, SRCU_INTERVAL otherwise.
354 : : */
355 : 2263 : static unsigned long srcu_get_delay(struct srcu_struct *ssp)
356 : : {
357 : 2263 : if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq),
358 : : READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
359 : 1408 : return 0;
360 : : return SRCU_INTERVAL;
361 : : }
362 : :
363 : : /**
364 : : * cleanup_srcu_struct - deconstruct a sleep-RCU structure
365 : : * @ssp: structure to clean up.
366 : : *
367 : : * Must invoke this after you are finished using a given srcu_struct that
368 : : * was initialized via init_srcu_struct(), else you leak memory.
369 : : */
370 : 0 : void cleanup_srcu_struct(struct srcu_struct *ssp)
371 : : {
372 : 0 : int cpu;
373 : :
374 [ # # # # : 0 : if (WARN_ON(!srcu_get_delay(ssp)))
# # ]
375 : : return; /* Just leak it! */
376 [ # # # # ]: 0 : if (WARN_ON(srcu_readers_active(ssp)))
377 : : return; /* Just leak it! */
378 : 0 : flush_delayed_work(&ssp->work);
379 [ # # ]: 0 : for_each_possible_cpu(cpu) {
380 : 0 : struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
381 : :
382 : 0 : del_timer_sync(&sdp->delay_work);
383 : 0 : flush_work(&sdp->work);
384 [ # # # # ]: 0 : if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
385 : : return; /* Forgot srcu_barrier(), so just leak it! */
386 : : }
387 [ # # # # ]: 0 : if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
388 [ # # # # ]: 0 : WARN_ON(srcu_readers_active(ssp))) {
389 : 0 : pr_info("%s: Active srcu_struct %p state: %d\n",
390 : : __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)));
391 : 0 : return; /* Caller forgot to stop doing call_srcu()? */
392 : : }
393 : 0 : free_percpu(ssp->sda);
394 : 0 : ssp->sda = NULL;
395 : : }
396 : : EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
397 : :
398 : : /*
399 : : * Counts the new reader in the appropriate per-CPU element of the
400 : : * srcu_struct.
401 : : * Returns an index that must be passed to the matching srcu_read_unlock().
402 : : */
403 : 136579 : int __srcu_read_lock(struct srcu_struct *ssp)
404 : : {
405 : 136579 : int idx;
406 : :
407 : 136579 : idx = READ_ONCE(ssp->srcu_idx) & 0x1;
408 : 136579 : this_cpu_inc(ssp->sda->srcu_lock_count[idx]);
409 : 136579 : smp_mb(); /* B */ /* Avoid leaking the critical section. */
410 : 771 : return idx;
411 : : }
412 : : EXPORT_SYMBOL_GPL(__srcu_read_lock);
413 : :
414 : : /*
415 : : * Removes the count for the old reader from the appropriate per-CPU
416 : : * element of the srcu_struct. Note that this may well be a different
417 : : * CPU than that which was incremented by the corresponding srcu_read_lock().
418 : : */
419 : 136579 : void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
420 : : {
421 : 136579 : smp_mb(); /* C */ /* Avoid leaking the critical section. */
422 : 136579 : this_cpu_inc(ssp->sda->srcu_unlock_count[idx]);
423 : 135808 : }
424 : : EXPORT_SYMBOL_GPL(__srcu_read_unlock);
425 : :
426 : : /*
427 : : * We use an adaptive strategy for synchronize_srcu() and especially for
428 : : * synchronize_srcu_expedited(). We spin for a fixed time period
429 : : * (defined below) to allow SRCU readers to exit their read-side critical
430 : : * sections. If there are still some readers after a few microseconds,
431 : : * we repeatedly block for 1-millisecond time periods.
432 : : */
433 : : #define SRCU_RETRY_CHECK_DELAY 5
434 : :
435 : : /*
436 : : * Start an SRCU grace period.
437 : : */
438 : 771 : static void srcu_gp_start(struct srcu_struct *ssp)
439 : : {
440 : 771 : struct srcu_data *sdp = this_cpu_ptr(ssp->sda);
441 : 771 : int state;
442 : :
443 : 771 : lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
444 [ - + ]: 771 : WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
445 : 771 : spin_lock_rcu_node(sdp); /* Interrupts already disabled. */
446 : 771 : rcu_segcblist_advance(&sdp->srcu_cblist,
447 : : rcu_seq_current(&ssp->srcu_gp_seq));
448 : 771 : (void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
449 : : rcu_seq_snap(&ssp->srcu_gp_seq));
450 : 771 : spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */
451 : 771 : smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
452 : 771 : rcu_seq_start(&ssp->srcu_gp_seq);
453 [ - + ]: 771 : state = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq));
454 [ - + ]: 771 : WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
455 : 771 : }
456 : :
457 : :
458 : 65 : static void srcu_delay_timer(struct timer_list *t)
459 : : {
460 : 65 : struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work);
461 : :
462 : 65 : queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
463 : 65 : }
464 : :
465 : 771 : static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
466 : : unsigned long delay)
467 : : {
468 [ + + ]: 771 : if (!delay) {
469 : 704 : queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
470 : 704 : return;
471 : : }
472 : :
473 : 67 : timer_reduce(&sdp->delay_work, jiffies + delay);
474 : : }
475 : :
476 : : /*
477 : : * Schedule callback invocation for the specified srcu_data structure,
478 : : * if possible, on the corresponding CPU.
479 : : */
480 : 0 : static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
481 : : {
482 : 0 : srcu_queue_delayed_work_on(sdp, delay);
483 : 0 : }
484 : :
485 : : /*
486 : : * Schedule callback invocation for all srcu_data structures associated
487 : : * with the specified srcu_node structure that have callbacks for the
488 : : * just-completed grace period, the one corresponding to idx. If possible,
489 : : * schedule this invocation on the corresponding CPUs.
490 : : */
491 : : static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp,
492 : : unsigned long mask, unsigned long delay)
493 : : {
494 : : int cpu;
495 : :
496 : : for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
497 : : if (!(mask & (1 << (cpu - snp->grplo))))
498 : : continue;
499 : : srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay);
500 : : }
501 : : }
502 : :
503 : : /*
504 : : * Note the end of an SRCU grace period. Initiates callback invocation
505 : : * and starts a new grace period if needed.
506 : : *
507 : : * The ->srcu_cb_mutex acquisition does not protect any data, but
508 : : * instead prevents more than one grace period from starting while we
509 : : * are initiating callback invocation. This allows the ->srcu_have_cbs[]
510 : : * array to have a finite number of elements.
511 : : */
512 : 771 : static void srcu_gp_end(struct srcu_struct *ssp)
513 : : {
514 : 771 : unsigned long cbdelay;
515 : 771 : bool cbs;
516 : 771 : bool last_lvl;
517 : 771 : int cpu;
518 : 771 : unsigned long flags;
519 : 771 : unsigned long gpseq;
520 : 771 : int idx;
521 : 771 : unsigned long mask;
522 : 771 : struct srcu_data *sdp;
523 : 771 : struct srcu_node *snp;
524 : :
525 : : /* Prevent more than one additional grace period. */
526 : 771 : mutex_lock(&ssp->srcu_cb_mutex);
527 : :
528 : : /* End the current grace period. */
529 : 771 : spin_lock_irq_rcu_node(ssp);
530 [ - + ]: 771 : idx = rcu_seq_state(ssp->srcu_gp_seq);
531 [ - + ]: 771 : WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
532 [ + + ]: 771 : cbdelay = srcu_get_delay(ssp);
533 : 771 : WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
534 : 771 : rcu_seq_end(&ssp->srcu_gp_seq);
535 [ + + ]: 771 : gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
536 [ + + ]: 771 : if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
537 : 67 : ssp->srcu_gp_seq_needed_exp = gpseq;
538 : 771 : spin_unlock_irq_rcu_node(ssp);
539 : 771 : mutex_unlock(&ssp->srcu_gp_mutex);
540 : : /* A new grace period can start at this point. But only one. */
541 : :
542 : : /* Initiate callback invocation as needed. */
543 : 771 : idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
544 [ + + ]: 1542 : srcu_for_each_node_breadth_first(ssp, snp) {
545 : 771 : spin_lock_irq_rcu_node(snp);
546 : 771 : cbs = false;
547 : 771 : last_lvl = snp >= ssp->level[rcu_num_lvls - 1];
548 [ + - ]: 771 : if (last_lvl)
549 : 771 : cbs = snp->srcu_have_cbs[idx] == gpseq;
550 : 771 : snp->srcu_have_cbs[idx] = gpseq;
551 [ + + ]: 771 : rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
552 [ + + ]: 771 : if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq))
553 : 67 : snp->srcu_gp_seq_needed_exp = gpseq;
554 : 771 : mask = snp->srcu_data_have_cbs[idx];
555 : 771 : snp->srcu_data_have_cbs[idx] = 0;
556 : 771 : spin_unlock_irq_rcu_node(snp);
557 [ + - ]: 771 : if (cbs)
558 : 771 : srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay);
559 : :
560 : : /* Occasionally prevent srcu_data counter wrap. */
561 [ - + - - ]: 771 : if (!(gpseq & counter_wrap_check) && last_lvl)
562 [ # # ]: 0 : for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
563 : 0 : sdp = per_cpu_ptr(ssp->sda, cpu);
564 : 0 : spin_lock_irqsave_rcu_node(sdp, flags);
565 [ # # ]: 0 : if (ULONG_CMP_GE(gpseq,
566 : : sdp->srcu_gp_seq_needed + 100))
567 : 0 : sdp->srcu_gp_seq_needed = gpseq;
568 [ # # ]: 0 : if (ULONG_CMP_GE(gpseq,
569 : : sdp->srcu_gp_seq_needed_exp + 100))
570 : 0 : sdp->srcu_gp_seq_needed_exp = gpseq;
571 : 0 : spin_unlock_irqrestore_rcu_node(sdp, flags);
572 : : }
573 : : }
574 : :
575 : : /* Callback initiation done, allow grace periods after next. */
576 : 771 : mutex_unlock(&ssp->srcu_cb_mutex);
577 : :
578 : : /* Start a new grace period if needed. */
579 : 771 : spin_lock_irq_rcu_node(ssp);
580 [ + - ]: 771 : gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
581 [ + - ]: 771 : if (!rcu_seq_state(gpseq) &&
582 [ + + ]: 771 : ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) {
583 : 50 : srcu_gp_start(ssp);
584 : 50 : spin_unlock_irq_rcu_node(ssp);
585 : 50 : srcu_reschedule(ssp, 0);
586 : : } else {
587 : 721 : spin_unlock_irq_rcu_node(ssp);
588 : : }
589 : 771 : }
590 : :
591 : : /*
592 : : * Funnel-locking scheme to scalably mediate many concurrent expedited
593 : : * grace-period requests. This function is invoked for the first known
594 : : * expedited request for a grace period that has already been requested,
595 : : * but without expediting. To start a completely new grace period,
596 : : * whether expedited or not, use srcu_funnel_gp_start() instead.
597 : : */
598 : 0 : static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp,
599 : : unsigned long s)
600 : : {
601 : 0 : unsigned long flags;
602 : :
603 [ # # ]: 0 : for (; snp != NULL; snp = snp->srcu_parent) {
604 [ # # # # ]: 0 : if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
605 [ # # ]: 0 : ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
606 : : return;
607 : 0 : spin_lock_irqsave_rcu_node(snp, flags);
608 [ # # ]: 0 : if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
609 : 0 : spin_unlock_irqrestore_rcu_node(snp, flags);
610 : 0 : return;
611 : : }
612 : 0 : WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
613 : 0 : spin_unlock_irqrestore_rcu_node(snp, flags);
614 : : }
615 : 0 : spin_lock_irqsave_rcu_node(ssp, flags);
616 [ # # ]: 0 : if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
617 : 0 : ssp->srcu_gp_seq_needed_exp = s;
618 : 0 : spin_unlock_irqrestore_rcu_node(ssp, flags);
619 : : }
620 : :
621 : : /*
622 : : * Funnel-locking scheme to scalably mediate many concurrent grace-period
623 : : * requests. The winner has to do the work of actually starting grace
624 : : * period s. Losers must either ensure that their desired grace-period
625 : : * number is recorded on at least their leaf srcu_node structure, or they
626 : : * must take steps to invoke their own callbacks.
627 : : *
628 : : * Note that this function also does the work of srcu_funnel_exp_start(),
629 : : * in some cases by directly invoking it.
630 : : */
631 : 771 : static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
632 : : unsigned long s, bool do_norm)
633 : : {
634 : 771 : unsigned long flags;
635 : 771 : int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
636 : 771 : struct srcu_node *snp = sdp->mynode;
637 : 771 : unsigned long snp_seq;
638 : :
639 : : /* Each pass through the loop does one level of the srcu_node tree. */
640 [ + + ]: 1542 : for (; snp != NULL; snp = snp->srcu_parent) {
641 [ - + - - ]: 771 : if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != sdp->mynode)
642 : : return; /* GP already done and CBs recorded. */
643 : 771 : spin_lock_irqsave_rcu_node(snp, flags);
644 [ - + ]: 771 : if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
645 : 0 : snp_seq = snp->srcu_have_cbs[idx];
646 [ # # # # ]: 0 : if (snp == sdp->mynode && snp_seq == s)
647 : 0 : snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
648 : 0 : spin_unlock_irqrestore_rcu_node(snp, flags);
649 [ # # # # ]: 0 : if (snp == sdp->mynode && snp_seq != s) {
650 : 0 : srcu_schedule_cbs_sdp(sdp, do_norm
651 : : ? SRCU_INTERVAL
652 : : : 0);
653 : 0 : return;
654 : : }
655 [ # # ]: 0 : if (!do_norm)
656 : 0 : srcu_funnel_exp_start(ssp, snp, s);
657 : 0 : return;
658 : : }
659 : 771 : snp->srcu_have_cbs[idx] = s;
660 [ + - ]: 771 : if (snp == sdp->mynode)
661 : 771 : snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
662 [ + + + - ]: 771 : if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
663 : 704 : snp->srcu_gp_seq_needed_exp = s;
664 : 771 : spin_unlock_irqrestore_rcu_node(snp, flags);
665 : : }
666 : :
667 : : /* Top of tree, must ensure the grace period will be started. */
668 : 771 : spin_lock_irqsave_rcu_node(ssp, flags);
669 [ + - ]: 771 : if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) {
670 : : /*
671 : : * Record need for grace period s. Pair with load
672 : : * acquire setting up for initialization.
673 : : */
674 : 771 : smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/
675 : : }
676 [ + + + - ]: 771 : if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
677 : 704 : ssp->srcu_gp_seq_needed_exp = s;
678 : :
679 : : /* If grace period not already done and none in progress, start it. */
680 [ + - + + ]: 771 : if (!rcu_seq_done(&ssp->srcu_gp_seq, s) &&
681 [ + + ]: 771 : rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
682 [ - + ]: 721 : WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
683 : 721 : srcu_gp_start(ssp);
684 [ + - ]: 721 : if (likely(srcu_init_done))
685 [ + + ]: 1425 : queue_delayed_work(rcu_gp_wq, &ssp->work,
686 : : srcu_get_delay(ssp));
687 [ # # ]: 0 : else if (list_empty(&ssp->work.work.entry))
688 : 0 : list_add(&ssp->work.work.entry, &srcu_boot_list);
689 : : }
690 : 771 : spin_unlock_irqrestore_rcu_node(ssp, flags);
691 : : }
692 : :
693 : : /*
694 : : * Wait until all readers counted by array index idx complete, but
695 : : * loop an additional time if there is an expedited grace period pending.
696 : : * The caller must ensure that ->srcu_idx is not changed while checking.
697 : : */
698 : 1542 : static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
699 : : {
700 : 1542 : for (;;) {
701 [ - + ]: 1542 : if (srcu_readers_active_idx_check(ssp, idx))
702 : : return true;
703 [ # # # # ]: 0 : if (--trycount + !srcu_get_delay(ssp) <= 0)
704 : : return false;
705 : 0 : udelay(SRCU_RETRY_CHECK_DELAY);
706 : : }
707 : : }
708 : :
709 : : /*
710 : : * Increment the ->srcu_idx counter so that future SRCU readers will
711 : : * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows
712 : : * us to wait for pre-existing readers in a starvation-free manner.
713 : : */
714 : 771 : static void srcu_flip(struct srcu_struct *ssp)
715 : : {
716 : : /*
717 : : * Ensure that if this updater saw a given reader's increment
718 : : * from __srcu_read_lock(), that reader was using an old value
719 : : * of ->srcu_idx. Also ensure that if a given reader sees the
720 : : * new value of ->srcu_idx, this updater's earlier scans cannot
721 : : * have seen that reader's increments (which is OK, because this
722 : : * grace period need not wait on that reader).
723 : : */
724 : 771 : smp_mb(); /* E */ /* Pairs with B and C. */
725 : :
726 : 771 : WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
727 : :
728 : : /*
729 : : * Ensure that if the updater misses an __srcu_read_unlock()
730 : : * increment, that task's next __srcu_read_lock() will see the
731 : : * above counter update. Note that both this memory barrier
732 : : * and the one in srcu_readers_active_idx_check() provide the
733 : : * guarantee for __srcu_read_lock().
734 : : */
735 : 771 : smp_mb(); /* D */ /* Pairs with C. */
736 : : }
737 : :
738 : : /*
739 : : * If SRCU is likely idle, return true, otherwise return false.
740 : : *
741 : : * Note that it is OK for several current from-idle requests for a new
742 : : * grace period from idle to specify expediting because they will all end
743 : : * up requesting the same grace period anyhow. So no loss.
744 : : *
745 : : * Note also that if any CPU (including the current one) is still invoking
746 : : * callbacks, this function will nevertheless say "idle". This is not
747 : : * ideal, but the overhead of checking all CPUs' callback lists is even
748 : : * less ideal, especially on large systems. Furthermore, the wakeup
749 : : * can happen before the callback is fully removed, so we have no choice
750 : : * but to accept this type of error.
751 : : *
752 : : * This function is also subject to counter-wrap errors, but let's face
753 : : * it, if this function was preempted for enough time for the counters
754 : : * to wrap, it really doesn't matter whether or not we expedite the grace
755 : : * period. The extra overhead of a needlessly expedited grace period is
756 : : * negligible when amoritized over that time period, and the extra latency
757 : : * of a needlessly non-expedited grace period is similarly negligible.
758 : : */
759 : 771 : static bool srcu_might_be_idle(struct srcu_struct *ssp)
760 : : {
761 : 771 : unsigned long curseq;
762 : 771 : unsigned long flags;
763 : 771 : struct srcu_data *sdp;
764 : 771 : unsigned long t;
765 : 771 : unsigned long tlast;
766 : :
767 : : /* If the local srcu_data structure has callbacks, not idle. */
768 : 771 : local_irq_save(flags);
769 : 771 : sdp = this_cpu_ptr(ssp->sda);
770 [ + + ]: 771 : if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
771 : 51 : local_irq_restore(flags);
772 : 51 : return false; /* Callbacks already present, so not idle. */
773 : : }
774 : 720 : local_irq_restore(flags);
775 : :
776 : : /*
777 : : * No local callbacks, so probabalistically probe global state.
778 : : * Exact information would require acquiring locks, which would
779 : : * kill scalability, hence the probabalistic nature of the probe.
780 : : */
781 : :
782 : : /* First, see if enough time has passed since the last GP. */
783 : 720 : t = ktime_get_mono_fast_ns();
784 [ + - ]: 720 : tlast = READ_ONCE(ssp->srcu_last_gp_end);
785 [ + - ]: 720 : if (exp_holdoff == 0 ||
786 [ + - + + ]: 720 : time_in_range_open(t, tlast, tlast + exp_holdoff))
787 : : return false; /* Too soon after last GP. */
788 : :
789 : : /* Next, check for probable idleness. */
790 : 643 : curseq = rcu_seq_current(&ssp->srcu_gp_seq);
791 : 643 : smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
792 [ + - ]: 643 : if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_gp_seq_needed)))
793 : : return false; /* Grace period in progress, so not idle. */
794 : 643 : smp_mb(); /* Order ->srcu_gp_seq with prior access. */
795 [ - + ]: 643 : if (curseq != rcu_seq_current(&ssp->srcu_gp_seq))
796 : 0 : return false; /* GP # changed, so not idle. */
797 : : return true; /* With reasonable probability, idle! */
798 : : }
799 : :
800 : : /*
801 : : * SRCU callback function to leak a callback.
802 : : */
803 : : static void srcu_leak_callback(struct rcu_head *rhp)
804 : : {
805 : : }
806 : :
807 : : /*
808 : : * Enqueue an SRCU callback on the srcu_data structure associated with
809 : : * the current CPU and the specified srcu_struct structure, initiating
810 : : * grace-period processing if it is not already running.
811 : : *
812 : : * Note that all CPUs must agree that the grace period extended beyond
813 : : * all pre-existing SRCU read-side critical section. On systems with
814 : : * more than one CPU, this means that when "func()" is invoked, each CPU
815 : : * is guaranteed to have executed a full memory barrier since the end of
816 : : * its last corresponding SRCU read-side critical section whose beginning
817 : : * preceded the call to call_srcu(). It also means that each CPU executing
818 : : * an SRCU read-side critical section that continues beyond the start of
819 : : * "func()" must have executed a memory barrier after the call_srcu()
820 : : * but before the beginning of that SRCU read-side critical section.
821 : : * Note that these guarantees include CPUs that are offline, idle, or
822 : : * executing in user mode, as well as CPUs that are executing in the kernel.
823 : : *
824 : : * Furthermore, if CPU A invoked call_srcu() and CPU B invoked the
825 : : * resulting SRCU callback function "func()", then both CPU A and CPU
826 : : * B are guaranteed to execute a full memory barrier during the time
827 : : * interval between the call to call_srcu() and the invocation of "func()".
828 : : * This guarantee applies even if CPU A and CPU B are the same CPU (but
829 : : * again only if the system has more than one CPU).
830 : : *
831 : : * Of course, these guarantees apply only for invocations of call_srcu(),
832 : : * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
833 : : * srcu_struct structure.
834 : : */
835 : 771 : static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
836 : : rcu_callback_t func, bool do_norm)
837 : : {
838 : 771 : unsigned long flags;
839 : 771 : int idx;
840 : 771 : bool needexp = false;
841 : 771 : bool needgp = false;
842 : 771 : unsigned long s;
843 : 771 : struct srcu_data *sdp;
844 : :
845 : 771 : check_init_srcu_struct(ssp);
846 : 771 : if (debug_rcu_head_queue(rhp)) {
847 : : /* Probable double call_srcu(), so leak the callback. */
848 : : WRITE_ONCE(rhp->func, srcu_leak_callback);
849 : : WARN_ONCE(1, "call_srcu(): Leaked duplicate callback\n");
850 : : return;
851 : : }
852 : 771 : rhp->func = func;
853 : 771 : idx = srcu_read_lock(ssp);
854 : 771 : local_irq_save(flags);
855 : 771 : sdp = this_cpu_ptr(ssp->sda);
856 : 771 : spin_lock_rcu_node(sdp);
857 : 771 : rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
858 : 771 : rcu_segcblist_advance(&sdp->srcu_cblist,
859 : : rcu_seq_current(&ssp->srcu_gp_seq));
860 : 771 : s = rcu_seq_snap(&ssp->srcu_gp_seq);
861 : 771 : (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s);
862 [ + - ]: 771 : if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
863 : 771 : sdp->srcu_gp_seq_needed = s;
864 : 771 : needgp = true;
865 : : }
866 [ + + + - ]: 771 : if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) {
867 : 704 : sdp->srcu_gp_seq_needed_exp = s;
868 : 704 : needexp = true;
869 : : }
870 : 771 : spin_unlock_irqrestore_rcu_node(sdp, flags);
871 [ + - ]: 771 : if (needgp)
872 : 771 : srcu_funnel_gp_start(ssp, sdp, s, do_norm);
873 [ # # ]: 0 : else if (needexp)
874 : 0 : srcu_funnel_exp_start(ssp, sdp->mynode, s);
875 : 771 : srcu_read_unlock(ssp, idx);
876 : : }
877 : :
878 : : /**
879 : : * call_srcu() - Queue a callback for invocation after an SRCU grace period
880 : : * @ssp: srcu_struct in queue the callback
881 : : * @rhp: structure to be used for queueing the SRCU callback.
882 : : * @func: function to be invoked after the SRCU grace period
883 : : *
884 : : * The callback function will be invoked some time after a full SRCU
885 : : * grace period elapses, in other words after all pre-existing SRCU
886 : : * read-side critical sections have completed. However, the callback
887 : : * function might well execute concurrently with other SRCU read-side
888 : : * critical sections that started after call_srcu() was invoked. SRCU
889 : : * read-side critical sections are delimited by srcu_read_lock() and
890 : : * srcu_read_unlock(), and may be nested.
891 : : *
892 : : * The callback will be invoked from process context, but must nevertheless
893 : : * be fast and must not block.
894 : : */
895 : 0 : void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
896 : : rcu_callback_t func)
897 : : {
898 : 0 : __call_srcu(ssp, rhp, func, true);
899 : 0 : }
900 : : EXPORT_SYMBOL_GPL(call_srcu);
901 : :
902 : : /*
903 : : * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
904 : : */
905 : 771 : static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm)
906 : : {
907 : 771 : struct rcu_synchronize rcu;
908 : :
909 : 771 : RCU_LOCKDEP_WARN(lock_is_held(&ssp->dep_map) ||
910 : : lock_is_held(&rcu_bh_lock_map) ||
911 : : lock_is_held(&rcu_lock_map) ||
912 : : lock_is_held(&rcu_sched_lock_map),
913 : : "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
914 : :
915 [ - + ]: 771 : if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
916 : 0 : return;
917 : 771 : might_sleep();
918 : 771 : check_init_srcu_struct(ssp);
919 : 771 : init_completion(&rcu.completion);
920 : 771 : init_rcu_head_on_stack(&rcu.head);
921 : 771 : __call_srcu(ssp, &rcu.head, wakeme_after_rcu, do_norm);
922 : 771 : wait_for_completion(&rcu.completion);
923 : 771 : destroy_rcu_head_on_stack(&rcu.head);
924 : :
925 : : /*
926 : : * Make sure that later code is ordered after the SRCU grace
927 : : * period. This pairs with the spin_lock_irq_rcu_node()
928 : : * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed
929 : : * because the current CPU might have been totally uninvolved with
930 : : * (and thus unordered against) that grace period.
931 : : */
932 : 771 : smp_mb();
933 : : }
934 : :
935 : : /**
936 : : * synchronize_srcu_expedited - Brute-force SRCU grace period
937 : : * @ssp: srcu_struct with which to synchronize.
938 : : *
939 : : * Wait for an SRCU grace period to elapse, but be more aggressive about
940 : : * spinning rather than blocking when waiting.
941 : : *
942 : : * Note that synchronize_srcu_expedited() has the same deadlock and
943 : : * memory-ordering properties as does synchronize_srcu().
944 : : */
945 : 704 : void synchronize_srcu_expedited(struct srcu_struct *ssp)
946 : : {
947 : 704 : __synchronize_srcu(ssp, rcu_gp_is_normal());
948 : 704 : }
949 : : EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
950 : :
951 : : /**
952 : : * synchronize_srcu - wait for prior SRCU read-side critical-section completion
953 : : * @ssp: srcu_struct with which to synchronize.
954 : : *
955 : : * Wait for the count to drain to zero of both indexes. To avoid the
956 : : * possible starvation of synchronize_srcu(), it waits for the count of
957 : : * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first,
958 : : * and then flip the srcu_idx and wait for the count of the other index.
959 : : *
960 : : * Can block; must be called from process context.
961 : : *
962 : : * Note that it is illegal to call synchronize_srcu() from the corresponding
963 : : * SRCU read-side critical section; doing so will result in deadlock.
964 : : * However, it is perfectly legal to call synchronize_srcu() on one
965 : : * srcu_struct from some other srcu_struct's read-side critical section,
966 : : * as long as the resulting graph of srcu_structs is acyclic.
967 : : *
968 : : * There are memory-ordering constraints implied by synchronize_srcu().
969 : : * On systems with more than one CPU, when synchronize_srcu() returns,
970 : : * each CPU is guaranteed to have executed a full memory barrier since
971 : : * the end of its last corresponding SRCU read-side critical section
972 : : * whose beginning preceded the call to synchronize_srcu(). In addition,
973 : : * each CPU having an SRCU read-side critical section that extends beyond
974 : : * the return from synchronize_srcu() is guaranteed to have executed a
975 : : * full memory barrier after the beginning of synchronize_srcu() and before
976 : : * the beginning of that SRCU read-side critical section. Note that these
977 : : * guarantees include CPUs that are offline, idle, or executing in user mode,
978 : : * as well as CPUs that are executing in the kernel.
979 : : *
980 : : * Furthermore, if CPU A invoked synchronize_srcu(), which returned
981 : : * to its caller on CPU B, then both CPU A and CPU B are guaranteed
982 : : * to have executed a full memory barrier during the execution of
983 : : * synchronize_srcu(). This guarantee applies even if CPU A and CPU B
984 : : * are the same CPU, but again only if the system has more than one CPU.
985 : : *
986 : : * Of course, these memory-ordering guarantees apply only when
987 : : * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
988 : : * passed the same srcu_struct structure.
989 : : *
990 : : * If SRCU is likely idle, expedite the first request. This semantic
991 : : * was provided by Classic SRCU, and is relied upon by its users, so TREE
992 : : * SRCU must also provide it. Note that detecting idleness is heuristic
993 : : * and subject to both false positives and negatives.
994 : : */
995 : 771 : void synchronize_srcu(struct srcu_struct *ssp)
996 : : {
997 [ + + + + ]: 771 : if (srcu_might_be_idle(ssp) || rcu_gp_is_expedited())
998 : 704 : synchronize_srcu_expedited(ssp);
999 : : else
1000 : 67 : __synchronize_srcu(ssp, true);
1001 : 771 : }
1002 : : EXPORT_SYMBOL_GPL(synchronize_srcu);
1003 : :
1004 : : /*
1005 : : * Callback function for srcu_barrier() use.
1006 : : */
1007 : 0 : static void srcu_barrier_cb(struct rcu_head *rhp)
1008 : : {
1009 : 0 : struct srcu_data *sdp;
1010 : 0 : struct srcu_struct *ssp;
1011 : :
1012 : 0 : sdp = container_of(rhp, struct srcu_data, srcu_barrier_head);
1013 : 0 : ssp = sdp->ssp;
1014 [ # # ]: 0 : if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
1015 : 0 : complete(&ssp->srcu_barrier_completion);
1016 : 0 : }
1017 : :
1018 : : /**
1019 : : * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
1020 : : * @ssp: srcu_struct on which to wait for in-flight callbacks.
1021 : : */
1022 : 0 : void srcu_barrier(struct srcu_struct *ssp)
1023 : : {
1024 : 0 : int cpu;
1025 : 0 : struct srcu_data *sdp;
1026 : 0 : unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq);
1027 : :
1028 : 0 : check_init_srcu_struct(ssp);
1029 : 0 : mutex_lock(&ssp->srcu_barrier_mutex);
1030 [ # # ]: 0 : if (rcu_seq_done(&ssp->srcu_barrier_seq, s)) {
1031 : 0 : smp_mb(); /* Force ordering following return. */
1032 : 0 : mutex_unlock(&ssp->srcu_barrier_mutex);
1033 : 0 : return; /* Someone else did our work for us. */
1034 : : }
1035 : 0 : rcu_seq_start(&ssp->srcu_barrier_seq);
1036 : 0 : init_completion(&ssp->srcu_barrier_completion);
1037 : :
1038 : : /* Initial count prevents reaching zero until all CBs are posted. */
1039 : 0 : atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
1040 : :
1041 : : /*
1042 : : * Each pass through this loop enqueues a callback, but only
1043 : : * on CPUs already having callbacks enqueued. Note that if
1044 : : * a CPU already has callbacks enqueue, it must have already
1045 : : * registered the need for a future grace period, so all we
1046 : : * need do is enqueue a callback that will use the same
1047 : : * grace period as the last callback already in the queue.
1048 : : */
1049 [ # # ]: 0 : for_each_possible_cpu(cpu) {
1050 : 0 : sdp = per_cpu_ptr(ssp->sda, cpu);
1051 : 0 : spin_lock_irq_rcu_node(sdp);
1052 : 0 : atomic_inc(&ssp->srcu_barrier_cpu_cnt);
1053 : 0 : sdp->srcu_barrier_head.func = srcu_barrier_cb;
1054 : 0 : debug_rcu_head_queue(&sdp->srcu_barrier_head);
1055 [ # # ]: 0 : if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
1056 : : &sdp->srcu_barrier_head)) {
1057 : 0 : debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
1058 : 0 : atomic_dec(&ssp->srcu_barrier_cpu_cnt);
1059 : : }
1060 : 0 : spin_unlock_irq_rcu_node(sdp);
1061 : : }
1062 : :
1063 : : /* Remove the initial count, at which point reaching zero can happen. */
1064 [ # # ]: 0 : if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
1065 : 0 : complete(&ssp->srcu_barrier_completion);
1066 : 0 : wait_for_completion(&ssp->srcu_barrier_completion);
1067 : :
1068 : 0 : rcu_seq_end(&ssp->srcu_barrier_seq);
1069 : 0 : mutex_unlock(&ssp->srcu_barrier_mutex);
1070 : : }
1071 : : EXPORT_SYMBOL_GPL(srcu_barrier);
1072 : :
1073 : : /**
1074 : : * srcu_batches_completed - return batches completed.
1075 : : * @ssp: srcu_struct on which to report batch completion.
1076 : : *
1077 : : * Report the number of batches, correlated with, but not necessarily
1078 : : * precisely the same as, the number of grace periods that have elapsed.
1079 : : */
1080 : 0 : unsigned long srcu_batches_completed(struct srcu_struct *ssp)
1081 : : {
1082 : 0 : return ssp->srcu_idx;
1083 : : }
1084 : : EXPORT_SYMBOL_GPL(srcu_batches_completed);
1085 : :
1086 : : /*
1087 : : * Core SRCU state machine. Push state bits of ->srcu_gp_seq
1088 : : * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has
1089 : : * completed in that state.
1090 : : */
1091 : 771 : static void srcu_advance_state(struct srcu_struct *ssp)
1092 : : {
1093 : 771 : int idx;
1094 : :
1095 : 771 : mutex_lock(&ssp->srcu_gp_mutex);
1096 : :
1097 : : /*
1098 : : * Because readers might be delayed for an extended period after
1099 : : * fetching ->srcu_idx for their index, at any point in time there
1100 : : * might well be readers using both idx=0 and idx=1. We therefore
1101 : : * need to wait for readers to clear from both index values before
1102 : : * invoking a callback.
1103 : : *
1104 : : * The load-acquire ensures that we see the accesses performed
1105 : : * by the prior grace period.
1106 : : */
1107 [ - + ]: 771 : idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq)); /* ^^^ */
1108 [ - + ]: 771 : if (idx == SRCU_STATE_IDLE) {
1109 : 0 : spin_lock_irq_rcu_node(ssp);
1110 [ # # ]: 0 : if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
1111 [ # # ]: 0 : WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq));
1112 : 0 : spin_unlock_irq_rcu_node(ssp);
1113 : 0 : mutex_unlock(&ssp->srcu_gp_mutex);
1114 : 0 : return;
1115 : : }
1116 [ # # ]: 0 : idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq));
1117 [ # # ]: 0 : if (idx == SRCU_STATE_IDLE)
1118 : 0 : srcu_gp_start(ssp);
1119 : 0 : spin_unlock_irq_rcu_node(ssp);
1120 [ # # ]: 0 : if (idx != SRCU_STATE_IDLE) {
1121 : 0 : mutex_unlock(&ssp->srcu_gp_mutex);
1122 : 0 : return; /* Someone else started the grace period. */
1123 : : }
1124 : : }
1125 : :
1126 [ + - ]: 771 : if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
1127 : 771 : idx = 1 ^ (ssp->srcu_idx & 1);
1128 [ - + ]: 771 : if (!try_check_zero(ssp, idx, 1)) {
1129 : 0 : mutex_unlock(&ssp->srcu_gp_mutex);
1130 : 0 : return; /* readers present, retry later. */
1131 : : }
1132 : 771 : srcu_flip(ssp);
1133 : 771 : rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2);
1134 : : }
1135 : :
1136 [ + - ]: 771 : if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
1137 : :
1138 : : /*
1139 : : * SRCU read-side critical sections are normally short,
1140 : : * so check at least twice in quick succession after a flip.
1141 : : */
1142 : 771 : idx = 1 ^ (ssp->srcu_idx & 1);
1143 [ - + ]: 771 : if (!try_check_zero(ssp, idx, 2)) {
1144 : 0 : mutex_unlock(&ssp->srcu_gp_mutex);
1145 : 0 : return; /* readers present, retry later. */
1146 : : }
1147 : 771 : srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */
1148 : : }
1149 : : }
1150 : :
1151 : : /*
1152 : : * Invoke a limited number of SRCU callbacks that have passed through
1153 : : * their grace period. If there are more to do, SRCU will reschedule
1154 : : * the workqueue. Note that needed memory barriers have been executed
1155 : : * in this task's context by srcu_readers_active_idx_check().
1156 : : */
1157 : 769 : static void srcu_invoke_callbacks(struct work_struct *work)
1158 : : {
1159 : 769 : bool more;
1160 : 769 : struct rcu_cblist ready_cbs;
1161 : 769 : struct rcu_head *rhp;
1162 : 769 : struct srcu_data *sdp;
1163 : 769 : struct srcu_struct *ssp;
1164 : :
1165 : 769 : sdp = container_of(work, struct srcu_data, work);
1166 : :
1167 : 769 : ssp = sdp->ssp;
1168 : 769 : rcu_cblist_init(&ready_cbs);
1169 : 769 : spin_lock_irq_rcu_node(sdp);
1170 : 769 : rcu_segcblist_advance(&sdp->srcu_cblist,
1171 : : rcu_seq_current(&ssp->srcu_gp_seq));
1172 [ + - - + ]: 1538 : if (sdp->srcu_cblist_invoking ||
1173 : 769 : !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
1174 : 0 : spin_unlock_irq_rcu_node(sdp);
1175 : 0 : return; /* Someone else on the job or nothing to do. */
1176 : : }
1177 : :
1178 : : /* We are on the job! Extract and invoke ready callbacks. */
1179 : 769 : sdp->srcu_cblist_invoking = true;
1180 : 769 : rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
1181 : 769 : spin_unlock_irq_rcu_node(sdp);
1182 : 769 : rhp = rcu_cblist_dequeue(&ready_cbs);
1183 [ + + ]: 2309 : for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
1184 : 771 : debug_rcu_head_unqueue(rhp);
1185 : 771 : local_bh_disable();
1186 : 771 : rhp->func(rhp);
1187 : 771 : local_bh_enable();
1188 : : }
1189 : :
1190 : : /*
1191 : : * Update counts, accelerate new callbacks, and if needed,
1192 : : * schedule another round of callback invocation.
1193 : : */
1194 : 769 : spin_lock_irq_rcu_node(sdp);
1195 : 769 : rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
1196 : 769 : (void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
1197 : : rcu_seq_snap(&ssp->srcu_gp_seq));
1198 : 769 : sdp->srcu_cblist_invoking = false;
1199 : 769 : more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
1200 : 769 : spin_unlock_irq_rcu_node(sdp);
1201 [ - + ]: 769 : if (more)
1202 : 0 : srcu_schedule_cbs_sdp(sdp, 0);
1203 : : }
1204 : :
1205 : : /*
1206 : : * Finished one round of SRCU grace period. Start another if there are
1207 : : * more SRCU callbacks queued, otherwise put SRCU into not-running state.
1208 : : */
1209 : 821 : static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
1210 : : {
1211 : 821 : bool pushgp = true;
1212 : :
1213 : 821 : spin_lock_irq_rcu_node(ssp);
1214 [ + + ]: 821 : if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
1215 [ - + + - ]: 721 : if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq))) {
1216 : : /* All requests fulfilled, time to go idle. */
1217 : 721 : pushgp = false;
1218 : : }
1219 [ - + ]: 100 : } else if (!rcu_seq_state(ssp->srcu_gp_seq)) {
1220 : : /* Outstanding request and no GP. Start one. */
1221 : 0 : srcu_gp_start(ssp);
1222 : : }
1223 : 821 : spin_unlock_irq_rcu_node(ssp);
1224 : :
1225 [ + + ]: 821 : if (pushgp)
1226 : 100 : queue_delayed_work(rcu_gp_wq, &ssp->work, delay);
1227 : 821 : }
1228 : :
1229 : : /*
1230 : : * This is the work-queue function that handles SRCU grace periods.
1231 : : */
1232 : 771 : static void process_srcu(struct work_struct *work)
1233 : : {
1234 : 771 : struct srcu_struct *ssp;
1235 : :
1236 : 771 : ssp = container_of(work, struct srcu_struct, work.work);
1237 : :
1238 : 771 : srcu_advance_state(ssp);
1239 [ - + ]: 771 : srcu_reschedule(ssp, srcu_get_delay(ssp));
1240 : 771 : }
1241 : :
1242 : 0 : void srcutorture_get_gp_data(enum rcutorture_type test_type,
1243 : : struct srcu_struct *ssp, int *flags,
1244 : : unsigned long *gp_seq)
1245 : : {
1246 [ # # ]: 0 : if (test_type != SRCU_FLAVOR)
1247 : : return;
1248 : 0 : *flags = 0;
1249 : 0 : *gp_seq = rcu_seq_current(&ssp->srcu_gp_seq);
1250 : : }
1251 : : EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
1252 : :
1253 : 0 : void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
1254 : : {
1255 : 0 : int cpu;
1256 : 0 : int idx;
1257 : 0 : unsigned long s0 = 0, s1 = 0;
1258 : :
1259 : 0 : idx = ssp->srcu_idx & 0x1;
1260 : 0 : pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):",
1261 : : tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), idx);
1262 [ # # ]: 0 : for_each_possible_cpu(cpu) {
1263 : 0 : unsigned long l0, l1;
1264 : 0 : unsigned long u0, u1;
1265 : 0 : long c0, c1;
1266 : 0 : struct srcu_data *sdp;
1267 : :
1268 : 0 : sdp = per_cpu_ptr(ssp->sda, cpu);
1269 : 0 : u0 = sdp->srcu_unlock_count[!idx];
1270 : 0 : u1 = sdp->srcu_unlock_count[idx];
1271 : :
1272 : : /*
1273 : : * Make sure that a lock is always counted if the corresponding
1274 : : * unlock is counted.
1275 : : */
1276 : 0 : smp_rmb();
1277 : :
1278 : 0 : l0 = sdp->srcu_lock_count[!idx];
1279 : 0 : l1 = sdp->srcu_lock_count[idx];
1280 : :
1281 : 0 : c0 = l0 - u0;
1282 : 0 : c1 = l1 - u1;
1283 : 0 : pr_cont(" %d(%ld,%ld %c)",
1284 : : cpu, c0, c1,
1285 : : "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
1286 : 0 : s0 += c0;
1287 : 0 : s1 += c1;
1288 : : }
1289 : 0 : pr_cont(" T(%ld,%ld)\n", s0, s1);
1290 : 0 : }
1291 : : EXPORT_SYMBOL_GPL(srcu_torture_stats_print);
1292 : :
1293 : 21 : static int __init srcu_bootup_announce(void)
1294 : : {
1295 : 21 : pr_info("Hierarchical SRCU implementation.\n");
1296 [ - + ]: 21 : if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
1297 : 0 : pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
1298 : 21 : return 0;
1299 : : }
1300 : : early_initcall(srcu_bootup_announce);
1301 : :
1302 : 21 : void __init srcu_init(void)
1303 : : {
1304 : 21 : struct srcu_struct *ssp;
1305 : :
1306 : 21 : srcu_init_done = true;
1307 [ - + ]: 21 : while (!list_empty(&srcu_boot_list)) {
1308 : 0 : ssp = list_first_entry(&srcu_boot_list, struct srcu_struct,
1309 : : work.work.entry);
1310 : 0 : check_init_srcu_struct(ssp);
1311 : 0 : list_del_init(&ssp->work.work.entry);
1312 : 0 : queue_work(rcu_gp_wq, &ssp->work.work);
1313 : : }
1314 : 21 : }
1315 : :
1316 : : #ifdef CONFIG_MODULES
1317 : :
1318 : : /* Initialize any global-scope srcu_struct structures used by this module. */
1319 : : static int srcu_module_coming(struct module *mod)
1320 : : {
1321 : : int i;
1322 : : struct srcu_struct **sspp = mod->srcu_struct_ptrs;
1323 : : int ret;
1324 : :
1325 : : for (i = 0; i < mod->num_srcu_structs; i++) {
1326 : : ret = init_srcu_struct(*(sspp++));
1327 : : if (WARN_ON_ONCE(ret))
1328 : : return ret;
1329 : : }
1330 : : return 0;
1331 : : }
1332 : :
1333 : : /* Clean up any global-scope srcu_struct structures used by this module. */
1334 : 0 : static void srcu_module_going(struct module *mod)
1335 : : {
1336 : 0 : int i;
1337 : 0 : struct srcu_struct **sspp = mod->srcu_struct_ptrs;
1338 : :
1339 [ # # ]: 0 : for (i = 0; i < mod->num_srcu_structs; i++)
1340 : 0 : cleanup_srcu_struct(*(sspp++));
1341 : : }
1342 : :
1343 : : /* Handle one module, either coming or going. */
1344 : 126 : static int srcu_module_notify(struct notifier_block *self,
1345 : : unsigned long val, void *data)
1346 : : {
1347 : 126 : struct module *mod = data;
1348 : 126 : int ret = 0;
1349 : :
1350 [ + - + ]: 126 : switch (val) {
1351 : 63 : case MODULE_STATE_COMING:
1352 : 63 : ret = srcu_module_coming(mod);
1353 : 63 : break;
1354 : 0 : case MODULE_STATE_GOING:
1355 : 0 : srcu_module_going(mod);
1356 : : break;
1357 : : default:
1358 : : break;
1359 : : }
1360 : 126 : return ret;
1361 : : }
1362 : :
1363 : : static struct notifier_block srcu_module_nb = {
1364 : : .notifier_call = srcu_module_notify,
1365 : : .priority = 0,
1366 : : };
1367 : :
1368 : 21 : static __init int init_srcu_module_notifier(void)
1369 : : {
1370 : 21 : int ret;
1371 : :
1372 : 21 : ret = register_module_notifier(&srcu_module_nb);
1373 [ - + ]: 21 : if (ret)
1374 : 0 : pr_warn("Failed to register srcu module notifier\n");
1375 : 21 : return ret;
1376 : : }
1377 : : late_initcall(init_srcu_module_notifier);
1378 : :
1379 : : #endif /* #ifdef CONFIG_MODULES */
|