Branch data Line data Source code
1 : : // SPDX-License-Identifier: GPL-2.0-only 2 : : #define pr_fmt(fmt) "%s: " fmt "\n", __func__ 3 : : 4 : : #include <linux/kernel.h> 5 : : #include <linux/sched.h> 6 : : #include <linux/wait.h> 7 : : #include <linux/percpu-refcount.h> 8 : : 9 : : /* 10 : : * Initially, a percpu refcount is just a set of percpu counters. Initially, we 11 : : * don't try to detect the ref hitting 0 - which means that get/put can just 12 : : * increment or decrement the local counter. Note that the counter on a 13 : : * particular cpu can (and will) wrap - this is fine, when we go to shutdown the 14 : : * percpu counters will all sum to the correct value 15 : : * 16 : : * (More precisely: because modular arithmetic is commutative the sum of all the 17 : : * percpu_count vars will be equal to what it would have been if all the gets 18 : : * and puts were done to a single integer, even if some of the percpu integers 19 : : * overflow or underflow). 20 : : * 21 : : * The real trick to implementing percpu refcounts is shutdown. We can't detect 22 : : * the ref hitting 0 on every put - this would require global synchronization 23 : : * and defeat the whole purpose of using percpu refs. 24 : : * 25 : : * What we do is require the user to keep track of the initial refcount; we know 26 : : * the ref can't hit 0 before the user drops the initial ref, so as long as we 27 : : * convert to non percpu mode before the initial ref is dropped everything 28 : : * works. 29 : : * 30 : : * Converting to non percpu mode is done with some RCUish stuff in 31 : : * percpu_ref_kill. Additionally, we need a bias value so that the 32 : : * atomic_long_t can't hit 0 before we've added up all the percpu refs. 33 : : */ 34 : : 35 : : #define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) 36 : : 37 : : static DEFINE_SPINLOCK(percpu_ref_switch_lock); 38 : : static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); 39 : : 40 : : static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) 41 : : { 42 : 3 : return (unsigned long __percpu *) 43 : 3 : (ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD); 44 : : } 45 : : 46 : : /** 47 : : * percpu_ref_init - initialize a percpu refcount 48 : : * @ref: percpu_ref to initialize 49 : : * @release: function which will be called when refcount hits 0 50 : : * @flags: PERCPU_REF_INIT_* flags 51 : : * @gfp: allocation mask to use 52 : : * 53 : : * Initializes @ref. If @flags is zero, @ref starts in percpu mode with a 54 : : * refcount of 1; analagous to atomic_long_set(ref, 1). See the 55 : : * definitions of PERCPU_REF_INIT_* flags for flag behaviors. 56 : : * 57 : : * Note that @release must not sleep - it may potentially be called from RCU 58 : : * callback context by percpu_ref_kill(). 59 : : */ 60 : 3 : int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, 61 : : unsigned int flags, gfp_t gfp) 62 : : { 63 : : size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS, 64 : : __alignof__(unsigned long)); 65 : : unsigned long start_count = 0; 66 : : 67 : 3 : ref->percpu_count_ptr = (unsigned long) 68 : 3 : __alloc_percpu_gfp(sizeof(unsigned long), align, gfp); 69 : 3 : if (!ref->percpu_count_ptr) 70 : : return -ENOMEM; 71 : : 72 : 3 : ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC; 73 : 3 : ref->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT; 74 : : 75 : 3 : if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) { 76 : 3 : ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; 77 : 3 : ref->allow_reinit = true; 78 : : } else { 79 : : start_count += PERCPU_COUNT_BIAS; 80 : : } 81 : : 82 : 3 : if (flags & PERCPU_REF_INIT_DEAD) 83 : 0 : ref->percpu_count_ptr |= __PERCPU_REF_DEAD; 84 : : else 85 : 3 : start_count++; 86 : : 87 : : atomic_long_set(&ref->count, start_count); 88 : : 89 : 3 : ref->release = release; 90 : 3 : ref->confirm_switch = NULL; 91 : 3 : return 0; 92 : : } 93 : : EXPORT_SYMBOL_GPL(percpu_ref_init); 94 : : 95 : : /** 96 : : * percpu_ref_exit - undo percpu_ref_init() 97 : : * @ref: percpu_ref to exit 98 : : * 99 : : * This function exits @ref. The caller is responsible for ensuring that 100 : : * @ref is no longer in active use. The usual places to invoke this 101 : : * function from are the @ref->release() callback or in init failure path 102 : : * where percpu_ref_init() succeeded but other parts of the initialization 103 : : * of the embedding object failed. 104 : : */ 105 : 3 : void percpu_ref_exit(struct percpu_ref *ref) 106 : : { 107 : : unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 108 : : 109 : 3 : if (percpu_count) { 110 : : /* non-NULL confirm_switch indicates switching in progress */ 111 : 3 : WARN_ON_ONCE(ref->confirm_switch); 112 : 3 : free_percpu(percpu_count); 113 : 3 : ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; 114 : : } 115 : 3 : } 116 : : EXPORT_SYMBOL_GPL(percpu_ref_exit); 117 : : 118 : 3 : static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu) 119 : : { 120 : 3 : struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); 121 : : 122 : 3 : ref->confirm_switch(ref); 123 : 3 : ref->confirm_switch = NULL; 124 : 3 : wake_up_all(&percpu_ref_switch_waitq); 125 : : 126 : 3 : if (!ref->allow_reinit) 127 : 3 : percpu_ref_exit(ref); 128 : : 129 : : /* drop ref from percpu_ref_switch_to_atomic() */ 130 : : percpu_ref_put(ref); 131 : 3 : } 132 : : 133 : 3 : static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu) 134 : : { 135 : : struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); 136 : : unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 137 : : unsigned long count = 0; 138 : : int cpu; 139 : : 140 : 3 : for_each_possible_cpu(cpu) 141 : 3 : count += *per_cpu_ptr(percpu_count, cpu); 142 : : 143 : : pr_debug("global %ld percpu %ld", 144 : : atomic_long_read(&ref->count), (long)count); 145 : : 146 : : /* 147 : : * It's crucial that we sum the percpu counters _before_ adding the sum 148 : : * to &ref->count; since gets could be happening on one cpu while puts 149 : : * happen on another, adding a single cpu's count could cause 150 : : * @ref->count to hit 0 before we've got a consistent value - but the 151 : : * sum of all the counts will be consistent and correct. 152 : : * 153 : : * Subtracting the bias value then has to happen _after_ adding count to 154 : : * &ref->count; we need the bias value to prevent &ref->count from 155 : : * reaching 0 before we add the percpu counts. But doing it at the same 156 : : * time is equivalent and saves us atomic operations: 157 : : */ 158 : 3 : atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count); 159 : : 160 : 3 : WARN_ONCE(atomic_long_read(&ref->count) <= 0, 161 : : "percpu ref (%ps) <= 0 (%ld) after switching to atomic", 162 : : ref->release, atomic_long_read(&ref->count)); 163 : : 164 : : /* @ref is viewed as dead on all CPUs, send out switch confirmation */ 165 : 3 : percpu_ref_call_confirm_rcu(rcu); 166 : 3 : } 167 : : 168 : 3 : static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref) 169 : : { 170 : 3 : } 171 : : 172 : 3 : static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, 173 : : percpu_ref_func_t *confirm_switch) 174 : : { 175 : 3 : if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) { 176 : 3 : if (confirm_switch) 177 : 0 : confirm_switch(ref); 178 : 3 : return; 179 : : } 180 : : 181 : : /* switching from percpu to atomic */ 182 : 3 : ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; 183 : : 184 : : /* 185 : : * Non-NULL ->confirm_switch is used to indicate that switching is 186 : : * in progress. Use noop one if unspecified. 187 : : */ 188 : 3 : ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch; 189 : : 190 : : percpu_ref_get(ref); /* put after confirmation */ 191 : 3 : call_rcu(&ref->rcu, percpu_ref_switch_to_atomic_rcu); 192 : : } 193 : : 194 : 3 : static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) 195 : : { 196 : : unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 197 : : int cpu; 198 : : 199 : 3 : BUG_ON(!percpu_count); 200 : : 201 : 3 : if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) 202 : : return; 203 : : 204 : 3 : if (WARN_ON_ONCE(!ref->allow_reinit)) 205 : : return; 206 : : 207 : 3 : atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); 208 : : 209 : : /* 210 : : * Restore per-cpu operation. smp_store_release() is paired 211 : : * with READ_ONCE() in __ref_is_percpu() and guarantees that the 212 : : * zeroing is visible to all percpu accesses which can see the 213 : : * following __PERCPU_REF_ATOMIC clearing. 214 : : */ 215 : 3 : for_each_possible_cpu(cpu) 216 : 3 : *per_cpu_ptr(percpu_count, cpu) = 0; 217 : : 218 : 3 : smp_store_release(&ref->percpu_count_ptr, 219 : : ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); 220 : : } 221 : : 222 : 3 : static void __percpu_ref_switch_mode(struct percpu_ref *ref, 223 : : percpu_ref_func_t *confirm_switch) 224 : : { 225 : : lockdep_assert_held(&percpu_ref_switch_lock); 226 : : 227 : : /* 228 : : * If the previous ATOMIC switching hasn't finished yet, wait for 229 : : * its completion. If the caller ensures that ATOMIC switching 230 : : * isn't in progress, this function can be called from any context. 231 : : */ 232 : 3 : wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch, 233 : : percpu_ref_switch_lock); 234 : : 235 : 3 : if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD)) 236 : 3 : __percpu_ref_switch_to_atomic(ref, confirm_switch); 237 : : else 238 : 3 : __percpu_ref_switch_to_percpu(ref); 239 : 3 : } 240 : : 241 : : /** 242 : : * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode 243 : : * @ref: percpu_ref to switch to atomic mode 244 : : * @confirm_switch: optional confirmation callback 245 : : * 246 : : * There's no reason to use this function for the usual reference counting. 247 : : * Use percpu_ref_kill[_and_confirm](). 248 : : * 249 : : * Schedule switching of @ref to atomic mode. All its percpu counts will 250 : : * be collected to the main atomic counter. On completion, when all CPUs 251 : : * are guaraneed to be in atomic mode, @confirm_switch, which may not 252 : : * block, is invoked. This function may be invoked concurrently with all 253 : : * the get/put operations and can safely be mixed with kill and reinit 254 : : * operations. Note that @ref will stay in atomic mode across kill/reinit 255 : : * cycles until percpu_ref_switch_to_percpu() is called. 256 : : * 257 : : * This function may block if @ref is in the process of switching to atomic 258 : : * mode. If the caller ensures that @ref is not in the process of 259 : : * switching to atomic mode, this function can be called from any context. 260 : : */ 261 : 0 : void percpu_ref_switch_to_atomic(struct percpu_ref *ref, 262 : : percpu_ref_func_t *confirm_switch) 263 : : { 264 : : unsigned long flags; 265 : : 266 : 0 : spin_lock_irqsave(&percpu_ref_switch_lock, flags); 267 : : 268 : 0 : ref->force_atomic = true; 269 : 0 : __percpu_ref_switch_mode(ref, confirm_switch); 270 : : 271 : : spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 272 : 0 : } 273 : : EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic); 274 : : 275 : : /** 276 : : * percpu_ref_switch_to_atomic_sync - switch a percpu_ref to atomic mode 277 : : * @ref: percpu_ref to switch to atomic mode 278 : : * 279 : : * Schedule switching the ref to atomic mode, and wait for the 280 : : * switch to complete. Caller must ensure that no other thread 281 : : * will switch back to percpu mode. 282 : : */ 283 : 0 : void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref) 284 : : { 285 : 0 : percpu_ref_switch_to_atomic(ref, NULL); 286 : 0 : wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); 287 : 0 : } 288 : : EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic_sync); 289 : : 290 : : /** 291 : : * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode 292 : : * @ref: percpu_ref to switch to percpu mode 293 : : * 294 : : * There's no reason to use this function for the usual reference counting. 295 : : * To re-use an expired ref, use percpu_ref_reinit(). 296 : : * 297 : : * Switch @ref to percpu mode. This function may be invoked concurrently 298 : : * with all the get/put operations and can safely be mixed with kill and 299 : : * reinit operations. This function reverses the sticky atomic state set 300 : : * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic(). If @ref is 301 : : * dying or dead, the actual switching takes place on the following 302 : : * percpu_ref_reinit(). 303 : : * 304 : : * This function may block if @ref is in the process of switching to atomic 305 : : * mode. If the caller ensures that @ref is not in the process of 306 : : * switching to atomic mode, this function can be called from any context. 307 : : */ 308 : 3 : void percpu_ref_switch_to_percpu(struct percpu_ref *ref) 309 : : { 310 : : unsigned long flags; 311 : : 312 : 3 : spin_lock_irqsave(&percpu_ref_switch_lock, flags); 313 : : 314 : 3 : ref->force_atomic = false; 315 : 3 : __percpu_ref_switch_mode(ref, NULL); 316 : : 317 : : spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 318 : 3 : } 319 : : EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu); 320 : : 321 : : /** 322 : : * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation 323 : : * @ref: percpu_ref to kill 324 : : * @confirm_kill: optional confirmation callback 325 : : * 326 : : * Equivalent to percpu_ref_kill() but also schedules kill confirmation if 327 : : * @confirm_kill is not NULL. @confirm_kill, which may not block, will be 328 : : * called after @ref is seen as dead from all CPUs at which point all 329 : : * further invocations of percpu_ref_tryget_live() will fail. See 330 : : * percpu_ref_tryget_live() for details. 331 : : * 332 : : * This function normally doesn't block and can be called from any context 333 : : * but it may block if @confirm_kill is specified and @ref is in the 334 : : * process of switching to atomic mode by percpu_ref_switch_to_atomic(). 335 : : * 336 : : * There are no implied RCU grace periods between kill and release. 337 : : */ 338 : 3 : void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 339 : : percpu_ref_func_t *confirm_kill) 340 : : { 341 : : unsigned long flags; 342 : : 343 : 3 : spin_lock_irqsave(&percpu_ref_switch_lock, flags); 344 : : 345 : 3 : WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, 346 : : "%s called more than once on %ps!", __func__, ref->release); 347 : : 348 : 3 : ref->percpu_count_ptr |= __PERCPU_REF_DEAD; 349 : 3 : __percpu_ref_switch_mode(ref, confirm_kill); 350 : : percpu_ref_put(ref); 351 : : 352 : : spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 353 : 3 : } 354 : : EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); 355 : : 356 : : /** 357 : : * percpu_ref_reinit - re-initialize a percpu refcount 358 : : * @ref: perpcu_ref to re-initialize 359 : : * 360 : : * Re-initialize @ref so that it's in the same state as when it finished 361 : : * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD. @ref must have been 362 : : * initialized successfully and reached 0 but not exited. 363 : : * 364 : : * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while 365 : : * this function is in progress. 366 : : */ 367 : 0 : void percpu_ref_reinit(struct percpu_ref *ref) 368 : : { 369 : 0 : WARN_ON_ONCE(!percpu_ref_is_zero(ref)); 370 : : 371 : 0 : percpu_ref_resurrect(ref); 372 : 0 : } 373 : : EXPORT_SYMBOL_GPL(percpu_ref_reinit); 374 : : 375 : : /** 376 : : * percpu_ref_resurrect - modify a percpu refcount from dead to live 377 : : * @ref: perpcu_ref to resurrect 378 : : * 379 : : * Modify @ref so that it's in the same state as before percpu_ref_kill() was 380 : : * called. @ref must be dead but must not yet have exited. 381 : : * 382 : : * If @ref->release() frees @ref then the caller is responsible for 383 : : * guaranteeing that @ref->release() does not get called while this 384 : : * function is in progress. 385 : : * 386 : : * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while 387 : : * this function is in progress. 388 : : */ 389 : 3 : void percpu_ref_resurrect(struct percpu_ref *ref) 390 : : { 391 : : unsigned long __percpu *percpu_count; 392 : : unsigned long flags; 393 : : 394 : 3 : spin_lock_irqsave(&percpu_ref_switch_lock, flags); 395 : : 396 : 3 : WARN_ON_ONCE(!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)); 397 : 3 : WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count)); 398 : : 399 : 3 : ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; 400 : : percpu_ref_get(ref); 401 : 3 : __percpu_ref_switch_mode(ref, NULL); 402 : : 403 : : spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 404 : 3 : } 405 : : EXPORT_SYMBOL_GPL(percpu_ref_resurrect);