Branch data Line data Source code
1 : : // SPDX-License-Identifier: GPL-2.0-only 2 : : #include <linux/atomic.h> 3 : : #include <linux/rwsem.h> 4 : : #include <linux/percpu.h> 5 : : #include <linux/lockdep.h> 6 : : #include <linux/percpu-rwsem.h> 7 : : #include <linux/rcupdate.h> 8 : : #include <linux/sched.h> 9 : : #include <linux/errno.h> 10 : : 11 : : #include "rwsem.h" 12 : : 13 : 3 : int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, 14 : : const char *name, struct lock_class_key *rwsem_key) 15 : : { 16 : 3 : sem->read_count = alloc_percpu(int); 17 : 3 : if (unlikely(!sem->read_count)) 18 : : return -ENOMEM; 19 : : 20 : : /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ 21 : 3 : rcu_sync_init(&sem->rss); 22 : 3 : __init_rwsem(&sem->rw_sem, name, rwsem_key); 23 : : rcuwait_init(&sem->writer); 24 : : sem->readers_block = 0; 25 : 3 : return 0; 26 : : } 27 : : EXPORT_SYMBOL_GPL(__percpu_init_rwsem); 28 : : 29 : 3 : void percpu_free_rwsem(struct percpu_rw_semaphore *sem) 30 : : { 31 : : /* 32 : : * XXX: temporary kludge. The error path in alloc_super() 33 : : * assumes that percpu_free_rwsem() is safe after kzalloc(). 34 : : */ 35 : 3 : if (!sem->read_count) 36 : 3 : return; 37 : : 38 : 3 : rcu_sync_dtor(&sem->rss); 39 : 3 : free_percpu(sem->read_count); 40 : 3 : sem->read_count = NULL; /* catch use after free bugs */ 41 : : } 42 : : EXPORT_SYMBOL_GPL(percpu_free_rwsem); 43 : : 44 : 3 : int __percpu_down_read(struct percpu_rw_semaphore *sem, int try) 45 : : { 46 : : /* 47 : : * Due to having preemption disabled the decrement happens on 48 : : * the same CPU as the increment, avoiding the 49 : : * increment-on-one-CPU-and-decrement-on-another problem. 50 : : * 51 : : * If the reader misses the writer's assignment of readers_block, then 52 : : * the writer is guaranteed to see the reader's increment. 53 : : * 54 : : * Conversely, any readers that increment their sem->read_count after 55 : : * the writer looks are guaranteed to see the readers_block value, 56 : : * which in turn means that they are guaranteed to immediately 57 : : * decrement their sem->read_count, so that it doesn't matter that the 58 : : * writer missed them. 59 : : */ 60 : : 61 : 3 : smp_mb(); /* A matches D */ 62 : : 63 : : /* 64 : : * If !readers_block the critical section starts here, matched by the 65 : : * release in percpu_up_write(). 66 : : */ 67 : 3 : if (likely(!smp_load_acquire(&sem->readers_block))) 68 : : return 1; 69 : : 70 : : /* 71 : : * Per the above comment; we still have preemption disabled and 72 : : * will thus decrement on the same CPU as we incremented. 73 : : */ 74 : 3 : __percpu_up_read(sem); 75 : : 76 : 3 : if (try) 77 : : return 0; 78 : : 79 : : /* 80 : : * We either call schedule() in the wait, or we'll fall through 81 : : * and reschedule on the preempt_enable() in percpu_down_read(). 82 : : */ 83 : 3 : preempt_enable_no_resched(); 84 : : 85 : : /* 86 : : * Avoid lockdep for the down/up_read() we already have them. 87 : : */ 88 : 3 : __down_read(&sem->rw_sem); 89 : 3 : this_cpu_inc(*sem->read_count); 90 : 3 : __up_read(&sem->rw_sem); 91 : : 92 : 3 : preempt_disable(); 93 : 3 : return 1; 94 : : } 95 : : EXPORT_SYMBOL_GPL(__percpu_down_read); 96 : : 97 : 3 : void __percpu_up_read(struct percpu_rw_semaphore *sem) 98 : : { 99 : 3 : smp_mb(); /* B matches C */ 100 : : /* 101 : : * In other words, if they see our decrement (presumably to aggregate 102 : : * zero, as that is the only time it matters) they will also see our 103 : : * critical section. 104 : : */ 105 : 3 : __this_cpu_dec(*sem->read_count); 106 : : 107 : : /* Prod writer to recheck readers_active */ 108 : 3 : rcuwait_wake_up(&sem->writer); 109 : 3 : } 110 : : EXPORT_SYMBOL_GPL(__percpu_up_read); 111 : : 112 : : #define per_cpu_sum(var) \ 113 : : ({ \ 114 : : typeof(var) __sum = 0; \ 115 : : int cpu; \ 116 : : compiletime_assert_atomic_type(__sum); \ 117 : : for_each_possible_cpu(cpu) \ 118 : : __sum += per_cpu(var, cpu); \ 119 : : __sum; \ 120 : : }) 121 : : 122 : : /* 123 : : * Return true if the modular sum of the sem->read_count per-CPU variable is 124 : : * zero. If this sum is zero, then it is stable due to the fact that if any 125 : : * newly arriving readers increment a given counter, they will immediately 126 : : * decrement that same counter. 127 : : */ 128 : 3 : static bool readers_active_check(struct percpu_rw_semaphore *sem) 129 : : { 130 : 3 : if (per_cpu_sum(*sem->read_count) != 0) 131 : : return false; 132 : : 133 : : /* 134 : : * If we observed the decrement; ensure we see the entire critical 135 : : * section. 136 : : */ 137 : : 138 : 3 : smp_mb(); /* C matches B */ 139 : : 140 : 3 : return true; 141 : : } 142 : : 143 : 3 : void percpu_down_write(struct percpu_rw_semaphore *sem) 144 : : { 145 : : /* Notify readers to take the slow path. */ 146 : 3 : rcu_sync_enter(&sem->rss); 147 : : 148 : 3 : down_write(&sem->rw_sem); 149 : : 150 : : /* 151 : : * Notify new readers to block; up until now, and thus throughout the 152 : : * longish rcu_sync_enter() above, new readers could still come in. 153 : : */ 154 : : WRITE_ONCE(sem->readers_block, 1); 155 : : 156 : 3 : smp_mb(); /* D matches A */ 157 : : 158 : : /* 159 : : * If they don't see our writer of readers_block, then we are 160 : : * guaranteed to see their sem->read_count increment, and therefore 161 : : * will wait for them. 162 : : */ 163 : : 164 : : /* Wait for all now active readers to complete. */ 165 : 3 : rcuwait_wait_event(&sem->writer, readers_active_check(sem)); 166 : 3 : } 167 : : EXPORT_SYMBOL_GPL(percpu_down_write); 168 : : 169 : 3 : void percpu_up_write(struct percpu_rw_semaphore *sem) 170 : : { 171 : : /* 172 : : * Signal the writer is done, no fast path yet. 173 : : * 174 : : * One reason that we cannot just immediately flip to readers_fast is 175 : : * that new readers might fail to see the results of this writer's 176 : : * critical section. 177 : : * 178 : : * Therefore we force it through the slow path which guarantees an 179 : : * acquire and thereby guarantees the critical section's consistency. 180 : : */ 181 : 3 : smp_store_release(&sem->readers_block, 0); 182 : : 183 : : /* 184 : : * Release the write lock, this will allow readers back in the game. 185 : : */ 186 : 3 : up_write(&sem->rw_sem); 187 : : 188 : : /* 189 : : * Once this completes (at least one RCU-sched grace period hence) the 190 : : * reader fast path will be available again. Safe to use outside the 191 : : * exclusive write lock because its counting. 192 : : */ 193 : 3 : rcu_sync_exit(&sem->rss); 194 : 3 : } 195 : : EXPORT_SYMBOL_GPL(percpu_up_write);