Branch data Line data Source code
1 : : // SPDX-License-Identifier: GPL-2.0-or-later 2 : : /* 3 : : * NET3: Garbage Collector For AF_UNIX sockets 4 : : * 5 : : * Garbage Collector: 6 : : * Copyright (C) Barak A. Pearlmutter. 7 : : * 8 : : * Chopped about by Alan Cox 22/3/96 to make it fit the AF_UNIX socket problem. 9 : : * If it doesn't work blame me, it worked when Barak sent it. 10 : : * 11 : : * Assumptions: 12 : : * 13 : : * - object w/ a bit 14 : : * - free list 15 : : * 16 : : * Current optimizations: 17 : : * 18 : : * - explicit stack instead of recursion 19 : : * - tail recurse on first born instead of immediate push/pop 20 : : * - we gather the stuff that should not be killed into tree 21 : : * and stack is just a path from root to the current pointer. 22 : : * 23 : : * Future optimizations: 24 : : * 25 : : * - don't just push entire root set; process in place 26 : : * 27 : : * Fixes: 28 : : * Alan Cox 07 Sept 1997 Vmalloc internal stack as needed. 29 : : * Cope with changing max_files. 30 : : * Al Viro 11 Oct 1998 31 : : * Graph may have cycles. That is, we can send the descriptor 32 : : * of foo to bar and vice versa. Current code chokes on that. 33 : : * Fix: move SCM_RIGHTS ones into the separate list and then 34 : : * skb_free() them all instead of doing explicit fput's. 35 : : * Another problem: since fput() may block somebody may 36 : : * create a new unix_socket when we are in the middle of sweep 37 : : * phase. Fix: revert the logic wrt MARKED. Mark everything 38 : : * upon the beginning and unmark non-junk ones. 39 : : * 40 : : * [12 Oct 1998] AAARGH! New code purges all SCM_RIGHTS 41 : : * sent to connect()'ed but still not accept()'ed sockets. 42 : : * Fixed. Old code had slightly different problem here: 43 : : * extra fput() in situation when we passed the descriptor via 44 : : * such socket and closed it (descriptor). That would happen on 45 : : * each unix_gc() until the accept(). Since the struct file in 46 : : * question would go to the free list and might be reused... 47 : : * That might be the reason of random oopses on filp_close() 48 : : * in unrelated processes. 49 : : * 50 : : * AV 28 Feb 1999 51 : : * Kill the explicit allocation of stack. Now we keep the tree 52 : : * with root in dummy + pointer (gc_current) to one of the nodes. 53 : : * Stack is represented as path from gc_current to dummy. Unmark 54 : : * now means "add to tree". Push == "make it a son of gc_current". 55 : : * Pop == "move gc_current to parent". We keep only pointers to 56 : : * parents (->gc_tree). 57 : : * AV 1 Mar 1999 58 : : * Damn. Added missing check for ->dead in listen queues scanning. 59 : : * 60 : : * Miklos Szeredi 25 Jun 2007 61 : : * Reimplement with a cycle collecting algorithm. This should 62 : : * solve several problems with the previous code, like being racy 63 : : * wrt receive and holding up unrelated socket operations. 64 : : */ 65 : : 66 : : #include <linux/kernel.h> 67 : : #include <linux/string.h> 68 : : #include <linux/socket.h> 69 : : #include <linux/un.h> 70 : : #include <linux/net.h> 71 : : #include <linux/fs.h> 72 : : #include <linux/skbuff.h> 73 : : #include <linux/netdevice.h> 74 : : #include <linux/file.h> 75 : : #include <linux/proc_fs.h> 76 : : #include <linux/mutex.h> 77 : : #include <linux/wait.h> 78 : : 79 : : #include <net/sock.h> 80 : : #include <net/af_unix.h> 81 : : #include <net/scm.h> 82 : : #include <net/tcp_states.h> 83 : : 84 : : #include "scm.h" 85 : : 86 : : /* Internal data structures and random procedures: */ 87 : : 88 : : static LIST_HEAD(gc_candidates); 89 : : static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); 90 : : 91 : 3 : static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), 92 : : struct sk_buff_head *hitlist) 93 : : { 94 : : struct sk_buff *skb; 95 : : struct sk_buff *next; 96 : : 97 : : spin_lock(&x->sk_receive_queue.lock); 98 : 3 : skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { 99 : : /* Do we have file descriptors ? */ 100 : 1 : if (UNIXCB(skb).fp) { 101 : : bool hit = false; 102 : : /* Process the descriptors of this socket */ 103 : 0 : int nfd = UNIXCB(skb).fp->count; 104 : 0 : struct file **fp = UNIXCB(skb).fp->fp; 105 : : 106 : 0 : while (nfd--) { 107 : : /* Get the socket the fd matches if it indeed does so */ 108 : 0 : struct sock *sk = unix_get_socket(*fp++); 109 : : 110 : 0 : if (sk) { 111 : : struct unix_sock *u = unix_sk(sk); 112 : : 113 : : /* Ignore non-candidates, they could 114 : : * have been added to the queues after 115 : : * starting the garbage collection 116 : : */ 117 : 0 : if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { 118 : : hit = true; 119 : : 120 : 0 : func(u); 121 : : } 122 : : } 123 : : } 124 : 0 : if (hit && hitlist != NULL) { 125 : : __skb_unlink(skb, &x->sk_receive_queue); 126 : : __skb_queue_tail(hitlist, skb); 127 : : } 128 : : } 129 : : } 130 : : spin_unlock(&x->sk_receive_queue.lock); 131 : 3 : } 132 : : 133 : 3 : static void scan_children(struct sock *x, void (*func)(struct unix_sock *), 134 : : struct sk_buff_head *hitlist) 135 : : { 136 : 3 : if (x->sk_state != TCP_LISTEN) { 137 : 3 : scan_inflight(x, func, hitlist); 138 : : } else { 139 : : struct sk_buff *skb; 140 : : struct sk_buff *next; 141 : : struct unix_sock *u; 142 : 0 : LIST_HEAD(embryos); 143 : : 144 : : /* For a listening socket collect the queued embryos 145 : : * and perform a scan on them as well. 146 : : */ 147 : : spin_lock(&x->sk_receive_queue.lock); 148 : 0 : skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { 149 : 0 : u = unix_sk(skb->sk); 150 : : 151 : : /* An embryo cannot be in-flight, so it's safe 152 : : * to use the list link. 153 : : */ 154 : 0 : BUG_ON(!list_empty(&u->link)); 155 : : list_add_tail(&u->link, &embryos); 156 : : } 157 : : spin_unlock(&x->sk_receive_queue.lock); 158 : : 159 : 0 : while (!list_empty(&embryos)) { 160 : 0 : u = list_entry(embryos.next, struct unix_sock, link); 161 : 0 : scan_inflight(&u->sk, func, hitlist); 162 : 0 : list_del_init(&u->link); 163 : : } 164 : : } 165 : 3 : } 166 : : 167 : 0 : static void dec_inflight(struct unix_sock *usk) 168 : : { 169 : 0 : atomic_long_dec(&usk->inflight); 170 : 0 : } 171 : : 172 : 0 : static void inc_inflight(struct unix_sock *usk) 173 : : { 174 : 0 : atomic_long_inc(&usk->inflight); 175 : 0 : } 176 : : 177 : 0 : static void inc_inflight_move_tail(struct unix_sock *u) 178 : : { 179 : 0 : atomic_long_inc(&u->inflight); 180 : : /* If this still might be part of a cycle, move it to the end 181 : : * of the list, so that it's checked even if it was already 182 : : * passed over 183 : : */ 184 : 0 : if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags)) 185 : 0 : list_move_tail(&u->link, &gc_candidates); 186 : 0 : } 187 : : 188 : : static bool gc_in_progress; 189 : : #define UNIX_INFLIGHT_TRIGGER_GC 16000 190 : : 191 : 3 : void wait_for_unix_gc(void) 192 : : { 193 : : /* If number of inflight sockets is insane, 194 : : * force a garbage collect right now. 195 : : */ 196 : 3 : if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) 197 : 0 : unix_gc(); 198 : 3 : wait_event(unix_gc_wait, gc_in_progress == false); 199 : 3 : } 200 : : 201 : : /* The external entry point: unix_gc() */ 202 : 3 : void unix_gc(void) 203 : : { 204 : : struct unix_sock *u; 205 : : struct unix_sock *next; 206 : : struct sk_buff_head hitlist; 207 : : struct list_head cursor; 208 : 3 : LIST_HEAD(not_cycle_list); 209 : : 210 : : spin_lock(&unix_gc_lock); 211 : : 212 : : /* Avoid a recursive GC. */ 213 : 3 : if (gc_in_progress) 214 : : goto out; 215 : : 216 : 3 : gc_in_progress = true; 217 : : /* First, select candidates for garbage collection. Only 218 : : * in-flight sockets are considered, and from those only ones 219 : : * which don't have any external reference. 220 : : * 221 : : * Holding unix_gc_lock will protect these candidates from 222 : : * being detached, and hence from gaining an external 223 : : * reference. Since there are no possible receivers, all 224 : : * buffers currently on the candidates' queues stay there 225 : : * during the garbage collection. 226 : : * 227 : : * We also know that no new candidate can be added onto the 228 : : * receive queues. Other, non candidate sockets _can_ be 229 : : * added to queue, so we must make sure only to touch 230 : : * candidates. 231 : : */ 232 : 3 : list_for_each_entry_safe(u, next, &gc_inflight_list, link) { 233 : : long total_refs; 234 : : long inflight_refs; 235 : : 236 : 3 : total_refs = file_count(u->sk.sk_socket->file); 237 : : inflight_refs = atomic_long_read(&u->inflight); 238 : : 239 : 3 : BUG_ON(inflight_refs < 1); 240 : 3 : BUG_ON(total_refs < inflight_refs); 241 : 3 : if (total_refs == inflight_refs) { 242 : : list_move_tail(&u->link, &gc_candidates); 243 : : __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); 244 : : __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); 245 : : } 246 : : } 247 : : 248 : : /* Now remove all internal in-flight reference to children of 249 : : * the candidates. 250 : : */ 251 : 3 : list_for_each_entry(u, &gc_candidates, link) 252 : 3 : scan_children(&u->sk, dec_inflight, NULL); 253 : : 254 : : /* Restore the references for children of all candidates, 255 : : * which have remaining references. Do this recursively, so 256 : : * only those remain, which form cyclic references. 257 : : * 258 : : * Use a "cursor" link, to make the list traversal safe, even 259 : : * though elements might be moved about. 260 : : */ 261 : : list_add(&cursor, &gc_candidates); 262 : 3 : while (cursor.next != &gc_candidates) { 263 : : u = list_entry(cursor.next, struct unix_sock, link); 264 : : 265 : : /* Move cursor to after the current position. */ 266 : 3 : list_move(&cursor, &u->link); 267 : : 268 : 3 : if (atomic_long_read(&u->inflight) > 0) { 269 : : list_move_tail(&u->link, ¬_cycle_list); 270 : : __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); 271 : 3 : scan_children(&u->sk, inc_inflight_move_tail, NULL); 272 : : } 273 : : } 274 : : list_del(&cursor); 275 : : 276 : : /* Now gc_candidates contains only garbage. Restore original 277 : : * inflight counters for these as well, and remove the skbuffs 278 : : * which are creating the cycle(s). 279 : : */ 280 : : skb_queue_head_init(&hitlist); 281 : 3 : list_for_each_entry(u, &gc_candidates, link) 282 : 0 : scan_children(&u->sk, inc_inflight, &hitlist); 283 : : 284 : : /* not_cycle_list contains those sockets which do not make up a 285 : : * cycle. Restore these to the inflight list. 286 : : */ 287 : 3 : while (!list_empty(¬_cycle_list)) { 288 : 3 : u = list_entry(not_cycle_list.next, struct unix_sock, link); 289 : : __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags); 290 : 3 : list_move_tail(&u->link, &gc_inflight_list); 291 : : } 292 : : 293 : : spin_unlock(&unix_gc_lock); 294 : : 295 : : /* Here we are. Hitlist is filled. Die. */ 296 : 3 : __skb_queue_purge(&hitlist); 297 : : 298 : : spin_lock(&unix_gc_lock); 299 : : 300 : : /* All candidates should have been detached by now. */ 301 : 3 : BUG_ON(!list_empty(&gc_candidates)); 302 : 3 : gc_in_progress = false; 303 : 3 : wake_up(&unix_gc_wait); 304 : : 305 : : out: 306 : : spin_unlock(&unix_gc_lock); 307 : 3 : }