Branch data Line data Source code
1 : : /* SPDX-License-Identifier: GPL-2.0 */ 2 : : /* 3 : : * include/linux/writeback.h 4 : : */ 5 : : #ifndef WRITEBACK_H 6 : : #define WRITEBACK_H 7 : : 8 : : #include <linux/sched.h> 9 : : #include <linux/workqueue.h> 10 : : #include <linux/fs.h> 11 : : #include <linux/flex_proportions.h> 12 : : #include <linux/backing-dev-defs.h> 13 : : #include <linux/blk_types.h> 14 : : #include <linux/blk-cgroup.h> 15 : : 16 : : struct bio; 17 : : 18 : : DECLARE_PER_CPU(int, dirty_throttle_leaks); 19 : : 20 : : /* 21 : : * The 1/4 region under the global dirty thresh is for smooth dirty throttling: 22 : : * 23 : : * (thresh - thresh/DIRTY_FULL_SCOPE, thresh) 24 : : * 25 : : * Further beyond, all dirtier tasks will enter a loop waiting (possibly long 26 : : * time) for the dirty pages to drop, unless written enough pages. 27 : : * 28 : : * The global dirty threshold is normally equal to the global dirty limit, 29 : : * except when the system suddenly allocates a lot of anonymous memory and 30 : : * knocks down the global dirty threshold quickly, in which case the global 31 : : * dirty limit will follow down slowly to prevent livelocking all dirtier tasks. 32 : : */ 33 : : #define DIRTY_SCOPE 8 34 : : #define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) 35 : : 36 : : struct backing_dev_info; 37 : : 38 : : /* 39 : : * fs/fs-writeback.c 40 : : */ 41 : : enum writeback_sync_modes { 42 : : WB_SYNC_NONE, /* Don't wait on anything */ 43 : : WB_SYNC_ALL, /* Wait on every mapping */ 44 : : }; 45 : : 46 : : /* 47 : : * A control structure which tells the writeback code what to do. These are 48 : : * always on the stack, and hence need no locking. They are always initialised 49 : : * in a manner such that unspecified fields are set to zero. 50 : : */ 51 : : struct writeback_control { 52 : : long nr_to_write; /* Write this many pages, and decrement 53 : : this for each page written */ 54 : : long pages_skipped; /* Pages which were not written */ 55 : : 56 : : /* 57 : : * For a_ops->writepages(): if start or end are non-zero then this is 58 : : * a hint that the filesystem need only write out the pages inside that 59 : : * byterange. The byte at `end' is included in the writeout request. 60 : : */ 61 : : loff_t range_start; 62 : : loff_t range_end; 63 : : 64 : : enum writeback_sync_modes sync_mode; 65 : : 66 : : unsigned for_kupdate:1; /* A kupdate writeback */ 67 : : unsigned for_background:1; /* A background writeback */ 68 : : unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */ 69 : : unsigned for_reclaim:1; /* Invoked from the page allocator */ 70 : : unsigned range_cyclic:1; /* range_start is cyclic */ 71 : : unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ 72 : : 73 : : /* 74 : : * When writeback IOs are bounced through async layers, only the 75 : : * initial synchronous phase should be accounted towards inode 76 : : * cgroup ownership arbitration to avoid confusion. Later stages 77 : : * can set the following flag to disable the accounting. 78 : : */ 79 : : unsigned no_cgroup_owner:1; 80 : : 81 : : unsigned punt_to_cgroup:1; /* cgrp punting, see __REQ_CGROUP_PUNT */ 82 : : 83 : : #ifdef CONFIG_CGROUP_WRITEBACK 84 : : struct bdi_writeback *wb; /* wb this writeback is issued under */ 85 : : struct inode *inode; /* inode being written out */ 86 : : 87 : : /* foreign inode detection, see wbc_detach_inode() */ 88 : : int wb_id; /* current wb id */ 89 : : int wb_lcand_id; /* last foreign candidate wb id */ 90 : : int wb_tcand_id; /* this foreign candidate wb id */ 91 : : size_t wb_bytes; /* bytes written by current wb */ 92 : : size_t wb_lcand_bytes; /* bytes written by last candidate */ 93 : : size_t wb_tcand_bytes; /* bytes written by this candidate */ 94 : : #endif 95 : : }; 96 : : 97 : : static inline int wbc_to_write_flags(struct writeback_control *wbc) 98 : : { 99 : : int flags = 0; 100 : : 101 : 3 : if (wbc->punt_to_cgroup) 102 : : flags = REQ_CGROUP_PUNT; 103 : : 104 : 3 : if (wbc->sync_mode == WB_SYNC_ALL) 105 : 3 : flags |= REQ_SYNC; 106 : 3 : else if (wbc->for_kupdate || wbc->for_background) 107 : 3 : flags |= REQ_BACKGROUND; 108 : : 109 : : return flags; 110 : : } 111 : : 112 : : static inline struct cgroup_subsys_state * 113 : : wbc_blkcg_css(struct writeback_control *wbc) 114 : : { 115 : : #ifdef CONFIG_CGROUP_WRITEBACK 116 : : if (wbc->wb) 117 : : return wbc->wb->blkcg_css; 118 : : #endif 119 : : return blkcg_root_css; 120 : : } 121 : : 122 : : /* 123 : : * A wb_domain represents a domain that wb's (bdi_writeback's) belong to 124 : : * and are measured against each other in. There always is one global 125 : : * domain, global_wb_domain, that every wb in the system is a member of. 126 : : * This allows measuring the relative bandwidth of each wb to distribute 127 : : * dirtyable memory accordingly. 128 : : */ 129 : : struct wb_domain { 130 : : spinlock_t lock; 131 : : 132 : : /* 133 : : * Scale the writeback cache size proportional to the relative 134 : : * writeout speed. 135 : : * 136 : : * We do this by keeping a floating proportion between BDIs, based 137 : : * on page writeback completions [end_page_writeback()]. Those 138 : : * devices that write out pages fastest will get the larger share, 139 : : * while the slower will get a smaller share. 140 : : * 141 : : * We use page writeout completions because we are interested in 142 : : * getting rid of dirty pages. Having them written out is the 143 : : * primary goal. 144 : : * 145 : : * We introduce a concept of time, a period over which we measure 146 : : * these events, because demand can/will vary over time. The length 147 : : * of this period itself is measured in page writeback completions. 148 : : */ 149 : : struct fprop_global completions; 150 : : struct timer_list period_timer; /* timer for aging of completions */ 151 : : unsigned long period_time; 152 : : 153 : : /* 154 : : * The dirtyable memory and dirty threshold could be suddenly 155 : : * knocked down by a large amount (eg. on the startup of KVM in a 156 : : * swapless system). This may throw the system into deep dirty 157 : : * exceeded state and throttle heavy/light dirtiers alike. To 158 : : * retain good responsiveness, maintain global_dirty_limit for 159 : : * tracking slowly down to the knocked down dirty threshold. 160 : : * 161 : : * Both fields are protected by ->lock. 162 : : */ 163 : : unsigned long dirty_limit_tstamp; 164 : : unsigned long dirty_limit; 165 : : }; 166 : : 167 : : /** 168 : : * wb_domain_size_changed - memory available to a wb_domain has changed 169 : : * @dom: wb_domain of interest 170 : : * 171 : : * This function should be called when the amount of memory available to 172 : : * @dom has changed. It resets @dom's dirty limit parameters to prevent 173 : : * the past values which don't match the current configuration from skewing 174 : : * dirty throttling. Without this, when memory size of a wb_domain is 175 : : * greatly reduced, the dirty throttling logic may allow too many pages to 176 : : * be dirtied leading to consecutive unnecessary OOMs and may get stuck in 177 : : * that situation. 178 : : */ 179 : 3 : static inline void wb_domain_size_changed(struct wb_domain *dom) 180 : : { 181 : : spin_lock(&dom->lock); 182 : 3 : dom->dirty_limit_tstamp = jiffies; 183 : 3 : dom->dirty_limit = 0; 184 : : spin_unlock(&dom->lock); 185 : 3 : } 186 : : 187 : : /* 188 : : * fs/fs-writeback.c 189 : : */ 190 : : struct bdi_writeback; 191 : : void writeback_inodes_sb(struct super_block *, enum wb_reason reason); 192 : : void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, 193 : : enum wb_reason reason); 194 : : void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason); 195 : : void sync_inodes_sb(struct super_block *); 196 : : void wakeup_flusher_threads(enum wb_reason reason); 197 : : void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, 198 : : enum wb_reason reason); 199 : : void inode_wait_for_writeback(struct inode *inode); 200 : : 201 : : /* writeback.h requires fs.h; it, too, is not included from here. */ 202 : : static inline void wait_on_inode(struct inode *inode) 203 : : { 204 : 3 : might_sleep(); 205 : 3 : wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); 206 : : } 207 : : 208 : : #ifdef CONFIG_CGROUP_WRITEBACK 209 : : 210 : : #include <linux/cgroup.h> 211 : : #include <linux/bio.h> 212 : : 213 : : void __inode_attach_wb(struct inode *inode, struct page *page); 214 : : void wbc_attach_and_unlock_inode(struct writeback_control *wbc, 215 : : struct inode *inode) 216 : : __releases(&inode->i_lock); 217 : : void wbc_detach_inode(struct writeback_control *wbc); 218 : : void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, 219 : : size_t bytes); 220 : : int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages, 221 : : enum wb_reason reason, struct wb_completion *done); 222 : : void cgroup_writeback_umount(void); 223 : : 224 : : /** 225 : : * inode_attach_wb - associate an inode with its wb 226 : : * @inode: inode of interest 227 : : * @page: page being dirtied (may be NULL) 228 : : * 229 : : * If @inode doesn't have its wb, associate it with the wb matching the 230 : : * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o 231 : : * @inode->i_lock. 232 : : */ 233 : : static inline void inode_attach_wb(struct inode *inode, struct page *page) 234 : : { 235 : 3 : if (!inode->i_wb) 236 : 3 : __inode_attach_wb(inode, page); 237 : : } 238 : : 239 : : /** 240 : : * inode_detach_wb - disassociate an inode from its wb 241 : : * @inode: inode of interest 242 : : * 243 : : * @inode is being freed. Detach from its wb. 244 : : */ 245 : 3 : static inline void inode_detach_wb(struct inode *inode) 246 : : { 247 : 3 : if (inode->i_wb) { 248 : 3 : WARN_ON_ONCE(!(inode->i_state & I_CLEAR)); 249 : 3 : wb_put(inode->i_wb); 250 : 3 : inode->i_wb = NULL; 251 : : } 252 : 3 : } 253 : : 254 : : /** 255 : : * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite 256 : : * @wbc: writeback_control of interest 257 : : * @inode: target inode 258 : : * 259 : : * This function is to be used by __filemap_fdatawrite_range(), which is an 260 : : * alternative entry point into writeback code, and first ensures @inode is 261 : : * associated with a bdi_writeback and attaches it to @wbc. 262 : : */ 263 : 3 : static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, 264 : : struct inode *inode) 265 : : { 266 : : spin_lock(&inode->i_lock); 267 : : inode_attach_wb(inode, NULL); 268 : 3 : wbc_attach_and_unlock_inode(wbc, inode); 269 : 3 : } 270 : : 271 : : /** 272 : : * wbc_init_bio - writeback specific initializtion of bio 273 : : * @wbc: writeback_control for the writeback in progress 274 : : * @bio: bio to be initialized 275 : : * 276 : : * @bio is a part of the writeback in progress controlled by @wbc. Perform 277 : : * writeback specific initialization. This is used to apply the cgroup 278 : : * writeback context. Must be called after the bio has been associated with 279 : : * a device. 280 : : */ 281 : : static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) 282 : : { 283 : : /* 284 : : * pageout() path doesn't attach @wbc to the inode being written 285 : : * out. This is intentional as we don't want the function to block 286 : : * behind a slow cgroup. Ultimately, we want pageout() to kick off 287 : : * regular writeback instead of writing things out itself. 288 : : */ 289 : 3 : if (wbc->wb) 290 : 1 : bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); 291 : : } 292 : : 293 : : #else /* CONFIG_CGROUP_WRITEBACK */ 294 : : 295 : : static inline void inode_attach_wb(struct inode *inode, struct page *page) 296 : : { 297 : : } 298 : : 299 : : static inline void inode_detach_wb(struct inode *inode) 300 : : { 301 : : } 302 : : 303 : : static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc, 304 : : struct inode *inode) 305 : : __releases(&inode->i_lock) 306 : : { 307 : : spin_unlock(&inode->i_lock); 308 : : } 309 : : 310 : : static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, 311 : : struct inode *inode) 312 : : { 313 : : } 314 : : 315 : : static inline void wbc_detach_inode(struct writeback_control *wbc) 316 : : { 317 : : } 318 : : 319 : : static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) 320 : : { 321 : : } 322 : : 323 : : static inline void wbc_account_cgroup_owner(struct writeback_control *wbc, 324 : : struct page *page, size_t bytes) 325 : : { 326 : : } 327 : : 328 : : static inline void cgroup_writeback_umount(void) 329 : : { 330 : : } 331 : : 332 : : #endif /* CONFIG_CGROUP_WRITEBACK */ 333 : : 334 : : /* 335 : : * mm/page-writeback.c 336 : : */ 337 : : #ifdef CONFIG_BLOCK 338 : : void laptop_io_completion(struct backing_dev_info *info); 339 : : void laptop_sync_completion(void); 340 : : void laptop_mode_sync(struct work_struct *work); 341 : : void laptop_mode_timer_fn(struct timer_list *t); 342 : : #else 343 : : static inline void laptop_sync_completion(void) { } 344 : : #endif 345 : : bool node_dirty_ok(struct pglist_data *pgdat); 346 : : int wb_domain_init(struct wb_domain *dom, gfp_t gfp); 347 : : #ifdef CONFIG_CGROUP_WRITEBACK 348 : : void wb_domain_exit(struct wb_domain *dom); 349 : : #endif 350 : : 351 : : extern struct wb_domain global_wb_domain; 352 : : 353 : : /* These are exported to sysctl. */ 354 : : extern int dirty_background_ratio; 355 : : extern unsigned long dirty_background_bytes; 356 : : extern int vm_dirty_ratio; 357 : : extern unsigned long vm_dirty_bytes; 358 : : extern unsigned int dirty_writeback_interval; 359 : : extern unsigned int dirty_expire_interval; 360 : : extern unsigned int dirtytime_expire_interval; 361 : : extern int vm_highmem_is_dirtyable; 362 : : extern int block_dump; 363 : : extern int laptop_mode; 364 : : 365 : : extern int dirty_background_ratio_handler(struct ctl_table *table, int write, 366 : : void __user *buffer, size_t *lenp, 367 : : loff_t *ppos); 368 : : extern int dirty_background_bytes_handler(struct ctl_table *table, int write, 369 : : void __user *buffer, size_t *lenp, 370 : : loff_t *ppos); 371 : : extern int dirty_ratio_handler(struct ctl_table *table, int write, 372 : : void __user *buffer, size_t *lenp, 373 : : loff_t *ppos); 374 : : extern int dirty_bytes_handler(struct ctl_table *table, int write, 375 : : void __user *buffer, size_t *lenp, 376 : : loff_t *ppos); 377 : : int dirtytime_interval_handler(struct ctl_table *table, int write, 378 : : void __user *buffer, size_t *lenp, loff_t *ppos); 379 : : 380 : : struct ctl_table; 381 : : int dirty_writeback_centisecs_handler(struct ctl_table *, int, 382 : : void __user *, size_t *, loff_t *); 383 : : 384 : : void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); 385 : : unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); 386 : : 387 : : void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); 388 : : void balance_dirty_pages_ratelimited(struct address_space *mapping); 389 : : bool wb_over_bg_thresh(struct bdi_writeback *wb); 390 : : 391 : : typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, 392 : : void *data); 393 : : 394 : : int generic_writepages(struct address_space *mapping, 395 : : struct writeback_control *wbc); 396 : : void tag_pages_for_writeback(struct address_space *mapping, 397 : : pgoff_t start, pgoff_t end); 398 : : int write_cache_pages(struct address_space *mapping, 399 : : struct writeback_control *wbc, writepage_t writepage, 400 : : void *data); 401 : : int do_writepages(struct address_space *mapping, struct writeback_control *wbc); 402 : : void writeback_set_ratelimit(void); 403 : : void tag_pages_for_writeback(struct address_space *mapping, 404 : : pgoff_t start, pgoff_t end); 405 : : 406 : : void account_page_redirty(struct page *page); 407 : : 408 : : void sb_mark_inode_writeback(struct inode *inode); 409 : : void sb_clear_inode_writeback(struct inode *inode); 410 : : 411 : : #endif /* WRITEBACK_H */