LCOV - combined.info - fs/buffer.c

LCOV - code coverage report

Current view:	top level - fs - buffer.c (source / functions)		Hit	Total	Coverage
Test:	combined.info	Lines:	623	1500	41.5 %
Date:	2022-04-01 14:35:51	Functions:	53	94	56.4 %
		Branches:	218	941	23.2 %

           Branch data     Line data    Source code

       1                 :            : // SPDX-License-Identifier: GPL-2.0-only
       2                 :            : /*
       3                 :            :  *  linux/fs/buffer.c
       4                 :            :  *
       5                 :            :  *  Copyright (C) 1991, 1992, 2002  Linus Torvalds
       6                 :            :  */
       7                 :            : 
       8                 :            : /*
       9                 :            :  * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95
      10                 :            :  *
      11                 :            :  * Removed a lot of unnecessary code and simplified things now that
      12                 :            :  * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
      13                 :            :  *
      14                 :            :  * Speed up hash, lru, and free list operations.  Use gfp() for allocating
      15                 :            :  * hash table, use SLAB cache for buffer heads. SMP threading.  -DaveM
      16                 :            :  *
      17                 :            :  * Added 32k buffer block sizes - these are required older ARM systems. - RMK
      18                 :            :  *
      19                 :            :  * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
      20                 :            :  */
      21                 :            : 
      22                 :            : #include <linux/kernel.h>
      23                 :            : #include <linux/sched/signal.h>
      24                 :            : #include <linux/syscalls.h>
      25                 :            : #include <linux/fs.h>
      26                 :            : #include <linux/iomap.h>
      27                 :            : #include <linux/mm.h>
      28                 :            : #include <linux/percpu.h>
      29                 :            : #include <linux/slab.h>
      30                 :            : #include <linux/capability.h>
      31                 :            : #include <linux/blkdev.h>
      32                 :            : #include <linux/file.h>
      33                 :            : #include <linux/quotaops.h>
      34                 :            : #include <linux/highmem.h>
      35                 :            : #include <linux/export.h>
      36                 :            : #include <linux/backing-dev.h>
      37                 :            : #include <linux/writeback.h>
      38                 :            : #include <linux/hash.h>
      39                 :            : #include <linux/suspend.h>
      40                 :            : #include <linux/buffer_head.h>
      41                 :            : #include <linux/task_io_accounting_ops.h>
      42                 :            : #include <linux/bio.h>
      43                 :            : #include <linux/cpu.h>
      44                 :            : #include <linux/bitops.h>
      45                 :            : #include <linux/mpage.h>
      46                 :            : #include <linux/bit_spinlock.h>
      47                 :            : #include <linux/pagevec.h>
      48                 :            : #include <linux/sched/mm.h>
      49                 :            : #include <trace/events/block.h>
      50                 :            : #include <linux/fscrypt.h>
      51                 :            : 
      52                 :            : #include "internal.h"
      53                 :            : 
      54                 :            : static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
      55                 :            : static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
      56                 :            :                          enum rw_hint hint, struct writeback_control *wbc);
      57                 :            : 
      58                 :            : #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
      59                 :            : 
      60                 :     188648 : inline void touch_buffer(struct buffer_head *bh)
      61                 :            : {
      62                 :          0 :         trace_block_touch_buffer(bh);
      63                 :     188648 :         mark_page_accessed(bh->b_page);
      64                 :     188648 : }
      65                 :            : EXPORT_SYMBOL(touch_buffer);
      66                 :            : 
      67                 :         44 : void __lock_buffer(struct buffer_head *bh)
      68                 :            : {
      69                 :         44 :         wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
      70                 :         43 : }
      71                 :            : EXPORT_SYMBOL(__lock_buffer);
      72                 :            : 
      73                 :      26392 : void unlock_buffer(struct buffer_head *bh)
      74                 :            : {
      75                 :      12909 :         clear_bit_unlock(BH_Lock, &bh->b_state);
      76                 :      26392 :         smp_mb__after_atomic();
      77                 :      26307 :         wake_up_bit(&bh->b_state, BH_Lock);
      78                 :        168 : }
      79                 :            : EXPORT_SYMBOL(unlock_buffer);
      80                 :            : 
      81                 :            : /*
      82                 :            :  * Returns if the page has dirty or writeback buffers. If all the buffers
      83                 :            :  * are unlocked and clean then the PageDirty information is stale. If
      84                 :            :  * any of the pages are locked, it is assumed they are locked for IO.
      85                 :            :  */
      86                 :          0 : void buffer_check_dirty_writeback(struct page *page,
      87                 :            :                                      bool *dirty, bool *writeback)
      88                 :            : {
      89                 :          0 :         struct buffer_head *head, *bh;
      90                 :          0 :         *dirty = false;
      91                 :          0 :         *writeback = false;
      92                 :            : 
      93   [ #  #  #  # ]:          0 :         BUG_ON(!PageLocked(page));
      94                 :            : 
      95         [ #  # ]:          0 :         if (!page_has_buffers(page))
      96                 :            :                 return;
      97                 :            : 
      98   [ #  #  #  # ]:          0 :         if (PageWriteback(page))
      99                 :          0 :                 *writeback = true;
     100                 :            : 
     101         [ #  # ]:          0 :         head = page_buffers(page);
     102                 :          0 :         bh = head;
     103                 :          0 :         do {
     104         [ #  # ]:          0 :                 if (buffer_locked(bh))
     105                 :          0 :                         *writeback = true;
     106                 :            : 
     107         [ #  # ]:          0 :                 if (buffer_dirty(bh))
     108                 :          0 :                         *dirty = true;
     109                 :            : 
     110                 :          0 :                 bh = bh->b_this_page;
     111         [ #  # ]:          0 :         } while (bh != head);
     112                 :            : }
     113                 :            : EXPORT_SYMBOL(buffer_check_dirty_writeback);
     114                 :            : 
     115                 :            : /*
     116                 :            :  * Block until a buffer comes unlocked.  This doesn't stop it
     117                 :            :  * from becoming locked again - you have to lock it yourself
     118                 :            :  * if you want to preserve its state.
     119                 :            :  */
     120                 :       2631 : void __wait_on_buffer(struct buffer_head * bh)
     121                 :            : {
     122                 :       2631 :         wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
     123                 :        153 : }
     124                 :            : EXPORT_SYMBOL(__wait_on_buffer);
     125                 :            : 
     126                 :            : static void
     127                 :         84 : __clear_page_buffers(struct page *page)
     128                 :            : {
     129                 :         84 :         ClearPagePrivate(page);
     130                 :         84 :         set_page_private(page, 0);
     131                 :         84 :         put_page(page);
     132                 :            : }
     133                 :            : 
     134                 :          0 : static void buffer_io_error(struct buffer_head *bh, char *msg)
     135                 :            : {
     136         [ #  # ]:          0 :         if (!test_bit(BH_Quiet, &bh->b_state))
     137         [ #  # ]:          0 :                 printk_ratelimited(KERN_ERR
     138                 :            :                         "Buffer I/O error on dev %pg, logical block %llu%s\n",
     139                 :            :                         bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
     140                 :          0 : }
     141                 :            : 
     142                 :            : /*
     143                 :            :  * End-of-IO handler helper function which does not touch the bh after
     144                 :            :  * unlocking it.
     145                 :            :  * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
     146                 :            :  * a race there is benign: unlock_buffer() only use the bh's address for
     147                 :            :  * hashing after unlocking the buffer, so it doesn't actually touch the bh
     148                 :            :  * itself.
     149                 :            :  */
     150                 :      12789 : static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
     151                 :            : {
     152         [ +  - ]:      12789 :         if (uptodate) {
     153                 :      12789 :                 set_buffer_uptodate(bh);
     154                 :            :         } else {
     155                 :            :                 /* This happens, due to failed read-ahead attempts. */
     156                 :          0 :                 clear_buffer_uptodate(bh);
     157                 :            :         }
     158                 :      12789 :         unlock_buffer(bh);
     159                 :      12789 : }
     160                 :            : 
     161                 :            : /*
     162                 :            :  * Default synchronous end-of-IO handler..  Just mark it up-to-date and
     163                 :            :  * unlock the buffer. This is what ll_rw_block uses too.
     164                 :            :  */
     165                 :      12789 : void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
     166                 :            : {
     167                 :      12789 :         __end_buffer_read_notouch(bh, uptodate);
     168                 :      12789 :         put_bh(bh);
     169                 :      12789 : }
     170                 :            : EXPORT_SYMBOL(end_buffer_read_sync);
     171                 :            : 
     172                 :         63 : void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
     173                 :            : {
     174         [ +  - ]:         63 :         if (uptodate) {
     175                 :         63 :                 set_buffer_uptodate(bh);
     176                 :            :         } else {
     177                 :          0 :                 buffer_io_error(bh, ", lost sync page write");
     178                 :          0 :                 mark_buffer_write_io_error(bh);
     179                 :          0 :                 clear_buffer_uptodate(bh);
     180                 :            :         }
     181                 :         63 :         unlock_buffer(bh);
     182                 :         63 :         put_bh(bh);
     183                 :         63 : }
     184                 :            : EXPORT_SYMBOL(end_buffer_write_sync);
     185                 :            : 
     186                 :            : /*
     187                 :            :  * Various filesystems appear to want __find_get_block to be non-blocking.
     188                 :            :  * But it's the page lock which protects the buffers.  To get around this,
     189                 :            :  * we get exclusion from try_to_free_buffers with the blockdev mapping's
     190                 :            :  * private_lock.
     191                 :            :  *
     192                 :            :  * Hack idea: for the blockdev mapping, private_lock contention
     193                 :            :  * may be quite high.  This code could TryLock the page, and if that
     194                 :            :  * succeeds, there is no need to take private_lock.
     195                 :            :  */
     196                 :            : static struct buffer_head *
     197                 :      71066 : __find_get_block_slow(struct block_device *bdev, sector_t block)
     198                 :            : {
     199                 :      71066 :         struct inode *bd_inode = bdev->bd_inode;
     200                 :      71066 :         struct address_space *bd_mapping = bd_inode->i_mapping;
     201                 :      71066 :         struct buffer_head *ret = NULL;
     202                 :      71066 :         pgoff_t index;
     203                 :      71066 :         struct buffer_head *bh;
     204                 :      71066 :         struct buffer_head *head;
     205                 :      71066 :         struct page *page;
     206                 :      71066 :         int all_mapped = 1;
     207                 :      71066 :         static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
     208                 :            : 
     209                 :      71066 :         index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
     210                 :      71066 :         page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
     211         [ +  + ]:      71066 :         if (!page)
     212                 :      38358 :                 goto out;
     213                 :            : 
     214                 :      32708 :         spin_lock(&bd_mapping->private_lock);
     215         [ -  + ]:      32708 :         if (!page_has_buffers(page))
     216                 :          0 :                 goto out_unlock;
     217         [ -  + ]:      32708 :         head = page_buffers(page);
     218                 :      32708 :         bh = head;
     219                 :      32792 :         do {
     220         [ +  - ]:      32792 :                 if (!buffer_mapped(bh))
     221                 :            :                         all_mapped = 0;
     222         [ +  + ]:      32792 :                 else if (bh->b_blocknr == block) {
     223                 :      32708 :                         ret = bh;
     224                 :      32708 :                         get_bh(bh);
     225                 :      32708 :                         goto out_unlock;
     226                 :            :                 }
     227                 :         84 :                 bh = bh->b_this_page;
     228         [ +  - ]:         84 :         } while (bh != head);
     229                 :            : 
     230                 :            :         /* we might be here because some of the buffers on this page are
     231                 :            :          * not mapped.  This is due to various races between
     232                 :            :          * file io on the block device and getblk.  It gets dealt with
     233                 :            :          * elsewhere, don't buffer_error if we had some unmapped buffers
     234                 :            :          */
     235         [ #  # ]:          0 :         ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
     236   [ #  #  #  # ]:          0 :         if (all_mapped && __ratelimit(&last_warned)) {
     237                 :          0 :                 printk("__find_get_block_slow() failed. block=%llu, "
     238                 :            :                        "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
     239                 :            :                        "device %pg blocksize: %d\n",
     240                 :            :                        (unsigned long long)block,
     241                 :          0 :                        (unsigned long long)bh->b_blocknr,
     242                 :            :                        bh->b_state, bh->b_size, bdev,
     243                 :          0 :                        1 << bd_inode->i_blkbits);
     244                 :            :         }
     245                 :          0 : out_unlock:
     246                 :      32708 :         spin_unlock(&bd_mapping->private_lock);
     247                 :      32708 :         put_page(page);
     248                 :      71066 : out:
     249                 :      71066 :         return ret;
     250                 :            : }
     251                 :            : 
     252                 :         42 : static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
     253                 :            : {
     254                 :         42 :         unsigned long flags;
     255                 :         42 :         struct buffer_head *first;
     256                 :         42 :         struct buffer_head *tmp;
     257                 :         42 :         struct page *page;
     258                 :         42 :         int page_uptodate = 1;
     259                 :            : 
     260         [ -  + ]:         42 :         BUG_ON(!buffer_async_read(bh));
     261                 :            : 
     262                 :         42 :         page = bh->b_page;
     263         [ +  - ]:         42 :         if (uptodate) {
     264                 :         42 :                 set_buffer_uptodate(bh);
     265                 :            :         } else {
     266                 :          0 :                 clear_buffer_uptodate(bh);
     267                 :          0 :                 buffer_io_error(bh, ", async page read");
     268         [ #  # ]:          0 :                 SetPageError(page);
     269                 :            :         }
     270                 :            : 
     271                 :            :         /*
     272                 :            :          * Be _very_ careful from here on. Bad things can happen if
     273                 :            :          * two buffer heads end IO at almost the same time and both
     274                 :            :          * decide that the page is now completely done.
     275                 :            :          */
     276         [ -  + ]:         42 :         first = page_buffers(page);
     277                 :         42 :         local_irq_save(flags);
     278                 :         42 :         bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
     279                 :         42 :         clear_buffer_async_read(bh);
     280                 :         42 :         unlock_buffer(bh);
     281                 :         42 :         tmp = bh;
     282                 :         42 :         do {
     283         [ -  + ]:         42 :                 if (!buffer_uptodate(tmp))
     284                 :          0 :                         page_uptodate = 0;
     285         [ -  + ]:         42 :                 if (buffer_async_read(tmp)) {
     286         [ #  # ]:          0 :                         BUG_ON(!buffer_locked(tmp));
     287                 :          0 :                         goto still_busy;
     288                 :            :                 }
     289                 :         42 :                 tmp = tmp->b_this_page;
     290         [ -  + ]:         42 :         } while (tmp != bh);
     291                 :         42 :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     292                 :         42 :         local_irq_restore(flags);
     293                 :            : 
     294                 :            :         /*
     295                 :            :          * If none of the buffers had errors and they are all
     296                 :            :          * uptodate then we can set the page uptodate.
     297                 :            :          */
     298   [ +  -  +  - ]:         84 :         if (page_uptodate && !PageError(page))
     299                 :         42 :                 SetPageUptodate(page);
     300                 :         42 :         unlock_page(page);
     301                 :         42 :         return;
     302                 :            : 
     303                 :            : still_busy:
     304                 :          0 :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     305                 :          0 :         local_irq_restore(flags);
     306                 :            :         return;
     307                 :            : }
     308                 :            : 
     309                 :            : struct decrypt_bh_ctx {
     310                 :            :         struct work_struct work;
     311                 :            :         struct buffer_head *bh;
     312                 :            : };
     313                 :            : 
     314                 :            : static void decrypt_bh(struct work_struct *work)
     315                 :            : {
     316                 :            :         struct decrypt_bh_ctx *ctx =
     317                 :            :                 container_of(work, struct decrypt_bh_ctx, work);
     318                 :            :         struct buffer_head *bh = ctx->bh;
     319                 :            :         int err;
     320                 :            : 
     321                 :            :         err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
     322                 :            :                                                bh_offset(bh));
     323                 :            :         end_buffer_async_read(bh, err == 0);
     324                 :            :         kfree(ctx);
     325                 :            : }
     326                 :            : 
     327                 :            : /*
     328                 :            :  * I/O completion handler for block_read_full_page() - pages
     329                 :            :  * which come unlocked at the end of I/O.
     330                 :            :  */
     331                 :         42 : static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
     332                 :            : {
     333                 :            :         /* Decrypt if needed */
     334                 :         42 :         if (uptodate && IS_ENABLED(CONFIG_FS_ENCRYPTION) &&
     335                 :            :             IS_ENCRYPTED(bh->b_page->mapping->host) &&
     336                 :            :             S_ISREG(bh->b_page->mapping->host->i_mode)) {
     337                 :            :                 struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
     338                 :            : 
     339                 :            :                 if (ctx) {
     340                 :            :                         INIT_WORK(&ctx->work, decrypt_bh);
     341                 :            :                         ctx->bh = bh;
     342                 :            :                         fscrypt_enqueue_decrypt_work(&ctx->work);
     343                 :            :                         return;
     344                 :            :                 }
     345                 :            :                 uptodate = 0;
     346                 :            :         }
     347                 :         42 :         end_buffer_async_read(bh, uptodate);
     348                 :            : }
     349                 :            : 
     350                 :            : /*
     351                 :            :  * Completion handler for block_write_full_page() - pages which are unlocked
     352                 :            :  * during I/O, and which have PageWriteback cleared upon I/O completion.
     353                 :            :  */
     354                 :          0 : void end_buffer_async_write(struct buffer_head *bh, int uptodate)
     355                 :            : {
     356                 :          0 :         unsigned long flags;
     357                 :          0 :         struct buffer_head *first;
     358                 :          0 :         struct buffer_head *tmp;
     359                 :          0 :         struct page *page;
     360                 :            : 
     361         [ #  # ]:          0 :         BUG_ON(!buffer_async_write(bh));
     362                 :            : 
     363                 :          0 :         page = bh->b_page;
     364         [ #  # ]:          0 :         if (uptodate) {
     365                 :          0 :                 set_buffer_uptodate(bh);
     366                 :            :         } else {
     367                 :          0 :                 buffer_io_error(bh, ", lost async page write");
     368                 :          0 :                 mark_buffer_write_io_error(bh);
     369                 :          0 :                 clear_buffer_uptodate(bh);
     370         [ #  # ]:          0 :                 SetPageError(page);
     371                 :            :         }
     372                 :            : 
     373         [ #  # ]:          0 :         first = page_buffers(page);
     374                 :          0 :         local_irq_save(flags);
     375                 :          0 :         bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
     376                 :            : 
     377                 :          0 :         clear_buffer_async_write(bh);
     378                 :          0 :         unlock_buffer(bh);
     379                 :          0 :         tmp = bh->b_this_page;
     380         [ #  # ]:          0 :         while (tmp != bh) {
     381         [ #  # ]:          0 :                 if (buffer_async_write(tmp)) {
     382         [ #  # ]:          0 :                         BUG_ON(!buffer_locked(tmp));
     383                 :          0 :                         goto still_busy;
     384                 :            :                 }
     385                 :          0 :                 tmp = tmp->b_this_page;
     386                 :            :         }
     387                 :          0 :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     388                 :          0 :         local_irq_restore(flags);
     389                 :          0 :         end_page_writeback(page);
     390                 :          0 :         return;
     391                 :            : 
     392                 :            : still_busy:
     393                 :          0 :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     394                 :          0 :         local_irq_restore(flags);
     395                 :            :         return;
     396                 :            : }
     397                 :            : EXPORT_SYMBOL(end_buffer_async_write);
     398                 :            : 
     399                 :            : /*
     400                 :            :  * If a page's buffers are under async readin (end_buffer_async_read
     401                 :            :  * completion) then there is a possibility that another thread of
     402                 :            :  * control could lock one of the buffers after it has completed
     403                 :            :  * but while some of the other buffers have not completed.  This
     404                 :            :  * locked buffer would confuse end_buffer_async_read() into not unlocking
     405                 :            :  * the page.  So the absence of BH_Async_Read tells end_buffer_async_read()
     406                 :            :  * that this buffer is not under async I/O.
     407                 :            :  *
     408                 :            :  * The page comes unlocked when it has no locked buffer_async buffers
     409                 :            :  * left.
     410                 :            :  *
     411                 :            :  * PageLocked prevents anyone starting new async I/O reads any of
     412                 :            :  * the buffers.
     413                 :            :  *
     414                 :            :  * PageWriteback is used to prevent simultaneous writeout of the same
     415                 :            :  * page.
     416                 :            :  *
     417                 :            :  * PageLocked prevents anyone from starting writeback of a page which is
     418                 :            :  * under read I/O (PageWriteback is only ever set against a locked page).
     419                 :            :  */
     420                 :         42 : static void mark_buffer_async_read(struct buffer_head *bh)
     421                 :            : {
     422                 :         42 :         bh->b_end_io = end_buffer_async_read_io;
     423                 :         42 :         set_buffer_async_read(bh);
     424                 :         42 : }
     425                 :            : 
     426                 :          0 : static void mark_buffer_async_write_endio(struct buffer_head *bh,
     427                 :            :                                           bh_end_io_t *handler)
     428                 :            : {
     429                 :          0 :         bh->b_end_io = handler;
     430                 :          0 :         set_buffer_async_write(bh);
     431                 :          0 : }
     432                 :            : 
     433                 :          0 : void mark_buffer_async_write(struct buffer_head *bh)
     434                 :            : {
     435                 :          0 :         mark_buffer_async_write_endio(bh, end_buffer_async_write);
     436                 :          0 : }
     437                 :            : EXPORT_SYMBOL(mark_buffer_async_write);
     438                 :            : 
     439                 :            : 
     440                 :            : /*
     441                 :            :  * fs/buffer.c contains helper functions for buffer-backed address space's
     442                 :            :  * fsync functions.  A common requirement for buffer-based filesystems is
     443                 :            :  * that certain data from the backing blockdev needs to be written out for
     444                 :            :  * a successful fsync().  For example, ext2 indirect blocks need to be
     445                 :            :  * written back and waited upon before fsync() returns.
     446                 :            :  *
     447                 :            :  * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
     448                 :            :  * inode_has_buffers() and invalidate_inode_buffers() are provided for the
     449                 :            :  * management of a list of dependent buffers at ->i_mapping->private_list.
     450                 :            :  *
     451                 :            :  * Locking is a little subtle: try_to_free_buffers() will remove buffers
     452                 :            :  * from their controlling inode's queue when they are being freed.  But
     453                 :            :  * try_to_free_buffers() will be operating against the *blockdev* mapping
     454                 :            :  * at the time, not against the S_ISREG file which depends on those buffers.
     455                 :            :  * So the locking for private_list is via the private_lock in the address_space
     456                 :            :  * which backs the buffers.  Which is different from the address_space 
     457                 :            :  * against which the buffers are listed.  So for a particular address_space,
     458                 :            :  * mapping->private_lock does *not* protect mapping->private_list!  In fact,
     459                 :            :  * mapping->private_list will always be protected by the backing blockdev's
     460                 :            :  * ->private_lock.
     461                 :            :  *
     462                 :            :  * Which introduces a requirement: all buffers on an address_space's
     463                 :            :  * ->private_list must be from the same address_space: the blockdev's.
     464                 :            :  *
     465                 :            :  * address_spaces which do not place buffers at ->private_list via these
     466                 :            :  * utility functions are free to use private_lock and private_list for
     467                 :            :  * whatever they want.  The only requirement is that list_empty(private_list)
     468                 :            :  * be true at clear_inode() time.
     469                 :            :  *
     470                 :            :  * FIXME: clear_inode should not call invalidate_inode_buffers().  The
     471                 :            :  * filesystems should do that.  invalidate_inode_buffers() should just go
     472                 :            :  * BUG_ON(!list_empty).
     473                 :            :  *
     474                 :            :  * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
     475                 :            :  * take an address_space, not an inode.  And it should be called
     476                 :            :  * mark_buffer_dirty_fsync() to clearly define why those buffers are being
     477                 :            :  * queued up.
     478                 :            :  *
     479                 :            :  * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
     480                 :            :  * list if it is already on a list.  Because if the buffer is on a list,
     481                 :            :  * it *must* already be on the right one.  If not, the filesystem is being
     482                 :            :  * silly.  This will save a ton of locking.  But first we have to ensure
     483                 :            :  * that buffers are taken *off* the old inode's list when they are freed
     484                 :            :  * (presumably in truncate).  That requires careful auditing of all
     485                 :            :  * filesystems (do it inside bforget()).  It could also be done by bringing
     486                 :            :  * b_inode back.
     487                 :            :  */
     488                 :            : 
     489                 :            : /*
     490                 :            :  * The buffer's backing address_space's private_lock must be held
     491                 :            :  */
     492                 :          0 : static void __remove_assoc_queue(struct buffer_head *bh)
     493                 :            : {
     494                 :          0 :         list_del_init(&bh->b_assoc_buffers);
     495   [ #  #  #  #  :          0 :         WARN_ON(!bh->b_assoc_map);
          #  #  #  #  #  
                      # ]
     496                 :          0 :         bh->b_assoc_map = NULL;
     497                 :          0 : }
     498                 :            : 
     499                 :      39784 : int inode_has_buffers(struct inode *inode)
     500                 :            : {
     501                 :      39784 :         return !list_empty(&inode->i_data.private_list);
     502                 :            : }
     503                 :            : 
     504                 :            : /*
     505                 :            :  * osync is designed to support O_SYNC io.  It waits synchronously for
     506                 :            :  * all already-submitted IO to complete, but does not queue any new
     507                 :            :  * writes to the disk.
     508                 :            :  *
     509                 :            :  * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
     510                 :            :  * you dirty the buffers, and then use osync_inode_buffers to wait for
     511                 :            :  * completion.  Any other dirty buffers which are not yet queued for
     512                 :            :  * write will not be flushed to disk by the osync.
     513                 :            :  */
     514                 :          0 : static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
     515                 :            : {
     516                 :          0 :         struct buffer_head *bh;
     517                 :          0 :         struct list_head *p;
     518                 :          0 :         int err = 0;
     519                 :            : 
     520                 :          0 :         spin_lock(lock);
     521                 :          0 : repeat:
     522         [ #  # ]:          0 :         list_for_each_prev(p, list) {
     523                 :          0 :                 bh = BH_ENTRY(p);
     524         [ #  # ]:          0 :                 if (buffer_locked(bh)) {
     525                 :          0 :                         get_bh(bh);
     526                 :          0 :                         spin_unlock(lock);
     527                 :          0 :                         wait_on_buffer(bh);
     528         [ #  # ]:          0 :                         if (!buffer_uptodate(bh))
     529                 :          0 :                                 err = -EIO;
     530         [ #  # ]:          0 :                         brelse(bh);
     531                 :          0 :                         spin_lock(lock);
     532                 :          0 :                         goto repeat;
     533                 :            :                 }
     534                 :            :         }
     535                 :          0 :         spin_unlock(lock);
     536                 :          0 :         return err;
     537                 :            : }
     538                 :            : 
     539                 :          0 : void emergency_thaw_bdev(struct super_block *sb)
     540                 :            : {
     541   [ #  #  #  # ]:          0 :         while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
     542                 :          0 :                 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
     543                 :          0 : }
     544                 :            : 
     545                 :            : /**
     546                 :            :  * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
     547                 :            :  * @mapping: the mapping which wants those buffers written
     548                 :            :  *
     549                 :            :  * Starts I/O against the buffers at mapping->private_list, and waits upon
     550                 :            :  * that I/O.
     551                 :            :  *
     552                 :            :  * Basically, this is a convenience function for fsync().
     553                 :            :  * @mapping is a file or directory which needs those buffers to be written for
     554                 :            :  * a successful fsync().
     555                 :            :  */
     556                 :          0 : int sync_mapping_buffers(struct address_space *mapping)
     557                 :            : {
     558                 :          0 :         struct address_space *buffer_mapping = mapping->private_data;
     559                 :            : 
     560   [ #  #  #  # ]:          0 :         if (buffer_mapping == NULL || list_empty(&mapping->private_list))
     561                 :            :                 return 0;
     562                 :            : 
     563                 :          0 :         return fsync_buffers_list(&buffer_mapping->private_lock,
     564                 :            :                                         &mapping->private_list);
     565                 :            : }
     566                 :            : EXPORT_SYMBOL(sync_mapping_buffers);
     567                 :            : 
     568                 :            : /*
     569                 :            :  * Called when we've recently written block `bblock', and it is known that
     570                 :            :  * `bblock' was for a buffer_boundary() buffer.  This means that the block at
     571                 :            :  * `bblock + 1' is probably a dirty indirect block.  Hunt it down and, if it's
     572                 :            :  * dirty, schedule it for IO.  So that indirects merge nicely with their data.
     573                 :            :  */
     574                 :          0 : void write_boundary_block(struct block_device *bdev,
     575                 :            :                         sector_t bblock, unsigned blocksize)
     576                 :            : {
     577                 :          0 :         struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
     578         [ #  # ]:          0 :         if (bh) {
     579         [ #  # ]:          0 :                 if (buffer_dirty(bh))
     580                 :          0 :                         ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
     581                 :          0 :                 put_bh(bh);
     582                 :            :         }
     583                 :          0 : }
     584                 :            : 
     585                 :          0 : void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
     586                 :            : {
     587                 :          0 :         struct address_space *mapping = inode->i_mapping;
     588                 :          0 :         struct address_space *buffer_mapping = bh->b_page->mapping;
     589                 :            : 
     590                 :          0 :         mark_buffer_dirty(bh);
     591         [ #  # ]:          0 :         if (!mapping->private_data) {
     592                 :          0 :                 mapping->private_data = buffer_mapping;
     593                 :            :         } else {
     594         [ #  # ]:          0 :                 BUG_ON(mapping->private_data != buffer_mapping);
     595                 :            :         }
     596         [ #  # ]:          0 :         if (!bh->b_assoc_map) {
     597                 :          0 :                 spin_lock(&buffer_mapping->private_lock);
     598                 :          0 :                 list_move_tail(&bh->b_assoc_buffers,
     599                 :            :                                 &mapping->private_list);
     600                 :          0 :                 bh->b_assoc_map = mapping;
     601                 :          0 :                 spin_unlock(&buffer_mapping->private_lock);
     602                 :            :         }
     603                 :          0 : }
     604                 :            : EXPORT_SYMBOL(mark_buffer_dirty_inode);
     605                 :            : 
     606                 :            : /*
     607                 :            :  * Mark the page dirty, and set it dirty in the page cache, and mark the inode
     608                 :            :  * dirty.
     609                 :            :  *
     610                 :            :  * If warn is true, then emit a warning if the page is not uptodate and has
     611                 :            :  * not been truncated.
     612                 :            :  *
     613                 :            :  * The caller must hold lock_page_memcg().
     614                 :            :  */
     615                 :      13027 : void __set_page_dirty(struct page *page, struct address_space *mapping,
     616                 :            :                              int warn)
     617                 :            : {
     618                 :      13027 :         unsigned long flags;
     619                 :            : 
     620                 :      13027 :         xa_lock_irqsave(&mapping->i_pages, flags);
     621         [ +  - ]:      13027 :         if (page->mapping) { /* Race with truncate? */
     622   [ -  +  -  -  :      13027 :                 WARN_ON_ONCE(warn && !PageUptodate(page));
                   -  + ]
     623                 :      13027 :                 account_page_dirtied(page, mapping);
     624                 :      13027 :                 __xa_set_mark(&mapping->i_pages, page_index(page),
     625                 :            :                                 PAGECACHE_TAG_DIRTY);
     626                 :            :         }
     627                 :      13027 :         xa_unlock_irqrestore(&mapping->i_pages, flags);
     628                 :      13027 : }
     629                 :            : EXPORT_SYMBOL_GPL(__set_page_dirty);
     630                 :            : 
     631                 :            : /*
     632                 :            :  * Add a page to the dirty page list.
     633                 :            :  *
     634                 :            :  * It is a sad fact of life that this function is called from several places
     635                 :            :  * deeply under spinlocking.  It may not sleep.
     636                 :            :  *
     637                 :            :  * If the page has buffers, the uptodate buffers are set dirty, to preserve
     638                 :            :  * dirty-state coherency between the page and the buffers.  It the page does
     639                 :            :  * not have buffers then when they are later attached they will all be set
     640                 :            :  * dirty.
     641                 :            :  *
     642                 :            :  * The buffers are dirtied before the page is dirtied.  There's a small race
     643                 :            :  * window in which a writepage caller may see the page cleanness but not the
     644                 :            :  * buffer dirtiness.  That's fine.  If this code were to set the page dirty
     645                 :            :  * before the buffers, a concurrent writepage caller could clear the page dirty
     646                 :            :  * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
     647                 :            :  * page on the dirty page list.
     648                 :            :  *
     649                 :            :  * We use private_lock to lock against try_to_free_buffers while using the
     650                 :            :  * page's buffer list.  Also use this to protect against clean buffers being
     651                 :            :  * added to the page after it was set dirty.
     652                 :            :  *
     653                 :            :  * FIXME: may need to call ->reservepage here as well.  That's rather up to the
     654                 :            :  * address_space though.
     655                 :            :  */
     656                 :          0 : int __set_page_dirty_buffers(struct page *page)
     657                 :            : {
     658                 :          0 :         int newly_dirty;
     659                 :          0 :         struct address_space *mapping = page_mapping(page);
     660                 :            : 
     661         [ #  # ]:          0 :         if (unlikely(!mapping))
     662         [ #  # ]:          0 :                 return !TestSetPageDirty(page);
     663                 :            : 
     664                 :          0 :         spin_lock(&mapping->private_lock);
     665         [ #  # ]:          0 :         if (page_has_buffers(page)) {
     666         [ #  # ]:          0 :                 struct buffer_head *head = page_buffers(page);
     667                 :          0 :                 struct buffer_head *bh = head;
     668                 :            : 
     669                 :          0 :                 do {
     670                 :          0 :                         set_buffer_dirty(bh);
     671                 :          0 :                         bh = bh->b_this_page;
     672         [ #  # ]:          0 :                 } while (bh != head);
     673                 :            :         }
     674                 :            :         /*
     675                 :            :          * Lock out page->mem_cgroup migration to keep PageDirty
     676                 :            :          * synchronized with per-memcg dirty page counters.
     677                 :            :          */
     678         [ #  # ]:          0 :         lock_page_memcg(page);
     679         [ #  # ]:          0 :         newly_dirty = !TestSetPageDirty(page);
     680                 :          0 :         spin_unlock(&mapping->private_lock);
     681                 :            : 
     682         [ #  # ]:          0 :         if (newly_dirty)
     683                 :          0 :                 __set_page_dirty(page, mapping, 1);
     684                 :            : 
     685         [ #  # ]:          0 :         unlock_page_memcg(page);
     686                 :            : 
     687         [ #  # ]:          0 :         if (newly_dirty)
     688                 :          0 :                 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
     689                 :            : 
     690                 :            :         return newly_dirty;
     691                 :            : }
     692                 :            : EXPORT_SYMBOL(__set_page_dirty_buffers);
     693                 :            : 
     694                 :            : /*
     695                 :            :  * Write out and wait upon a list of buffers.
     696                 :            :  *
     697                 :            :  * We have conflicting pressures: we want to make sure that all
     698                 :            :  * initially dirty buffers get waited on, but that any subsequently
     699                 :            :  * dirtied buffers don't.  After all, we don't want fsync to last
     700                 :            :  * forever if somebody is actively writing to the file.
     701                 :            :  *
     702                 :            :  * Do this in two main stages: first we copy dirty buffers to a
     703                 :            :  * temporary inode list, queueing the writes as we go.  Then we clean
     704                 :            :  * up, waiting for those writes to complete.
     705                 :            :  * 
     706                 :            :  * During this second stage, any subsequent updates to the file may end
     707                 :            :  * up refiling the buffer on the original inode's dirty list again, so
     708                 :            :  * there is a chance we will end up with a buffer queued for write but
     709                 :            :  * not yet completed on that list.  So, as a final cleanup we go through
     710                 :            :  * the osync code to catch these locked, dirty buffers without requeuing
     711                 :            :  * any newly dirty buffers for write.
     712                 :            :  */
     713                 :          0 : static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
     714                 :            : {
     715                 :          0 :         struct buffer_head *bh;
     716                 :          0 :         struct list_head tmp;
     717                 :          0 :         struct address_space *mapping;
     718                 :          0 :         int err = 0, err2;
     719                 :          0 :         struct blk_plug plug;
     720                 :            : 
     721                 :          0 :         INIT_LIST_HEAD(&tmp);
     722                 :          0 :         blk_start_plug(&plug);
     723                 :            : 
     724                 :          0 :         spin_lock(lock);
     725         [ #  # ]:          0 :         while (!list_empty(list)) {
     726                 :          0 :                 bh = BH_ENTRY(list->next);
     727                 :          0 :                 mapping = bh->b_assoc_map;
     728         [ #  # ]:          0 :                 __remove_assoc_queue(bh);
     729                 :            :                 /* Avoid race with mark_buffer_dirty_inode() which does
     730                 :            :                  * a lockless check and we rely on seeing the dirty bit */
     731                 :          0 :                 smp_mb();
     732   [ #  #  #  # ]:          0 :                 if (buffer_dirty(bh) || buffer_locked(bh)) {
     733                 :          0 :                         list_add(&bh->b_assoc_buffers, &tmp);
     734                 :          0 :                         bh->b_assoc_map = mapping;
     735         [ #  # ]:          0 :                         if (buffer_dirty(bh)) {
     736                 :          0 :                                 get_bh(bh);
     737                 :          0 :                                 spin_unlock(lock);
     738                 :            :                                 /*
     739                 :            :                                  * Ensure any pending I/O completes so that
     740                 :            :                                  * write_dirty_buffer() actually writes the
     741                 :            :                                  * current contents - it is a noop if I/O is
     742                 :            :                                  * still in flight on potentially older
     743                 :            :                                  * contents.
     744                 :            :                                  */
     745                 :          0 :                                 write_dirty_buffer(bh, REQ_SYNC);
     746                 :            : 
     747                 :            :                                 /*
     748                 :            :                                  * Kick off IO for the previous mapping. Note
     749                 :            :                                  * that we will not run the very last mapping,
     750                 :            :                                  * wait_on_buffer() will do that for us
     751                 :            :                                  * through sync_buffer().
     752                 :            :                                  */
     753         [ #  # ]:          0 :                                 brelse(bh);
     754                 :          0 :                                 spin_lock(lock);
     755                 :            :                         }
     756                 :            :                 }
     757                 :            :         }
     758                 :            : 
     759                 :          0 :         spin_unlock(lock);
     760                 :          0 :         blk_finish_plug(&plug);
     761                 :          0 :         spin_lock(lock);
     762                 :            : 
     763         [ #  # ]:          0 :         while (!list_empty(&tmp)) {
     764                 :          0 :                 bh = BH_ENTRY(tmp.prev);
     765                 :          0 :                 get_bh(bh);
     766                 :          0 :                 mapping = bh->b_assoc_map;
     767         [ #  # ]:          0 :                 __remove_assoc_queue(bh);
     768                 :            :                 /* Avoid race with mark_buffer_dirty_inode() which does
     769                 :            :                  * a lockless check and we rely on seeing the dirty bit */
     770                 :          0 :                 smp_mb();
     771         [ #  # ]:          0 :                 if (buffer_dirty(bh)) {
     772                 :          0 :                         list_add(&bh->b_assoc_buffers,
     773                 :            :                                  &mapping->private_list);
     774                 :          0 :                         bh->b_assoc_map = mapping;
     775                 :            :                 }
     776                 :          0 :                 spin_unlock(lock);
     777                 :          0 :                 wait_on_buffer(bh);
     778         [ #  # ]:          0 :                 if (!buffer_uptodate(bh))
     779                 :          0 :                         err = -EIO;
     780         [ #  # ]:          0 :                 brelse(bh);
     781                 :          0 :                 spin_lock(lock);
     782                 :            :         }
     783                 :            :         
     784                 :          0 :         spin_unlock(lock);
     785                 :          0 :         err2 = osync_buffers_list(lock, list);
     786         [ #  # ]:          0 :         if (err)
     787                 :            :                 return err;
     788                 :            :         else
     789                 :          0 :                 return err2;
     790                 :            : }
     791                 :            : 
     792                 :            : /*
     793                 :            :  * Invalidate any and all dirty buffers on a given inode.  We are
     794                 :            :  * probably unmounting the fs, but that doesn't mean we have already
     795                 :            :  * done a sync().  Just drop the buffers from the inode list.
     796                 :            :  *
     797                 :            :  * NOTE: we take the inode's blockdev's mapping's private_lock.  Which
     798                 :            :  * assumes that all the buffers are against the blockdev.  Not true
     799                 :            :  * for reiserfs.
     800                 :            :  */
     801                 :        252 : void invalidate_inode_buffers(struct inode *inode)
     802                 :            : {
     803         [ -  + ]:        252 :         if (inode_has_buffers(inode)) {
     804                 :          0 :                 struct address_space *mapping = &inode->i_data;
     805                 :          0 :                 struct list_head *list = &mapping->private_list;
     806                 :          0 :                 struct address_space *buffer_mapping = mapping->private_data;
     807                 :            : 
     808                 :          0 :                 spin_lock(&buffer_mapping->private_lock);
     809         [ #  # ]:          0 :                 while (!list_empty(list))
     810         [ #  # ]:          0 :                         __remove_assoc_queue(BH_ENTRY(list->next));
     811                 :          0 :                 spin_unlock(&buffer_mapping->private_lock);
     812                 :            :         }
     813                 :        252 : }
     814                 :            : EXPORT_SYMBOL(invalidate_inode_buffers);
     815                 :            : 
     816                 :            : /*
     817                 :            :  * Remove any clean buffers from the inode's buffer list.  This is called
     818                 :            :  * when we're trying to free the inode itself.  Those buffers can pin it.
     819                 :            :  *
     820                 :            :  * Returns true if all buffers were removed.
     821                 :            :  */
     822                 :          0 : int remove_inode_buffers(struct inode *inode)
     823                 :            : {
     824                 :          0 :         int ret = 1;
     825                 :            : 
     826         [ #  # ]:          0 :         if (inode_has_buffers(inode)) {
     827                 :          0 :                 struct address_space *mapping = &inode->i_data;
     828                 :          0 :                 struct list_head *list = &mapping->private_list;
     829                 :          0 :                 struct address_space *buffer_mapping = mapping->private_data;
     830                 :            : 
     831                 :          0 :                 spin_lock(&buffer_mapping->private_lock);
     832         [ #  # ]:          0 :                 while (!list_empty(list)) {
     833                 :          0 :                         struct buffer_head *bh = BH_ENTRY(list->next);
     834         [ #  # ]:          0 :                         if (buffer_dirty(bh)) {
     835                 :            :                                 ret = 0;
     836                 :            :                                 break;
     837                 :            :                         }
     838         [ #  # ]:          0 :                         __remove_assoc_queue(bh);
     839                 :            :                 }
     840                 :          0 :                 spin_unlock(&buffer_mapping->private_lock);
     841                 :            :         }
     842                 :          0 :         return ret;
     843                 :            : }
     844                 :            : 
     845                 :            : /*
     846                 :            :  * Create the appropriate buffers when given a page for data area and
     847                 :            :  * the size of each buffer.. Use the bh->b_this_page linked list to
     848                 :            :  * follow the buffers created.  Return NULL if unable to create more
     849                 :            :  * buffers.
     850                 :            :  *
     851                 :            :  * The retry flag is used to differentiate async IO (paging, swapping)
     852                 :            :  * which may not fail from ordinary buffer allocations.
     853                 :            :  */
     854                 :      31366 : struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
     855                 :            :                 bool retry)
     856                 :            : {
     857                 :      31366 :         struct buffer_head *bh, *head;
     858                 :      31366 :         gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
     859                 :      31366 :         long offset;
     860                 :      31366 :         struct mem_cgroup *memcg;
     861                 :            : 
     862         [ +  - ]:      31366 :         if (retry)
     863                 :      31366 :                 gfp |= __GFP_NOFAIL;
     864                 :            : 
     865                 :      31366 :         memcg = get_mem_cgroup_from_page(page);
     866                 :      31366 :         memalloc_use_memcg(memcg);
     867                 :            : 
     868                 :      31366 :         head = NULL;
     869                 :      31366 :         offset = PAGE_SIZE;
     870         [ +  + ]:      62984 :         while ((offset -= size) >= 0) {
     871                 :      31618 :                 bh = alloc_buffer_head(gfp);
     872         [ -  + ]:      31618 :                 if (!bh)
     873                 :          0 :                         goto no_grow;
     874                 :            : 
     875                 :      31618 :                 bh->b_this_page = head;
     876                 :      31618 :                 bh->b_blocknr = -1;
     877                 :      31618 :                 head = bh;
     878                 :            : 
     879                 :      31618 :                 bh->b_size = size;
     880                 :            : 
     881                 :            :                 /* Link the buffer to its page */
     882                 :      31618 :                 set_bh_page(bh, page, offset);
     883                 :            :         }
     884                 :      31366 : out:
     885                 :      31366 :         memalloc_unuse_memcg();
     886                 :      31366 :         mem_cgroup_put(memcg);
     887                 :      31366 :         return head;
     888                 :            : /*
     889                 :            :  * In case anything failed, we just free everything we got.
     890                 :            :  */
     891                 :            : no_grow:
     892         [ #  # ]:          0 :         if (head) {
     893                 :          0 :                 do {
     894                 :          0 :                         bh = head;
     895                 :          0 :                         head = head->b_this_page;
     896                 :          0 :                         free_buffer_head(bh);
     897         [ #  # ]:          0 :                 } while (head);
     898                 :            :         }
     899                 :            : 
     900                 :          0 :         goto out;
     901                 :            : }
     902                 :            : EXPORT_SYMBOL_GPL(alloc_page_buffers);
     903                 :            : 
     904                 :            : static inline void
     905                 :            : link_dev_buffers(struct page *page, struct buffer_head *head)
     906                 :            : {
     907                 :            :         struct buffer_head *bh, *tail;
     908                 :            : 
     909                 :            :         bh = head;
     910                 :      19431 :         do {
     911                 :      19431 :                 tail = bh;
     912                 :      19431 :                 bh = bh->b_this_page;
     913         [ +  + ]:      19431 :         } while (bh);
     914                 :      19179 :         tail->b_this_page = head;
     915                 :      19179 :         attach_page_buffers(page, head);
     916                 :            : }
     917                 :            : 
     918                 :      19179 : static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
     919                 :            : {
     920                 :      19179 :         sector_t retval = ~((sector_t)0);
     921                 :      19179 :         loff_t sz = i_size_read(bdev->bd_inode);
     922                 :            : 
     923         [ +  - ]:      19179 :         if (sz) {
     924                 :            :                 unsigned int sizebits = blksize_bits(size);
     925                 :      19179 :                 retval = (sz >> sizebits);
     926                 :            :         }
     927                 :      19179 :         return retval;
     928                 :            : }
     929                 :            : 
     930                 :            : /*
     931                 :            :  * Initialise the state of a blockdev page's buffers.
     932                 :            :  */ 
     933                 :            : static sector_t
     934                 :      19179 : init_page_buffers(struct page *page, struct block_device *bdev,
     935                 :            :                         sector_t block, int size)
     936                 :            : {
     937         [ -  + ]:      19179 :         struct buffer_head *head = page_buffers(page);
     938                 :      19179 :         struct buffer_head *bh = head;
     939                 :      19179 :         int uptodate = PageUptodate(page);
     940         [ +  - ]:      19179 :         sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
     941                 :            : 
     942                 :      19431 :         do {
     943         [ +  - ]:      19431 :                 if (!buffer_mapped(bh)) {
     944                 :      19431 :                         bh->b_end_io = NULL;
     945                 :      19431 :                         bh->b_private = NULL;
     946                 :      19431 :                         bh->b_bdev = bdev;
     947                 :      19431 :                         bh->b_blocknr = block;
     948         [ -  + ]:      19431 :                         if (uptodate)
     949                 :          0 :                                 set_buffer_uptodate(bh);
     950         [ +  - ]:      19431 :                         if (block < end_block)
     951                 :      19431 :                                 set_buffer_mapped(bh);
     952                 :            :                 }
     953                 :      19431 :                 block++;
     954                 :      19431 :                 bh = bh->b_this_page;
     955         [ +  + ]:      19431 :         } while (bh != head);
     956                 :            : 
     957                 :            :         /*
     958                 :            :          * Caller needs to validate requested block against end of device.
     959                 :            :          */
     960                 :      19179 :         return end_block;
     961                 :            : }
     962                 :            : 
     963                 :            : /*
     964                 :            :  * Create the page-cache page that contains the requested block.
     965                 :            :  *
     966                 :            :  * This is used purely for blockdev mappings.
     967                 :            :  */
     968                 :            : static int
     969                 :      19179 : grow_dev_page(struct block_device *bdev, sector_t block,
     970                 :            :               pgoff_t index, int size, int sizebits, gfp_t gfp)
     971                 :            : {
     972                 :      19179 :         struct inode *inode = bdev->bd_inode;
     973                 :      19179 :         struct page *page;
     974                 :      19179 :         struct buffer_head *bh;
     975                 :      19179 :         sector_t end_block;
     976                 :      19179 :         int ret = 0;            /* Will call free_more_memory() */
     977                 :      19179 :         gfp_t gfp_mask;
     978                 :            : 
     979                 :      19179 :         gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
     980                 :            : 
     981                 :            :         /*
     982                 :            :          * XXX: __getblk_slow() can not really deal with failure and
     983                 :            :          * will endlessly loop on improvised global reclaim.  Prefer
     984                 :            :          * looping in the allocator rather than here, at least that
     985                 :            :          * code knows what it's doing.
     986                 :            :          */
     987                 :      19179 :         gfp_mask |= __GFP_NOFAIL;
     988                 :            : 
     989                 :      19179 :         page = find_or_create_page(inode->i_mapping, index, gfp_mask);
     990                 :            : 
     991   [ -  +  -  + ]:      38358 :         BUG_ON(!PageLocked(page));
     992                 :            : 
     993         [ -  + ]:      19179 :         if (page_has_buffers(page)) {
     994         [ #  # ]:          0 :                 bh = page_buffers(page);
     995         [ #  # ]:          0 :                 if (bh->b_size == size) {
     996                 :          0 :                         end_block = init_page_buffers(page, bdev,
     997                 :            :                                                 (sector_t)index << sizebits,
     998                 :            :                                                 size);
     999                 :          0 :                         goto done;
    1000                 :            :                 }
    1001         [ #  # ]:          0 :                 if (!try_to_free_buffers(page))
    1002                 :          0 :                         goto failed;
    1003                 :            :         }
    1004                 :            : 
    1005                 :            :         /*
    1006                 :            :          * Allocate some buffers for this page
    1007                 :            :          */
    1008                 :      19179 :         bh = alloc_page_buffers(page, size, true);
    1009                 :            : 
    1010                 :            :         /*
    1011                 :            :          * Link the page to the buffers and initialise them.  Take the
    1012                 :            :          * lock to be atomic wrt __find_get_block(), which does not
    1013                 :            :          * run under the page lock.
    1014                 :            :          */
    1015                 :      19179 :         spin_lock(&inode->i_mapping->private_lock);
    1016                 :            :         link_dev_buffers(page, bh);
    1017                 :      19179 :         end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
    1018                 :            :                         size);
    1019                 :      19179 :         spin_unlock(&inode->i_mapping->private_lock);
    1020                 :      19179 : done:
    1021         [ -  + ]:      19179 :         ret = (block < end_block) ? 1 : -ENXIO;
    1022                 :      19179 : failed:
    1023                 :      19179 :         unlock_page(page);
    1024                 :      19179 :         put_page(page);
    1025                 :      19179 :         return ret;
    1026                 :            : }
    1027                 :            : 
    1028                 :            : /*
    1029                 :            :  * Create buffers for the specified block device block's page.  If
    1030                 :            :  * that page was dirty, the buffers are set dirty also.
    1031                 :            :  */
    1032                 :            : static int
    1033                 :      19179 : grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
    1034                 :            : {
    1035                 :      19179 :         pgoff_t index;
    1036                 :      19179 :         int sizebits;
    1037                 :            : 
    1038                 :      19179 :         sizebits = -1;
    1039                 :      19347 :         do {
    1040                 :      19347 :                 sizebits++;
    1041         [ +  + ]:      19347 :         } while ((size << sizebits) < PAGE_SIZE);
    1042                 :            : 
    1043                 :      19179 :         index = block >> sizebits;
    1044                 :            : 
    1045                 :            :         /*
    1046                 :            :          * Check for a block which wants to lie outside our maximum possible
    1047                 :            :          * pagecache index.  (this comparison is done using sector_t types).
    1048                 :            :          */
    1049                 :      19179 :         if (unlikely(index != block >> sizebits)) {
    1050                 :            :                 printk(KERN_ERR "%s: requested out-of-range block %llu for "
    1051                 :            :                         "device %pg\n",
    1052                 :            :                         __func__, (unsigned long long)block,
    1053                 :            :                         bdev);
    1054                 :            :                 return -EIO;
    1055                 :            :         }
    1056                 :            : 
    1057                 :            :         /* Create a page with the proper size buffers.. */
    1058                 :      19179 :         return grow_dev_page(bdev, block, index, size, sizebits, gfp);
    1059                 :            : }
    1060                 :            : 
    1061                 :            : static struct buffer_head *
    1062                 :      19179 : __getblk_slow(struct block_device *bdev, sector_t block,
    1063                 :            :              unsigned size, gfp_t gfp)
    1064                 :            : {
    1065                 :            :         /* Size must be multiple of hard sectorsize */
    1066   [ +  -  +  -  :      38358 :         if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
                   -  + ]
    1067                 :            :                         (size < 512 || size > PAGE_SIZE))) {
    1068                 :          0 :                 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
    1069                 :            :                                         size);
    1070         [ #  # ]:          0 :                 printk(KERN_ERR "logical block size: %d\n",
    1071                 :            :                                         bdev_logical_block_size(bdev));
    1072                 :            : 
    1073                 :          0 :                 dump_stack();
    1074                 :          0 :                 return NULL;
    1075                 :            :         }
    1076                 :            : 
    1077                 :      38358 :         for (;;) {
    1078                 :      38358 :                 struct buffer_head *bh;
    1079                 :      38358 :                 int ret;
    1080                 :            : 
    1081                 :      38358 :                 bh = __find_get_block(bdev, block, size);
    1082         [ +  + ]:      38358 :                 if (bh)
    1083                 :      19179 :                         return bh;
    1084                 :            : 
    1085                 :      19179 :                 ret = grow_buffers(bdev, block, size, gfp);
    1086         [ +  - ]:      19179 :                 if (ret < 0)
    1087                 :            :                         return NULL;
    1088                 :            :         }
    1089                 :            : }
    1090                 :            : 
    1091                 :            : /*
    1092                 :            :  * The relationship between dirty buffers and dirty pages:
    1093                 :            :  *
    1094                 :            :  * Whenever a page has any dirty buffers, the page's dirty bit is set, and
    1095                 :            :  * the page is tagged dirty in the page cache.
    1096                 :            :  *
    1097                 :            :  * At all times, the dirtiness of the buffers represents the dirtiness of
    1098                 :            :  * subsections of the page.  If the page has buffers, the page dirty bit is
    1099                 :            :  * merely a hint about the true dirty state.
    1100                 :            :  *
    1101                 :            :  * When a page is set dirty in its entirety, all its buffers are marked dirty
    1102                 :            :  * (if the page has buffers).
    1103                 :            :  *
    1104                 :            :  * When a buffer is marked dirty, its page is dirtied, but the page's other
    1105                 :            :  * buffers are not.
    1106                 :            :  *
    1107                 :            :  * Also.  When blockdev buffers are explicitly read with bread(), they
    1108                 :            :  * individually become uptodate.  But their backing page remains not
    1109                 :            :  * uptodate - even if all of its buffers are uptodate.  A subsequent
    1110                 :            :  * block_read_full_page() against that page will discover all the uptodate
    1111                 :            :  * buffers, will set the page uptodate and will perform no I/O.
    1112                 :            :  */
    1113                 :            : 
    1114                 :            : /**
    1115                 :            :  * mark_buffer_dirty - mark a buffer_head as needing writeout
    1116                 :            :  * @bh: the buffer_head to mark dirty
    1117                 :            :  *
    1118                 :            :  * mark_buffer_dirty() will set the dirty bit against the buffer, then set
    1119                 :            :  * its backing page dirty, then tag the page as dirty in the page cache
    1120                 :            :  * and then attach the address_space's inode to its superblock's dirty
    1121                 :            :  * inode list.
    1122                 :            :  *
    1123                 :            :  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
    1124                 :            :  * i_pages lock and mapping->host->i_lock.
    1125                 :            :  */
    1126                 :      16887 : void mark_buffer_dirty(struct buffer_head *bh)
    1127                 :            : {
    1128         [ -  + ]:      16887 :         WARN_ON_ONCE(!buffer_uptodate(bh));
    1129                 :            : 
    1130                 :      16887 :         trace_block_dirty_buffer(bh);
    1131                 :            : 
    1132                 :            :         /*
    1133                 :            :          * Very *carefully* optimize the it-is-already-dirty case.
    1134                 :            :          *
    1135                 :            :          * Don't let the final "is it dirty" escape to before we
    1136                 :            :          * perhaps modified the buffer.
    1137                 :            :          */
    1138         [ +  + ]:      16887 :         if (buffer_dirty(bh)) {
    1139                 :       2888 :                 smp_mb();
    1140         [ -  + ]:       2888 :                 if (buffer_dirty(bh))
    1141                 :            :                         return;
    1142                 :            :         }
    1143                 :            : 
    1144         [ +  - ]:      13999 :         if (!test_set_buffer_dirty(bh)) {
    1145                 :      13999 :                 struct page *page = bh->b_page;
    1146                 :      13999 :                 struct address_space *mapping = NULL;
    1147                 :            : 
    1148         [ -  + ]:      13999 :                 lock_page_memcg(page);
    1149   [ -  +  +  + ]:      27998 :                 if (!TestSetPageDirty(page)) {
    1150                 :      13027 :                         mapping = page_mapping(page);
    1151         [ +  - ]:      13027 :                         if (mapping)
    1152                 :      13027 :                                 __set_page_dirty(page, mapping, 0);
    1153                 :            :                 }
    1154         [ +  - ]:      13027 :                 unlock_page_memcg(page);
    1155         [ +  - ]:      13027 :                 if (mapping)
    1156                 :      13027 :                         __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
    1157                 :            :         }
    1158                 :            : }
    1159                 :            : EXPORT_SYMBOL(mark_buffer_dirty);
    1160                 :            : 
    1161                 :          0 : void mark_buffer_write_io_error(struct buffer_head *bh)
    1162                 :            : {
    1163                 :          0 :         set_buffer_write_io_error(bh);
    1164                 :            :         /* FIXME: do we need to set this in both places? */
    1165   [ #  #  #  # ]:          0 :         if (bh->b_page && bh->b_page->mapping)
    1166                 :          0 :                 mapping_set_error(bh->b_page->mapping, -EIO);
    1167         [ #  # ]:          0 :         if (bh->b_assoc_map)
    1168                 :          0 :                 mapping_set_error(bh->b_assoc_map, -EIO);
    1169                 :          0 : }
    1170                 :            : EXPORT_SYMBOL(mark_buffer_write_io_error);
    1171                 :            : 
    1172                 :            : /*
    1173                 :            :  * Decrement a buffer_head's reference count.  If all buffers against a page
    1174                 :            :  * have zero reference count, are clean and unlocked, and if the page is clean
    1175                 :            :  * and unlocked then try_to_free_buffers() may strip the buffers from the page
    1176                 :            :  * in preparation for freeing it (sometimes, rarely, buffers are removed from
    1177                 :            :  * a page but it ends up not being freed, and buffers may later be reattached).
    1178                 :            :  */
    1179                 :     258918 : void __brelse(struct buffer_head * buf)
    1180                 :            : {
    1181         [ +  - ]:     258918 :         if (atomic_read(&buf->b_count)) {
    1182                 :     258918 :                 put_bh(buf);
    1183                 :     258918 :                 return;
    1184                 :            :         }
    1185                 :          0 :         WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
    1186                 :            : }
    1187                 :            : EXPORT_SYMBOL(__brelse);
    1188                 :            : 
    1189                 :            : /*
    1190                 :            :  * bforget() is like brelse(), except it discards any
    1191                 :            :  * potentially dirty data.
    1192                 :            :  */
    1193                 :          0 : void __bforget(struct buffer_head *bh)
    1194                 :            : {
    1195                 :          0 :         clear_buffer_dirty(bh);
    1196         [ #  # ]:          0 :         if (bh->b_assoc_map) {
    1197                 :          0 :                 struct address_space *buffer_mapping = bh->b_page->mapping;
    1198                 :            : 
    1199                 :          0 :                 spin_lock(&buffer_mapping->private_lock);
    1200                 :          0 :                 list_del_init(&bh->b_assoc_buffers);
    1201                 :          0 :                 bh->b_assoc_map = NULL;
    1202                 :          0 :                 spin_unlock(&buffer_mapping->private_lock);
    1203                 :            :         }
    1204                 :          0 :         __brelse(bh);
    1205                 :          0 : }
    1206                 :            : EXPORT_SYMBOL(__bforget);
    1207                 :            : 
    1208                 :        169 : static struct buffer_head *__bread_slow(struct buffer_head *bh)
    1209                 :            : {
    1210                 :        169 :         lock_buffer(bh);
    1211         [ +  + ]:        169 :         if (buffer_uptodate(bh)) {
    1212                 :         43 :                 unlock_buffer(bh);
    1213                 :         43 :                 return bh;
    1214                 :            :         } else {
    1215                 :        126 :                 get_bh(bh);
    1216                 :        126 :                 bh->b_end_io = end_buffer_read_sync;
    1217                 :        126 :                 submit_bh(REQ_OP_READ, 0, bh);
    1218                 :        126 :                 wait_on_buffer(bh);
    1219         [ -  + ]:        126 :                 if (buffer_uptodate(bh))
    1220                 :            :                         return bh;
    1221                 :            :         }
    1222         [ #  # ]:          0 :         brelse(bh);
    1223                 :            :         return NULL;
    1224                 :            : }
    1225                 :            : 
    1226                 :            : /*
    1227                 :            :  * Per-cpu buffer LRU implementation.  To reduce the cost of __find_get_block().
    1228                 :            :  * The bhs[] array is sorted - newest buffer is at bhs[0].  Buffers have their
    1229                 :            :  * refcount elevated by one when they're in an LRU.  A buffer can only appear
    1230                 :            :  * once in a particular CPU's LRU.  A single buffer can be present in multiple
    1231                 :            :  * CPU's LRUs at the same time.
    1232                 :            :  *
    1233                 :            :  * This is a transparent caching front-end to sb_bread(), sb_getblk() and
    1234                 :            :  * sb_find_get_block().
    1235                 :            :  *
    1236                 :            :  * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
    1237                 :            :  * a local interrupt disable for that.
    1238                 :            :  */
    1239                 :            : 
    1240                 :            : #define BH_LRU_SIZE     16
    1241                 :            : 
    1242                 :            : struct bh_lru {
    1243                 :            :         struct buffer_head *bhs[BH_LRU_SIZE];
    1244                 :            : };
    1245                 :            : 
    1246                 :            : static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
    1247                 :            : 
    1248                 :            : #ifdef CONFIG_SMP
    1249                 :            : #define bh_lru_lock()   local_irq_disable()
    1250                 :            : #define bh_lru_unlock() local_irq_enable()
    1251                 :            : #else
    1252                 :            : #define bh_lru_lock()   preempt_disable()
    1253                 :            : #define bh_lru_unlock() preempt_enable()
    1254                 :            : #endif
    1255                 :            : 
    1256                 :     292422 : static inline void check_irqs_on(void)
    1257                 :            : {
    1258                 :            : #ifdef irqs_disabled
    1259   [ -  +  -  + ]:     292422 :         BUG_ON(irqs_disabled());
    1260                 :            : #endif
    1261                 :            : }
    1262                 :            : 
    1263                 :            : /*
    1264                 :            :  * Install a buffer_head into this cpu's LRU.  If not already in the LRU, it is
    1265                 :            :  * inserted at the front, and the buffer_head at the back if any is evicted.
    1266                 :            :  * Or, if already in the LRU it is moved to the front.
    1267                 :            :  */
    1268                 :      32708 : static void bh_lru_install(struct buffer_head *bh)
    1269                 :            : {
    1270                 :      32708 :         struct buffer_head *evictee = bh;
    1271                 :      32708 :         struct bh_lru *b;
    1272                 :      32708 :         int i;
    1273                 :            : 
    1274                 :      32708 :         check_irqs_on();
    1275                 :      32708 :         bh_lru_lock();
    1276                 :            : 
    1277                 :      32708 :         b = this_cpu_ptr(&bh_lrus);
    1278         [ +  + ]:     556036 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1279                 :     523328 :                 swap(evictee, b->bhs[i]);
    1280         [ -  + ]:     523328 :                 if (evictee == bh) {
    1281                 :          0 :                         bh_lru_unlock();
    1282                 :          0 :                         return;
    1283                 :            :                 }
    1284                 :            :         }
    1285                 :            : 
    1286                 :      32708 :         get_bh(bh);
    1287                 :      32708 :         bh_lru_unlock();
    1288         [ +  + ]:      32708 :         brelse(evictee);
    1289                 :            : }
    1290                 :            : 
    1291                 :            : /*
    1292                 :            :  * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
    1293                 :            :  */
    1294                 :            : static struct buffer_head *
    1295                 :     259714 : lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
    1296                 :            : {
    1297                 :     259714 :         struct buffer_head *ret = NULL;
    1298                 :     259714 :         unsigned int i;
    1299                 :            : 
    1300                 :     259714 :         check_irqs_on();
    1301                 :     259714 :         bh_lru_lock();
    1302         [ +  + ]:    2041611 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1303         [ +  + ]:    1710831 :                 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
    1304                 :            : 
    1305   [ +  +  +  +  :    1710831 :                 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
                   +  - ]
    1306         [ +  - ]:     188648 :                     bh->b_size == size) {
    1307         [ +  + ]:     188648 :                         if (i) {
    1308         [ +  + ]:     512676 :                                 while (i) {
    1309                 :     385127 :                                         __this_cpu_write(bh_lrus.bhs[i],
    1310                 :            :                                                 __this_cpu_read(bh_lrus.bhs[i - 1]));
    1311                 :     385127 :                                         i--;
    1312                 :            :                                 }
    1313                 :     188648 :                                 __this_cpu_write(bh_lrus.bhs[0], bh);
    1314                 :            :                         }
    1315                 :     188648 :                         get_bh(bh);
    1316                 :     188648 :                         ret = bh;
    1317                 :     188648 :                         break;
    1318                 :            :                 }
    1319                 :            :         }
    1320                 :     259714 :         bh_lru_unlock();
    1321                 :     259714 :         return ret;
    1322                 :            : }
    1323                 :            : 
    1324                 :            : /*
    1325                 :            :  * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
    1326                 :            :  * it in the LRU and mark it as accessed.  If it is not present then return
    1327                 :            :  * NULL
    1328                 :            :  */
    1329                 :            : struct buffer_head *
    1330                 :     259714 : __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
    1331                 :            : {
    1332                 :     259714 :         struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
    1333                 :            : 
    1334         [ +  + ]:     259714 :         if (bh == NULL) {
    1335                 :            :                 /* __find_get_block_slow will mark the page accessed */
    1336                 :      71066 :                 bh = __find_get_block_slow(bdev, block);
    1337         [ +  + ]:      71066 :                 if (bh)
    1338                 :      32708 :                         bh_lru_install(bh);
    1339                 :            :         } else
    1340                 :     188648 :                 touch_buffer(bh);
    1341                 :            : 
    1342                 :     259714 :         return bh;
    1343                 :            : }
    1344                 :            : EXPORT_SYMBOL(__find_get_block);
    1345                 :            : 
    1346                 :            : /*
    1347                 :            :  * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
    1348                 :            :  * which corresponds to the passed block_device, block and size. The
    1349                 :            :  * returned buffer has its reference count incremented.
    1350                 :            :  *
    1351                 :            :  * __getblk_gfp() will lock up the machine if grow_dev_page's
    1352                 :            :  * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
    1353                 :            :  */
    1354                 :            : struct buffer_head *
    1355                 :     213393 : __getblk_gfp(struct block_device *bdev, sector_t block,
    1356                 :            :              unsigned size, gfp_t gfp)
    1357                 :            : {
    1358                 :     213393 :         struct buffer_head *bh = __find_get_block(bdev, block, size);
    1359                 :            : 
    1360                 :     213393 :         might_sleep();
    1361         [ +  + ]:     213393 :         if (bh == NULL)
    1362                 :      19179 :                 bh = __getblk_slow(bdev, block, size, gfp);
    1363                 :     213393 :         return bh;
    1364                 :            : }
    1365                 :            : EXPORT_SYMBOL(__getblk_gfp);
    1366                 :            : 
    1367                 :            : /*
    1368                 :            :  * Do async read-ahead on a buffer..
    1369                 :            :  */
    1370                 :      10500 : void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
    1371                 :            : {
    1372                 :      10500 :         struct buffer_head *bh = __getblk(bdev, block, size);
    1373         [ +  - ]:      10500 :         if (likely(bh)) {
    1374                 :      10500 :                 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
    1375         [ +  - ]:      10500 :                 brelse(bh);
    1376                 :            :         }
    1377                 :      10500 : }
    1378                 :            : EXPORT_SYMBOL(__breadahead);
    1379                 :            : 
    1380                 :            : /**
    1381                 :            :  *  __bread_gfp() - reads a specified block and returns the bh
    1382                 :            :  *  @bdev: the block_device to read from
    1383                 :            :  *  @block: number of block
    1384                 :            :  *  @size: size (in bytes) to read
    1385                 :            :  *  @gfp: page allocation flag
    1386                 :            :  *
    1387                 :            :  *  Reads a specified block, and returns buffer head that contains it.
    1388                 :            :  *  The page cache can be allocated from non-movable area
    1389                 :            :  *  not to prevent page migration if you set gfp to zero.
    1390                 :            :  *  It returns NULL if the block was unreadable.
    1391                 :            :  */
    1392                 :            : struct buffer_head *
    1393                 :        189 : __bread_gfp(struct block_device *bdev, sector_t block,
    1394                 :            :                    unsigned size, gfp_t gfp)
    1395                 :            : {
    1396                 :        189 :         struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
    1397                 :            : 
    1398   [ +  -  +  + ]:        378 :         if (likely(bh) && !buffer_uptodate(bh))
    1399                 :        169 :                 bh = __bread_slow(bh);
    1400                 :        189 :         return bh;
    1401                 :            : }
    1402                 :            : EXPORT_SYMBOL(__bread_gfp);
    1403                 :            : 
    1404                 :            : /*
    1405                 :            :  * invalidate_bh_lrus() is called rarely - but not only at unmount.
    1406                 :            :  * This doesn't race because it runs in each cpu either in irq
    1407                 :            :  * or with preempt disabled.
    1408                 :            :  */
    1409                 :        126 : static void invalidate_bh_lru(void *arg)
    1410                 :            : {
    1411                 :        126 :         struct bh_lru *b = &get_cpu_var(bh_lrus);
    1412                 :        126 :         int i;
    1413                 :            : 
    1414         [ +  + ]:       2142 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1415         [ +  + ]:       2016 :                 brelse(b->bhs[i]);
    1416                 :       2016 :                 b->bhs[i] = NULL;
    1417                 :            :         }
    1418                 :        126 :         put_cpu_var(bh_lrus);
    1419                 :        126 : }
    1420                 :            : 
    1421                 :        168 : static bool has_bh_in_lru(int cpu, void *dummy)
    1422                 :            : {
    1423                 :        168 :         struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
    1424                 :        168 :         int i;
    1425                 :            :         
    1426         [ +  + ]:        840 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1427         [ +  + ]:        798 :                 if (b->bhs[i])
    1428                 :            :                         return true;
    1429                 :            :         }
    1430                 :            : 
    1431                 :            :         return false;
    1432                 :            : }
    1433                 :            : 
    1434                 :        168 : void invalidate_bh_lrus(void)
    1435                 :            : {
    1436                 :        168 :         on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
    1437                 :        168 : }
    1438                 :            : EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
    1439                 :            : 
    1440                 :      33433 : void set_bh_page(struct buffer_head *bh,
    1441                 :            :                 struct page *page, unsigned long offset)
    1442                 :            : {
    1443                 :      33433 :         bh->b_page = page;
    1444   [ -  +  -  + ]:      33433 :         BUG_ON(offset >= PAGE_SIZE);
    1445                 :      31618 :         if (PageHighMem(page))
    1446                 :            :                 /*
    1447                 :            :                  * This catches illegal uses and preserves the offset:
    1448                 :            :                  */
    1449                 :            :                 bh->b_data = (char *)(0 + offset);
    1450                 :            :         else
    1451                 :      31618 :                 bh->b_data = page_address(page) + offset;
    1452                 :      31618 : }
    1453                 :            : EXPORT_SYMBOL(set_bh_page);
    1454                 :            : 
    1455                 :            : /*
    1456                 :            :  * Called when truncating a buffer on a page completely.
    1457                 :            :  */
    1458                 :            : 
    1459                 :            : /* Bits that are cleared during an invalidate */
    1460                 :            : #define BUFFER_FLAGS_DISCARD \
    1461                 :            :         (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
    1462                 :            :          1 << BH_Delay | 1 << BH_Unwritten)
    1463                 :            : 
    1464                 :        378 : static void discard_buffer(struct buffer_head * bh)
    1465                 :            : {
    1466                 :        378 :         unsigned long b_state, b_state_old;
    1467                 :            : 
    1468                 :        378 :         lock_buffer(bh);
    1469                 :        378 :         clear_buffer_dirty(bh);
    1470                 :        378 :         bh->b_bdev = NULL;
    1471                 :        378 :         b_state = bh->b_state;
    1472                 :        378 :         for (;;) {
    1473                 :        378 :                 b_state_old = cmpxchg(&bh->b_state, b_state,
    1474                 :            :                                       (b_state & ~BUFFER_FLAGS_DISCARD));
    1475         [ -  + ]:        378 :                 if (b_state_old == b_state)
    1476                 :            :                         break;
    1477                 :            :                 b_state = b_state_old;
    1478                 :            :         }
    1479                 :        378 :         unlock_buffer(bh);
    1480                 :        378 : }
    1481                 :            : 
    1482                 :            : /**
    1483                 :            :  * block_invalidatepage - invalidate part or all of a buffer-backed page
    1484                 :            :  *
    1485                 :            :  * @page: the page which is affected
    1486                 :            :  * @offset: start of the range to invalidate
    1487                 :            :  * @length: length of the range to invalidate
    1488                 :            :  *
    1489                 :            :  * block_invalidatepage() is called when all or part of the page has become
    1490                 :            :  * invalidated by a truncate operation.
    1491                 :            :  *
    1492                 :            :  * block_invalidatepage() does not have to release all buffers, but it must
    1493                 :            :  * ensure that no dirty buffer is left outside @offset and that no I/O
    1494                 :            :  * is underway against any of the blocks which are outside the truncation
    1495                 :            :  * point.  Because the caller is about to free (and possibly reuse) those
    1496                 :            :  * blocks on-disk.
    1497                 :            :  */
    1498                 :        126 : void block_invalidatepage(struct page *page, unsigned int offset,
    1499                 :            :                           unsigned int length)
    1500                 :            : {
    1501                 :        126 :         struct buffer_head *head, *bh, *next;
    1502                 :        126 :         unsigned int curr_off = 0;
    1503                 :        126 :         unsigned int stop = length + offset;
    1504                 :            : 
    1505   [ -  +  -  + ]:        252 :         BUG_ON(!PageLocked(page));
    1506         [ -  + ]:        126 :         if (!page_has_buffers(page))
    1507                 :          0 :                 goto out;
    1508                 :            : 
    1509                 :            :         /*
    1510                 :            :          * Check for overflow
    1511                 :            :          */
    1512         [ -  + ]:        126 :         BUG_ON(stop > PAGE_SIZE || stop < length);
    1513                 :            : 
    1514         [ -  + ]:        126 :         head = page_buffers(page);
    1515                 :        126 :         bh = head;
    1516                 :        378 :         do {
    1517                 :        378 :                 unsigned int next_off = curr_off + bh->b_size;
    1518                 :        378 :                 next = bh->b_this_page;
    1519                 :            : 
    1520                 :            :                 /*
    1521                 :            :                  * Are we still fully in range ?
    1522                 :            :                  */
    1523         [ -  + ]:        378 :                 if (next_off > stop)
    1524                 :          0 :                         goto out;
    1525                 :            : 
    1526                 :            :                 /*
    1527                 :            :                  * is this block fully invalidated?
    1528                 :            :                  */
    1529         [ +  - ]:        378 :                 if (offset <= curr_off)
    1530                 :        378 :                         discard_buffer(bh);
    1531                 :        378 :                 curr_off = next_off;
    1532                 :        378 :                 bh = next;
    1533         [ +  + ]:        378 :         } while (bh != head);
    1534                 :            : 
    1535                 :            :         /*
    1536                 :            :          * We release buffers only if the entire page is being invalidated.
    1537                 :            :          * The get_block cached value has been unconditionally invalidated,
    1538                 :            :          * so real IO is not possible anymore.
    1539                 :            :          */
    1540         [ -  + ]:        126 :         if (length == PAGE_SIZE)
    1541                 :        126 :                 try_to_release_page(page, 0);
    1542                 :          0 : out:
    1543                 :        126 :         return;
    1544                 :            : }
    1545                 :            : EXPORT_SYMBOL(block_invalidatepage);
    1546                 :            : 
    1547                 :            : 
    1548                 :            : /*
    1549                 :            :  * We attach and possibly dirty the buffers atomically wrt
    1550                 :            :  * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
    1551                 :            :  * is already excluded via the page lock.
    1552                 :            :  */
    1553                 :      12187 : void create_empty_buffers(struct page *page,
    1554                 :            :                         unsigned long blocksize, unsigned long b_state)
    1555                 :            : {
    1556                 :      12187 :         struct buffer_head *bh, *head, *tail;
    1557                 :            : 
    1558                 :      12187 :         head = alloc_page_buffers(page, blocksize, true);
    1559                 :      12187 :         bh = head;
    1560                 :      12187 :         do {
    1561                 :      12187 :                 bh->b_state |= b_state;
    1562                 :      12187 :                 tail = bh;
    1563                 :      12187 :                 bh = bh->b_this_page;
    1564         [ -  + ]:      12187 :         } while (bh);
    1565                 :      12187 :         tail->b_this_page = head;
    1566                 :            : 
    1567                 :      12187 :         spin_lock(&page->mapping->private_lock);
    1568   [ +  +  -  + ]:      24353 :         if (PageUptodate(page) || PageDirty(page)) {
    1569                 :            :                 bh = head;
    1570                 :         21 :                 do {
    1571   [ -  +  -  + ]:         42 :                         if (PageDirty(page))
    1572                 :          0 :                                 set_buffer_dirty(bh);
    1573         [ +  - ]:         21 :                         if (PageUptodate(page))
    1574                 :         21 :                                 set_buffer_uptodate(bh);
    1575                 :         21 :                         bh = bh->b_this_page;
    1576         [ -  + ]:         21 :                 } while (bh != head);
    1577                 :            :         }
    1578                 :      12187 :         attach_page_buffers(page, head);
    1579                 :      12187 :         spin_unlock(&page->mapping->private_lock);
    1580                 :      12187 : }
    1581                 :            : EXPORT_SYMBOL(create_empty_buffers);
    1582                 :            : 
    1583                 :            : /**
    1584                 :            :  * clean_bdev_aliases: clean a range of buffers in block device
    1585                 :            :  * @bdev: Block device to clean buffers in
    1586                 :            :  * @block: Start of a range of blocks to clean
    1587                 :            :  * @len: Number of blocks to clean
    1588                 :            :  *
    1589                 :            :  * We are taking a range of blocks for data and we don't want writeback of any
    1590                 :            :  * buffer-cache aliases starting from return from this function and until the
    1591                 :            :  * moment when something will explicitly mark the buffer dirty (hopefully that
    1592                 :            :  * will not happen until we will free that block ;-) We don't even need to mark
    1593                 :            :  * it not-uptodate - nobody can expect anything from a newly allocated buffer
    1594                 :            :  * anyway. We used to use unmap_buffer() for such invalidation, but that was
    1595                 :            :  * wrong. We definitely don't want to mark the alias unmapped, for example - it
    1596                 :            :  * would confuse anyone who might pick it with bread() afterwards...
    1597                 :            :  *
    1598                 :            :  * Also..  Note that bforget() doesn't lock the buffer.  So there can be
    1599                 :            :  * writeout I/O going on against recently-freed buffers.  We don't wait on that
    1600                 :            :  * I/O in bforget() - it's more efficient to wait on the I/O only if we really
    1601                 :            :  * need to.  That happens here.
    1602                 :            :  */
    1603                 :      12145 : void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
    1604                 :            : {
    1605                 :      12145 :         struct inode *bd_inode = bdev->bd_inode;
    1606                 :      12145 :         struct address_space *bd_mapping = bd_inode->i_mapping;
    1607                 :      12145 :         struct pagevec pvec;
    1608                 :      12145 :         pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
    1609                 :      12145 :         pgoff_t end;
    1610                 :      12145 :         int i, count;
    1611                 :      12145 :         struct buffer_head *bh;
    1612                 :      12145 :         struct buffer_head *head;
    1613                 :            : 
    1614                 :      12145 :         end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
    1615                 :      12145 :         pagevec_init(&pvec);
    1616         [ -  + ]:      12145 :         while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
    1617                 :          0 :                 count = pagevec_count(&pvec);
    1618         [ #  # ]:          0 :                 for (i = 0; i < count; i++) {
    1619                 :          0 :                         struct page *page = pvec.pages[i];
    1620                 :            : 
    1621         [ #  # ]:          0 :                         if (!page_has_buffers(page))
    1622                 :          0 :                                 continue;
    1623                 :            :                         /*
    1624                 :            :                          * We use page lock instead of bd_mapping->private_lock
    1625                 :            :                          * to pin buffers here since we can afford to sleep and
    1626                 :            :                          * it scales better than a global spinlock lock.
    1627                 :            :                          */
    1628                 :          0 :                         lock_page(page);
    1629                 :            :                         /* Recheck when the page is locked which pins bhs */
    1630         [ #  # ]:          0 :                         if (!page_has_buffers(page))
    1631                 :          0 :                                 goto unlock_page;
    1632         [ #  # ]:          0 :                         head = page_buffers(page);
    1633                 :          0 :                         bh = head;
    1634                 :          0 :                         do {
    1635   [ #  #  #  # ]:          0 :                                 if (!buffer_mapped(bh) || (bh->b_blocknr < block))
    1636                 :          0 :                                         goto next;
    1637         [ #  # ]:          0 :                                 if (bh->b_blocknr >= block + len)
    1638                 :            :                                         break;
    1639                 :          0 :                                 clear_buffer_dirty(bh);
    1640                 :          0 :                                 wait_on_buffer(bh);
    1641                 :          0 :                                 clear_buffer_req(bh);
    1642                 :          0 : next:
    1643                 :          0 :                                 bh = bh->b_this_page;
    1644         [ #  # ]:          0 :                         } while (bh != head);
    1645                 :          0 : unlock_page:
    1646                 :          0 :                         unlock_page(page);
    1647                 :            :                 }
    1648         [ #  # ]:          0 :                 pagevec_release(&pvec);
    1649                 :          0 :                 cond_resched();
    1650                 :            :                 /* End of range already reached? */
    1651   [ #  #  #  # ]:          0 :                 if (index > end || !index)
    1652                 :            :                         break;
    1653                 :            :         }
    1654                 :      12145 : }
    1655                 :            : EXPORT_SYMBOL(clean_bdev_aliases);
    1656                 :            : 
    1657                 :            : /*
    1658                 :            :  * Size is a power-of-two in the range 512..PAGE_SIZE,
    1659                 :            :  * and the case we care about most is PAGE_SIZE.
    1660                 :            :  *
    1661                 :            :  * So this *could* possibly be written with those
    1662                 :            :  * constraints in mind (relevant mostly if some
    1663                 :            :  * architecture has a slow bit-scan instruction)
    1664                 :            :  */
    1665                 :      15117 : static inline int block_size_bits(unsigned int blocksize)
    1666                 :            : {
    1667   [ -  +  -  -  :      15117 :         return ilog2(blocksize);
          -  -  -  -  -  
          -  -  -  -  -  
          -  -  -  -  -  
          -  -  -  -  -  
          -  -  -  -  -  
          -  -  -  -  -  
          -  -  -  -  -  
          -  -  -  -  -  
          -  -  -  -  -  
          -  -  -  -  -  
          -  -  -  -  -  
             -  -  -  -  
                      - ]
    1668                 :            : }
    1669                 :            : 
    1670                 :      15117 : static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
    1671                 :            : {
    1672   [ -  +  -  + ]:      30234 :         BUG_ON(!PageLocked(page));
    1673                 :            : 
    1674         [ +  + ]:      15117 :         if (!page_has_buffers(page))
    1675                 :      12187 :                 create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
    1676                 :            :                                      b_state);
    1677         [ -  + ]:      15117 :         return page_buffers(page);
    1678                 :            : }
    1679                 :            : 
    1680                 :            : /*
    1681                 :            :  * NOTE! All mapped/uptodate combinations are valid:
    1682                 :            :  *
    1683                 :            :  *      Mapped  Uptodate        Meaning
    1684                 :            :  *
    1685                 :            :  *      No      No              "unknown" - must do get_block()
    1686                 :            :  *      No      Yes             "hole" - zero-filled
    1687                 :            :  *      Yes     No              "allocated" - allocated on disk, not read in
    1688                 :            :  *      Yes     Yes             "valid" - allocated and up-to-date in memory.
    1689                 :            :  *
    1690                 :            :  * "Dirty" is valid only with the last case (mapped+uptodate).
    1691                 :            :  */
    1692                 :            : 
    1693                 :            : /*
    1694                 :            :  * While block_write_full_page is writing back the dirty buffers under
    1695                 :            :  * the page lock, whoever dirtied the buffers may decide to clean them
    1696                 :            :  * again at any time.  We handle that by only looking at the buffer
    1697                 :            :  * state inside lock_buffer().
    1698                 :            :  *
    1699                 :            :  * If block_write_full_page() is called for regular writeback
    1700                 :            :  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
    1701                 :            :  * locked buffer.   This only can happen if someone has written the buffer
    1702                 :            :  * directly, with submit_bh().  At the address_space level PageWriteback
    1703                 :            :  * prevents this contention from occurring.
    1704                 :            :  *
    1705                 :            :  * If block_write_full_page() is called with wbc->sync_mode ==
    1706                 :            :  * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
    1707                 :            :  * causes the writes to be flagged as synchronous writes.
    1708                 :            :  */
    1709                 :          0 : int __block_write_full_page(struct inode *inode, struct page *page,
    1710                 :            :                         get_block_t *get_block, struct writeback_control *wbc,
    1711                 :            :                         bh_end_io_t *handler)
    1712                 :            : {
    1713                 :          0 :         int err;
    1714                 :          0 :         sector_t block;
    1715                 :          0 :         sector_t last_block;
    1716                 :          0 :         struct buffer_head *bh, *head;
    1717                 :          0 :         unsigned int blocksize, bbits;
    1718                 :          0 :         int nr_underway = 0;
    1719         [ #  # ]:          0 :         int write_flags = wbc_to_write_flags(wbc);
    1720                 :            : 
    1721                 :          0 :         head = create_page_buffers(page, inode,
    1722                 :            :                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
    1723                 :            : 
    1724                 :            :         /*
    1725                 :            :          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
    1726                 :            :          * here, and the (potentially unmapped) buffers may become dirty at
    1727                 :            :          * any time.  If a buffer becomes dirty here after we've inspected it
    1728                 :            :          * then we just miss that fact, and the page stays dirty.
    1729                 :            :          *
    1730                 :            :          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
    1731                 :            :          * handle that here by just cleaning them.
    1732                 :            :          */
    1733                 :            : 
    1734                 :          0 :         bh = head;
    1735                 :          0 :         blocksize = bh->b_size;
    1736                 :          0 :         bbits = block_size_bits(blocksize);
    1737                 :            : 
    1738                 :          0 :         block = (sector_t)page->index << (PAGE_SHIFT - bbits);
    1739                 :          0 :         last_block = (i_size_read(inode) - 1) >> bbits;
    1740                 :            : 
    1741                 :            :         /*
    1742                 :            :          * Get all the dirty buffers mapped to disk addresses and
    1743                 :            :          * handle any aliases from the underlying blockdev's mapping.
    1744                 :            :          */
    1745                 :          0 :         do {
    1746         [ #  # ]:          0 :                 if (block > last_block) {
    1747                 :            :                         /*
    1748                 :            :                          * mapped buffers outside i_size will occur, because
    1749                 :            :                          * this page can be outside i_size when there is a
    1750                 :            :                          * truncate in progress.
    1751                 :            :                          */
    1752                 :            :                         /*
    1753                 :            :                          * The buffer was zeroed by block_write_full_page()
    1754                 :            :                          */
    1755                 :          0 :                         clear_buffer_dirty(bh);
    1756                 :          0 :                         set_buffer_uptodate(bh);
    1757   [ #  #  #  #  :          0 :                 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
                   #  # ]
    1758                 :            :                            buffer_dirty(bh)) {
    1759         [ #  # ]:          0 :                         WARN_ON(bh->b_size != blocksize);
    1760                 :          0 :                         err = get_block(inode, block, bh, 1);
    1761         [ #  # ]:          0 :                         if (err)
    1762                 :          0 :                                 goto recover;
    1763                 :          0 :                         clear_buffer_delay(bh);
    1764         [ #  # ]:          0 :                         if (buffer_new(bh)) {
    1765                 :            :                                 /* blockdev mappings never come here */
    1766                 :          0 :                                 clear_buffer_new(bh);
    1767                 :          0 :                                 clean_bdev_bh_alias(bh);
    1768                 :            :                         }
    1769                 :            :                 }
    1770                 :          0 :                 bh = bh->b_this_page;
    1771                 :          0 :                 block++;
    1772         [ #  # ]:          0 :         } while (bh != head);
    1773                 :            : 
    1774                 :          0 :         do {
    1775         [ #  # ]:          0 :                 if (!buffer_mapped(bh))
    1776                 :          0 :                         continue;
    1777                 :            :                 /*
    1778                 :            :                  * If it's a fully non-blocking write attempt and we cannot
    1779                 :            :                  * lock the buffer then redirty the page.  Note that this can
    1780                 :            :                  * potentially cause a busy-wait loop from writeback threads
    1781                 :            :                  * and kswapd activity, but those code paths have their own
    1782                 :            :                  * higher-level throttling.
    1783                 :            :                  */
    1784         [ #  # ]:          0 :                 if (wbc->sync_mode != WB_SYNC_NONE) {
    1785                 :          0 :                         lock_buffer(bh);
    1786         [ #  # ]:          0 :                 } else if (!trylock_buffer(bh)) {
    1787                 :          0 :                         redirty_page_for_writepage(wbc, page);
    1788                 :          0 :                         continue;
    1789                 :            :                 }
    1790         [ #  # ]:          0 :                 if (test_clear_buffer_dirty(bh)) {
    1791                 :          0 :                         mark_buffer_async_write_endio(bh, handler);
    1792                 :            :                 } else {
    1793                 :          0 :                         unlock_buffer(bh);
    1794                 :            :                 }
    1795         [ #  # ]:          0 :         } while ((bh = bh->b_this_page) != head);
    1796                 :            : 
    1797                 :            :         /*
    1798                 :            :          * The page and its buffers are protected by PageWriteback(), so we can
    1799                 :            :          * drop the bh refcounts early.
    1800                 :            :          */
    1801   [ #  #  #  # ]:          0 :         BUG_ON(PageWriteback(page));
    1802                 :          0 :         set_page_writeback(page);
    1803                 :            : 
    1804                 :          0 :         do {
    1805                 :          0 :                 struct buffer_head *next = bh->b_this_page;
    1806         [ #  # ]:          0 :                 if (buffer_async_write(bh)) {
    1807                 :          0 :                         submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
    1808                 :          0 :                                         inode->i_write_hint, wbc);
    1809                 :          0 :                         nr_underway++;
    1810                 :            :                 }
    1811                 :          0 :                 bh = next;
    1812         [ #  # ]:          0 :         } while (bh != head);
    1813                 :          0 :         unlock_page(page);
    1814                 :            : 
    1815                 :          0 :         err = 0;
    1816                 :          0 : done:
    1817         [ #  # ]:          0 :         if (nr_underway == 0) {
    1818                 :            :                 /*
    1819                 :            :                  * The page was marked dirty, but the buffers were
    1820                 :            :                  * clean.  Someone wrote them back by hand with
    1821                 :            :                  * ll_rw_block/submit_bh.  A rare case.
    1822                 :            :                  */
    1823                 :          0 :                 end_page_writeback(page);
    1824                 :            : 
    1825                 :            :                 /*
    1826                 :            :                  * The page and buffer_heads can be released at any time from
    1827                 :            :                  * here on.
    1828                 :            :                  */
    1829                 :            :         }
    1830                 :          0 :         return err;
    1831                 :            : 
    1832                 :            : recover:
    1833                 :            :         /*
    1834                 :            :          * ENOSPC, or some other error.  We may already have added some
    1835                 :            :          * blocks to the file, so we need to write these out to avoid
    1836                 :            :          * exposing stale data.
    1837                 :            :          * The page is currently locked and not marked for writeback
    1838                 :            :          */
    1839                 :          0 :         bh = head;
    1840                 :            :         /* Recovery: lock and submit the mapped buffers */
    1841                 :          0 :         do {
    1842   [ #  #  #  #  :          0 :                 if (buffer_mapped(bh) && buffer_dirty(bh) &&
                   #  # ]
    1843                 :            :                     !buffer_delay(bh)) {
    1844                 :          0 :                         lock_buffer(bh);
    1845                 :          0 :                         mark_buffer_async_write_endio(bh, handler);
    1846                 :            :                 } else {
    1847                 :            :                         /*
    1848                 :            :                          * The buffer may have been set dirty during
    1849                 :            :                          * attachment to a dirty page.
    1850                 :            :                          */
    1851                 :          0 :                         clear_buffer_dirty(bh);
    1852                 :            :                 }
    1853         [ #  # ]:          0 :         } while ((bh = bh->b_this_page) != head);
    1854         [ #  # ]:          0 :         SetPageError(page);
    1855   [ #  #  #  # ]:          0 :         BUG_ON(PageWriteback(page));
    1856                 :          0 :         mapping_set_error(page->mapping, err);
    1857                 :          0 :         set_page_writeback(page);
    1858                 :          0 :         do {
    1859                 :          0 :                 struct buffer_head *next = bh->b_this_page;
    1860         [ #  # ]:          0 :                 if (buffer_async_write(bh)) {
    1861                 :          0 :                         clear_buffer_dirty(bh);
    1862                 :          0 :                         submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
    1863                 :          0 :                                         inode->i_write_hint, wbc);
    1864                 :          0 :                         nr_underway++;
    1865                 :            :                 }
    1866                 :          0 :                 bh = next;
    1867         [ #  # ]:          0 :         } while (bh != head);
    1868                 :          0 :         unlock_page(page);
    1869                 :          0 :         goto done;
    1870                 :            : }
    1871                 :            : EXPORT_SYMBOL(__block_write_full_page);
    1872                 :            : 
    1873                 :            : /*
    1874                 :            :  * If a page has any new buffers, zero them out here, and mark them uptodate
    1875                 :            :  * and dirty so they'll be written out (in order to prevent uninitialised
    1876                 :            :  * block data from leaking). And clear the new bit.
    1877                 :            :  */
    1878                 :          0 : void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
    1879                 :            : {
    1880                 :          0 :         unsigned int block_start, block_end;
    1881                 :          0 :         struct buffer_head *head, *bh;
    1882                 :            : 
    1883   [ #  #  #  # ]:          0 :         BUG_ON(!PageLocked(page));
    1884         [ #  # ]:          0 :         if (!page_has_buffers(page))
    1885                 :            :                 return;
    1886                 :            : 
    1887         [ #  # ]:          0 :         bh = head = page_buffers(page);
    1888                 :          0 :         block_start = 0;
    1889                 :          0 :         do {
    1890                 :          0 :                 block_end = block_start + bh->b_size;
    1891                 :            : 
    1892         [ #  # ]:          0 :                 if (buffer_new(bh)) {
    1893         [ #  # ]:          0 :                         if (block_end > from && block_start < to) {
    1894         [ #  # ]:          0 :                                 if (!PageUptodate(page)) {
    1895                 :          0 :                                         unsigned start, size;
    1896                 :            : 
    1897                 :          0 :                                         start = max(from, block_start);
    1898                 :          0 :                                         size = min(to, block_end) - start;
    1899                 :            : 
    1900                 :          0 :                                         zero_user(page, start, size);
    1901                 :          0 :                                         set_buffer_uptodate(bh);
    1902                 :            :                                 }
    1903                 :            : 
    1904                 :          0 :                                 clear_buffer_new(bh);
    1905                 :          0 :                                 mark_buffer_dirty(bh);
    1906                 :            :                         }
    1907                 :            :                 }
    1908                 :            : 
    1909                 :          0 :                 block_start = block_end;
    1910                 :          0 :                 bh = bh->b_this_page;
    1911         [ #  # ]:          0 :         } while (bh != head);
    1912                 :            : }
    1913                 :            : EXPORT_SYMBOL(page_zero_new_buffers);
    1914                 :            : 
    1915                 :            : static void
    1916                 :          0 : iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
    1917                 :            :                 struct iomap *iomap)
    1918                 :            : {
    1919                 :          0 :         loff_t offset = block << inode->i_blkbits;
    1920                 :            : 
    1921                 :          0 :         bh->b_bdev = iomap->bdev;
    1922                 :            : 
    1923                 :            :         /*
    1924                 :            :          * Block points to offset in file we need to map, iomap contains
    1925                 :            :          * the offset at which the map starts. If the map ends before the
    1926                 :            :          * current block, then do not map the buffer and let the caller
    1927                 :            :          * handle it.
    1928                 :            :          */
    1929         [ #  # ]:          0 :         BUG_ON(offset >= iomap->offset + iomap->length);
    1930                 :            : 
    1931   [ #  #  #  #  :          0 :         switch (iomap->type) {
                      # ]
    1932                 :            :         case IOMAP_HOLE:
    1933                 :            :                 /*
    1934                 :            :                  * If the buffer is not up to date or beyond the current EOF,
    1935                 :            :                  * we need to mark it as new to ensure sub-block zeroing is
    1936                 :            :                  * executed if necessary.
    1937                 :            :                  */
    1938   [ #  #  #  # ]:          0 :                 if (!buffer_uptodate(bh) ||
    1939         [ #  # ]:          0 :                     (offset >= i_size_read(inode)))
    1940                 :          0 :                         set_buffer_new(bh);
    1941                 :            :                 break;
    1942                 :            :         case IOMAP_DELALLOC:
    1943   [ #  #  #  # ]:          0 :                 if (!buffer_uptodate(bh) ||
    1944         [ #  # ]:          0 :                     (offset >= i_size_read(inode)))
    1945                 :          0 :                         set_buffer_new(bh);
    1946                 :          0 :                 set_buffer_uptodate(bh);
    1947                 :          0 :                 set_buffer_mapped(bh);
    1948                 :          0 :                 set_buffer_delay(bh);
    1949                 :            :                 break;
    1950                 :            :         case IOMAP_UNWRITTEN:
    1951                 :            :                 /*
    1952                 :            :                  * For unwritten regions, we always need to ensure that regions
    1953                 :            :                  * in the block we are not writing to are zeroed. Mark the
    1954                 :            :                  * buffer as new to ensure this.
    1955                 :            :                  */
    1956                 :          0 :                 set_buffer_new(bh);
    1957                 :          0 :                 set_buffer_unwritten(bh);
    1958                 :            :                 /* FALLTHRU */
    1959                 :          0 :         case IOMAP_MAPPED:
    1960   [ #  #  #  # ]:          0 :                 if ((iomap->flags & IOMAP_F_NEW) ||
    1961         [ #  # ]:          0 :                     offset >= i_size_read(inode))
    1962                 :          0 :                         set_buffer_new(bh);
    1963                 :          0 :                 bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
    1964                 :          0 :                                 inode->i_blkbits;
    1965                 :          0 :                 set_buffer_mapped(bh);
    1966                 :            :                 break;
    1967                 :            :         }
    1968                 :          0 : }
    1969                 :            : 
    1970                 :      15012 : int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
    1971                 :            :                 get_block_t *get_block, struct iomap *iomap)
    1972                 :            : {
    1973                 :      15012 :         unsigned from = pos & (PAGE_SIZE - 1);
    1974                 :      15012 :         unsigned to = from + len;
    1975                 :      15012 :         struct inode *inode = page->mapping->host;
    1976                 :      15012 :         unsigned block_start, block_end;
    1977                 :      15012 :         sector_t block;
    1978                 :      15012 :         int err = 0;
    1979                 :      15012 :         unsigned blocksize, bbits;
    1980                 :      15012 :         struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
    1981                 :            : 
    1982   [ -  +  -  + ]:      30024 :         BUG_ON(!PageLocked(page));
    1983                 :      15012 :         BUG_ON(from > PAGE_SIZE);
    1984         [ -  + ]:      15012 :         BUG_ON(to > PAGE_SIZE);
    1985         [ -  + ]:      15012 :         BUG_ON(from > to);
    1986                 :            : 
    1987                 :      15012 :         head = create_page_buffers(page, inode, 0);
    1988                 :      15012 :         blocksize = head->b_size;
    1989                 :      15012 :         bbits = block_size_bits(blocksize);
    1990                 :            : 
    1991                 :      15012 :         block = (sector_t)page->index << (PAGE_SHIFT - bbits);
    1992                 :            : 
    1993         [ +  + ]:      30024 :         for(bh = head, block_start = 0; bh != head || !block_start;
    1994                 :      15012 :             block++, block_start=block_end, bh = bh->b_this_page) {
    1995                 :      15012 :                 block_end = block_start + blocksize;
    1996         [ -  + ]:      15012 :                 if (block_end <= from || block_start >= to) {
    1997         [ #  # ]:          0 :                         if (PageUptodate(page)) {
    1998         [ #  # ]:          0 :                                 if (!buffer_uptodate(bh))
    1999                 :          0 :                                         set_buffer_uptodate(bh);
    2000                 :            :                         }
    2001                 :          0 :                         continue;
    2002                 :            :                 }
    2003         [ -  + ]:      15012 :                 if (buffer_new(bh))
    2004                 :          0 :                         clear_buffer_new(bh);
    2005         [ +  + ]:      15012 :                 if (!buffer_mapped(bh)) {
    2006         [ -  + ]:      12145 :                         WARN_ON(bh->b_size != blocksize);
    2007         [ +  - ]:      12145 :                         if (get_block) {
    2008                 :      12145 :                                 err = get_block(inode, block, bh, 1);
    2009         [ -  + ]:      12145 :                                 if (err)
    2010                 :            :                                         break;
    2011                 :            :                         } else {
    2012                 :          0 :                                 iomap_to_bh(inode, block, bh, iomap);
    2013                 :            :                         }
    2014                 :            : 
    2015         [ +  - ]:      12145 :                         if (buffer_new(bh)) {
    2016                 :      12145 :                                 clean_bdev_bh_alias(bh);
    2017         [ +  + ]:      12145 :                                 if (PageUptodate(page)) {
    2018                 :         21 :                                         clear_buffer_new(bh);
    2019                 :         21 :                                         set_buffer_uptodate(bh);
    2020                 :         21 :                                         mark_buffer_dirty(bh);
    2021                 :         21 :                                         continue;
    2022                 :            :                                 }
    2023         [ +  + ]:      12124 :                                 if (block_end > to || block_start < from)
    2024                 :       7369 :                                         zero_user_segments(page,
    2025                 :            :                                                 to, block_end,
    2026                 :            :                                                 block_start, from);
    2027                 :      12124 :                                 continue;
    2028                 :            :                         }
    2029                 :            :                 }
    2030         [ +  - ]:       2867 :                 if (PageUptodate(page)) {
    2031         [ -  + ]:       2867 :                         if (!buffer_uptodate(bh))
    2032                 :          0 :                                 set_buffer_uptodate(bh);
    2033                 :       2867 :                         continue; 
    2034                 :            :                 }
    2035   [ #  #  #  #  :          0 :                 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
                   #  # ]
    2036                 :          0 :                     !buffer_unwritten(bh) &&
    2037         [ #  # ]:          0 :                      (block_start < from || block_end > to)) {
    2038                 :          0 :                         ll_rw_block(REQ_OP_READ, 0, 1, &bh);
    2039                 :          0 :                         *wait_bh++=bh;
    2040                 :            :                 }
    2041                 :            :         }
    2042                 :            :         /*
    2043                 :            :          * If we issued read requests - let them complete.
    2044                 :            :          */
    2045         [ -  + ]:      15012 :         while(wait_bh > wait) {
    2046                 :          0 :                 wait_on_buffer(*--wait_bh);
    2047         [ #  # ]:          0 :                 if (!buffer_uptodate(*wait_bh))
    2048                 :          0 :                         err = -EIO;
    2049                 :            :         }
    2050         [ -  + ]:      15012 :         if (unlikely(err))
    2051                 :          0 :                 page_zero_new_buffers(page, from, to);
    2052                 :      15012 :         return err;
    2053                 :            : }
    2054                 :            : 
    2055                 :      15012 : int __block_write_begin(struct page *page, loff_t pos, unsigned len,
    2056                 :            :                 get_block_t *get_block)
    2057                 :            : {
    2058                 :      15012 :         return __block_write_begin_int(page, pos, len, get_block, NULL);
    2059                 :            : }
    2060                 :            : EXPORT_SYMBOL(__block_write_begin);
    2061                 :            : 
    2062                 :            : static int __block_commit_write(struct inode *inode, struct page *page,
    2063                 :            :                 unsigned from, unsigned to)
    2064                 :            : {
    2065                 :            :         unsigned block_start, block_end;
    2066                 :            :         int partial = 0;
    2067                 :            :         unsigned blocksize;
    2068                 :            :         struct buffer_head *bh, *head;
    2069                 :            : 
    2070                 :            :         bh = head = page_buffers(page);
    2071                 :            :         blocksize = bh->b_size;
    2072                 :            : 
    2073                 :            :         block_start = 0;
    2074                 :            :         do {
    2075                 :            :                 block_end = block_start + blocksize;
    2076                 :            :                 if (block_end <= from || block_start >= to) {
    2077                 :            :                         if (!buffer_uptodate(bh))
    2078                 :            :                                 partial = 1;
    2079                 :            :                 } else {
    2080                 :            :                         set_buffer_uptodate(bh);
    2081                 :            :                         mark_buffer_dirty(bh);
    2082                 :            :                 }
    2083                 :            :                 clear_buffer_new(bh);
    2084                 :            : 
    2085                 :            :                 block_start = block_end;
    2086                 :            :                 bh = bh->b_this_page;
    2087                 :            :         } while (bh != head);
    2088                 :            : 
    2089                 :            :         /*
    2090                 :            :          * If this is a partial write which happened to make all buffers
    2091                 :            :          * uptodate then we can optimize away a bogus readpage() for
    2092                 :            :          * the next read(). Here we 'discover' whether the page went
    2093                 :            :          * uptodate as a result of this (potentially partial) write.
    2094                 :            :          */
    2095                 :            :         if (!partial)
    2096                 :            :                 SetPageUptodate(page);
    2097                 :            :         return 0;
    2098                 :            : }
    2099                 :            : 
    2100                 :            : /*
    2101                 :            :  * block_write_begin takes care of the basic task of block allocation and
    2102                 :            :  * bringing partial write blocks uptodate first.
    2103                 :            :  *
    2104                 :            :  * The filesystem needs to handle block truncation upon failure.
    2105                 :            :  */
    2106                 :          0 : int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
    2107                 :            :                 unsigned flags, struct page **pagep, get_block_t *get_block)
    2108                 :            : {
    2109                 :          0 :         pgoff_t index = pos >> PAGE_SHIFT;
    2110                 :          0 :         struct page *page;
    2111                 :          0 :         int status;
    2112                 :            : 
    2113                 :          0 :         page = grab_cache_page_write_begin(mapping, index, flags);
    2114         [ #  # ]:          0 :         if (!page)
    2115                 :            :                 return -ENOMEM;
    2116                 :            : 
    2117                 :          0 :         status = __block_write_begin(page, pos, len, get_block);
    2118         [ #  # ]:          0 :         if (unlikely(status)) {
    2119                 :          0 :                 unlock_page(page);
    2120                 :          0 :                 put_page(page);
    2121                 :          0 :                 page = NULL;
    2122                 :            :         }
    2123                 :            : 
    2124                 :          0 :         *pagep = page;
    2125                 :          0 :         return status;
    2126                 :            : }
    2127                 :            : EXPORT_SYMBOL(block_write_begin);
    2128                 :            : 
    2129                 :      15012 : int block_write_end(struct file *file, struct address_space *mapping,
    2130                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2131                 :            :                         struct page *page, void *fsdata)
    2132                 :            : {
    2133                 :      15012 :         struct inode *inode = mapping->host;
    2134                 :      15012 :         unsigned start;
    2135                 :            : 
    2136                 :      15012 :         start = pos & (PAGE_SIZE - 1);
    2137                 :            : 
    2138         [ -  + ]:      15012 :         if (unlikely(copied < len)) {
    2139                 :            :                 /*
    2140                 :            :                  * The buffers that were written will now be uptodate, so we
    2141                 :            :                  * don't have to worry about a readpage reading them and
    2142                 :            :                  * overwriting a partial write. However if we have encountered
    2143                 :            :                  * a short write and only partially written into a buffer, it
    2144                 :            :                  * will not be marked uptodate, so a readpage might come in and
    2145                 :            :                  * destroy our partial write.
    2146                 :            :                  *
    2147                 :            :                  * Do the simplest thing, and just treat any short write to a
    2148                 :            :                  * non uptodate page as a zero-length write, and force the
    2149                 :            :                  * caller to redo the whole thing.
    2150                 :            :                  */
    2151         [ #  # ]:          0 :                 if (!PageUptodate(page))
    2152                 :          0 :                         copied = 0;
    2153                 :            : 
    2154                 :          0 :                 page_zero_new_buffers(page, start+copied, start+len);
    2155                 :            :         }
    2156                 :      15012 :         flush_dcache_page(page);
    2157                 :            : 
    2158                 :            :         /* This could be a short (even 0-length) commit */
    2159                 :      15012 :         __block_commit_write(inode, page, start, start+copied);
    2160                 :            : 
    2161                 :      15012 :         return copied;
    2162                 :            : }
    2163                 :            : EXPORT_SYMBOL(block_write_end);
    2164                 :            : 
    2165                 :      15012 : int generic_write_end(struct file *file, struct address_space *mapping,
    2166                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2167                 :            :                         struct page *page, void *fsdata)
    2168                 :            : {
    2169                 :      15012 :         struct inode *inode = mapping->host;
    2170                 :      15012 :         loff_t old_size = inode->i_size;
    2171                 :      15012 :         bool i_size_changed = false;
    2172                 :            : 
    2173                 :      15012 :         copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
    2174                 :            : 
    2175                 :            :         /*
    2176                 :            :          * No need to use i_size_read() here, the i_size cannot change under us
    2177                 :            :          * because we hold i_rwsem.
    2178                 :            :          *
    2179                 :            :          * But it's important to update i_size while still holding page lock:
    2180                 :            :          * page writeout could otherwise come in and zero beyond i_size.
    2181                 :            :          */
    2182         [ +  - ]:      15012 :         if (pos + copied > inode->i_size) {
    2183                 :      15012 :                 i_size_write(inode, pos + copied);
    2184                 :      15012 :                 i_size_changed = true;
    2185                 :            :         }
    2186                 :            : 
    2187                 :      15012 :         unlock_page(page);
    2188                 :      15012 :         put_page(page);
    2189                 :            : 
    2190         [ -  + ]:      15012 :         if (old_size < pos)
    2191                 :          0 :                 pagecache_isize_extended(inode, old_size, pos);
    2192                 :            :         /*
    2193                 :            :          * Don't mark the inode dirty under page lock. First, it unnecessarily
    2194                 :            :          * makes the holding time of page lock longer. Second, it forces lock
    2195                 :            :          * ordering of page lock and transaction start for journaling
    2196                 :            :          * filesystems.
    2197                 :            :          */
    2198         [ +  - ]:      15012 :         if (i_size_changed)
    2199                 :      15012 :                 mark_inode_dirty(inode);
    2200                 :      15012 :         return copied;
    2201                 :            : }
    2202                 :            : EXPORT_SYMBOL(generic_write_end);
    2203                 :            : 
    2204                 :            : /*
    2205                 :            :  * block_is_partially_uptodate checks whether buffers within a page are
    2206                 :            :  * uptodate or not.
    2207                 :            :  *
    2208                 :            :  * Returns true if all buffers which correspond to a file portion
    2209                 :            :  * we want to read are uptodate.
    2210                 :            :  */
    2211                 :          0 : int block_is_partially_uptodate(struct page *page, unsigned long from,
    2212                 :            :                                         unsigned long count)
    2213                 :            : {
    2214                 :          0 :         unsigned block_start, block_end, blocksize;
    2215                 :          0 :         unsigned to;
    2216                 :          0 :         struct buffer_head *bh, *head;
    2217                 :          0 :         int ret = 1;
    2218                 :            : 
    2219         [ #  # ]:          0 :         if (!page_has_buffers(page))
    2220                 :            :                 return 0;
    2221                 :            : 
    2222         [ #  # ]:          0 :         head = page_buffers(page);
    2223                 :          0 :         blocksize = head->b_size;
    2224                 :          0 :         to = min_t(unsigned, PAGE_SIZE - from, count);
    2225                 :          0 :         to = from + to;
    2226   [ #  #  #  # ]:          0 :         if (from < blocksize && to > PAGE_SIZE - blocksize)
    2227                 :            :                 return 0;
    2228                 :            : 
    2229                 :            :         bh = head;
    2230                 :            :         block_start = 0;
    2231                 :          0 :         do {
    2232                 :          0 :                 block_end = block_start + blocksize;
    2233   [ #  #  #  # ]:          0 :                 if (block_end > from && block_start < to) {
    2234         [ #  # ]:          0 :                         if (!buffer_uptodate(bh)) {
    2235                 :            :                                 ret = 0;
    2236                 :            :                                 break;
    2237                 :            :                         }
    2238         [ #  # ]:          0 :                         if (block_end >= to)
    2239                 :            :                                 break;
    2240                 :            :                 }
    2241                 :          0 :                 block_start = block_end;
    2242                 :          0 :                 bh = bh->b_this_page;
    2243         [ #  # ]:          0 :         } while (bh != head);
    2244                 :            : 
    2245                 :            :         return ret;
    2246                 :            : }
    2247                 :            : EXPORT_SYMBOL(block_is_partially_uptodate);
    2248                 :            : 
    2249                 :            : /*
    2250                 :            :  * Generic "read page" function for block devices that have the normal
    2251                 :            :  * get_block functionality. This is most of the block device filesystems.
    2252                 :            :  * Reads the page asynchronously --- the unlock_buffer() and
    2253                 :            :  * set/clear_buffer_uptodate() functions propagate buffer state into the
    2254                 :            :  * page struct once IO has completed.
    2255                 :            :  */
    2256                 :        105 : int block_read_full_page(struct page *page, get_block_t *get_block)
    2257                 :            : {
    2258                 :        105 :         struct inode *inode = page->mapping->host;
    2259                 :        105 :         sector_t iblock, lblock;
    2260                 :        105 :         struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
    2261                 :        105 :         unsigned int blocksize, bbits;
    2262                 :        105 :         int nr, i;
    2263                 :        105 :         int fully_mapped = 1;
    2264                 :            : 
    2265                 :        105 :         head = create_page_buffers(page, inode, 0);
    2266                 :        105 :         blocksize = head->b_size;
    2267                 :        105 :         bbits = block_size_bits(blocksize);
    2268                 :            : 
    2269                 :        105 :         iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
    2270                 :        105 :         lblock = (i_size_read(inode)+blocksize-1) >> bbits;
    2271                 :        105 :         bh = head;
    2272                 :        105 :         nr = 0;
    2273                 :        105 :         i = 0;
    2274                 :            : 
    2275                 :        105 :         do {
    2276         [ +  + ]:        105 :                 if (buffer_uptodate(bh))
    2277                 :         63 :                         continue;
    2278                 :            : 
    2279         [ +  - ]:         42 :                 if (!buffer_mapped(bh)) {
    2280                 :         42 :                         int err = 0;
    2281                 :            : 
    2282                 :         42 :                         fully_mapped = 0;
    2283         [ +  - ]:         42 :                         if (iblock < lblock) {
    2284         [ -  + ]:         42 :                                 WARN_ON(bh->b_size != blocksize);
    2285                 :         42 :                                 err = get_block(inode, iblock, bh, 0);
    2286         [ -  + ]:         42 :                                 if (err)
    2287         [ #  # ]:          0 :                                         SetPageError(page);
    2288                 :            :                         }
    2289         [ -  + ]:         42 :                         if (!buffer_mapped(bh)) {
    2290                 :          0 :                                 zero_user(page, i * blocksize, blocksize);
    2291         [ #  # ]:          0 :                                 if (!err)
    2292                 :          0 :                                         set_buffer_uptodate(bh);
    2293                 :          0 :                                 continue;
    2294                 :            :                         }
    2295                 :            :                         /*
    2296                 :            :                          * get_block() might have updated the buffer
    2297                 :            :                          * synchronously
    2298                 :            :                          */
    2299         [ -  + ]:         42 :                         if (buffer_uptodate(bh))
    2300                 :          0 :                                 continue;
    2301                 :            :                 }
    2302                 :         42 :                 arr[nr++] = bh;
    2303         [ -  + ]:        105 :         } while (i++, iblock++, (bh = bh->b_this_page) != head);
    2304                 :            : 
    2305         [ +  + ]:        105 :         if (fully_mapped)
    2306         [ -  + ]:         63 :                 SetPageMappedToDisk(page);
    2307                 :            : 
    2308         [ +  + ]:        105 :         if (!nr) {
    2309                 :            :                 /*
    2310                 :            :                  * All buffers are uptodate - we can set the page uptodate
    2311                 :            :                  * as well. But not if get_block() returned an error.
    2312                 :            :                  */
    2313   [ -  +  +  - ]:        126 :                 if (!PageError(page))
    2314                 :         63 :                         SetPageUptodate(page);
    2315                 :         63 :                 unlock_page(page);
    2316                 :         63 :                 return 0;
    2317                 :            :         }
    2318                 :            : 
    2319                 :            :         /* Stage two: lock the buffers */
    2320         [ +  + ]:         84 :         for (i = 0; i < nr; i++) {
    2321                 :         42 :                 bh = arr[i];
    2322                 :         42 :                 lock_buffer(bh);
    2323                 :         42 :                 mark_buffer_async_read(bh);
    2324                 :            :         }
    2325                 :            : 
    2326                 :            :         /*
    2327                 :            :          * Stage 3: start the IO.  Check for uptodateness
    2328                 :            :          * inside the buffer lock in case another process reading
    2329                 :            :          * the underlying blockdev brought it uptodate (the sct fix).
    2330                 :            :          */
    2331         [ +  + ]:         84 :         for (i = 0; i < nr; i++) {
    2332                 :         42 :                 bh = arr[i];
    2333         [ -  + ]:         42 :                 if (buffer_uptodate(bh))
    2334                 :          0 :                         end_buffer_async_read(bh, 1);
    2335                 :            :                 else
    2336                 :         42 :                         submit_bh(REQ_OP_READ, 0, bh);
    2337                 :            :         }
    2338                 :            :         return 0;
    2339                 :            : }
    2340                 :            : EXPORT_SYMBOL(block_read_full_page);
    2341                 :            : 
    2342                 :            : /* utility function for filesystems that need to do work on expanding
    2343                 :            :  * truncates.  Uses filesystem pagecache writes to allow the filesystem to
    2344                 :            :  * deal with the hole.  
    2345                 :            :  */
    2346                 :          0 : int generic_cont_expand_simple(struct inode *inode, loff_t size)
    2347                 :            : {
    2348                 :          0 :         struct address_space *mapping = inode->i_mapping;
    2349                 :          0 :         struct page *page;
    2350                 :          0 :         void *fsdata;
    2351                 :          0 :         int err;
    2352                 :            : 
    2353                 :          0 :         err = inode_newsize_ok(inode, size);
    2354         [ #  # ]:          0 :         if (err)
    2355                 :          0 :                 goto out;
    2356                 :            : 
    2357                 :          0 :         err = pagecache_write_begin(NULL, mapping, size, 0,
    2358                 :            :                                     AOP_FLAG_CONT_EXPAND, &page, &fsdata);
    2359         [ #  # ]:          0 :         if (err)
    2360                 :          0 :                 goto out;
    2361                 :            : 
    2362                 :          0 :         err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
    2363         [ #  # ]:          0 :         BUG_ON(err > 0);
    2364                 :            : 
    2365                 :          0 : out:
    2366                 :          0 :         return err;
    2367                 :            : }
    2368                 :            : EXPORT_SYMBOL(generic_cont_expand_simple);
    2369                 :            : 
    2370                 :          0 : static int cont_expand_zero(struct file *file, struct address_space *mapping,
    2371                 :            :                             loff_t pos, loff_t *bytes)
    2372                 :            : {
    2373                 :          0 :         struct inode *inode = mapping->host;
    2374                 :          0 :         unsigned int blocksize = i_blocksize(inode);
    2375                 :          0 :         struct page *page;
    2376                 :          0 :         void *fsdata;
    2377                 :          0 :         pgoff_t index, curidx;
    2378                 :          0 :         loff_t curpos;
    2379                 :          0 :         unsigned zerofrom, offset, len;
    2380                 :          0 :         int err = 0;
    2381                 :            : 
    2382                 :          0 :         index = pos >> PAGE_SHIFT;
    2383                 :          0 :         offset = pos & ~PAGE_MASK;
    2384                 :            : 
    2385         [ #  # ]:          0 :         while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
    2386                 :          0 :                 zerofrom = curpos & ~PAGE_MASK;
    2387         [ #  # ]:          0 :                 if (zerofrom & (blocksize-1)) {
    2388                 :          0 :                         *bytes |= (blocksize-1);
    2389                 :          0 :                         (*bytes)++;
    2390                 :            :                 }
    2391                 :          0 :                 len = PAGE_SIZE - zerofrom;
    2392                 :            : 
    2393                 :          0 :                 err = pagecache_write_begin(file, mapping, curpos, len, 0,
    2394                 :            :                                             &page, &fsdata);
    2395         [ #  # ]:          0 :                 if (err)
    2396                 :          0 :                         goto out;
    2397                 :          0 :                 zero_user(page, zerofrom, len);
    2398                 :          0 :                 err = pagecache_write_end(file, mapping, curpos, len, len,
    2399                 :            :                                                 page, fsdata);
    2400         [ #  # ]:          0 :                 if (err < 0)
    2401                 :          0 :                         goto out;
    2402         [ #  # ]:          0 :                 BUG_ON(err != len);
    2403                 :          0 :                 err = 0;
    2404                 :            : 
    2405                 :          0 :                 balance_dirty_pages_ratelimited(mapping);
    2406                 :            : 
    2407         [ #  # ]:          0 :                 if (fatal_signal_pending(current)) {
    2408                 :          0 :                         err = -EINTR;
    2409                 :          0 :                         goto out;
    2410                 :            :                 }
    2411                 :            :         }
    2412                 :            : 
    2413                 :            :         /* page covers the boundary, find the boundary offset */
    2414         [ #  # ]:          0 :         if (index == curidx) {
    2415                 :          0 :                 zerofrom = curpos & ~PAGE_MASK;
    2416                 :            :                 /* if we will expand the thing last block will be filled */
    2417         [ #  # ]:          0 :                 if (offset <= zerofrom) {
    2418                 :          0 :                         goto out;
    2419                 :            :                 }
    2420         [ #  # ]:          0 :                 if (zerofrom & (blocksize-1)) {
    2421                 :          0 :                         *bytes |= (blocksize-1);
    2422                 :          0 :                         (*bytes)++;
    2423                 :            :                 }
    2424                 :          0 :                 len = offset - zerofrom;
    2425                 :            : 
    2426                 :          0 :                 err = pagecache_write_begin(file, mapping, curpos, len, 0,
    2427                 :            :                                             &page, &fsdata);
    2428         [ #  # ]:          0 :                 if (err)
    2429                 :          0 :                         goto out;
    2430                 :          0 :                 zero_user(page, zerofrom, len);
    2431                 :          0 :                 err = pagecache_write_end(file, mapping, curpos, len, len,
    2432                 :            :                                                 page, fsdata);
    2433         [ #  # ]:          0 :                 if (err < 0)
    2434                 :          0 :                         goto out;
    2435         [ #  # ]:          0 :                 BUG_ON(err != len);
    2436                 :            :                 err = 0;
    2437                 :            :         }
    2438                 :          0 : out:
    2439                 :          0 :         return err;
    2440                 :            : }
    2441                 :            : 
    2442                 :            : /*
    2443                 :            :  * For moronic filesystems that do not allow holes in file.
    2444                 :            :  * We may have to extend the file.
    2445                 :            :  */
    2446                 :          0 : int cont_write_begin(struct file *file, struct address_space *mapping,
    2447                 :            :                         loff_t pos, unsigned len, unsigned flags,
    2448                 :            :                         struct page **pagep, void **fsdata,
    2449                 :            :                         get_block_t *get_block, loff_t *bytes)
    2450                 :            : {
    2451                 :          0 :         struct inode *inode = mapping->host;
    2452                 :          0 :         unsigned int blocksize = i_blocksize(inode);
    2453                 :          0 :         unsigned int zerofrom;
    2454                 :          0 :         int err;
    2455                 :            : 
    2456                 :          0 :         err = cont_expand_zero(file, mapping, pos, bytes);
    2457         [ #  # ]:          0 :         if (err)
    2458                 :            :                 return err;
    2459                 :            : 
    2460                 :          0 :         zerofrom = *bytes & ~PAGE_MASK;
    2461   [ #  #  #  # ]:          0 :         if (pos+len > *bytes && zerofrom & (blocksize-1)) {
    2462                 :          0 :                 *bytes |= (blocksize-1);
    2463                 :          0 :                 (*bytes)++;
    2464                 :            :         }
    2465                 :            : 
    2466                 :          0 :         return block_write_begin(mapping, pos, len, flags, pagep, get_block);
    2467                 :            : }
    2468                 :            : EXPORT_SYMBOL(cont_write_begin);
    2469                 :            : 
    2470                 :          0 : int block_commit_write(struct page *page, unsigned from, unsigned to)
    2471                 :            : {
    2472                 :          0 :         struct inode *inode = page->mapping->host;
    2473                 :          0 :         __block_commit_write(inode,page,from,to);
    2474                 :          0 :         return 0;
    2475                 :            : }
    2476                 :            : EXPORT_SYMBOL(block_commit_write);
    2477                 :            : 
    2478                 :            : /*
    2479                 :            :  * block_page_mkwrite() is not allowed to change the file size as it gets
    2480                 :            :  * called from a page fault handler when a page is first dirtied. Hence we must
    2481                 :            :  * be careful to check for EOF conditions here. We set the page up correctly
    2482                 :            :  * for a written page which means we get ENOSPC checking when writing into
    2483                 :            :  * holes and correct delalloc and unwritten extent mapping on filesystems that
    2484                 :            :  * support these features.
    2485                 :            :  *
    2486                 :            :  * We are not allowed to take the i_mutex here so we have to play games to
    2487                 :            :  * protect against truncate races as the page could now be beyond EOF.  Because
    2488                 :            :  * truncate writes the inode size before removing pages, once we have the
    2489                 :            :  * page lock we can determine safely if the page is beyond EOF. If it is not
    2490                 :            :  * beyond EOF, then the page is guaranteed safe against truncation until we
    2491                 :            :  * unlock the page.
    2492                 :            :  *
    2493                 :            :  * Direct callers of this function should protect against filesystem freezing
    2494                 :            :  * using sb_start_pagefault() - sb_end_pagefault() functions.
    2495                 :            :  */
    2496                 :          0 : int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
    2497                 :            :                          get_block_t get_block)
    2498                 :            : {
    2499                 :          0 :         struct page *page = vmf->page;
    2500                 :          0 :         struct inode *inode = file_inode(vma->vm_file);
    2501                 :          0 :         unsigned long end;
    2502                 :          0 :         loff_t size;
    2503                 :          0 :         int ret;
    2504                 :            : 
    2505                 :          0 :         lock_page(page);
    2506         [ #  # ]:          0 :         size = i_size_read(inode);
    2507   [ #  #  #  # ]:          0 :         if ((page->mapping != inode->i_mapping) ||
    2508         [ #  # ]:          0 :             (page_offset(page) > size)) {
    2509                 :            :                 /* We overload EFAULT to mean page got truncated */
    2510                 :          0 :                 ret = -EFAULT;
    2511                 :          0 :                 goto out_unlock;
    2512                 :            :         }
    2513                 :            : 
    2514                 :            :         /* page is wholly or partially inside EOF */
    2515         [ #  # ]:          0 :         if (((page->index + 1) << PAGE_SHIFT) > size)
    2516                 :          0 :                 end = size & ~PAGE_MASK;
    2517                 :            :         else
    2518                 :            :                 end = PAGE_SIZE;
    2519                 :            : 
    2520                 :          0 :         ret = __block_write_begin(page, 0, end, get_block);
    2521         [ #  # ]:          0 :         if (!ret)
    2522                 :          0 :                 ret = block_commit_write(page, 0, end);
    2523                 :            : 
    2524         [ #  # ]:          0 :         if (unlikely(ret < 0))
    2525                 :          0 :                 goto out_unlock;
    2526                 :          0 :         set_page_dirty(page);
    2527                 :          0 :         wait_for_stable_page(page);
    2528                 :          0 :         return 0;
    2529                 :          0 : out_unlock:
    2530                 :          0 :         unlock_page(page);
    2531                 :          0 :         return ret;
    2532                 :            : }
    2533                 :            : EXPORT_SYMBOL(block_page_mkwrite);
    2534                 :            : 
    2535                 :            : /*
    2536                 :            :  * nobh_write_begin()'s prereads are special: the buffer_heads are freed
    2537                 :            :  * immediately, while under the page lock.  So it needs a special end_io
    2538                 :            :  * handler which does not touch the bh after unlocking it.
    2539                 :            :  */
    2540                 :          0 : static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
    2541                 :            : {
    2542                 :          0 :         __end_buffer_read_notouch(bh, uptodate);
    2543                 :          0 : }
    2544                 :            : 
    2545                 :            : /*
    2546                 :            :  * Attach the singly-linked list of buffers created by nobh_write_begin, to
    2547                 :            :  * the page (converting it to circular linked list and taking care of page
    2548                 :            :  * dirty races).
    2549                 :            :  */
    2550                 :          0 : static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
    2551                 :            : {
    2552                 :          0 :         struct buffer_head *bh;
    2553                 :            : 
    2554   [ #  #  #  # ]:          0 :         BUG_ON(!PageLocked(page));
    2555                 :            : 
    2556                 :          0 :         spin_lock(&page->mapping->private_lock);
    2557                 :          0 :         bh = head;
    2558                 :          0 :         do {
    2559   [ #  #  #  # ]:          0 :                 if (PageDirty(page))
    2560                 :          0 :                         set_buffer_dirty(bh);
    2561         [ #  # ]:          0 :                 if (!bh->b_this_page)
    2562                 :          0 :                         bh->b_this_page = head;
    2563                 :          0 :                 bh = bh->b_this_page;
    2564         [ #  # ]:          0 :         } while (bh != head);
    2565                 :          0 :         attach_page_buffers(page, head);
    2566                 :          0 :         spin_unlock(&page->mapping->private_lock);
    2567                 :          0 : }
    2568                 :            : 
    2569                 :            : /*
    2570                 :            :  * On entry, the page is fully not uptodate.
    2571                 :            :  * On exit the page is fully uptodate in the areas outside (from,to)
    2572                 :            :  * The filesystem needs to handle block truncation upon failure.
    2573                 :            :  */
    2574                 :          0 : int nobh_write_begin(struct address_space *mapping,
    2575                 :            :                         loff_t pos, unsigned len, unsigned flags,
    2576                 :            :                         struct page **pagep, void **fsdata,
    2577                 :            :                         get_block_t *get_block)
    2578                 :            : {
    2579                 :          0 :         struct inode *inode = mapping->host;
    2580                 :          0 :         const unsigned blkbits = inode->i_blkbits;
    2581                 :          0 :         const unsigned blocksize = 1 << blkbits;
    2582                 :          0 :         struct buffer_head *head, *bh;
    2583                 :          0 :         struct page *page;
    2584                 :          0 :         pgoff_t index;
    2585                 :          0 :         unsigned from, to;
    2586                 :          0 :         unsigned block_in_page;
    2587                 :          0 :         unsigned block_start, block_end;
    2588                 :          0 :         sector_t block_in_file;
    2589                 :          0 :         int nr_reads = 0;
    2590                 :          0 :         int ret = 0;
    2591                 :          0 :         int is_mapped_to_disk = 1;
    2592                 :            : 
    2593                 :          0 :         index = pos >> PAGE_SHIFT;
    2594                 :          0 :         from = pos & (PAGE_SIZE - 1);
    2595                 :          0 :         to = from + len;
    2596                 :            : 
    2597                 :          0 :         page = grab_cache_page_write_begin(mapping, index, flags);
    2598         [ #  # ]:          0 :         if (!page)
    2599                 :            :                 return -ENOMEM;
    2600                 :          0 :         *pagep = page;
    2601                 :          0 :         *fsdata = NULL;
    2602                 :            : 
    2603         [ #  # ]:          0 :         if (page_has_buffers(page)) {
    2604                 :          0 :                 ret = __block_write_begin(page, pos, len, get_block);
    2605         [ #  # ]:          0 :                 if (unlikely(ret))
    2606                 :          0 :                         goto out_release;
    2607                 :            :                 return ret;
    2608                 :            :         }
    2609                 :            : 
    2610   [ #  #  #  # ]:          0 :         if (PageMappedToDisk(page))
    2611                 :            :                 return 0;
    2612                 :            : 
    2613                 :            :         /*
    2614                 :            :          * Allocate buffers so that we can keep track of state, and potentially
    2615                 :            :          * attach them to the page if an error occurs. In the common case of
    2616                 :            :          * no error, they will just be freed again without ever being attached
    2617                 :            :          * to the page (which is all OK, because we're under the page lock).
    2618                 :            :          *
    2619                 :            :          * Be careful: the buffer linked list is a NULL terminated one, rather
    2620                 :            :          * than the circular one we're used to.
    2621                 :            :          */
    2622                 :          0 :         head = alloc_page_buffers(page, blocksize, false);
    2623         [ #  # ]:          0 :         if (!head) {
    2624                 :          0 :                 ret = -ENOMEM;
    2625                 :          0 :                 goto out_release;
    2626                 :            :         }
    2627                 :            : 
    2628                 :          0 :         block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
    2629                 :            : 
    2630                 :            :         /*
    2631                 :            :          * We loop across all blocks in the page, whether or not they are
    2632                 :            :          * part of the affected region.  This is so we can discover if the
    2633                 :            :          * page is fully mapped-to-disk.
    2634                 :            :          */
    2635                 :          0 :         for (block_start = 0, block_in_page = 0, bh = head;
    2636         [ #  # ]:          0 :                   block_start < PAGE_SIZE;
    2637                 :          0 :                   block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
    2638                 :          0 :                 int create;
    2639                 :            : 
    2640                 :          0 :                 block_end = block_start + blocksize;
    2641                 :          0 :                 bh->b_state = 0;
    2642                 :          0 :                 create = 1;
    2643         [ #  # ]:          0 :                 if (block_start >= to)
    2644                 :          0 :                         create = 0;
    2645                 :          0 :                 ret = get_block(inode, block_in_file + block_in_page,
    2646                 :            :                                         bh, create);
    2647         [ #  # ]:          0 :                 if (ret)
    2648                 :          0 :                         goto failed;
    2649         [ #  # ]:          0 :                 if (!buffer_mapped(bh))
    2650                 :          0 :                         is_mapped_to_disk = 0;
    2651         [ #  # ]:          0 :                 if (buffer_new(bh))
    2652                 :          0 :                         clean_bdev_bh_alias(bh);
    2653         [ #  # ]:          0 :                 if (PageUptodate(page)) {
    2654                 :          0 :                         set_buffer_uptodate(bh);
    2655                 :          0 :                         continue;
    2656                 :            :                 }
    2657   [ #  #  #  # ]:          0 :                 if (buffer_new(bh) || !buffer_mapped(bh)) {
    2658                 :          0 :                         zero_user_segments(page, block_start, from,
    2659                 :            :                                                         to, block_end);
    2660                 :          0 :                         continue;
    2661                 :            :                 }
    2662         [ #  # ]:          0 :                 if (buffer_uptodate(bh))
    2663                 :          0 :                         continue;       /* reiserfs does this */
    2664         [ #  # ]:          0 :                 if (block_start < from || block_end > to) {
    2665                 :          0 :                         lock_buffer(bh);
    2666                 :          0 :                         bh->b_end_io = end_buffer_read_nobh;
    2667                 :          0 :                         submit_bh(REQ_OP_READ, 0, bh);
    2668                 :          0 :                         nr_reads++;
    2669                 :            :                 }
    2670                 :            :         }
    2671                 :            : 
    2672         [ #  # ]:          0 :         if (nr_reads) {
    2673                 :            :                 /*
    2674                 :            :                  * The page is locked, so these buffers are protected from
    2675                 :            :                  * any VM or truncate activity.  Hence we don't need to care
    2676                 :            :                  * for the buffer_head refcounts.
    2677                 :            :                  */
    2678         [ #  # ]:          0 :                 for (bh = head; bh; bh = bh->b_this_page) {
    2679                 :          0 :                         wait_on_buffer(bh);
    2680         [ #  # ]:          0 :                         if (!buffer_uptodate(bh))
    2681                 :          0 :                                 ret = -EIO;
    2682                 :            :                 }
    2683         [ #  # ]:          0 :                 if (ret)
    2684                 :          0 :                         goto failed;
    2685                 :            :         }
    2686                 :            : 
    2687         [ #  # ]:          0 :         if (is_mapped_to_disk)
    2688         [ #  # ]:          0 :                 SetPageMappedToDisk(page);
    2689                 :            : 
    2690                 :          0 :         *fsdata = head; /* to be released by nobh_write_end */
    2691                 :            : 
    2692                 :          0 :         return 0;
    2693                 :            : 
    2694                 :          0 : failed:
    2695         [ #  # ]:          0 :         BUG_ON(!ret);
    2696                 :            :         /*
    2697                 :            :          * Error recovery is a bit difficult. We need to zero out blocks that
    2698                 :            :          * were newly allocated, and dirty them to ensure they get written out.
    2699                 :            :          * Buffers need to be attached to the page at this point, otherwise
    2700                 :            :          * the handling of potential IO errors during writeout would be hard
    2701                 :            :          * (could try doing synchronous writeout, but what if that fails too?)
    2702                 :            :          */
    2703                 :          0 :         attach_nobh_buffers(page, head);
    2704                 :          0 :         page_zero_new_buffers(page, from, to);
    2705                 :            : 
    2706                 :          0 : out_release:
    2707                 :          0 :         unlock_page(page);
    2708                 :          0 :         put_page(page);
    2709                 :          0 :         *pagep = NULL;
    2710                 :            : 
    2711                 :          0 :         return ret;
    2712                 :            : }
    2713                 :            : EXPORT_SYMBOL(nobh_write_begin);
    2714                 :            : 
    2715                 :          0 : int nobh_write_end(struct file *file, struct address_space *mapping,
    2716                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2717                 :            :                         struct page *page, void *fsdata)
    2718                 :            : {
    2719                 :          0 :         struct inode *inode = page->mapping->host;
    2720                 :          0 :         struct buffer_head *head = fsdata;
    2721                 :          0 :         struct buffer_head *bh;
    2722   [ #  #  #  # ]:          0 :         BUG_ON(fsdata != NULL && page_has_buffers(page));
    2723                 :            : 
    2724   [ #  #  #  # ]:          0 :         if (unlikely(copied < len) && head)
    2725                 :          0 :                 attach_nobh_buffers(page, head);
    2726         [ #  # ]:          0 :         if (page_has_buffers(page))
    2727                 :          0 :                 return generic_write_end(file, mapping, pos, len,
    2728                 :            :                                         copied, page, fsdata);
    2729                 :            : 
    2730                 :          0 :         SetPageUptodate(page);
    2731                 :          0 :         set_page_dirty(page);
    2732         [ #  # ]:          0 :         if (pos+copied > inode->i_size) {
    2733                 :          0 :                 i_size_write(inode, pos+copied);
    2734                 :          0 :                 mark_inode_dirty(inode);
    2735                 :            :         }
    2736                 :            : 
    2737                 :          0 :         unlock_page(page);
    2738                 :          0 :         put_page(page);
    2739                 :            : 
    2740         [ #  # ]:          0 :         while (head) {
    2741                 :          0 :                 bh = head;
    2742                 :          0 :                 head = head->b_this_page;
    2743                 :          0 :                 free_buffer_head(bh);
    2744                 :            :         }
    2745                 :            : 
    2746                 :          0 :         return copied;
    2747                 :            : }
    2748                 :            : EXPORT_SYMBOL(nobh_write_end);
    2749                 :            : 
    2750                 :            : /*
    2751                 :            :  * nobh_writepage() - based on block_full_write_page() except
    2752                 :            :  * that it tries to operate without attaching bufferheads to
    2753                 :            :  * the page.
    2754                 :            :  */
    2755                 :          0 : int nobh_writepage(struct page *page, get_block_t *get_block,
    2756                 :            :                         struct writeback_control *wbc)
    2757                 :            : {
    2758                 :          0 :         struct inode * const inode = page->mapping->host;
    2759         [ #  # ]:          0 :         loff_t i_size = i_size_read(inode);
    2760                 :          0 :         const pgoff_t end_index = i_size >> PAGE_SHIFT;
    2761                 :          0 :         unsigned offset;
    2762                 :          0 :         int ret;
    2763                 :            : 
    2764                 :            :         /* Is the page fully inside i_size? */
    2765         [ #  # ]:          0 :         if (page->index < end_index)
    2766                 :          0 :                 goto out;
    2767                 :            : 
    2768                 :            :         /* Is the page fully outside i_size? (truncate in progress) */
    2769                 :          0 :         offset = i_size & (PAGE_SIZE-1);
    2770   [ #  #  #  # ]:          0 :         if (page->index >= end_index+1 || !offset) {
    2771                 :            :                 /*
    2772                 :            :                  * The page may have dirty, unmapped buffers.  For example,
    2773                 :            :                  * they may have been added in ext3_writepage().  Make them
    2774                 :            :                  * freeable here, so the page does not leak.
    2775                 :            :                  */
    2776                 :            : #if 0
    2777                 :            :                 /* Not really sure about this  - do we need this ? */
    2778                 :            :                 if (page->mapping->a_ops->invalidatepage)
    2779                 :            :                         page->mapping->a_ops->invalidatepage(page, offset);
    2780                 :            : #endif
    2781                 :          0 :                 unlock_page(page);
    2782                 :          0 :                 return 0; /* don't care */
    2783                 :            :         }
    2784                 :            : 
    2785                 :            :         /*
    2786                 :            :          * The page straddles i_size.  It must be zeroed out on each and every
    2787                 :            :          * writepage invocation because it may be mmapped.  "A file is mapped
    2788                 :            :          * in multiples of the page size.  For a file that is not a multiple of
    2789                 :            :          * the  page size, the remaining memory is zeroed when mapped, and
    2790                 :            :          * writes to that region are not written out to the file."
    2791                 :            :          */
    2792                 :          0 :         zero_user_segment(page, offset, PAGE_SIZE);
    2793                 :          0 : out:
    2794                 :          0 :         ret = mpage_writepage(page, get_block, wbc);
    2795         [ #  # ]:          0 :         if (ret == -EAGAIN)
    2796                 :          0 :                 ret = __block_write_full_page(inode, page, get_block, wbc,
    2797                 :            :                                               end_buffer_async_write);
    2798                 :            :         return ret;
    2799                 :            : }
    2800                 :            : EXPORT_SYMBOL(nobh_writepage);
    2801                 :            : 
    2802                 :          0 : int nobh_truncate_page(struct address_space *mapping,
    2803                 :            :                         loff_t from, get_block_t *get_block)
    2804                 :            : {
    2805                 :          0 :         pgoff_t index = from >> PAGE_SHIFT;
    2806                 :          0 :         unsigned offset = from & (PAGE_SIZE-1);
    2807                 :          0 :         unsigned blocksize;
    2808                 :          0 :         sector_t iblock;
    2809                 :          0 :         unsigned length, pos;
    2810                 :          0 :         struct inode *inode = mapping->host;
    2811                 :          0 :         struct page *page;
    2812                 :          0 :         struct buffer_head map_bh;
    2813                 :          0 :         int err;
    2814                 :            : 
    2815         [ #  # ]:          0 :         blocksize = i_blocksize(inode);
    2816                 :          0 :         length = offset & (blocksize - 1);
    2817                 :            : 
    2818                 :            :         /* Block boundary? Nothing to do */
    2819         [ #  # ]:          0 :         if (!length)
    2820                 :            :                 return 0;
    2821                 :            : 
    2822                 :          0 :         length = blocksize - length;
    2823                 :          0 :         iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
    2824                 :            : 
    2825                 :          0 :         page = grab_cache_page(mapping, index);
    2826                 :          0 :         err = -ENOMEM;
    2827         [ #  # ]:          0 :         if (!page)
    2828                 :          0 :                 goto out;
    2829                 :            : 
    2830         [ #  # ]:          0 :         if (page_has_buffers(page)) {
    2831                 :          0 : has_buffers:
    2832                 :          0 :                 unlock_page(page);
    2833                 :          0 :                 put_page(page);
    2834                 :          0 :                 return block_truncate_page(mapping, from, get_block);
    2835                 :            :         }
    2836                 :            : 
    2837                 :            :         /* Find the buffer that contains "offset" */
    2838                 :            :         pos = blocksize;
    2839         [ #  # ]:          0 :         while (offset >= pos) {
    2840                 :          0 :                 iblock++;
    2841                 :          0 :                 pos += blocksize;
    2842                 :            :         }
    2843                 :            : 
    2844                 :          0 :         map_bh.b_size = blocksize;
    2845                 :          0 :         map_bh.b_state = 0;
    2846                 :          0 :         err = get_block(inode, iblock, &map_bh, 0);
    2847         [ #  # ]:          0 :         if (err)
    2848                 :          0 :                 goto unlock;
    2849                 :            :         /* unmapped? It's a hole - nothing to do */
    2850         [ #  # ]:          0 :         if (!buffer_mapped(&map_bh))
    2851                 :          0 :                 goto unlock;
    2852                 :            : 
    2853                 :            :         /* Ok, it's mapped. Make sure it's up-to-date */
    2854         [ #  # ]:          0 :         if (!PageUptodate(page)) {
    2855                 :          0 :                 err = mapping->a_ops->readpage(NULL, page);
    2856         [ #  # ]:          0 :                 if (err) {
    2857                 :          0 :                         put_page(page);
    2858                 :          0 :                         goto out;
    2859                 :            :                 }
    2860                 :          0 :                 lock_page(page);
    2861         [ #  # ]:          0 :                 if (!PageUptodate(page)) {
    2862                 :          0 :                         err = -EIO;
    2863                 :          0 :                         goto unlock;
    2864                 :            :                 }
    2865         [ #  # ]:          0 :                 if (page_has_buffers(page))
    2866                 :          0 :                         goto has_buffers;
    2867                 :            :         }
    2868                 :          0 :         zero_user(page, offset, length);
    2869                 :          0 :         set_page_dirty(page);
    2870                 :          0 :         err = 0;
    2871                 :            : 
    2872                 :          0 : unlock:
    2873                 :          0 :         unlock_page(page);
    2874                 :          0 :         put_page(page);
    2875                 :            : out:
    2876                 :            :         return err;
    2877                 :            : }
    2878                 :            : EXPORT_SYMBOL(nobh_truncate_page);
    2879                 :            : 
    2880                 :          0 : int block_truncate_page(struct address_space *mapping,
    2881                 :            :                         loff_t from, get_block_t *get_block)
    2882                 :            : {
    2883                 :          0 :         pgoff_t index = from >> PAGE_SHIFT;
    2884                 :          0 :         unsigned offset = from & (PAGE_SIZE-1);
    2885                 :          0 :         unsigned blocksize;
    2886                 :          0 :         sector_t iblock;
    2887                 :          0 :         unsigned length, pos;
    2888                 :          0 :         struct inode *inode = mapping->host;
    2889                 :          0 :         struct page *page;
    2890                 :          0 :         struct buffer_head *bh;
    2891                 :          0 :         int err;
    2892                 :            : 
    2893         [ #  # ]:          0 :         blocksize = i_blocksize(inode);
    2894                 :          0 :         length = offset & (blocksize - 1);
    2895                 :            : 
    2896                 :            :         /* Block boundary? Nothing to do */
    2897         [ #  # ]:          0 :         if (!length)
    2898                 :            :                 return 0;
    2899                 :            : 
    2900                 :          0 :         length = blocksize - length;
    2901                 :          0 :         iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
    2902                 :            :         
    2903                 :          0 :         page = grab_cache_page(mapping, index);
    2904                 :          0 :         err = -ENOMEM;
    2905         [ #  # ]:          0 :         if (!page)
    2906                 :          0 :                 goto out;
    2907                 :            : 
    2908         [ #  # ]:          0 :         if (!page_has_buffers(page))
    2909                 :          0 :                 create_empty_buffers(page, blocksize, 0);
    2910                 :            : 
    2911                 :            :         /* Find the buffer that contains "offset" */
    2912         [ #  # ]:          0 :         bh = page_buffers(page);
    2913                 :          0 :         pos = blocksize;
    2914         [ #  # ]:          0 :         while (offset >= pos) {
    2915                 :          0 :                 bh = bh->b_this_page;
    2916                 :          0 :                 iblock++;
    2917                 :          0 :                 pos += blocksize;
    2918                 :            :         }
    2919                 :            : 
    2920                 :          0 :         err = 0;
    2921         [ #  # ]:          0 :         if (!buffer_mapped(bh)) {
    2922         [ #  # ]:          0 :                 WARN_ON(bh->b_size != blocksize);
    2923                 :          0 :                 err = get_block(inode, iblock, bh, 0);
    2924         [ #  # ]:          0 :                 if (err)
    2925                 :          0 :                         goto unlock;
    2926                 :            :                 /* unmapped? It's a hole - nothing to do */
    2927         [ #  # ]:          0 :                 if (!buffer_mapped(bh))
    2928                 :          0 :                         goto unlock;
    2929                 :            :         }
    2930                 :            : 
    2931                 :            :         /* Ok, it's mapped. Make sure it's up-to-date */
    2932         [ #  # ]:          0 :         if (PageUptodate(page))
    2933                 :          0 :                 set_buffer_uptodate(bh);
    2934                 :            : 
    2935   [ #  #  #  #  :          0 :         if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
                   #  # ]
    2936                 :          0 :                 err = -EIO;
    2937                 :          0 :                 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
    2938                 :          0 :                 wait_on_buffer(bh);
    2939                 :            :                 /* Uhhuh. Read error. Complain and punt. */
    2940         [ #  # ]:          0 :                 if (!buffer_uptodate(bh))
    2941                 :          0 :                         goto unlock;
    2942                 :            :         }
    2943                 :            : 
    2944                 :          0 :         zero_user(page, offset, length);
    2945                 :          0 :         mark_buffer_dirty(bh);
    2946                 :          0 :         err = 0;
    2947                 :            : 
    2948                 :          0 : unlock:
    2949                 :          0 :         unlock_page(page);
    2950                 :          0 :         put_page(page);
    2951                 :            : out:
    2952                 :            :         return err;
    2953                 :            : }
    2954                 :            : EXPORT_SYMBOL(block_truncate_page);
    2955                 :            : 
    2956                 :            : /*
    2957                 :            :  * The generic ->writepage function for buffer-backed address_spaces
    2958                 :            :  */
    2959                 :          0 : int block_write_full_page(struct page *page, get_block_t *get_block,
    2960                 :            :                         struct writeback_control *wbc)
    2961                 :            : {
    2962                 :          0 :         struct inode * const inode = page->mapping->host;
    2963         [ #  # ]:          0 :         loff_t i_size = i_size_read(inode);
    2964                 :          0 :         const pgoff_t end_index = i_size >> PAGE_SHIFT;
    2965                 :          0 :         unsigned offset;
    2966                 :            : 
    2967                 :            :         /* Is the page fully inside i_size? */
    2968         [ #  # ]:          0 :         if (page->index < end_index)
    2969                 :          0 :                 return __block_write_full_page(inode, page, get_block, wbc,
    2970                 :            :                                                end_buffer_async_write);
    2971                 :            : 
    2972                 :            :         /* Is the page fully outside i_size? (truncate in progress) */
    2973                 :          0 :         offset = i_size & (PAGE_SIZE-1);
    2974   [ #  #  #  # ]:          0 :         if (page->index >= end_index+1 || !offset) {
    2975                 :            :                 /*
    2976                 :            :                  * The page may have dirty, unmapped buffers.  For example,
    2977                 :            :                  * they may have been added in ext3_writepage().  Make them
    2978                 :            :                  * freeable here, so the page does not leak.
    2979                 :            :                  */
    2980                 :          0 :                 do_invalidatepage(page, 0, PAGE_SIZE);
    2981                 :          0 :                 unlock_page(page);
    2982                 :          0 :                 return 0; /* don't care */
    2983                 :            :         }
    2984                 :            : 
    2985                 :            :         /*
    2986                 :            :          * The page straddles i_size.  It must be zeroed out on each and every
    2987                 :            :          * writepage invocation because it may be mmapped.  "A file is mapped
    2988                 :            :          * in multiples of the page size.  For a file that is not a multiple of
    2989                 :            :          * the  page size, the remaining memory is zeroed when mapped, and
    2990                 :            :          * writes to that region are not written out to the file."
    2991                 :            :          */
    2992                 :          0 :         zero_user_segment(page, offset, PAGE_SIZE);
    2993                 :          0 :         return __block_write_full_page(inode, page, get_block, wbc,
    2994                 :            :                                                         end_buffer_async_write);
    2995                 :            : }
    2996                 :            : EXPORT_SYMBOL(block_write_full_page);
    2997                 :            : 
    2998                 :       2157 : sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
    2999                 :            :                             get_block_t *get_block)
    3000                 :            : {
    3001                 :       2157 :         struct inode *inode = mapping->host;
    3002                 :       2157 :         struct buffer_head tmp = {
    3003                 :       2157 :                 .b_size = i_blocksize(inode),
    3004                 :            :         };
    3005                 :            : 
    3006                 :       2157 :         get_block(inode, block, &tmp, 0);
    3007                 :       2157 :         return tmp.b_blocknr;
    3008                 :            : }
    3009                 :            : EXPORT_SYMBOL(generic_block_bmap);
    3010                 :            : 
    3011                 :      15093 : static void end_bio_bh_io_sync(struct bio *bio)
    3012                 :            : {
    3013                 :      15093 :         struct buffer_head *bh = bio->bi_private;
    3014                 :            : 
    3015         [ -  + ]:      15093 :         if (unlikely(bio_flagged(bio, BIO_QUIET)))
    3016                 :          0 :                 set_bit(BH_Quiet, &bh->b_state);
    3017                 :            : 
    3018                 :      15093 :         bh->b_end_io(bh, !bio->bi_status);
    3019                 :      15093 :         bio_put(bio);
    3020                 :      15093 : }
    3021                 :            : 
    3022                 :            : /*
    3023                 :            :  * This allows us to do IO even on the odd last sectors
    3024                 :            :  * of a device, even if the block size is some multiple
    3025                 :            :  * of the physical sector size.
    3026                 :            :  *
    3027                 :            :  * We'll just truncate the bio to the size of the device,
    3028                 :            :  * and clear the end of the buffer head manually.
    3029                 :            :  *
    3030                 :            :  * Truly out-of-range accesses will turn into actual IO
    3031                 :            :  * errors, this only handles the "we need to be able to
    3032                 :            :  * do IO at the final sector" case.
    3033                 :            :  */
    3034                 :      18054 : void guard_bio_eod(struct bio *bio)
    3035                 :            : {
    3036                 :      18054 :         sector_t maxsector;
    3037                 :      18054 :         struct hd_struct *part;
    3038                 :            : 
    3039                 :      18054 :         rcu_read_lock();
    3040                 :      18054 :         part = __disk_get_part(bio->bi_disk, bio->bi_partno);
    3041         [ +  - ]:      18054 :         if (part)
    3042                 :      18054 :                 maxsector = part_nr_sects_read(part);
    3043                 :            :         else
    3044                 :          0 :                 maxsector = get_capacity(bio->bi_disk);
    3045                 :      18054 :         rcu_read_unlock();
    3046                 :            : 
    3047         [ +  - ]:      18054 :         if (!maxsector)
    3048                 :            :                 return;
    3049                 :            : 
    3050                 :            :         /*
    3051                 :            :          * If the *whole* IO is past the end of the device,
    3052                 :            :          * let it through, and the IO layer will turn it into
    3053                 :            :          * an EIO.
    3054                 :            :          */
    3055         [ +  - ]:      18054 :         if (unlikely(bio->bi_iter.bi_sector >= maxsector))
    3056                 :            :                 return;
    3057                 :            : 
    3058                 :      18054 :         maxsector -= bio->bi_iter.bi_sector;
    3059         [ -  + ]:      18054 :         if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
    3060                 :            :                 return;
    3061                 :            : 
    3062                 :          0 :         bio_truncate(bio, maxsector << 9);
    3063                 :            : }
    3064                 :            : 
    3065                 :            : static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
    3066                 :            :                          enum rw_hint write_hint, struct writeback_control *wbc)
    3067                 :            : {
    3068                 :            :         struct bio *bio;
    3069                 :            : 
    3070                 :            :         BUG_ON(!buffer_locked(bh));
    3071                 :            :         BUG_ON(!buffer_mapped(bh));
    3072                 :            :         BUG_ON(!bh->b_end_io);
    3073                 :            :         BUG_ON(buffer_delay(bh));
    3074                 :            :         BUG_ON(buffer_unwritten(bh));
    3075                 :            : 
    3076                 :            :         /*
    3077                 :            :          * Only clear out a write error when rewriting
    3078                 :            :          */
    3079                 :            :         if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
    3080                 :            :                 clear_buffer_write_io_error(bh);
    3081                 :            : 
    3082                 :            :         /*
    3083                 :            :          * from here on down, it's all bio -- do the initial mapping,
    3084                 :            :          * submit_bio -> generic_make_request may further map this bio around
    3085                 :            :          */
    3086                 :            :         bio = bio_alloc(GFP_NOIO, 1);
    3087                 :            : 
    3088                 :            :         bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
    3089                 :            :         bio_set_dev(bio, bh->b_bdev);
    3090                 :            :         bio->bi_write_hint = write_hint;
    3091                 :            : 
    3092                 :            :         bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
    3093                 :            :         BUG_ON(bio->bi_iter.bi_size != bh->b_size);
    3094                 :            : 
    3095                 :            :         bio->bi_end_io = end_bio_bh_io_sync;
    3096                 :            :         bio->bi_private = bh;
    3097                 :            : 
    3098                 :            :         if (buffer_meta(bh))
    3099                 :            :                 op_flags |= REQ_META;
    3100                 :            :         if (buffer_prio(bh))
    3101                 :            :                 op_flags |= REQ_PRIO;
    3102                 :            :         bio_set_op_attrs(bio, op, op_flags);
    3103                 :            : 
    3104                 :            :         /* Take care of bh's that straddle the end of the device */
    3105                 :            :         guard_bio_eod(bio);
    3106                 :            : 
    3107                 :            :         if (wbc) {
    3108                 :            :                 wbc_init_bio(wbc, bio);
    3109                 :            :                 wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
    3110                 :            :         }
    3111                 :            : 
    3112                 :            :         submit_bio(bio);
    3113                 :            :         return 0;
    3114                 :            : }
    3115                 :            : 
    3116                 :      15093 : int submit_bh(int op, int op_flags, struct buffer_head *bh)
    3117                 :            : {
    3118                 :       2745 :         return submit_bh_wbc(op, op_flags, bh, 0, NULL);
    3119                 :            : }
    3120                 :            : EXPORT_SYMBOL(submit_bh);
    3121                 :            : 
    3122                 :            : /**
    3123                 :            :  * ll_rw_block: low-level access to block devices (DEPRECATED)
    3124                 :            :  * @op: whether to %READ or %WRITE
    3125                 :            :  * @op_flags: req_flag_bits
    3126                 :            :  * @nr: number of &struct buffer_heads in the array
    3127                 :            :  * @bhs: array of pointers to &struct buffer_head
    3128                 :            :  *
    3129                 :            :  * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
    3130                 :            :  * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
    3131                 :            :  * @op_flags contains flags modifying the detailed I/O behavior, most notably
    3132                 :            :  * %REQ_RAHEAD.
    3133                 :            :  *
    3134                 :            :  * This function drops any buffer that it cannot get a lock on (with the
    3135                 :            :  * BH_Lock state bit), any buffer that appears to be clean when doing a write
    3136                 :            :  * request, and any buffer that appears to be up-to-date when doing read
    3137                 :            :  * request.  Further it marks as clean buffers that are processed for
    3138                 :            :  * writing (the buffer cache won't assume that they are actually clean
    3139                 :            :  * until the buffer gets unlocked).
    3140                 :            :  *
    3141                 :            :  * ll_rw_block sets b_end_io to simple completion handler that marks
    3142                 :            :  * the buffer up-to-date (if appropriate), unlocks the buffer and wakes
    3143                 :            :  * any waiters. 
    3144                 :            :  *
    3145                 :            :  * All of the buffers must be for the same device, and must also be a
    3146                 :            :  * multiple of the current approved size for the device.
    3147                 :            :  */
    3148                 :      12832 : void ll_rw_block(int op, int op_flags,  int nr, struct buffer_head *bhs[])
    3149                 :            : {
    3150                 :      12832 :         int i;
    3151                 :            : 
    3152         [ +  + ]:      25664 :         for (i = 0; i < nr; i++) {
    3153                 :      12832 :                 struct buffer_head *bh = bhs[i];
    3154                 :            : 
    3155         [ +  + ]:      12832 :                 if (!trylock_buffer(bh))
    3156                 :        358 :                         continue;
    3157         [ -  + ]:      12474 :                 if (op == WRITE) {
    3158         [ #  # ]:          0 :                         if (test_clear_buffer_dirty(bh)) {
    3159                 :          0 :                                 bh->b_end_io = end_buffer_write_sync;
    3160                 :          0 :                                 get_bh(bh);
    3161                 :          0 :                                 submit_bh(op, op_flags, bh);
    3162                 :          0 :                                 continue;
    3163                 :            :                         }
    3164                 :            :                 } else {
    3165         [ +  + ]:      12474 :                         if (!buffer_uptodate(bh)) {
    3166                 :      12306 :                                 bh->b_end_io = end_buffer_read_sync;
    3167                 :      12306 :                                 get_bh(bh);
    3168                 :      12306 :                                 submit_bh(op, op_flags, bh);
    3169                 :      12306 :                                 continue;
    3170                 :            :                         }
    3171                 :            :                 }
    3172                 :        168 :                 unlock_buffer(bh);
    3173                 :            :         }
    3174                 :      12832 : }
    3175                 :            : EXPORT_SYMBOL(ll_rw_block);
    3176                 :            : 
    3177                 :          0 : void write_dirty_buffer(struct buffer_head *bh, int op_flags)
    3178                 :            : {
    3179                 :          0 :         lock_buffer(bh);
    3180         [ #  # ]:          0 :         if (!test_clear_buffer_dirty(bh)) {
    3181                 :          0 :                 unlock_buffer(bh);
    3182                 :          0 :                 return;
    3183                 :            :         }
    3184                 :          0 :         bh->b_end_io = end_buffer_write_sync;
    3185                 :          0 :         get_bh(bh);
    3186                 :          0 :         submit_bh(REQ_OP_WRITE, op_flags, bh);
    3187                 :            : }
    3188                 :            : EXPORT_SYMBOL(write_dirty_buffer);
    3189                 :            : 
    3190                 :            : /*
    3191                 :            :  * For a data-integrity writeout, we need to wait upon any in-progress I/O
    3192                 :            :  * and then start new I/O and then wait upon it.  The caller must have a ref on
    3193                 :            :  * the buffer_head.
    3194                 :            :  */
    3195                 :         42 : int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
    3196                 :            : {
    3197                 :         42 :         int ret = 0;
    3198                 :            : 
    3199         [ -  + ]:         42 :         WARN_ON(atomic_read(&bh->b_count) < 1);
    3200                 :         42 :         lock_buffer(bh);
    3201         [ +  - ]:         42 :         if (test_clear_buffer_dirty(bh)) {
    3202                 :         42 :                 get_bh(bh);
    3203                 :         42 :                 bh->b_end_io = end_buffer_write_sync;
    3204                 :         42 :                 ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
    3205                 :         42 :                 wait_on_buffer(bh);
    3206   [ +  -  -  + ]:         84 :                 if (!ret && !buffer_uptodate(bh))
    3207                 :          0 :                         ret = -EIO;
    3208                 :            :         } else {
    3209                 :          0 :                 unlock_buffer(bh);
    3210                 :            :         }
    3211                 :         42 :         return ret;
    3212                 :            : }
    3213                 :            : EXPORT_SYMBOL(__sync_dirty_buffer);
    3214                 :            : 
    3215                 :          0 : int sync_dirty_buffer(struct buffer_head *bh)
    3216                 :            : {
    3217                 :          0 :         return __sync_dirty_buffer(bh, REQ_SYNC);
    3218                 :            : }
    3219                 :            : EXPORT_SYMBOL(sync_dirty_buffer);
    3220                 :            : 
    3221                 :            : /*
    3222                 :            :  * try_to_free_buffers() checks if all the buffers on this particular page
    3223                 :            :  * are unused, and releases them if so.
    3224                 :            :  *
    3225                 :            :  * Exclusion against try_to_free_buffers may be obtained by either
    3226                 :            :  * locking the page or by holding its mapping's private_lock.
    3227                 :            :  *
    3228                 :            :  * If the page is dirty but all the buffers are clean then we need to
    3229                 :            :  * be sure to mark the page clean as well.  This is because the page
    3230                 :            :  * may be against a block device, and a later reattachment of buffers
    3231                 :            :  * to a dirty page will set *all* buffers dirty.  Which would corrupt
    3232                 :            :  * filesystem data on the same device.
    3233                 :            :  *
    3234                 :            :  * The same applies to regular filesystem pages: if all the buffers are
    3235                 :            :  * clean then we set the page clean and proceed.  To do that, we require
    3236                 :            :  * total exclusion from __set_page_dirty_buffers().  That is obtained with
    3237                 :            :  * private_lock.
    3238                 :            :  *
    3239                 :            :  * try_to_free_buffers() is non-blocking.
    3240                 :            :  */
    3241                 :        294 : static inline int buffer_busy(struct buffer_head *bh)
    3242                 :            : {
    3243                 :        588 :         return atomic_read(&bh->b_count) |
    3244                 :        294 :                 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
    3245                 :            : }
    3246                 :            : 
    3247                 :            : static int
    3248                 :        126 : drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
    3249                 :            : {
    3250         [ -  + ]:        126 :         struct buffer_head *head = page_buffers(page);
    3251                 :        126 :         struct buffer_head *bh;
    3252                 :            : 
    3253                 :        126 :         bh = head;
    3254                 :        294 :         do {
    3255         [ +  + ]:        294 :                 if (buffer_busy(bh))
    3256                 :         42 :                         goto failed;
    3257                 :        252 :                 bh = bh->b_this_page;
    3258         [ +  + ]:        252 :         } while (bh != head);
    3259                 :            : 
    3260                 :        210 :         do {
    3261                 :        210 :                 struct buffer_head *next = bh->b_this_page;
    3262                 :            : 
    3263         [ -  + ]:        210 :                 if (bh->b_assoc_map)
    3264         [ #  # ]:          0 :                         __remove_assoc_queue(bh);
    3265                 :        210 :                 bh = next;
    3266         [ +  + ]:        210 :         } while (bh != head);
    3267                 :         84 :         *buffers_to_free = head;
    3268                 :         84 :         __clear_page_buffers(page);
    3269                 :         84 :         return 1;
    3270                 :            : failed:
    3271                 :         42 :         return 0;
    3272                 :            : }
    3273                 :            : 
    3274                 :        126 : int try_to_free_buffers(struct page *page)
    3275                 :            : {
    3276                 :        126 :         struct address_space * const mapping = page->mapping;
    3277                 :        126 :         struct buffer_head *buffers_to_free = NULL;
    3278                 :        126 :         int ret = 0;
    3279                 :            : 
    3280   [ -  +  -  + ]:        252 :         BUG_ON(!PageLocked(page));
    3281   [ -  +  +  - ]:        252 :         if (PageWriteback(page))
    3282                 :            :                 return 0;
    3283                 :            : 
    3284         [ -  + ]:        126 :         if (mapping == NULL) {          /* can this still happen? */
    3285                 :          0 :                 ret = drop_buffers(page, &buffers_to_free);
    3286                 :          0 :                 goto out;
    3287                 :            :         }
    3288                 :            : 
    3289                 :        126 :         spin_lock(&mapping->private_lock);
    3290                 :        126 :         ret = drop_buffers(page, &buffers_to_free);
    3291                 :            : 
    3292                 :            :         /*
    3293                 :            :          * If the filesystem writes its buffers by hand (eg ext3)
    3294                 :            :          * then we can have clean buffers against a dirty page.  We
    3295                 :            :          * clean the page here; otherwise the VM will never notice
    3296                 :            :          * that the filesystem did any IO at all.
    3297                 :            :          *
    3298                 :            :          * Also, during truncate, discard_buffer will have marked all
    3299                 :            :          * the page's buffers clean.  We discover that here and clean
    3300                 :            :          * the page also.
    3301                 :            :          *
    3302                 :            :          * private_lock must be held over this entire operation in order
    3303                 :            :          * to synchronise against __set_page_dirty_buffers and prevent the
    3304                 :            :          * dirty bit from being lost.
    3305                 :            :          */
    3306         [ +  + ]:        126 :         if (ret)
    3307                 :         84 :                 cancel_dirty_page(page);
    3308                 :        126 :         spin_unlock(&mapping->private_lock);
    3309                 :        126 : out:
    3310         [ +  + ]:        126 :         if (buffers_to_free) {
    3311                 :            :                 struct buffer_head *bh = buffers_to_free;
    3312                 :            : 
    3313                 :        210 :                 do {
    3314                 :        210 :                         struct buffer_head *next = bh->b_this_page;
    3315                 :        210 :                         free_buffer_head(bh);
    3316                 :        210 :                         bh = next;
    3317         [ +  + ]:        210 :                 } while (bh != buffers_to_free);
    3318                 :            :         }
    3319                 :            :         return ret;
    3320                 :            : }
    3321                 :            : EXPORT_SYMBOL(try_to_free_buffers);
    3322                 :            : 
    3323                 :            : /*
    3324                 :            :  * There are no bdflush tunables left.  But distributions are
    3325                 :            :  * still running obsolete flush daemons, so we terminate them here.
    3326                 :            :  *
    3327                 :            :  * Use of bdflush() is deprecated and will be removed in a future kernel.
    3328                 :            :  * The `flush-X' kernel threads fully replace bdflush daemons and this call.
    3329                 :            :  */
    3330                 :          0 : SYSCALL_DEFINE2(bdflush, int, func, long, data)
    3331                 :            : {
    3332                 :            :         static int msg_count;
    3333                 :            : 
    3334                 :            :         if (!capable(CAP_SYS_ADMIN))
    3335                 :            :                 return -EPERM;
    3336                 :            : 
    3337                 :            :         if (msg_count < 5) {
    3338                 :            :                 msg_count++;
    3339                 :            :                 printk(KERN_INFO
    3340                 :            :                         "warning: process `%s' used the obsolete bdflush"
    3341                 :            :                         " system call\n", current->comm);
    3342                 :            :                 printk(KERN_INFO "Fix your initscripts?\n");
    3343                 :            :         }
    3344                 :            : 
    3345                 :            :         if (func == 1)
    3346                 :            :                 do_exit(0);
    3347                 :            :         return 0;
    3348                 :            : }
    3349                 :            : 
    3350                 :            : /*
    3351                 :            :  * Buffer-head allocation
    3352                 :            :  */
    3353                 :            : static struct kmem_cache *bh_cachep __read_mostly;
    3354                 :            : 
    3355                 :            : /*
    3356                 :            :  * Once the number of bh's in the machine exceeds this level, we start
    3357                 :            :  * stripping them in writeback.
    3358                 :            :  */
    3359                 :            : static unsigned long max_buffer_heads;
    3360                 :            : 
    3361                 :            : int buffer_heads_over_limit;
    3362                 :            : 
    3363                 :            : struct bh_accounting {
    3364                 :            :         int nr;                 /* Number of live bh's */
    3365                 :            :         int ratelimit;          /* Limit cacheline bouncing */
    3366                 :            : };
    3367                 :            : 
    3368                 :            : static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
    3369                 :            : 
    3370                 :      35458 : static void recalc_bh_state(void)
    3371                 :            : {
    3372                 :      35458 :         int i;
    3373                 :      35458 :         int tot = 0;
    3374                 :            : 
    3375         [ -  + ]:      35458 :         if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
    3376                 :            :                 return;
    3377                 :          0 :         __this_cpu_write(bh_accounting.ratelimit, 0);
    3378         [ #  # ]:          0 :         for_each_online_cpu(i)
    3379                 :          0 :                 tot += per_cpu(bh_accounting, i).nr;
    3380                 :          0 :         buffer_heads_over_limit = (tot > max_buffer_heads);
    3381                 :            : }
    3382                 :            : 
    3383                 :      33433 : struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
    3384                 :            : {
    3385                 :      33433 :         struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
    3386         [ +  - ]:      33433 :         if (ret) {
    3387                 :      33433 :                 INIT_LIST_HEAD(&ret->b_assoc_buffers);
    3388                 :      33433 :                 preempt_disable();
    3389                 :      33433 :                 __this_cpu_inc(bh_accounting.nr);
    3390                 :      33433 :                 recalc_bh_state();
    3391                 :      33433 :                 preempt_enable();
    3392                 :            :         }
    3393                 :      33433 :         return ret;
    3394                 :            : }
    3395                 :            : EXPORT_SYMBOL(alloc_buffer_head);
    3396                 :            : 
    3397                 :       2025 : void free_buffer_head(struct buffer_head *bh)
    3398                 :            : {
    3399         [ -  + ]:       2025 :         BUG_ON(!list_empty(&bh->b_assoc_buffers));
    3400                 :       2025 :         kmem_cache_free(bh_cachep, bh);
    3401                 :       2025 :         preempt_disable();
    3402                 :       2025 :         __this_cpu_dec(bh_accounting.nr);
    3403                 :       2025 :         recalc_bh_state();
    3404                 :       2025 :         preempt_enable();
    3405                 :       2025 : }
    3406                 :            : EXPORT_SYMBOL(free_buffer_head);
    3407                 :            : 
    3408                 :          0 : static int buffer_exit_cpu_dead(unsigned int cpu)
    3409                 :            : {
    3410                 :          0 :         int i;
    3411                 :          0 :         struct bh_lru *b = &per_cpu(bh_lrus, cpu);
    3412                 :            : 
    3413         [ #  # ]:          0 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    3414         [ #  # ]:          0 :                 brelse(b->bhs[i]);
    3415                 :          0 :                 b->bhs[i] = NULL;
    3416                 :            :         }
    3417                 :          0 :         this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
    3418                 :          0 :         per_cpu(bh_accounting, cpu).nr = 0;
    3419                 :          0 :         return 0;
    3420                 :            : }
    3421                 :            : 
    3422                 :            : /**
    3423                 :            :  * bh_uptodate_or_lock - Test whether the buffer is uptodate
    3424                 :            :  * @bh: struct buffer_head
    3425                 :            :  *
    3426                 :            :  * Return true if the buffer is up-to-date and false,
    3427                 :            :  * with the buffer locked, if not.
    3428                 :            :  */
    3429                 :          0 : int bh_uptodate_or_lock(struct buffer_head *bh)
    3430                 :            : {
    3431         [ #  # ]:          0 :         if (!buffer_uptodate(bh)) {
    3432                 :          0 :                 lock_buffer(bh);
    3433         [ #  # ]:          0 :                 if (!buffer_uptodate(bh))
    3434                 :            :                         return 0;
    3435                 :          0 :                 unlock_buffer(bh);
    3436                 :            :         }
    3437                 :            :         return 1;
    3438                 :            : }
    3439                 :            : EXPORT_SYMBOL(bh_uptodate_or_lock);
    3440                 :            : 
    3441                 :            : /**
    3442                 :            :  * bh_submit_read - Submit a locked buffer for reading
    3443                 :            :  * @bh: struct buffer_head
    3444                 :            :  *
    3445                 :            :  * Returns zero on success and -EIO on error.
    3446                 :            :  */
    3447                 :          0 : int bh_submit_read(struct buffer_head *bh)
    3448                 :            : {
    3449         [ #  # ]:          0 :         BUG_ON(!buffer_locked(bh));
    3450                 :            : 
    3451         [ #  # ]:          0 :         if (buffer_uptodate(bh)) {
    3452                 :          0 :                 unlock_buffer(bh);
    3453                 :          0 :                 return 0;
    3454                 :            :         }
    3455                 :            : 
    3456                 :          0 :         get_bh(bh);
    3457                 :          0 :         bh->b_end_io = end_buffer_read_sync;
    3458                 :          0 :         submit_bh(REQ_OP_READ, 0, bh);
    3459                 :          0 :         wait_on_buffer(bh);
    3460         [ #  # ]:          0 :         if (buffer_uptodate(bh))
    3461                 :          0 :                 return 0;
    3462                 :            :         return -EIO;
    3463                 :            : }
    3464                 :            : EXPORT_SYMBOL(bh_submit_read);
    3465                 :            : 
    3466                 :         21 : void __init buffer_init(void)
    3467                 :            : {
    3468                 :         21 :         unsigned long nrpages;
    3469                 :         21 :         int ret;
    3470                 :            : 
    3471                 :         21 :         bh_cachep = kmem_cache_create("buffer_head",
    3472                 :            :                         sizeof(struct buffer_head), 0,
    3473                 :            :                                 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
    3474                 :            :                                 SLAB_MEM_SPREAD),
    3475                 :            :                                 NULL);
    3476                 :            : 
    3477                 :            :         /*
    3478                 :            :          * Limit the bh occupancy to 10% of ZONE_NORMAL
    3479                 :            :          */
    3480                 :         21 :         nrpages = (nr_free_buffer_pages() * 10) / 100;
    3481                 :         21 :         max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
    3482                 :         21 :         ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
    3483                 :            :                                         NULL, buffer_exit_cpu_dead);
    3484         [ -  + ]:         21 :         WARN_ON(ret < 0);
    3485                 :         21 : }

Generated by: LCOV version 1.14