LCOV - code coverage report
Current view: top level - fs - buffer.c (source / functions) Hit Total Coverage
Test: Real Lines: 618 1113 55.5 %
Date: 2020-10-17 15:46:16 Functions: 3 102 2.9 %
Legend: Neither, QEMU, Real, Both Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : // SPDX-License-Identifier: GPL-2.0-only
       2                 :            : /*
       3                 :            :  *  linux/fs/buffer.c
       4                 :            :  *
       5                 :            :  *  Copyright (C) 1991, 1992, 2002  Linus Torvalds
       6                 :            :  */
       7                 :            : 
       8                 :            : /*
       9                 :            :  * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95
      10                 :            :  *
      11                 :            :  * Removed a lot of unnecessary code and simplified things now that
      12                 :            :  * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
      13                 :            :  *
      14                 :            :  * Speed up hash, lru, and free list operations.  Use gfp() for allocating
      15                 :            :  * hash table, use SLAB cache for buffer heads. SMP threading.  -DaveM
      16                 :            :  *
      17                 :            :  * Added 32k buffer block sizes - these are required older ARM systems. - RMK
      18                 :            :  *
      19                 :            :  * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
      20                 :            :  */
      21                 :            : 
      22                 :            : #include <linux/kernel.h>
      23                 :            : #include <linux/sched/signal.h>
      24                 :            : #include <linux/syscalls.h>
      25                 :            : #include <linux/fs.h>
      26                 :            : #include <linux/iomap.h>
      27                 :            : #include <linux/mm.h>
      28                 :            : #include <linux/percpu.h>
      29                 :            : #include <linux/slab.h>
      30                 :            : #include <linux/capability.h>
      31                 :            : #include <linux/blkdev.h>
      32                 :            : #include <linux/file.h>
      33                 :            : #include <linux/quotaops.h>
      34                 :            : #include <linux/highmem.h>
      35                 :            : #include <linux/export.h>
      36                 :            : #include <linux/backing-dev.h>
      37                 :            : #include <linux/writeback.h>
      38                 :            : #include <linux/hash.h>
      39                 :            : #include <linux/suspend.h>
      40                 :            : #include <linux/buffer_head.h>
      41                 :            : #include <linux/task_io_accounting_ops.h>
      42                 :            : #include <linux/bio.h>
      43                 :            : #include <linux/cpu.h>
      44                 :            : #include <linux/bitops.h>
      45                 :            : #include <linux/mpage.h>
      46                 :            : #include <linux/bit_spinlock.h>
      47                 :            : #include <linux/pagevec.h>
      48                 :            : #include <linux/sched/mm.h>
      49                 :            : #include <trace/events/block.h>
      50                 :            : 
      51                 :            : static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
      52                 :            : static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
      53                 :            :                          enum rw_hint hint, struct writeback_control *wbc);
      54                 :            : 
      55                 :            : #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
      56                 :            : 
      57                 :          0 : inline void touch_buffer(struct buffer_head *bh)
      58                 :            : {
      59                 :          3 :         trace_block_touch_buffer(bh);
      60                 :          3 :         mark_page_accessed(bh->b_page);
      61                 :          0 : }
      62                 :            : EXPORT_SYMBOL(touch_buffer);
      63                 :            : 
      64                 :          3 : void __lock_buffer(struct buffer_head *bh)
      65                 :            : {
      66                 :          3 :         wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
      67                 :          3 : }
      68                 :            : EXPORT_SYMBOL(__lock_buffer);
      69                 :            : 
      70                 :          3 : void unlock_buffer(struct buffer_head *bh)
      71                 :            : {
      72                 :          3 :         clear_bit_unlock(BH_Lock, &bh->b_state);
      73                 :          3 :         smp_mb__after_atomic();
      74                 :          3 :         wake_up_bit(&bh->b_state, BH_Lock);
      75                 :          3 : }
      76                 :            : EXPORT_SYMBOL(unlock_buffer);
      77                 :            : 
      78                 :            : /*
      79                 :            :  * Returns if the page has dirty or writeback buffers. If all the buffers
      80                 :            :  * are unlocked and clean then the PageDirty information is stale. If
      81                 :            :  * any of the pages are locked, it is assumed they are locked for IO.
      82                 :            :  */
      83                 :          0 : void buffer_check_dirty_writeback(struct page *page,
      84                 :            :                                      bool *dirty, bool *writeback)
      85                 :            : {
      86                 :            :         struct buffer_head *head, *bh;
      87                 :          0 :         *dirty = false;
      88                 :          0 :         *writeback = false;
      89                 :            : 
      90                 :          0 :         BUG_ON(!PageLocked(page));
      91                 :            : 
      92                 :          0 :         if (!page_has_buffers(page))
      93                 :          0 :                 return;
      94                 :            : 
      95                 :          0 :         if (PageWriteback(page))
      96                 :          0 :                 *writeback = true;
      97                 :            : 
      98                 :          0 :         head = page_buffers(page);
      99                 :            :         bh = head;
     100                 :            :         do {
     101                 :          0 :                 if (buffer_locked(bh))
     102                 :          0 :                         *writeback = true;
     103                 :            : 
     104                 :          0 :                 if (buffer_dirty(bh))
     105                 :          0 :                         *dirty = true;
     106                 :            : 
     107                 :          0 :                 bh = bh->b_this_page;
     108                 :          0 :         } while (bh != head);
     109                 :            : }
     110                 :            : EXPORT_SYMBOL(buffer_check_dirty_writeback);
     111                 :            : 
     112                 :            : /*
     113                 :            :  * Block until a buffer comes unlocked.  This doesn't stop it
     114                 :            :  * from becoming locked again - you have to lock it yourself
     115                 :            :  * if you want to preserve its state.
     116                 :            :  */
     117                 :          3 : void __wait_on_buffer(struct buffer_head * bh)
     118                 :            : {
     119                 :          3 :         wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
     120                 :          3 : }
     121                 :            : EXPORT_SYMBOL(__wait_on_buffer);
     122                 :            : 
     123                 :            : static void
     124                 :          3 : __clear_page_buffers(struct page *page)
     125                 :            : {
     126                 :            :         ClearPagePrivate(page);
     127                 :          3 :         set_page_private(page, 0);
     128                 :          3 :         put_page(page);
     129                 :          3 : }
     130                 :            : 
     131                 :          0 : static void buffer_io_error(struct buffer_head *bh, char *msg)
     132                 :            : {
     133                 :          0 :         if (!test_bit(BH_Quiet, &bh->b_state))
     134                 :          0 :                 printk_ratelimited(KERN_ERR
     135                 :            :                         "Buffer I/O error on dev %pg, logical block %llu%s\n",
     136                 :            :                         bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
     137                 :          0 : }
     138                 :            : 
     139                 :            : /*
     140                 :            :  * End-of-IO handler helper function which does not touch the bh after
     141                 :            :  * unlocking it.
     142                 :            :  * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
     143                 :            :  * a race there is benign: unlock_buffer() only use the bh's address for
     144                 :            :  * hashing after unlocking the buffer, so it doesn't actually touch the bh
     145                 :            :  * itself.
     146                 :            :  */
     147                 :          3 : static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
     148                 :            : {
     149                 :          3 :         if (uptodate) {
     150                 :            :                 set_buffer_uptodate(bh);
     151                 :            :         } else {
     152                 :            :                 /* This happens, due to failed read-ahead attempts. */
     153                 :            :                 clear_buffer_uptodate(bh);
     154                 :            :         }
     155                 :          3 :         unlock_buffer(bh);
     156                 :          3 : }
     157                 :            : 
     158                 :            : /*
     159                 :            :  * Default synchronous end-of-IO handler..  Just mark it up-to-date and
     160                 :            :  * unlock the buffer. This is what ll_rw_block uses too.
     161                 :            :  */
     162                 :          3 : void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
     163                 :            : {
     164                 :          3 :         __end_buffer_read_notouch(bh, uptodate);
     165                 :          3 :         put_bh(bh);
     166                 :          3 : }
     167                 :            : EXPORT_SYMBOL(end_buffer_read_sync);
     168                 :            : 
     169                 :          3 : void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
     170                 :            : {
     171                 :          3 :         if (uptodate) {
     172                 :            :                 set_buffer_uptodate(bh);
     173                 :            :         } else {
     174                 :          0 :                 buffer_io_error(bh, ", lost sync page write");
     175                 :          0 :                 mark_buffer_write_io_error(bh);
     176                 :            :                 clear_buffer_uptodate(bh);
     177                 :            :         }
     178                 :          3 :         unlock_buffer(bh);
     179                 :          3 :         put_bh(bh);
     180                 :          3 : }
     181                 :            : EXPORT_SYMBOL(end_buffer_write_sync);
     182                 :            : 
     183                 :            : /*
     184                 :            :  * Various filesystems appear to want __find_get_block to be non-blocking.
     185                 :            :  * But it's the page lock which protects the buffers.  To get around this,
     186                 :            :  * we get exclusion from try_to_free_buffers with the blockdev mapping's
     187                 :            :  * private_lock.
     188                 :            :  *
     189                 :            :  * Hack idea: for the blockdev mapping, private_lock contention
     190                 :            :  * may be quite high.  This code could TryLock the page, and if that
     191                 :            :  * succeeds, there is no need to take private_lock.
     192                 :            :  */
     193                 :            : static struct buffer_head *
     194                 :          3 : __find_get_block_slow(struct block_device *bdev, sector_t block)
     195                 :            : {
     196                 :          3 :         struct inode *bd_inode = bdev->bd_inode;
     197                 :          3 :         struct address_space *bd_mapping = bd_inode->i_mapping;
     198                 :            :         struct buffer_head *ret = NULL;
     199                 :            :         pgoff_t index;
     200                 :            :         struct buffer_head *bh;
     201                 :            :         struct buffer_head *head;
     202                 :            :         struct page *page;
     203                 :            :         int all_mapped = 1;
     204                 :            :         static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
     205                 :            : 
     206                 :          3 :         index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
     207                 :            :         page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
     208                 :          3 :         if (!page)
     209                 :            :                 goto out;
     210                 :            : 
     211                 :            :         spin_lock(&bd_mapping->private_lock);
     212                 :          3 :         if (!page_has_buffers(page))
     213                 :            :                 goto out_unlock;
     214                 :          3 :         head = page_buffers(page);
     215                 :            :         bh = head;
     216                 :            :         do {
     217                 :          3 :                 if (!buffer_mapped(bh))
     218                 :            :                         all_mapped = 0;
     219                 :          3 :                 else if (bh->b_blocknr == block) {
     220                 :          3 :                         ret = bh;
     221                 :            :                         get_bh(bh);
     222                 :            :                         goto out_unlock;
     223                 :            :                 }
     224                 :          3 :                 bh = bh->b_this_page;
     225                 :          3 :         } while (bh != head);
     226                 :            : 
     227                 :            :         /* we might be here because some of the buffers on this page are
     228                 :            :          * not mapped.  This is due to various races between
     229                 :            :          * file io on the block device and getblk.  It gets dealt with
     230                 :            :          * elsewhere, don't buffer_error if we had some unmapped buffers
     231                 :            :          */
     232                 :            :         ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
     233                 :          0 :         if (all_mapped && __ratelimit(&last_warned)) {
     234                 :          0 :                 printk("__find_get_block_slow() failed. block=%llu, "
     235                 :            :                        "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
     236                 :            :                        "device %pg blocksize: %d\n",
     237                 :            :                        (unsigned long long)block,
     238                 :            :                        (unsigned long long)bh->b_blocknr,
     239                 :            :                        bh->b_state, bh->b_size, bdev,
     240                 :          0 :                        1 << bd_inode->i_blkbits);
     241                 :            :         }
     242                 :            : out_unlock:
     243                 :            :         spin_unlock(&bd_mapping->private_lock);
     244                 :          3 :         put_page(page);
     245                 :            : out:
     246                 :          3 :         return ret;
     247                 :            : }
     248                 :            : 
     249                 :            : /*
     250                 :            :  * I/O completion handler for block_read_full_page() - pages
     251                 :            :  * which come unlocked at the end of I/O.
     252                 :            :  */
     253                 :          3 : static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
     254                 :            : {
     255                 :            :         unsigned long flags;
     256                 :            :         struct buffer_head *first;
     257                 :            :         struct buffer_head *tmp;
     258                 :            :         struct page *page;
     259                 :            :         int page_uptodate = 1;
     260                 :            : 
     261                 :          3 :         BUG_ON(!buffer_async_read(bh));
     262                 :            : 
     263                 :          3 :         page = bh->b_page;
     264                 :          3 :         if (uptodate) {
     265                 :            :                 set_buffer_uptodate(bh);
     266                 :            :         } else {
     267                 :            :                 clear_buffer_uptodate(bh);
     268                 :          0 :                 buffer_io_error(bh, ", async page read");
     269                 :            :                 SetPageError(page);
     270                 :            :         }
     271                 :            : 
     272                 :            :         /*
     273                 :            :          * Be _very_ careful from here on. Bad things can happen if
     274                 :            :          * two buffer heads end IO at almost the same time and both
     275                 :            :          * decide that the page is now completely done.
     276                 :            :          */
     277                 :          3 :         first = page_buffers(page);
     278                 :          3 :         local_irq_save(flags);
     279                 :          3 :         bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
     280                 :            :         clear_buffer_async_read(bh);
     281                 :          3 :         unlock_buffer(bh);
     282                 :            :         tmp = bh;
     283                 :            :         do {
     284                 :          3 :                 if (!buffer_uptodate(tmp))
     285                 :            :                         page_uptodate = 0;
     286                 :          3 :                 if (buffer_async_read(tmp)) {
     287                 :          2 :                         BUG_ON(!buffer_locked(tmp));
     288                 :            :                         goto still_busy;
     289                 :            :                 }
     290                 :          3 :                 tmp = tmp->b_this_page;
     291                 :          3 :         } while (tmp != bh);
     292                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     293                 :          3 :         local_irq_restore(flags);
     294                 :            : 
     295                 :            :         /*
     296                 :            :          * If none of the buffers had errors and they are all
     297                 :            :          * uptodate then we can set the page uptodate.
     298                 :            :          */
     299                 :          3 :         if (page_uptodate && !PageError(page))
     300                 :            :                 SetPageUptodate(page);
     301                 :          3 :         unlock_page(page);
     302                 :          3 :         return;
     303                 :            : 
     304                 :            : still_busy:
     305                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     306                 :          2 :         local_irq_restore(flags);
     307                 :            :         return;
     308                 :            : }
     309                 :            : 
     310                 :            : /*
     311                 :            :  * Completion handler for block_write_full_page() - pages which are unlocked
     312                 :            :  * during I/O, and which have PageWriteback cleared upon I/O completion.
     313                 :            :  */
     314                 :          3 : void end_buffer_async_write(struct buffer_head *bh, int uptodate)
     315                 :            : {
     316                 :            :         unsigned long flags;
     317                 :            :         struct buffer_head *first;
     318                 :            :         struct buffer_head *tmp;
     319                 :            :         struct page *page;
     320                 :            : 
     321                 :          3 :         BUG_ON(!buffer_async_write(bh));
     322                 :            : 
     323                 :          3 :         page = bh->b_page;
     324                 :          3 :         if (uptodate) {
     325                 :            :                 set_buffer_uptodate(bh);
     326                 :            :         } else {
     327                 :          0 :                 buffer_io_error(bh, ", lost async page write");
     328                 :          0 :                 mark_buffer_write_io_error(bh);
     329                 :            :                 clear_buffer_uptodate(bh);
     330                 :            :                 SetPageError(page);
     331                 :            :         }
     332                 :            : 
     333                 :          3 :         first = page_buffers(page);
     334                 :          3 :         local_irq_save(flags);
     335                 :          3 :         bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
     336                 :            : 
     337                 :            :         clear_buffer_async_write(bh);
     338                 :          3 :         unlock_buffer(bh);
     339                 :          3 :         tmp = bh->b_this_page;
     340                 :          3 :         while (tmp != bh) {
     341                 :          3 :                 if (buffer_async_write(tmp)) {
     342                 :          0 :                         BUG_ON(!buffer_locked(tmp));
     343                 :            :                         goto still_busy;
     344                 :            :                 }
     345                 :          3 :                 tmp = tmp->b_this_page;
     346                 :            :         }
     347                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     348                 :          3 :         local_irq_restore(flags);
     349                 :          3 :         end_page_writeback(page);
     350                 :          3 :         return;
     351                 :            : 
     352                 :            : still_busy:
     353                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     354                 :          0 :         local_irq_restore(flags);
     355                 :            :         return;
     356                 :            : }
     357                 :            : EXPORT_SYMBOL(end_buffer_async_write);
     358                 :            : 
     359                 :            : /*
     360                 :            :  * If a page's buffers are under async readin (end_buffer_async_read
     361                 :            :  * completion) then there is a possibility that another thread of
     362                 :            :  * control could lock one of the buffers after it has completed
     363                 :            :  * but while some of the other buffers have not completed.  This
     364                 :            :  * locked buffer would confuse end_buffer_async_read() into not unlocking
     365                 :            :  * the page.  So the absence of BH_Async_Read tells end_buffer_async_read()
     366                 :            :  * that this buffer is not under async I/O.
     367                 :            :  *
     368                 :            :  * The page comes unlocked when it has no locked buffer_async buffers
     369                 :            :  * left.
     370                 :            :  *
     371                 :            :  * PageLocked prevents anyone starting new async I/O reads any of
     372                 :            :  * the buffers.
     373                 :            :  *
     374                 :            :  * PageWriteback is used to prevent simultaneous writeout of the same
     375                 :            :  * page.
     376                 :            :  *
     377                 :            :  * PageLocked prevents anyone from starting writeback of a page which is
     378                 :            :  * under read I/O (PageWriteback is only ever set against a locked page).
     379                 :            :  */
     380                 :          3 : static void mark_buffer_async_read(struct buffer_head *bh)
     381                 :            : {
     382                 :          3 :         bh->b_end_io = end_buffer_async_read;
     383                 :            :         set_buffer_async_read(bh);
     384                 :          3 : }
     385                 :            : 
     386                 :          3 : static void mark_buffer_async_write_endio(struct buffer_head *bh,
     387                 :            :                                           bh_end_io_t *handler)
     388                 :            : {
     389                 :          3 :         bh->b_end_io = handler;
     390                 :            :         set_buffer_async_write(bh);
     391                 :          3 : }
     392                 :            : 
     393                 :          0 : void mark_buffer_async_write(struct buffer_head *bh)
     394                 :            : {
     395                 :          0 :         mark_buffer_async_write_endio(bh, end_buffer_async_write);
     396                 :          0 : }
     397                 :            : EXPORT_SYMBOL(mark_buffer_async_write);
     398                 :            : 
     399                 :            : 
     400                 :            : /*
     401                 :            :  * fs/buffer.c contains helper functions for buffer-backed address space's
     402                 :            :  * fsync functions.  A common requirement for buffer-based filesystems is
     403                 :            :  * that certain data from the backing blockdev needs to be written out for
     404                 :            :  * a successful fsync().  For example, ext2 indirect blocks need to be
     405                 :            :  * written back and waited upon before fsync() returns.
     406                 :            :  *
     407                 :            :  * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
     408                 :            :  * inode_has_buffers() and invalidate_inode_buffers() are provided for the
     409                 :            :  * management of a list of dependent buffers at ->i_mapping->private_list.
     410                 :            :  *
     411                 :            :  * Locking is a little subtle: try_to_free_buffers() will remove buffers
     412                 :            :  * from their controlling inode's queue when they are being freed.  But
     413                 :            :  * try_to_free_buffers() will be operating against the *blockdev* mapping
     414                 :            :  * at the time, not against the S_ISREG file which depends on those buffers.
     415                 :            :  * So the locking for private_list is via the private_lock in the address_space
     416                 :            :  * which backs the buffers.  Which is different from the address_space 
     417                 :            :  * against which the buffers are listed.  So for a particular address_space,
     418                 :            :  * mapping->private_lock does *not* protect mapping->private_list!  In fact,
     419                 :            :  * mapping->private_list will always be protected by the backing blockdev's
     420                 :            :  * ->private_lock.
     421                 :            :  *
     422                 :            :  * Which introduces a requirement: all buffers on an address_space's
     423                 :            :  * ->private_list must be from the same address_space: the blockdev's.
     424                 :            :  *
     425                 :            :  * address_spaces which do not place buffers at ->private_list via these
     426                 :            :  * utility functions are free to use private_lock and private_list for
     427                 :            :  * whatever they want.  The only requirement is that list_empty(private_list)
     428                 :            :  * be true at clear_inode() time.
     429                 :            :  *
     430                 :            :  * FIXME: clear_inode should not call invalidate_inode_buffers().  The
     431                 :            :  * filesystems should do that.  invalidate_inode_buffers() should just go
     432                 :            :  * BUG_ON(!list_empty).
     433                 :            :  *
     434                 :            :  * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
     435                 :            :  * take an address_space, not an inode.  And it should be called
     436                 :            :  * mark_buffer_dirty_fsync() to clearly define why those buffers are being
     437                 :            :  * queued up.
     438                 :            :  *
     439                 :            :  * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
     440                 :            :  * list if it is already on a list.  Because if the buffer is on a list,
     441                 :            :  * it *must* already be on the right one.  If not, the filesystem is being
     442                 :            :  * silly.  This will save a ton of locking.  But first we have to ensure
     443                 :            :  * that buffers are taken *off* the old inode's list when they are freed
     444                 :            :  * (presumably in truncate).  That requires careful auditing of all
     445                 :            :  * filesystems (do it inside bforget()).  It could also be done by bringing
     446                 :            :  * b_inode back.
     447                 :            :  */
     448                 :            : 
     449                 :            : /*
     450                 :            :  * The buffer's backing address_space's private_lock must be held
     451                 :            :  */
     452                 :          0 : static void __remove_assoc_queue(struct buffer_head *bh)
     453                 :            : {
     454                 :          0 :         list_del_init(&bh->b_assoc_buffers);
     455                 :          0 :         WARN_ON(!bh->b_assoc_map);
     456                 :          0 :         bh->b_assoc_map = NULL;
     457                 :          0 : }
     458                 :            : 
     459                 :          3 : int inode_has_buffers(struct inode *inode)
     460                 :            : {
     461                 :          3 :         return !list_empty(&inode->i_data.private_list);
     462                 :            : }
     463                 :            : 
     464                 :            : /*
     465                 :            :  * osync is designed to support O_SYNC io.  It waits synchronously for
     466                 :            :  * all already-submitted IO to complete, but does not queue any new
     467                 :            :  * writes to the disk.
     468                 :            :  *
     469                 :            :  * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
     470                 :            :  * you dirty the buffers, and then use osync_inode_buffers to wait for
     471                 :            :  * completion.  Any other dirty buffers which are not yet queued for
     472                 :            :  * write will not be flushed to disk by the osync.
     473                 :            :  */
     474                 :          0 : static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
     475                 :            : {
     476                 :            :         struct buffer_head *bh;
     477                 :            :         struct list_head *p;
     478                 :            :         int err = 0;
     479                 :            : 
     480                 :            :         spin_lock(lock);
     481                 :            : repeat:
     482                 :          0 :         list_for_each_prev(p, list) {
     483                 :          0 :                 bh = BH_ENTRY(p);
     484                 :          0 :                 if (buffer_locked(bh)) {
     485                 :            :                         get_bh(bh);
     486                 :            :                         spin_unlock(lock);
     487                 :          0 :                         wait_on_buffer(bh);
     488                 :          0 :                         if (!buffer_uptodate(bh))
     489                 :            :                                 err = -EIO;
     490                 :            :                         brelse(bh);
     491                 :            :                         spin_lock(lock);
     492                 :            :                         goto repeat;
     493                 :            :                 }
     494                 :            :         }
     495                 :            :         spin_unlock(lock);
     496                 :          0 :         return err;
     497                 :            : }
     498                 :            : 
     499                 :          0 : void emergency_thaw_bdev(struct super_block *sb)
     500                 :            : {
     501                 :          0 :         while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
     502                 :          0 :                 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
     503                 :          0 : }
     504                 :            : 
     505                 :            : /**
     506                 :            :  * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
     507                 :            :  * @mapping: the mapping which wants those buffers written
     508                 :            :  *
     509                 :            :  * Starts I/O against the buffers at mapping->private_list, and waits upon
     510                 :            :  * that I/O.
     511                 :            :  *
     512                 :            :  * Basically, this is a convenience function for fsync().
     513                 :            :  * @mapping is a file or directory which needs those buffers to be written for
     514                 :            :  * a successful fsync().
     515                 :            :  */
     516                 :          0 : int sync_mapping_buffers(struct address_space *mapping)
     517                 :            : {
     518                 :          0 :         struct address_space *buffer_mapping = mapping->private_data;
     519                 :            : 
     520                 :          0 :         if (buffer_mapping == NULL || list_empty(&mapping->private_list))
     521                 :            :                 return 0;
     522                 :            : 
     523                 :          0 :         return fsync_buffers_list(&buffer_mapping->private_lock,
     524                 :            :                                         &mapping->private_list);
     525                 :            : }
     526                 :            : EXPORT_SYMBOL(sync_mapping_buffers);
     527                 :            : 
     528                 :            : /*
     529                 :            :  * Called when we've recently written block `bblock', and it is known that
     530                 :            :  * `bblock' was for a buffer_boundary() buffer.  This means that the block at
     531                 :            :  * `bblock + 1' is probably a dirty indirect block.  Hunt it down and, if it's
     532                 :            :  * dirty, schedule it for IO.  So that indirects merge nicely with their data.
     533                 :            :  */
     534                 :          0 : void write_boundary_block(struct block_device *bdev,
     535                 :            :                         sector_t bblock, unsigned blocksize)
     536                 :            : {
     537                 :          0 :         struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
     538                 :          0 :         if (bh) {
     539                 :          0 :                 if (buffer_dirty(bh))
     540                 :          0 :                         ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
     541                 :          0 :                 put_bh(bh);
     542                 :            :         }
     543                 :          0 : }
     544                 :            : 
     545                 :          0 : void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
     546                 :            : {
     547                 :          0 :         struct address_space *mapping = inode->i_mapping;
     548                 :          0 :         struct address_space *buffer_mapping = bh->b_page->mapping;
     549                 :            : 
     550                 :          0 :         mark_buffer_dirty(bh);
     551                 :          0 :         if (!mapping->private_data) {
     552                 :          0 :                 mapping->private_data = buffer_mapping;
     553                 :            :         } else {
     554                 :          0 :                 BUG_ON(mapping->private_data != buffer_mapping);
     555                 :            :         }
     556                 :          0 :         if (!bh->b_assoc_map) {
     557                 :            :                 spin_lock(&buffer_mapping->private_lock);
     558                 :          0 :                 list_move_tail(&bh->b_assoc_buffers,
     559                 :            :                                 &mapping->private_list);
     560                 :          0 :                 bh->b_assoc_map = mapping;
     561                 :            :                 spin_unlock(&buffer_mapping->private_lock);
     562                 :            :         }
     563                 :          0 : }
     564                 :            : EXPORT_SYMBOL(mark_buffer_dirty_inode);
     565                 :            : 
     566                 :            : /*
     567                 :            :  * Mark the page dirty, and set it dirty in the page cache, and mark the inode
     568                 :            :  * dirty.
     569                 :            :  *
     570                 :            :  * If warn is true, then emit a warning if the page is not uptodate and has
     571                 :            :  * not been truncated.
     572                 :            :  *
     573                 :            :  * The caller must hold lock_page_memcg().
     574                 :            :  */
     575                 :          3 : void __set_page_dirty(struct page *page, struct address_space *mapping,
     576                 :            :                              int warn)
     577                 :            : {
     578                 :            :         unsigned long flags;
     579                 :            : 
     580                 :          3 :         xa_lock_irqsave(&mapping->i_pages, flags);
     581                 :          3 :         if (page->mapping) { /* Race with truncate? */
     582                 :          3 :                 WARN_ON_ONCE(warn && !PageUptodate(page));
     583                 :          3 :                 account_page_dirtied(page, mapping);
     584                 :          3 :                 __xa_set_mark(&mapping->i_pages, page_index(page),
     585                 :            :                                 PAGECACHE_TAG_DIRTY);
     586                 :            :         }
     587                 :            :         xa_unlock_irqrestore(&mapping->i_pages, flags);
     588                 :          3 : }
     589                 :            : EXPORT_SYMBOL_GPL(__set_page_dirty);
     590                 :            : 
     591                 :            : /*
     592                 :            :  * Add a page to the dirty page list.
     593                 :            :  *
     594                 :            :  * It is a sad fact of life that this function is called from several places
     595                 :            :  * deeply under spinlocking.  It may not sleep.
     596                 :            :  *
     597                 :            :  * If the page has buffers, the uptodate buffers are set dirty, to preserve
     598                 :            :  * dirty-state coherency between the page and the buffers.  It the page does
     599                 :            :  * not have buffers then when they are later attached they will all be set
     600                 :            :  * dirty.
     601                 :            :  *
     602                 :            :  * The buffers are dirtied before the page is dirtied.  There's a small race
     603                 :            :  * window in which a writepage caller may see the page cleanness but not the
     604                 :            :  * buffer dirtiness.  That's fine.  If this code were to set the page dirty
     605                 :            :  * before the buffers, a concurrent writepage caller could clear the page dirty
     606                 :            :  * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
     607                 :            :  * page on the dirty page list.
     608                 :            :  *
     609                 :            :  * We use private_lock to lock against try_to_free_buffers while using the
     610                 :            :  * page's buffer list.  Also use this to protect against clean buffers being
     611                 :            :  * added to the page after it was set dirty.
     612                 :            :  *
     613                 :            :  * FIXME: may need to call ->reservepage here as well.  That's rather up to the
     614                 :            :  * address_space though.
     615                 :            :  */
     616                 :          3 : int __set_page_dirty_buffers(struct page *page)
     617                 :            : {
     618                 :            :         int newly_dirty;
     619                 :          3 :         struct address_space *mapping = page_mapping(page);
     620                 :            : 
     621                 :          3 :         if (unlikely(!mapping))
     622                 :          0 :                 return !TestSetPageDirty(page);
     623                 :            : 
     624                 :            :         spin_lock(&mapping->private_lock);
     625                 :          3 :         if (page_has_buffers(page)) {
     626                 :          3 :                 struct buffer_head *head = page_buffers(page);
     627                 :            :                 struct buffer_head *bh = head;
     628                 :            : 
     629                 :            :                 do {
     630                 :            :                         set_buffer_dirty(bh);
     631                 :          3 :                         bh = bh->b_this_page;
     632                 :          3 :                 } while (bh != head);
     633                 :            :         }
     634                 :            :         /*
     635                 :            :          * Lock out page->mem_cgroup migration to keep PageDirty
     636                 :            :          * synchronized with per-memcg dirty page counters.
     637                 :            :          */
     638                 :          3 :         lock_page_memcg(page);
     639                 :          3 :         newly_dirty = !TestSetPageDirty(page);
     640                 :            :         spin_unlock(&mapping->private_lock);
     641                 :            : 
     642                 :          3 :         if (newly_dirty)
     643                 :          0 :                 __set_page_dirty(page, mapping, 1);
     644                 :            : 
     645                 :          3 :         unlock_page_memcg(page);
     646                 :            : 
     647                 :          3 :         if (newly_dirty)
     648                 :          0 :                 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
     649                 :            : 
     650                 :          3 :         return newly_dirty;
     651                 :            : }
     652                 :            : EXPORT_SYMBOL(__set_page_dirty_buffers);
     653                 :            : 
     654                 :            : /*
     655                 :            :  * Write out and wait upon a list of buffers.
     656                 :            :  *
     657                 :            :  * We have conflicting pressures: we want to make sure that all
     658                 :            :  * initially dirty buffers get waited on, but that any subsequently
     659                 :            :  * dirtied buffers don't.  After all, we don't want fsync to last
     660                 :            :  * forever if somebody is actively writing to the file.
     661                 :            :  *
     662                 :            :  * Do this in two main stages: first we copy dirty buffers to a
     663                 :            :  * temporary inode list, queueing the writes as we go.  Then we clean
     664                 :            :  * up, waiting for those writes to complete.
     665                 :            :  * 
     666                 :            :  * During this second stage, any subsequent updates to the file may end
     667                 :            :  * up refiling the buffer on the original inode's dirty list again, so
     668                 :            :  * there is a chance we will end up with a buffer queued for write but
     669                 :            :  * not yet completed on that list.  So, as a final cleanup we go through
     670                 :            :  * the osync code to catch these locked, dirty buffers without requeuing
     671                 :            :  * any newly dirty buffers for write.
     672                 :            :  */
     673                 :          0 : static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
     674                 :            : {
     675                 :            :         struct buffer_head *bh;
     676                 :            :         struct list_head tmp;
     677                 :            :         struct address_space *mapping;
     678                 :            :         int err = 0, err2;
     679                 :            :         struct blk_plug plug;
     680                 :            : 
     681                 :            :         INIT_LIST_HEAD(&tmp);
     682                 :          0 :         blk_start_plug(&plug);
     683                 :            : 
     684                 :            :         spin_lock(lock);
     685                 :          0 :         while (!list_empty(list)) {
     686                 :          0 :                 bh = BH_ENTRY(list->next);
     687                 :          0 :                 mapping = bh->b_assoc_map;
     688                 :          0 :                 __remove_assoc_queue(bh);
     689                 :            :                 /* Avoid race with mark_buffer_dirty_inode() which does
     690                 :            :                  * a lockless check and we rely on seeing the dirty bit */
     691                 :          0 :                 smp_mb();
     692                 :          0 :                 if (buffer_dirty(bh) || buffer_locked(bh)) {
     693                 :          0 :                         list_add(&bh->b_assoc_buffers, &tmp);
     694                 :          0 :                         bh->b_assoc_map = mapping;
     695                 :          0 :                         if (buffer_dirty(bh)) {
     696                 :            :                                 get_bh(bh);
     697                 :            :                                 spin_unlock(lock);
     698                 :            :                                 /*
     699                 :            :                                  * Ensure any pending I/O completes so that
     700                 :            :                                  * write_dirty_buffer() actually writes the
     701                 :            :                                  * current contents - it is a noop if I/O is
     702                 :            :                                  * still in flight on potentially older
     703                 :            :                                  * contents.
     704                 :            :                                  */
     705                 :          0 :                                 write_dirty_buffer(bh, REQ_SYNC);
     706                 :            : 
     707                 :            :                                 /*
     708                 :            :                                  * Kick off IO for the previous mapping. Note
     709                 :            :                                  * that we will not run the very last mapping,
     710                 :            :                                  * wait_on_buffer() will do that for us
     711                 :            :                                  * through sync_buffer().
     712                 :            :                                  */
     713                 :            :                                 brelse(bh);
     714                 :            :                                 spin_lock(lock);
     715                 :            :                         }
     716                 :            :                 }
     717                 :            :         }
     718                 :            : 
     719                 :            :         spin_unlock(lock);
     720                 :          0 :         blk_finish_plug(&plug);
     721                 :            :         spin_lock(lock);
     722                 :            : 
     723                 :          0 :         while (!list_empty(&tmp)) {
     724                 :          0 :                 bh = BH_ENTRY(tmp.prev);
     725                 :            :                 get_bh(bh);
     726                 :          0 :                 mapping = bh->b_assoc_map;
     727                 :          0 :                 __remove_assoc_queue(bh);
     728                 :            :                 /* Avoid race with mark_buffer_dirty_inode() which does
     729                 :            :                  * a lockless check and we rely on seeing the dirty bit */
     730                 :          0 :                 smp_mb();
     731                 :          0 :                 if (buffer_dirty(bh)) {
     732                 :          0 :                         list_add(&bh->b_assoc_buffers,
     733                 :            :                                  &mapping->private_list);
     734                 :          0 :                         bh->b_assoc_map = mapping;
     735                 :            :                 }
     736                 :            :                 spin_unlock(lock);
     737                 :          0 :                 wait_on_buffer(bh);
     738                 :          0 :                 if (!buffer_uptodate(bh))
     739                 :            :                         err = -EIO;
     740                 :            :                 brelse(bh);
     741                 :            :                 spin_lock(lock);
     742                 :            :         }
     743                 :            :         
     744                 :            :         spin_unlock(lock);
     745                 :          0 :         err2 = osync_buffers_list(lock, list);
     746                 :          0 :         if (err)
     747                 :          0 :                 return err;
     748                 :            :         else
     749                 :            :                 return err2;
     750                 :            : }
     751                 :            : 
     752                 :            : /*
     753                 :            :  * Invalidate any and all dirty buffers on a given inode.  We are
     754                 :            :  * probably unmounting the fs, but that doesn't mean we have already
     755                 :            :  * done a sync().  Just drop the buffers from the inode list.
     756                 :            :  *
     757                 :            :  * NOTE: we take the inode's blockdev's mapping's private_lock.  Which
     758                 :            :  * assumes that all the buffers are against the blockdev.  Not true
     759                 :            :  * for reiserfs.
     760                 :            :  */
     761                 :          3 : void invalidate_inode_buffers(struct inode *inode)
     762                 :            : {
     763                 :          3 :         if (inode_has_buffers(inode)) {
     764                 :            :                 struct address_space *mapping = &inode->i_data;
     765                 :            :                 struct list_head *list = &mapping->private_list;
     766                 :          0 :                 struct address_space *buffer_mapping = mapping->private_data;
     767                 :            : 
     768                 :            :                 spin_lock(&buffer_mapping->private_lock);
     769                 :          0 :                 while (!list_empty(list))
     770                 :          0 :                         __remove_assoc_queue(BH_ENTRY(list->next));
     771                 :            :                 spin_unlock(&buffer_mapping->private_lock);
     772                 :            :         }
     773                 :          3 : }
     774                 :            : EXPORT_SYMBOL(invalidate_inode_buffers);
     775                 :            : 
     776                 :            : /*
     777                 :            :  * Remove any clean buffers from the inode's buffer list.  This is called
     778                 :            :  * when we're trying to free the inode itself.  Those buffers can pin it.
     779                 :            :  *
     780                 :            :  * Returns true if all buffers were removed.
     781                 :            :  */
     782                 :          0 : int remove_inode_buffers(struct inode *inode)
     783                 :            : {
     784                 :            :         int ret = 1;
     785                 :            : 
     786                 :          0 :         if (inode_has_buffers(inode)) {
     787                 :            :                 struct address_space *mapping = &inode->i_data;
     788                 :            :                 struct list_head *list = &mapping->private_list;
     789                 :          0 :                 struct address_space *buffer_mapping = mapping->private_data;
     790                 :            : 
     791                 :            :                 spin_lock(&buffer_mapping->private_lock);
     792                 :          0 :                 while (!list_empty(list)) {
     793                 :          0 :                         struct buffer_head *bh = BH_ENTRY(list->next);
     794                 :          0 :                         if (buffer_dirty(bh)) {
     795                 :            :                                 ret = 0;
     796                 :            :                                 break;
     797                 :            :                         }
     798                 :          0 :                         __remove_assoc_queue(bh);
     799                 :            :                 }
     800                 :            :                 spin_unlock(&buffer_mapping->private_lock);
     801                 :            :         }
     802                 :          0 :         return ret;
     803                 :            : }
     804                 :            : 
     805                 :            : /*
     806                 :            :  * Create the appropriate buffers when given a page for data area and
     807                 :            :  * the size of each buffer.. Use the bh->b_this_page linked list to
     808                 :            :  * follow the buffers created.  Return NULL if unable to create more
     809                 :            :  * buffers.
     810                 :            :  *
     811                 :            :  * The retry flag is used to differentiate async IO (paging, swapping)
     812                 :            :  * which may not fail from ordinary buffer allocations.
     813                 :            :  */
     814                 :          3 : struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
     815                 :            :                 bool retry)
     816                 :            : {
     817                 :            :         struct buffer_head *bh, *head;
     818                 :            :         gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
     819                 :            :         long offset;
     820                 :            :         struct mem_cgroup *memcg;
     821                 :            : 
     822                 :          3 :         if (retry)
     823                 :            :                 gfp |= __GFP_NOFAIL;
     824                 :            : 
     825                 :          3 :         memcg = get_mem_cgroup_from_page(page);
     826                 :          3 :         memalloc_use_memcg(memcg);
     827                 :            : 
     828                 :            :         head = NULL;
     829                 :            :         offset = PAGE_SIZE;
     830                 :          3 :         while ((offset -= size) >= 0) {
     831                 :          3 :                 bh = alloc_buffer_head(gfp);
     832                 :          3 :                 if (!bh)
     833                 :            :                         goto no_grow;
     834                 :            : 
     835                 :          3 :                 bh->b_this_page = head;
     836                 :          3 :                 bh->b_blocknr = -1;
     837                 :            :                 head = bh;
     838                 :            : 
     839                 :          3 :                 bh->b_size = size;
     840                 :            : 
     841                 :            :                 /* Link the buffer to its page */
     842                 :          3 :                 set_bh_page(bh, page, offset);
     843                 :            :         }
     844                 :            : out:
     845                 :            :         memalloc_unuse_memcg();
     846                 :          3 :         mem_cgroup_put(memcg);
     847                 :          3 :         return head;
     848                 :            : /*
     849                 :            :  * In case anything failed, we just free everything we got.
     850                 :            :  */
     851                 :            : no_grow:
     852                 :          0 :         if (head) {
     853                 :            :                 do {
     854                 :            :                         bh = head;
     855                 :          0 :                         head = head->b_this_page;
     856                 :          0 :                         free_buffer_head(bh);
     857                 :          0 :                 } while (head);
     858                 :            :         }
     859                 :            : 
     860                 :            :         goto out;
     861                 :            : }
     862                 :            : EXPORT_SYMBOL_GPL(alloc_page_buffers);
     863                 :            : 
     864                 :            : static inline void
     865                 :          3 : link_dev_buffers(struct page *page, struct buffer_head *head)
     866                 :            : {
     867                 :            :         struct buffer_head *bh, *tail;
     868                 :            : 
     869                 :            :         bh = head;
     870                 :            :         do {
     871                 :            :                 tail = bh;
     872                 :          3 :                 bh = bh->b_this_page;
     873                 :          3 :         } while (bh);
     874                 :          3 :         tail->b_this_page = head;
     875                 :            :         attach_page_buffers(page, head);
     876                 :          3 : }
     877                 :            : 
     878                 :          3 : static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
     879                 :            : {
     880                 :            :         sector_t retval = ~((sector_t)0);
     881                 :          3 :         loff_t sz = i_size_read(bdev->bd_inode);
     882                 :            : 
     883                 :          3 :         if (sz) {
     884                 :            :                 unsigned int sizebits = blksize_bits(size);
     885                 :          3 :                 retval = (sz >> sizebits);
     886                 :            :         }
     887                 :          3 :         return retval;
     888                 :            : }
     889                 :            : 
     890                 :            : /*
     891                 :            :  * Initialise the state of a blockdev page's buffers.
     892                 :            :  */ 
     893                 :            : static sector_t
     894                 :          3 : init_page_buffers(struct page *page, struct block_device *bdev,
     895                 :            :                         sector_t block, int size)
     896                 :            : {
     897                 :          3 :         struct buffer_head *head = page_buffers(page);
     898                 :            :         struct buffer_head *bh = head;
     899                 :            :         int uptodate = PageUptodate(page);
     900                 :          3 :         sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
     901                 :            : 
     902                 :            :         do {
     903                 :          3 :                 if (!buffer_mapped(bh)) {
     904                 :          3 :                         bh->b_end_io = NULL;
     905                 :          3 :                         bh->b_private = NULL;
     906                 :          3 :                         bh->b_bdev = bdev;
     907                 :          3 :                         bh->b_blocknr = block;
     908                 :          3 :                         if (uptodate)
     909                 :            :                                 set_buffer_uptodate(bh);
     910                 :          3 :                         if (block < end_block)
     911                 :            :                                 set_buffer_mapped(bh);
     912                 :            :                 }
     913                 :          3 :                 block++;
     914                 :          3 :                 bh = bh->b_this_page;
     915                 :          3 :         } while (bh != head);
     916                 :            : 
     917                 :            :         /*
     918                 :            :          * Caller needs to validate requested block against end of device.
     919                 :            :          */
     920                 :          3 :         return end_block;
     921                 :            : }
     922                 :            : 
     923                 :            : /*
     924                 :            :  * Create the page-cache page that contains the requested block.
     925                 :            :  *
     926                 :            :  * This is used purely for blockdev mappings.
     927                 :            :  */
     928                 :            : static int
     929                 :          3 : grow_dev_page(struct block_device *bdev, sector_t block,
     930                 :            :               pgoff_t index, int size, int sizebits, gfp_t gfp)
     931                 :            : {
     932                 :          3 :         struct inode *inode = bdev->bd_inode;
     933                 :            :         struct page *page;
     934                 :            :         struct buffer_head *bh;
     935                 :            :         sector_t end_block;
     936                 :            :         int ret = 0;            /* Will call free_more_memory() */
     937                 :            :         gfp_t gfp_mask;
     938                 :            : 
     939                 :          3 :         gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
     940                 :            : 
     941                 :            :         /*
     942                 :            :          * XXX: __getblk_slow() can not really deal with failure and
     943                 :            :          * will endlessly loop on improvised global reclaim.  Prefer
     944                 :            :          * looping in the allocator rather than here, at least that
     945                 :            :          * code knows what it's doing.
     946                 :            :          */
     947                 :          3 :         gfp_mask |= __GFP_NOFAIL;
     948                 :            : 
     949                 :            :         page = find_or_create_page(inode->i_mapping, index, gfp_mask);
     950                 :            : 
     951                 :          3 :         BUG_ON(!PageLocked(page));
     952                 :            : 
     953                 :          3 :         if (page_has_buffers(page)) {
     954                 :          3 :                 bh = page_buffers(page);
     955                 :          3 :                 if (bh->b_size == size) {
     956                 :          3 :                         end_block = init_page_buffers(page, bdev,
     957                 :          3 :                                                 (sector_t)index << sizebits,
     958                 :            :                                                 size);
     959                 :          3 :                         goto done;
     960                 :            :                 }
     961                 :          0 :                 if (!try_to_free_buffers(page))
     962                 :            :                         goto failed;
     963                 :            :         }
     964                 :            : 
     965                 :            :         /*
     966                 :            :          * Allocate some buffers for this page
     967                 :            :          */
     968                 :          3 :         bh = alloc_page_buffers(page, size, true);
     969                 :            : 
     970                 :            :         /*
     971                 :            :          * Link the page to the buffers and initialise them.  Take the
     972                 :            :          * lock to be atomic wrt __find_get_block(), which does not
     973                 :            :          * run under the page lock.
     974                 :            :          */
     975                 :          3 :         spin_lock(&inode->i_mapping->private_lock);
     976                 :          3 :         link_dev_buffers(page, bh);
     977                 :          3 :         end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
     978                 :            :                         size);
     979                 :          3 :         spin_unlock(&inode->i_mapping->private_lock);
     980                 :            : done:
     981                 :          3 :         ret = (block < end_block) ? 1 : -ENXIO;
     982                 :            : failed:
     983                 :          3 :         unlock_page(page);
     984                 :          3 :         put_page(page);
     985                 :          3 :         return ret;
     986                 :            : }
     987                 :            : 
     988                 :            : /*
     989                 :            :  * Create buffers for the specified block device block's page.  If
     990                 :            :  * that page was dirty, the buffers are set dirty also.
     991                 :            :  */
     992                 :            : static int
     993                 :          3 : grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
     994                 :            : {
     995                 :            :         pgoff_t index;
     996                 :            :         int sizebits;
     997                 :            : 
     998                 :            :         sizebits = -1;
     999                 :            :         do {
    1000                 :          3 :                 sizebits++;
    1001                 :          3 :         } while ((size << sizebits) < PAGE_SIZE);
    1002                 :            : 
    1003                 :          3 :         index = block >> sizebits;
    1004                 :            : 
    1005                 :            :         /*
    1006                 :            :          * Check for a block which wants to lie outside our maximum possible
    1007                 :            :          * pagecache index.  (this comparison is done using sector_t types).
    1008                 :            :          */
    1009                 :          3 :         if (unlikely(index != block >> sizebits)) {
    1010                 :          0 :                 printk(KERN_ERR "%s: requested out-of-range block %llu for "
    1011                 :            :                         "device %pg\n",
    1012                 :            :                         __func__, (unsigned long long)block,
    1013                 :            :                         bdev);
    1014                 :          0 :                 return -EIO;
    1015                 :            :         }
    1016                 :            : 
    1017                 :            :         /* Create a page with the proper size buffers.. */
    1018                 :          3 :         return grow_dev_page(bdev, block, index, size, sizebits, gfp);
    1019                 :            : }
    1020                 :            : 
    1021                 :            : static struct buffer_head *
    1022                 :          3 : __getblk_slow(struct block_device *bdev, sector_t block,
    1023                 :            :              unsigned size, gfp_t gfp)
    1024                 :            : {
    1025                 :            :         /* Size must be multiple of hard sectorsize */
    1026                 :          3 :         if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
    1027                 :            :                         (size < 512 || size > PAGE_SIZE))) {
    1028                 :          3 :                 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
    1029                 :            :                                         size);
    1030                 :          0 :                 printk(KERN_ERR "logical block size: %d\n",
    1031                 :            :                                         bdev_logical_block_size(bdev));
    1032                 :            : 
    1033                 :          0 :                 dump_stack();
    1034                 :          0 :                 return NULL;
    1035                 :            :         }
    1036                 :            : 
    1037                 :            :         for (;;) {
    1038                 :            :                 struct buffer_head *bh;
    1039                 :            :                 int ret;
    1040                 :            : 
    1041                 :          3 :                 bh = __find_get_block(bdev, block, size);
    1042                 :          3 :                 if (bh)
    1043                 :          3 :                         return bh;
    1044                 :            : 
    1045                 :          3 :                 ret = grow_buffers(bdev, block, size, gfp);
    1046                 :          3 :                 if (ret < 0)
    1047                 :            :                         return NULL;
    1048                 :            :         }
    1049                 :            : }
    1050                 :            : 
    1051                 :            : /*
    1052                 :            :  * The relationship between dirty buffers and dirty pages:
    1053                 :            :  *
    1054                 :            :  * Whenever a page has any dirty buffers, the page's dirty bit is set, and
    1055                 :            :  * the page is tagged dirty in the page cache.
    1056                 :            :  *
    1057                 :            :  * At all times, the dirtiness of the buffers represents the dirtiness of
    1058                 :            :  * subsections of the page.  If the page has buffers, the page dirty bit is
    1059                 :            :  * merely a hint about the true dirty state.
    1060                 :            :  *
    1061                 :            :  * When a page is set dirty in its entirety, all its buffers are marked dirty
    1062                 :            :  * (if the page has buffers).
    1063                 :            :  *
    1064                 :            :  * When a buffer is marked dirty, its page is dirtied, but the page's other
    1065                 :            :  * buffers are not.
    1066                 :            :  *
    1067                 :            :  * Also.  When blockdev buffers are explicitly read with bread(), they
    1068                 :            :  * individually become uptodate.  But their backing page remains not
    1069                 :            :  * uptodate - even if all of its buffers are uptodate.  A subsequent
    1070                 :            :  * block_read_full_page() against that page will discover all the uptodate
    1071                 :            :  * buffers, will set the page uptodate and will perform no I/O.
    1072                 :            :  */
    1073                 :            : 
    1074                 :            : /**
    1075                 :            :  * mark_buffer_dirty - mark a buffer_head as needing writeout
    1076                 :            :  * @bh: the buffer_head to mark dirty
    1077                 :            :  *
    1078                 :            :  * mark_buffer_dirty() will set the dirty bit against the buffer, then set
    1079                 :            :  * its backing page dirty, then tag the page as dirty in the page cache
    1080                 :            :  * and then attach the address_space's inode to its superblock's dirty
    1081                 :            :  * inode list.
    1082                 :            :  *
    1083                 :            :  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
    1084                 :            :  * i_pages lock and mapping->host->i_lock.
    1085                 :            :  */
    1086                 :          3 : void mark_buffer_dirty(struct buffer_head *bh)
    1087                 :            : {
    1088                 :          3 :         WARN_ON_ONCE(!buffer_uptodate(bh));
    1089                 :            : 
    1090                 :          3 :         trace_block_dirty_buffer(bh);
    1091                 :            : 
    1092                 :            :         /*
    1093                 :            :          * Very *carefully* optimize the it-is-already-dirty case.
    1094                 :            :          *
    1095                 :            :          * Don't let the final "is it dirty" escape to before we
    1096                 :            :          * perhaps modified the buffer.
    1097                 :            :          */
    1098                 :          3 :         if (buffer_dirty(bh)) {
    1099                 :          3 :                 smp_mb();
    1100                 :          3 :                 if (buffer_dirty(bh))
    1101                 :          3 :                         return;
    1102                 :            :         }
    1103                 :            : 
    1104                 :          3 :         if (!test_set_buffer_dirty(bh)) {
    1105                 :          3 :                 struct page *page = bh->b_page;
    1106                 :            :                 struct address_space *mapping = NULL;
    1107                 :            : 
    1108                 :          3 :                 lock_page_memcg(page);
    1109                 :          3 :                 if (!TestSetPageDirty(page)) {
    1110                 :          3 :                         mapping = page_mapping(page);
    1111                 :          3 :                         if (mapping)
    1112                 :          3 :                                 __set_page_dirty(page, mapping, 0);
    1113                 :            :                 }
    1114                 :          3 :                 unlock_page_memcg(page);
    1115                 :          3 :                 if (mapping)
    1116                 :          3 :                         __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
    1117                 :            :         }
    1118                 :            : }
    1119                 :            : EXPORT_SYMBOL(mark_buffer_dirty);
    1120                 :            : 
    1121                 :          0 : void mark_buffer_write_io_error(struct buffer_head *bh)
    1122                 :            : {
    1123                 :            :         set_buffer_write_io_error(bh);
    1124                 :            :         /* FIXME: do we need to set this in both places? */
    1125                 :          0 :         if (bh->b_page && bh->b_page->mapping)
    1126                 :          0 :                 mapping_set_error(bh->b_page->mapping, -EIO);
    1127                 :          0 :         if (bh->b_assoc_map)
    1128                 :          0 :                 mapping_set_error(bh->b_assoc_map, -EIO);
    1129                 :          0 : }
    1130                 :            : EXPORT_SYMBOL(mark_buffer_write_io_error);
    1131                 :            : 
    1132                 :            : /*
    1133                 :            :  * Decrement a buffer_head's reference count.  If all buffers against a page
    1134                 :            :  * have zero reference count, are clean and unlocked, and if the page is clean
    1135                 :            :  * and unlocked then try_to_free_buffers() may strip the buffers from the page
    1136                 :            :  * in preparation for freeing it (sometimes, rarely, buffers are removed from
    1137                 :            :  * a page but it ends up not being freed, and buffers may later be reattached).
    1138                 :            :  */
    1139                 :          3 : void __brelse(struct buffer_head * buf)
    1140                 :            : {
    1141                 :          3 :         if (atomic_read(&buf->b_count)) {
    1142                 :          3 :                 put_bh(buf);
    1143                 :          3 :                 return;
    1144                 :            :         }
    1145                 :          0 :         WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
    1146                 :            : }
    1147                 :            : EXPORT_SYMBOL(__brelse);
    1148                 :            : 
    1149                 :            : /*
    1150                 :            :  * bforget() is like brelse(), except it discards any
    1151                 :            :  * potentially dirty data.
    1152                 :            :  */
    1153                 :          3 : void __bforget(struct buffer_head *bh)
    1154                 :            : {
    1155                 :            :         clear_buffer_dirty(bh);
    1156                 :          3 :         if (bh->b_assoc_map) {
    1157                 :          0 :                 struct address_space *buffer_mapping = bh->b_page->mapping;
    1158                 :            : 
    1159                 :            :                 spin_lock(&buffer_mapping->private_lock);
    1160                 :          0 :                 list_del_init(&bh->b_assoc_buffers);
    1161                 :          0 :                 bh->b_assoc_map = NULL;
    1162                 :            :                 spin_unlock(&buffer_mapping->private_lock);
    1163                 :            :         }
    1164                 :          3 :         __brelse(bh);
    1165                 :          3 : }
    1166                 :            : EXPORT_SYMBOL(__bforget);
    1167                 :            : 
    1168                 :          3 : static struct buffer_head *__bread_slow(struct buffer_head *bh)
    1169                 :            : {
    1170                 :          3 :         lock_buffer(bh);
    1171                 :          3 :         if (buffer_uptodate(bh)) {
    1172                 :          3 :                 unlock_buffer(bh);
    1173                 :          3 :                 return bh;
    1174                 :            :         } else {
    1175                 :            :                 get_bh(bh);
    1176                 :          3 :                 bh->b_end_io = end_buffer_read_sync;
    1177                 :            :                 submit_bh(REQ_OP_READ, 0, bh);
    1178                 :          3 :                 wait_on_buffer(bh);
    1179                 :          3 :                 if (buffer_uptodate(bh))
    1180                 :            :                         return bh;
    1181                 :            :         }
    1182                 :            :         brelse(bh);
    1183                 :            :         return NULL;
    1184                 :            : }
    1185                 :            : 
    1186                 :            : /*
    1187                 :            :  * Per-cpu buffer LRU implementation.  To reduce the cost of __find_get_block().
    1188                 :            :  * The bhs[] array is sorted - newest buffer is at bhs[0].  Buffers have their
    1189                 :            :  * refcount elevated by one when they're in an LRU.  A buffer can only appear
    1190                 :            :  * once in a particular CPU's LRU.  A single buffer can be present in multiple
    1191                 :            :  * CPU's LRUs at the same time.
    1192                 :            :  *
    1193                 :            :  * This is a transparent caching front-end to sb_bread(), sb_getblk() and
    1194                 :            :  * sb_find_get_block().
    1195                 :            :  *
    1196                 :            :  * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
    1197                 :            :  * a local interrupt disable for that.
    1198                 :            :  */
    1199                 :            : 
    1200                 :            : #define BH_LRU_SIZE     16
    1201                 :            : 
    1202                 :            : struct bh_lru {
    1203                 :            :         struct buffer_head *bhs[BH_LRU_SIZE];
    1204                 :            : };
    1205                 :            : 
    1206                 :            : static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
    1207                 :            : 
    1208                 :            : #ifdef CONFIG_SMP
    1209                 :            : #define bh_lru_lock()   local_irq_disable()
    1210                 :            : #define bh_lru_unlock() local_irq_enable()
    1211                 :            : #else
    1212                 :            : #define bh_lru_lock()   preempt_disable()
    1213                 :            : #define bh_lru_unlock() preempt_enable()
    1214                 :            : #endif
    1215                 :            : 
    1216                 :          3 : static inline void check_irqs_on(void)
    1217                 :            : {
    1218                 :            : #ifdef irqs_disabled
    1219                 :          3 :         BUG_ON(irqs_disabled());
    1220                 :            : #endif
    1221                 :          3 : }
    1222                 :            : 
    1223                 :            : /*
    1224                 :            :  * Install a buffer_head into this cpu's LRU.  If not already in the LRU, it is
    1225                 :            :  * inserted at the front, and the buffer_head at the back if any is evicted.
    1226                 :            :  * Or, if already in the LRU it is moved to the front.
    1227                 :            :  */
    1228                 :          3 : static void bh_lru_install(struct buffer_head *bh)
    1229                 :            : {
    1230                 :            :         struct buffer_head *evictee = bh;
    1231                 :            :         struct bh_lru *b;
    1232                 :            :         int i;
    1233                 :            : 
    1234                 :          3 :         check_irqs_on();
    1235                 :          3 :         bh_lru_lock();
    1236                 :            : 
    1237                 :          3 :         b = this_cpu_ptr(&bh_lrus);
    1238                 :          3 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1239                 :          3 :                 swap(evictee, b->bhs[i]);
    1240                 :          3 :                 if (evictee == bh) {
    1241                 :          1 :                         bh_lru_unlock();
    1242                 :          3 :                         return;
    1243                 :            :                 }
    1244                 :            :         }
    1245                 :            : 
    1246                 :            :         get_bh(bh);
    1247                 :          3 :         bh_lru_unlock();
    1248                 :            :         brelse(evictee);
    1249                 :            : }
    1250                 :            : 
    1251                 :            : /*
    1252                 :            :  * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
    1253                 :            :  */
    1254                 :            : static struct buffer_head *
    1255                 :          3 : lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
    1256                 :            : {
    1257                 :            :         struct buffer_head *ret = NULL;
    1258                 :            :         unsigned int i;
    1259                 :            : 
    1260                 :          3 :         check_irqs_on();
    1261                 :          3 :         bh_lru_lock();
    1262                 :          3 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1263                 :          3 :                 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
    1264                 :            : 
    1265                 :          3 :                 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
    1266                 :          3 :                     bh->b_size == size) {
    1267                 :          3 :                         if (i) {
    1268                 :          3 :                                 while (i) {
    1269                 :          3 :                                         __this_cpu_write(bh_lrus.bhs[i],
    1270                 :            :                                                 __this_cpu_read(bh_lrus.bhs[i - 1]));
    1271                 :            :                                         i--;
    1272                 :            :                                 }
    1273                 :          3 :                                 __this_cpu_write(bh_lrus.bhs[0], bh);
    1274                 :            :                         }
    1275                 :            :                         get_bh(bh);
    1276                 :          3 :                         ret = bh;
    1277                 :          3 :                         break;
    1278                 :            :                 }
    1279                 :            :         }
    1280                 :          3 :         bh_lru_unlock();
    1281                 :          3 :         return ret;
    1282                 :            : }
    1283                 :            : 
    1284                 :            : /*
    1285                 :            :  * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
    1286                 :            :  * it in the LRU and mark it as accessed.  If it is not present then return
    1287                 :            :  * NULL
    1288                 :            :  */
    1289                 :            : struct buffer_head *
    1290                 :          3 : __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
    1291                 :            : {
    1292                 :          3 :         struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
    1293                 :            : 
    1294                 :          3 :         if (bh == NULL) {
    1295                 :            :                 /* __find_get_block_slow will mark the page accessed */
    1296                 :          3 :                 bh = __find_get_block_slow(bdev, block);
    1297                 :          3 :                 if (bh)
    1298                 :          3 :                         bh_lru_install(bh);
    1299                 :            :         } else
    1300                 :            :                 touch_buffer(bh);
    1301                 :            : 
    1302                 :          3 :         return bh;
    1303                 :            : }
    1304                 :            : EXPORT_SYMBOL(__find_get_block);
    1305                 :            : 
    1306                 :            : /*
    1307                 :            :  * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
    1308                 :            :  * which corresponds to the passed block_device, block and size. The
    1309                 :            :  * returned buffer has its reference count incremented.
    1310                 :            :  *
    1311                 :            :  * __getblk_gfp() will lock up the machine if grow_dev_page's
    1312                 :            :  * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
    1313                 :            :  */
    1314                 :            : struct buffer_head *
    1315                 :          3 : __getblk_gfp(struct block_device *bdev, sector_t block,
    1316                 :            :              unsigned size, gfp_t gfp)
    1317                 :            : {
    1318                 :          3 :         struct buffer_head *bh = __find_get_block(bdev, block, size);
    1319                 :            : 
    1320                 :          3 :         might_sleep();
    1321                 :          3 :         if (bh == NULL)
    1322                 :          3 :                 bh = __getblk_slow(bdev, block, size, gfp);
    1323                 :          3 :         return bh;
    1324                 :            : }
    1325                 :            : EXPORT_SYMBOL(__getblk_gfp);
    1326                 :            : 
    1327                 :            : /*
    1328                 :            :  * Do async read-ahead on a buffer..
    1329                 :            :  */
    1330                 :          3 : void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
    1331                 :            : {
    1332                 :          3 :         struct buffer_head *bh = __getblk(bdev, block, size);
    1333                 :          3 :         if (likely(bh)) {
    1334                 :          3 :                 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
    1335                 :          3 :                 brelse(bh);
    1336                 :            :         }
    1337                 :          3 : }
    1338                 :            : EXPORT_SYMBOL(__breadahead);
    1339                 :            : 
    1340                 :          3 : void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
    1341                 :            :                       gfp_t gfp)
    1342                 :            : {
    1343                 :          3 :         struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
    1344                 :          3 :         if (likely(bh)) {
    1345                 :          3 :                 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
    1346                 :          3 :                 brelse(bh);
    1347                 :            :         }
    1348                 :          3 : }
    1349                 :            : EXPORT_SYMBOL(__breadahead_gfp);
    1350                 :            : 
    1351                 :            : /**
    1352                 :            :  *  __bread_gfp() - reads a specified block and returns the bh
    1353                 :            :  *  @bdev: the block_device to read from
    1354                 :            :  *  @block: number of block
    1355                 :            :  *  @size: size (in bytes) to read
    1356                 :            :  *  @gfp: page allocation flag
    1357                 :            :  *
    1358                 :            :  *  Reads a specified block, and returns buffer head that contains it.
    1359                 :            :  *  The page cache can be allocated from non-movable area
    1360                 :            :  *  not to prevent page migration if you set gfp to zero.
    1361                 :            :  *  It returns NULL if the block was unreadable.
    1362                 :            :  */
    1363                 :            : struct buffer_head *
    1364                 :          3 : __bread_gfp(struct block_device *bdev, sector_t block,
    1365                 :            :                    unsigned size, gfp_t gfp)
    1366                 :            : {
    1367                 :          3 :         struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
    1368                 :            : 
    1369                 :          3 :         if (likely(bh) && !buffer_uptodate(bh))
    1370                 :          3 :                 bh = __bread_slow(bh);
    1371                 :          3 :         return bh;
    1372                 :            : }
    1373                 :            : EXPORT_SYMBOL(__bread_gfp);
    1374                 :            : 
    1375                 :            : /*
    1376                 :            :  * invalidate_bh_lrus() is called rarely - but not only at unmount.
    1377                 :            :  * This doesn't race because it runs in each cpu either in irq
    1378                 :            :  * or with preempt disabled.
    1379                 :            :  */
    1380                 :          3 : static void invalidate_bh_lru(void *arg)
    1381                 :            : {
    1382                 :          3 :         struct bh_lru *b = &get_cpu_var(bh_lrus);
    1383                 :            :         int i;
    1384                 :            : 
    1385                 :          3 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1386                 :          3 :                 brelse(b->bhs[i]);
    1387                 :          3 :                 b->bhs[i] = NULL;
    1388                 :            :         }
    1389                 :          3 :         put_cpu_var(bh_lrus);
    1390                 :          3 : }
    1391                 :            : 
    1392                 :          3 : static bool has_bh_in_lru(int cpu, void *dummy)
    1393                 :            : {
    1394                 :          3 :         struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
    1395                 :            :         int i;
    1396                 :            :         
    1397                 :          3 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1398                 :          3 :                 if (b->bhs[i])
    1399                 :            :                         return 1;
    1400                 :            :         }
    1401                 :            : 
    1402                 :            :         return 0;
    1403                 :            : }
    1404                 :            : 
    1405                 :          3 : void invalidate_bh_lrus(void)
    1406                 :            : {
    1407                 :          3 :         on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
    1408                 :          3 : }
    1409                 :            : EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
    1410                 :            : 
    1411                 :          3 : void set_bh_page(struct buffer_head *bh,
    1412                 :            :                 struct page *page, unsigned long offset)
    1413                 :            : {
    1414                 :          3 :         bh->b_page = page;
    1415                 :          3 :         BUG_ON(offset >= PAGE_SIZE);
    1416                 :            :         if (PageHighMem(page))
    1417                 :            :                 /*
    1418                 :            :                  * This catches illegal uses and preserves the offset:
    1419                 :            :                  */
    1420                 :            :                 bh->b_data = (char *)(0 + offset);
    1421                 :            :         else
    1422                 :          3 :                 bh->b_data = page_address(page) + offset;
    1423                 :          3 : }
    1424                 :            : EXPORT_SYMBOL(set_bh_page);
    1425                 :            : 
    1426                 :            : /*
    1427                 :            :  * Called when truncating a buffer on a page completely.
    1428                 :            :  */
    1429                 :            : 
    1430                 :            : /* Bits that are cleared during an invalidate */
    1431                 :            : #define BUFFER_FLAGS_DISCARD \
    1432                 :            :         (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
    1433                 :            :          1 << BH_Delay | 1 << BH_Unwritten)
    1434                 :            : 
    1435                 :          3 : static void discard_buffer(struct buffer_head * bh)
    1436                 :            : {
    1437                 :            :         unsigned long b_state, b_state_old;
    1438                 :            : 
    1439                 :          3 :         lock_buffer(bh);
    1440                 :            :         clear_buffer_dirty(bh);
    1441                 :          3 :         bh->b_bdev = NULL;
    1442                 :          3 :         b_state = bh->b_state;
    1443                 :            :         for (;;) {
    1444                 :          3 :                 b_state_old = cmpxchg(&bh->b_state, b_state,
    1445                 :            :                                       (b_state & ~BUFFER_FLAGS_DISCARD));
    1446                 :          3 :                 if (b_state_old == b_state)
    1447                 :            :                         break;
    1448                 :            :                 b_state = b_state_old;
    1449                 :            :         }
    1450                 :          3 :         unlock_buffer(bh);
    1451                 :          3 : }
    1452                 :            : 
    1453                 :            : /**
    1454                 :            :  * block_invalidatepage - invalidate part or all of a buffer-backed page
    1455                 :            :  *
    1456                 :            :  * @page: the page which is affected
    1457                 :            :  * @offset: start of the range to invalidate
    1458                 :            :  * @length: length of the range to invalidate
    1459                 :            :  *
    1460                 :            :  * block_invalidatepage() is called when all or part of the page has become
    1461                 :            :  * invalidated by a truncate operation.
    1462                 :            :  *
    1463                 :            :  * block_invalidatepage() does not have to release all buffers, but it must
    1464                 :            :  * ensure that no dirty buffer is left outside @offset and that no I/O
    1465                 :            :  * is underway against any of the blocks which are outside the truncation
    1466                 :            :  * point.  Because the caller is about to free (and possibly reuse) those
    1467                 :            :  * blocks on-disk.
    1468                 :            :  */
    1469                 :          3 : void block_invalidatepage(struct page *page, unsigned int offset,
    1470                 :            :                           unsigned int length)
    1471                 :            : {
    1472                 :            :         struct buffer_head *head, *bh, *next;
    1473                 :            :         unsigned int curr_off = 0;
    1474                 :          3 :         unsigned int stop = length + offset;
    1475                 :            : 
    1476                 :          3 :         BUG_ON(!PageLocked(page));
    1477                 :          3 :         if (!page_has_buffers(page))
    1478                 :            :                 goto out;
    1479                 :            : 
    1480                 :            :         /*
    1481                 :            :          * Check for overflow
    1482                 :            :          */
    1483                 :          3 :         BUG_ON(stop > PAGE_SIZE || stop < length);
    1484                 :            : 
    1485                 :          3 :         head = page_buffers(page);
    1486                 :            :         bh = head;
    1487                 :            :         do {
    1488                 :          3 :                 unsigned int next_off = curr_off + bh->b_size;
    1489                 :          3 :                 next = bh->b_this_page;
    1490                 :            : 
    1491                 :            :                 /*
    1492                 :            :                  * Are we still fully in range ?
    1493                 :            :                  */
    1494                 :          3 :                 if (next_off > stop)
    1495                 :            :                         goto out;
    1496                 :            : 
    1497                 :            :                 /*
    1498                 :            :                  * is this block fully invalidated?
    1499                 :            :                  */
    1500                 :          3 :                 if (offset <= curr_off)
    1501                 :          3 :                         discard_buffer(bh);
    1502                 :            :                 curr_off = next_off;
    1503                 :            :                 bh = next;
    1504                 :          3 :         } while (bh != head);
    1505                 :            : 
    1506                 :            :         /*
    1507                 :            :          * We release buffers only if the entire page is being invalidated.
    1508                 :            :          * The get_block cached value has been unconditionally invalidated,
    1509                 :            :          * so real IO is not possible anymore.
    1510                 :            :          */
    1511                 :          3 :         if (length == PAGE_SIZE)
    1512                 :          3 :                 try_to_release_page(page, 0);
    1513                 :            : out:
    1514                 :          3 :         return;
    1515                 :            : }
    1516                 :            : EXPORT_SYMBOL(block_invalidatepage);
    1517                 :            : 
    1518                 :            : 
    1519                 :            : /*
    1520                 :            :  * We attach and possibly dirty the buffers atomically wrt
    1521                 :            :  * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
    1522                 :            :  * is already excluded via the page lock.
    1523                 :            :  */
    1524                 :          3 : void create_empty_buffers(struct page *page,
    1525                 :            :                         unsigned long blocksize, unsigned long b_state)
    1526                 :            : {
    1527                 :            :         struct buffer_head *bh, *head, *tail;
    1528                 :            : 
    1529                 :          3 :         head = alloc_page_buffers(page, blocksize, true);
    1530                 :            :         bh = head;
    1531                 :            :         do {
    1532                 :          3 :                 bh->b_state |= b_state;
    1533                 :            :                 tail = bh;
    1534                 :          3 :                 bh = bh->b_this_page;
    1535                 :          3 :         } while (bh);
    1536                 :          3 :         tail->b_this_page = head;
    1537                 :            : 
    1538                 :          3 :         spin_lock(&page->mapping->private_lock);
    1539                 :          3 :         if (PageUptodate(page) || PageDirty(page)) {
    1540                 :            :                 bh = head;
    1541                 :            :                 do {
    1542                 :          3 :                         if (PageDirty(page))
    1543                 :            :                                 set_buffer_dirty(bh);
    1544                 :          3 :                         if (PageUptodate(page))
    1545                 :            :                                 set_buffer_uptodate(bh);
    1546                 :          3 :                         bh = bh->b_this_page;
    1547                 :          3 :                 } while (bh != head);
    1548                 :            :         }
    1549                 :            :         attach_page_buffers(page, head);
    1550                 :          3 :         spin_unlock(&page->mapping->private_lock);
    1551                 :          3 : }
    1552                 :            : EXPORT_SYMBOL(create_empty_buffers);
    1553                 :            : 
    1554                 :            : /**
    1555                 :            :  * clean_bdev_aliases: clean a range of buffers in block device
    1556                 :            :  * @bdev: Block device to clean buffers in
    1557                 :            :  * @block: Start of a range of blocks to clean
    1558                 :            :  * @len: Number of blocks to clean
    1559                 :            :  *
    1560                 :            :  * We are taking a range of blocks for data and we don't want writeback of any
    1561                 :            :  * buffer-cache aliases starting from return from this function and until the
    1562                 :            :  * moment when something will explicitly mark the buffer dirty (hopefully that
    1563                 :            :  * will not happen until we will free that block ;-) We don't even need to mark
    1564                 :            :  * it not-uptodate - nobody can expect anything from a newly allocated buffer
    1565                 :            :  * anyway. We used to use unmap_buffer() for such invalidation, but that was
    1566                 :            :  * wrong. We definitely don't want to mark the alias unmapped, for example - it
    1567                 :            :  * would confuse anyone who might pick it with bread() afterwards...
    1568                 :            :  *
    1569                 :            :  * Also..  Note that bforget() doesn't lock the buffer.  So there can be
    1570                 :            :  * writeout I/O going on against recently-freed buffers.  We don't wait on that
    1571                 :            :  * I/O in bforget() - it's more efficient to wait on the I/O only if we really
    1572                 :            :  * need to.  That happens here.
    1573                 :            :  */
    1574                 :          1 : void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
    1575                 :            : {
    1576                 :          1 :         struct inode *bd_inode = bdev->bd_inode;
    1577                 :          1 :         struct address_space *bd_mapping = bd_inode->i_mapping;
    1578                 :            :         struct pagevec pvec;
    1579                 :          1 :         pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
    1580                 :            :         pgoff_t end;
    1581                 :            :         int i, count;
    1582                 :            :         struct buffer_head *bh;
    1583                 :            :         struct buffer_head *head;
    1584                 :            : 
    1585                 :          1 :         end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
    1586                 :            :         pagevec_init(&pvec);
    1587                 :          1 :         while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
    1588                 :          0 :                 count = pagevec_count(&pvec);
    1589                 :          0 :                 for (i = 0; i < count; i++) {
    1590                 :          0 :                         struct page *page = pvec.pages[i];
    1591                 :            : 
    1592                 :          0 :                         if (!page_has_buffers(page))
    1593                 :          0 :                                 continue;
    1594                 :            :                         /*
    1595                 :            :                          * We use page lock instead of bd_mapping->private_lock
    1596                 :            :                          * to pin buffers here since we can afford to sleep and
    1597                 :            :                          * it scales better than a global spinlock lock.
    1598                 :            :                          */
    1599                 :          0 :                         lock_page(page);
    1600                 :            :                         /* Recheck when the page is locked which pins bhs */
    1601                 :          0 :                         if (!page_has_buffers(page))
    1602                 :            :                                 goto unlock_page;
    1603                 :          0 :                         head = page_buffers(page);
    1604                 :            :                         bh = head;
    1605                 :            :                         do {
    1606                 :          0 :                                 if (!buffer_mapped(bh) || (bh->b_blocknr < block))
    1607                 :            :                                         goto next;
    1608                 :          0 :                                 if (bh->b_blocknr >= block + len)
    1609                 :            :                                         break;
    1610                 :            :                                 clear_buffer_dirty(bh);
    1611                 :          0 :                                 wait_on_buffer(bh);
    1612                 :            :                                 clear_buffer_req(bh);
    1613                 :            : next:
    1614                 :          0 :                                 bh = bh->b_this_page;
    1615                 :          0 :                         } while (bh != head);
    1616                 :            : unlock_page:
    1617                 :          0 :                         unlock_page(page);
    1618                 :            :                 }
    1619                 :            :                 pagevec_release(&pvec);
    1620                 :          0 :                 cond_resched();
    1621                 :            :                 /* End of range already reached? */
    1622                 :          0 :                 if (index > end || !index)
    1623                 :            :                         break;
    1624                 :            :         }
    1625                 :          1 : }
    1626                 :            : EXPORT_SYMBOL(clean_bdev_aliases);
    1627                 :            : 
    1628                 :            : /*
    1629                 :            :  * Size is a power-of-two in the range 512..PAGE_SIZE,
    1630                 :            :  * and the case we care about most is PAGE_SIZE.
    1631                 :            :  *
    1632                 :            :  * So this *could* possibly be written with those
    1633                 :            :  * constraints in mind (relevant mostly if some
    1634                 :            :  * architecture has a slow bit-scan instruction)
    1635                 :            :  */
    1636                 :          3 : static inline int block_size_bits(unsigned int blocksize)
    1637                 :            : {
    1638                 :          3 :         return ilog2(blocksize);
    1639                 :            : }
    1640                 :            : 
    1641                 :          3 : static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
    1642                 :            : {
    1643                 :          3 :         BUG_ON(!PageLocked(page));
    1644                 :            : 
    1645                 :          3 :         if (!page_has_buffers(page))
    1646                 :          3 :                 create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
    1647                 :            :                                      b_state);
    1648                 :          3 :         return page_buffers(page);
    1649                 :            : }
    1650                 :            : 
    1651                 :            : /*
    1652                 :            :  * NOTE! All mapped/uptodate combinations are valid:
    1653                 :            :  *
    1654                 :            :  *      Mapped  Uptodate        Meaning
    1655                 :            :  *
    1656                 :            :  *      No      No              "unknown" - must do get_block()
    1657                 :            :  *      No      Yes             "hole" - zero-filled
    1658                 :            :  *      Yes     No              "allocated" - allocated on disk, not read in
    1659                 :            :  *      Yes     Yes             "valid" - allocated and up-to-date in memory.
    1660                 :            :  *
    1661                 :            :  * "Dirty" is valid only with the last case (mapped+uptodate).
    1662                 :            :  */
    1663                 :            : 
    1664                 :            : /*
    1665                 :            :  * While block_write_full_page is writing back the dirty buffers under
    1666                 :            :  * the page lock, whoever dirtied the buffers may decide to clean them
    1667                 :            :  * again at any time.  We handle that by only looking at the buffer
    1668                 :            :  * state inside lock_buffer().
    1669                 :            :  *
    1670                 :            :  * If block_write_full_page() is called for regular writeback
    1671                 :            :  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
    1672                 :            :  * locked buffer.   This only can happen if someone has written the buffer
    1673                 :            :  * directly, with submit_bh().  At the address_space level PageWriteback
    1674                 :            :  * prevents this contention from occurring.
    1675                 :            :  *
    1676                 :            :  * If block_write_full_page() is called with wbc->sync_mode ==
    1677                 :            :  * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
    1678                 :            :  * causes the writes to be flagged as synchronous writes.
    1679                 :            :  */
    1680                 :          3 : int __block_write_full_page(struct inode *inode, struct page *page,
    1681                 :            :                         get_block_t *get_block, struct writeback_control *wbc,
    1682                 :            :                         bh_end_io_t *handler)
    1683                 :            : {
    1684                 :            :         int err;
    1685                 :            :         sector_t block;
    1686                 :            :         sector_t last_block;
    1687                 :            :         struct buffer_head *bh, *head;
    1688                 :            :         unsigned int blocksize, bbits;
    1689                 :            :         int nr_underway = 0;
    1690                 :            :         int write_flags = wbc_to_write_flags(wbc);
    1691                 :            : 
    1692                 :          3 :         head = create_page_buffers(page, inode,
    1693                 :            :                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
    1694                 :            : 
    1695                 :            :         /*
    1696                 :            :          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
    1697                 :            :          * here, and the (potentially unmapped) buffers may become dirty at
    1698                 :            :          * any time.  If a buffer becomes dirty here after we've inspected it
    1699                 :            :          * then we just miss that fact, and the page stays dirty.
    1700                 :            :          *
    1701                 :            :          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
    1702                 :            :          * handle that here by just cleaning them.
    1703                 :            :          */
    1704                 :            : 
    1705                 :            :         bh = head;
    1706                 :          3 :         blocksize = bh->b_size;
    1707                 :          3 :         bbits = block_size_bits(blocksize);
    1708                 :            : 
    1709                 :          3 :         block = (sector_t)page->index << (PAGE_SHIFT - bbits);
    1710                 :          3 :         last_block = (i_size_read(inode) - 1) >> bbits;
    1711                 :            : 
    1712                 :            :         /*
    1713                 :            :          * Get all the dirty buffers mapped to disk addresses and
    1714                 :            :          * handle any aliases from the underlying blockdev's mapping.
    1715                 :            :          */
    1716                 :            :         do {
    1717                 :          3 :                 if (block > last_block) {
    1718                 :            :                         /*
    1719                 :            :                          * mapped buffers outside i_size will occur, because
    1720                 :            :                          * this page can be outside i_size when there is a
    1721                 :            :                          * truncate in progress.
    1722                 :            :                          */
    1723                 :            :                         /*
    1724                 :            :                          * The buffer was zeroed by block_write_full_page()
    1725                 :            :                          */
    1726                 :            :                         clear_buffer_dirty(bh);
    1727                 :            :                         set_buffer_uptodate(bh);
    1728                 :          3 :                 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
    1729                 :            :                            buffer_dirty(bh)) {
    1730                 :          0 :                         WARN_ON(bh->b_size != blocksize);
    1731                 :          0 :                         err = get_block(inode, block, bh, 1);
    1732                 :          0 :                         if (err)
    1733                 :            :                                 goto recover;
    1734                 :            :                         clear_buffer_delay(bh);
    1735                 :          0 :                         if (buffer_new(bh)) {
    1736                 :            :                                 /* blockdev mappings never come here */
    1737                 :            :                                 clear_buffer_new(bh);
    1738                 :          0 :                                 clean_bdev_bh_alias(bh);
    1739                 :            :                         }
    1740                 :            :                 }
    1741                 :          3 :                 bh = bh->b_this_page;
    1742                 :          3 :                 block++;
    1743                 :          3 :         } while (bh != head);
    1744                 :            : 
    1745                 :            :         do {
    1746                 :          3 :                 if (!buffer_mapped(bh))
    1747                 :          0 :                         continue;
    1748                 :            :                 /*
    1749                 :            :                  * If it's a fully non-blocking write attempt and we cannot
    1750                 :            :                  * lock the buffer then redirty the page.  Note that this can
    1751                 :            :                  * potentially cause a busy-wait loop from writeback threads
    1752                 :            :                  * and kswapd activity, but those code paths have their own
    1753                 :            :                  * higher-level throttling.
    1754                 :            :                  */
    1755                 :          3 :                 if (wbc->sync_mode != WB_SYNC_NONE) {
    1756                 :          3 :                         lock_buffer(bh);
    1757                 :          3 :                 } else if (!trylock_buffer(bh)) {
    1758                 :          0 :                         redirty_page_for_writepage(wbc, page);
    1759                 :          0 :                         continue;
    1760                 :            :                 }
    1761                 :          3 :                 if (test_clear_buffer_dirty(bh)) {
    1762                 :          3 :                         mark_buffer_async_write_endio(bh, handler);
    1763                 :            :                 } else {
    1764                 :          3 :                         unlock_buffer(bh);
    1765                 :            :                 }
    1766                 :          3 :         } while ((bh = bh->b_this_page) != head);
    1767                 :            : 
    1768                 :            :         /*
    1769                 :            :          * The page and its buffers are protected by PageWriteback(), so we can
    1770                 :            :          * drop the bh refcounts early.
    1771                 :            :          */
    1772                 :          3 :         BUG_ON(PageWriteback(page));
    1773                 :            :         set_page_writeback(page);
    1774                 :            : 
    1775                 :            :         do {
    1776                 :          3 :                 struct buffer_head *next = bh->b_this_page;
    1777                 :          3 :                 if (buffer_async_write(bh)) {
    1778                 :          3 :                         submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
    1779                 :          3 :                                         inode->i_write_hint, wbc);
    1780                 :          3 :                         nr_underway++;
    1781                 :            :                 }
    1782                 :            :                 bh = next;
    1783                 :          3 :         } while (bh != head);
    1784                 :          3 :         unlock_page(page);
    1785                 :            : 
    1786                 :            :         err = 0;
    1787                 :            : done:
    1788                 :          3 :         if (nr_underway == 0) {
    1789                 :            :                 /*
    1790                 :            :                  * The page was marked dirty, but the buffers were
    1791                 :            :                  * clean.  Someone wrote them back by hand with
    1792                 :            :                  * ll_rw_block/submit_bh.  A rare case.
    1793                 :            :                  */
    1794                 :          3 :                 end_page_writeback(page);
    1795                 :            : 
    1796                 :            :                 /*
    1797                 :            :                  * The page and buffer_heads can be released at any time from
    1798                 :            :                  * here on.
    1799                 :            :                  */
    1800                 :            :         }
    1801                 :          3 :         return err;
    1802                 :            : 
    1803                 :            : recover:
    1804                 :            :         /*
    1805                 :            :          * ENOSPC, or some other error.  We may already have added some
    1806                 :            :          * blocks to the file, so we need to write these out to avoid
    1807                 :            :          * exposing stale data.
    1808                 :            :          * The page is currently locked and not marked for writeback
    1809                 :            :          */
    1810                 :            :         bh = head;
    1811                 :            :         /* Recovery: lock and submit the mapped buffers */
    1812                 :            :         do {
    1813                 :          0 :                 if (buffer_mapped(bh) && buffer_dirty(bh) &&
    1814                 :            :                     !buffer_delay(bh)) {
    1815                 :          0 :                         lock_buffer(bh);
    1816                 :          0 :                         mark_buffer_async_write_endio(bh, handler);
    1817                 :            :                 } else {
    1818                 :            :                         /*
    1819                 :            :                          * The buffer may have been set dirty during
    1820                 :            :                          * attachment to a dirty page.
    1821                 :            :                          */
    1822                 :            :                         clear_buffer_dirty(bh);
    1823                 :            :                 }
    1824                 :          0 :         } while ((bh = bh->b_this_page) != head);
    1825                 :            :         SetPageError(page);
    1826                 :          0 :         BUG_ON(PageWriteback(page));
    1827                 :          0 :         mapping_set_error(page->mapping, err);
    1828                 :            :         set_page_writeback(page);
    1829                 :            :         do {
    1830                 :          0 :                 struct buffer_head *next = bh->b_this_page;
    1831                 :          0 :                 if (buffer_async_write(bh)) {
    1832                 :            :                         clear_buffer_dirty(bh);
    1833                 :          0 :                         submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
    1834                 :          0 :                                         inode->i_write_hint, wbc);
    1835                 :          0 :                         nr_underway++;
    1836                 :            :                 }
    1837                 :            :                 bh = next;
    1838                 :          0 :         } while (bh != head);
    1839                 :          0 :         unlock_page(page);
    1840                 :          0 :         goto done;
    1841                 :            : }
    1842                 :            : EXPORT_SYMBOL(__block_write_full_page);
    1843                 :            : 
    1844                 :            : /*
    1845                 :            :  * If a page has any new buffers, zero them out here, and mark them uptodate
    1846                 :            :  * and dirty so they'll be written out (in order to prevent uninitialised
    1847                 :            :  * block data from leaking). And clear the new bit.
    1848                 :            :  */
    1849                 :          0 : void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
    1850                 :            : {
    1851                 :            :         unsigned int block_start, block_end;
    1852                 :            :         struct buffer_head *head, *bh;
    1853                 :            : 
    1854                 :          0 :         BUG_ON(!PageLocked(page));
    1855                 :          0 :         if (!page_has_buffers(page))
    1856                 :          0 :                 return;
    1857                 :            : 
    1858                 :          0 :         bh = head = page_buffers(page);
    1859                 :            :         block_start = 0;
    1860                 :            :         do {
    1861                 :          0 :                 block_end = block_start + bh->b_size;
    1862                 :            : 
    1863                 :          0 :                 if (buffer_new(bh)) {
    1864                 :          0 :                         if (block_end > from && block_start < to) {
    1865                 :          0 :                                 if (!PageUptodate(page)) {
    1866                 :            :                                         unsigned start, size;
    1867                 :            : 
    1868                 :          0 :                                         start = max(from, block_start);
    1869                 :          0 :                                         size = min(to, block_end) - start;
    1870                 :            : 
    1871                 :            :                                         zero_user(page, start, size);
    1872                 :            :                                         set_buffer_uptodate(bh);
    1873                 :            :                                 }
    1874                 :            : 
    1875                 :            :                                 clear_buffer_new(bh);
    1876                 :          0 :                                 mark_buffer_dirty(bh);
    1877                 :            :                         }
    1878                 :            :                 }
    1879                 :            : 
    1880                 :            :                 block_start = block_end;
    1881                 :          0 :                 bh = bh->b_this_page;
    1882                 :          0 :         } while (bh != head);
    1883                 :            : }
    1884                 :            : EXPORT_SYMBOL(page_zero_new_buffers);
    1885                 :            : 
    1886                 :            : static void
    1887                 :          0 : iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
    1888                 :            :                 struct iomap *iomap)
    1889                 :            : {
    1890                 :          0 :         loff_t offset = block << inode->i_blkbits;
    1891                 :            : 
    1892                 :          0 :         bh->b_bdev = iomap->bdev;
    1893                 :            : 
    1894                 :            :         /*
    1895                 :            :          * Block points to offset in file we need to map, iomap contains
    1896                 :            :          * the offset at which the map starts. If the map ends before the
    1897                 :            :          * current block, then do not map the buffer and let the caller
    1898                 :            :          * handle it.
    1899                 :            :          */
    1900                 :          0 :         BUG_ON(offset >= iomap->offset + iomap->length);
    1901                 :            : 
    1902                 :          0 :         switch (iomap->type) {
    1903                 :            :         case IOMAP_HOLE:
    1904                 :            :                 /*
    1905                 :            :                  * If the buffer is not up to date or beyond the current EOF,
    1906                 :            :                  * we need to mark it as new to ensure sub-block zeroing is
    1907                 :            :                  * executed if necessary.
    1908                 :            :                  */
    1909                 :          0 :                 if (!buffer_uptodate(bh) ||
    1910                 :            :                     (offset >= i_size_read(inode)))
    1911                 :            :                         set_buffer_new(bh);
    1912                 :            :                 break;
    1913                 :            :         case IOMAP_DELALLOC:
    1914                 :          0 :                 if (!buffer_uptodate(bh) ||
    1915                 :            :                     (offset >= i_size_read(inode)))
    1916                 :            :                         set_buffer_new(bh);
    1917                 :            :                 set_buffer_uptodate(bh);
    1918                 :            :                 set_buffer_mapped(bh);
    1919                 :            :                 set_buffer_delay(bh);
    1920                 :            :                 break;
    1921                 :            :         case IOMAP_UNWRITTEN:
    1922                 :            :                 /*
    1923                 :            :                  * For unwritten regions, we always need to ensure that regions
    1924                 :            :                  * in the block we are not writing to are zeroed. Mark the
    1925                 :            :                  * buffer as new to ensure this.
    1926                 :            :                  */
    1927                 :            :                 set_buffer_new(bh);
    1928                 :            :                 set_buffer_unwritten(bh);
    1929                 :            :                 /* FALLTHRU */
    1930                 :            :         case IOMAP_MAPPED:
    1931                 :          0 :                 if ((iomap->flags & IOMAP_F_NEW) ||
    1932                 :            :                     offset >= i_size_read(inode))
    1933                 :            :                         set_buffer_new(bh);
    1934                 :          0 :                 bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
    1935                 :          0 :                                 inode->i_blkbits;
    1936                 :            :                 set_buffer_mapped(bh);
    1937                 :            :                 break;
    1938                 :            :         }
    1939                 :          0 : }
    1940                 :            : 
    1941                 :          3 : int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
    1942                 :            :                 get_block_t *get_block, struct iomap *iomap)
    1943                 :            : {
    1944                 :          3 :         unsigned from = pos & (PAGE_SIZE - 1);
    1945                 :          3 :         unsigned to = from + len;
    1946                 :          3 :         struct inode *inode = page->mapping->host;
    1947                 :            :         unsigned block_start, block_end;
    1948                 :            :         sector_t block;
    1949                 :            :         int err = 0;
    1950                 :            :         unsigned blocksize, bbits;
    1951                 :            :         struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
    1952                 :            : 
    1953                 :          3 :         BUG_ON(!PageLocked(page));
    1954                 :            :         BUG_ON(from > PAGE_SIZE);
    1955                 :          3 :         BUG_ON(to > PAGE_SIZE);
    1956                 :          3 :         BUG_ON(from > to);
    1957                 :            : 
    1958                 :          3 :         head = create_page_buffers(page, inode, 0);
    1959                 :          3 :         blocksize = head->b_size;
    1960                 :          3 :         bbits = block_size_bits(blocksize);
    1961                 :            : 
    1962                 :          3 :         block = (sector_t)page->index << (PAGE_SHIFT - bbits);
    1963                 :            : 
    1964                 :          3 :         for(bh = head, block_start = 0; bh != head || !block_start;
    1965                 :          3 :             block++, block_start=block_end, bh = bh->b_this_page) {
    1966                 :          3 :                 block_end = block_start + blocksize;
    1967                 :          3 :                 if (block_end <= from || block_start >= to) {
    1968                 :          0 :                         if (PageUptodate(page)) {
    1969                 :          0 :                                 if (!buffer_uptodate(bh))
    1970                 :          0 :                                         set_buffer_uptodate(bh);
    1971                 :            :                         }
    1972                 :          0 :                         continue;
    1973                 :            :                 }
    1974                 :          3 :                 if (buffer_new(bh))
    1975                 :          0 :                         clear_buffer_new(bh);
    1976                 :          3 :                 if (!buffer_mapped(bh)) {
    1977                 :          1 :                         WARN_ON(bh->b_size != blocksize);
    1978                 :          1 :                         if (get_block) {
    1979                 :          1 :                                 err = get_block(inode, block, bh, 1);
    1980                 :          1 :                                 if (err)
    1981                 :            :                                         break;
    1982                 :            :                         } else {
    1983                 :          0 :                                 iomap_to_bh(inode, block, bh, iomap);
    1984                 :            :                         }
    1985                 :            : 
    1986                 :          1 :                         if (buffer_new(bh)) {
    1987                 :          1 :                                 clean_bdev_bh_alias(bh);
    1988                 :          1 :                                 if (PageUptodate(page)) {
    1989                 :          1 :                                         clear_buffer_new(bh);
    1990                 :          1 :                                         set_buffer_uptodate(bh);
    1991                 :          1 :                                         mark_buffer_dirty(bh);
    1992                 :          1 :                                         continue;
    1993                 :            :                                 }
    1994                 :          0 :                                 if (block_end > to || block_start < from)
    1995                 :          0 :                                         zero_user_segments(page,
    1996                 :            :                                                 to, block_end,
    1997                 :            :                                                 block_start, from);
    1998                 :          0 :                                 continue;
    1999                 :            :                         }
    2000                 :            :                 }
    2001                 :          2 :                 if (PageUptodate(page)) {
    2002                 :          2 :                         if (!buffer_uptodate(bh))
    2003                 :          0 :                                 set_buffer_uptodate(bh);
    2004                 :          2 :                         continue; 
    2005                 :            :                 }
    2006                 :          0 :                 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
    2007                 :          0 :                     !buffer_unwritten(bh) &&
    2008                 :          0 :                      (block_start < from || block_end > to)) {
    2009                 :          0 :                         ll_rw_block(REQ_OP_READ, 0, 1, &bh);
    2010                 :          0 :                         *wait_bh++=bh;
    2011                 :            :                 }
    2012                 :            :         }
    2013                 :            :         /*
    2014                 :            :          * If we issued read requests - let them complete.
    2015                 :            :          */
    2016                 :          3 :         while(wait_bh > wait) {
    2017                 :          0 :                 wait_on_buffer(*--wait_bh);
    2018                 :          0 :                 if (!buffer_uptodate(*wait_bh))
    2019                 :            :                         err = -EIO;
    2020                 :            :         }
    2021                 :          3 :         if (unlikely(err))
    2022                 :          0 :                 page_zero_new_buffers(page, from, to);
    2023                 :          3 :         return err;
    2024                 :            : }
    2025                 :            : 
    2026                 :          0 : int __block_write_begin(struct page *page, loff_t pos, unsigned len,
    2027                 :            :                 get_block_t *get_block)
    2028                 :            : {
    2029                 :          3 :         return __block_write_begin_int(page, pos, len, get_block, NULL);
    2030                 :            : }
    2031                 :            : EXPORT_SYMBOL(__block_write_begin);
    2032                 :            : 
    2033                 :          3 : static int __block_commit_write(struct inode *inode, struct page *page,
    2034                 :            :                 unsigned from, unsigned to)
    2035                 :            : {
    2036                 :            :         unsigned block_start, block_end;
    2037                 :            :         int partial = 0;
    2038                 :            :         unsigned blocksize;
    2039                 :            :         struct buffer_head *bh, *head;
    2040                 :            : 
    2041                 :          3 :         bh = head = page_buffers(page);
    2042                 :          3 :         blocksize = bh->b_size;
    2043                 :            : 
    2044                 :            :         block_start = 0;
    2045                 :            :         do {
    2046                 :          3 :                 block_end = block_start + blocksize;
    2047                 :          3 :                 if (block_end <= from || block_start >= to) {
    2048                 :          0 :                         if (!buffer_uptodate(bh))
    2049                 :            :                                 partial = 1;
    2050                 :            :                 } else {
    2051                 :            :                         set_buffer_uptodate(bh);
    2052                 :          3 :                         mark_buffer_dirty(bh);
    2053                 :            :                 }
    2054                 :            :                 clear_buffer_new(bh);
    2055                 :            : 
    2056                 :            :                 block_start = block_end;
    2057                 :          3 :                 bh = bh->b_this_page;
    2058                 :          3 :         } while (bh != head);
    2059                 :            : 
    2060                 :            :         /*
    2061                 :            :          * If this is a partial write which happened to make all buffers
    2062                 :            :          * uptodate then we can optimize away a bogus readpage() for
    2063                 :            :          * the next read(). Here we 'discover' whether the page went
    2064                 :            :          * uptodate as a result of this (potentially partial) write.
    2065                 :            :          */
    2066                 :          3 :         if (!partial)
    2067                 :            :                 SetPageUptodate(page);
    2068                 :          3 :         return 0;
    2069                 :            : }
    2070                 :            : 
    2071                 :            : /*
    2072                 :            :  * block_write_begin takes care of the basic task of block allocation and
    2073                 :            :  * bringing partial write blocks uptodate first.
    2074                 :            :  *
    2075                 :            :  * The filesystem needs to handle block truncation upon failure.
    2076                 :            :  */
    2077                 :          0 : int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
    2078                 :            :                 unsigned flags, struct page **pagep, get_block_t *get_block)
    2079                 :            : {
    2080                 :          0 :         pgoff_t index = pos >> PAGE_SHIFT;
    2081                 :            :         struct page *page;
    2082                 :            :         int status;
    2083                 :            : 
    2084                 :          0 :         page = grab_cache_page_write_begin(mapping, index, flags);
    2085                 :          0 :         if (!page)
    2086                 :            :                 return -ENOMEM;
    2087                 :            : 
    2088                 :            :         status = __block_write_begin(page, pos, len, get_block);
    2089                 :          0 :         if (unlikely(status)) {
    2090                 :          0 :                 unlock_page(page);
    2091                 :          0 :                 put_page(page);
    2092                 :            :                 page = NULL;
    2093                 :            :         }
    2094                 :            : 
    2095                 :          0 :         *pagep = page;
    2096                 :          0 :         return status;
    2097                 :            : }
    2098                 :            : EXPORT_SYMBOL(block_write_begin);
    2099                 :            : 
    2100                 :          3 : int block_write_end(struct file *file, struct address_space *mapping,
    2101                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2102                 :            :                         struct page *page, void *fsdata)
    2103                 :            : {
    2104                 :          3 :         struct inode *inode = mapping->host;
    2105                 :            :         unsigned start;
    2106                 :            : 
    2107                 :          3 :         start = pos & (PAGE_SIZE - 1);
    2108                 :            : 
    2109                 :          3 :         if (unlikely(copied < len)) {
    2110                 :            :                 /*
    2111                 :            :                  * The buffers that were written will now be uptodate, so we
    2112                 :            :                  * don't have to worry about a readpage reading them and
    2113                 :            :                  * overwriting a partial write. However if we have encountered
    2114                 :            :                  * a short write and only partially written into a buffer, it
    2115                 :            :                  * will not be marked uptodate, so a readpage might come in and
    2116                 :            :                  * destroy our partial write.
    2117                 :            :                  *
    2118                 :            :                  * Do the simplest thing, and just treat any short write to a
    2119                 :            :                  * non uptodate page as a zero-length write, and force the
    2120                 :            :                  * caller to redo the whole thing.
    2121                 :            :                  */
    2122                 :          0 :                 if (!PageUptodate(page))
    2123                 :            :                         copied = 0;
    2124                 :            : 
    2125                 :          0 :                 page_zero_new_buffers(page, start+copied, start+len);
    2126                 :            :         }
    2127                 :          3 :         flush_dcache_page(page);
    2128                 :            : 
    2129                 :            :         /* This could be a short (even 0-length) commit */
    2130                 :          3 :         __block_commit_write(inode, page, start, start+copied);
    2131                 :            : 
    2132                 :          3 :         return copied;
    2133                 :            : }
    2134                 :            : EXPORT_SYMBOL(block_write_end);
    2135                 :            : 
    2136                 :          3 : int generic_write_end(struct file *file, struct address_space *mapping,
    2137                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2138                 :            :                         struct page *page, void *fsdata)
    2139                 :            : {
    2140                 :          3 :         struct inode *inode = mapping->host;
    2141                 :          3 :         loff_t old_size = inode->i_size;
    2142                 :            :         bool i_size_changed = false;
    2143                 :            : 
    2144                 :          3 :         copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
    2145                 :            : 
    2146                 :            :         /*
    2147                 :            :          * No need to use i_size_read() here, the i_size cannot change under us
    2148                 :            :          * because we hold i_rwsem.
    2149                 :            :          *
    2150                 :            :          * But it's important to update i_size while still holding page lock:
    2151                 :            :          * page writeout could otherwise come in and zero beyond i_size.
    2152                 :            :          */
    2153                 :          3 :         if (pos + copied > inode->i_size) {
    2154                 :            :                 i_size_write(inode, pos + copied);
    2155                 :            :                 i_size_changed = true;
    2156                 :            :         }
    2157                 :            : 
    2158                 :          3 :         unlock_page(page);
    2159                 :          3 :         put_page(page);
    2160                 :            : 
    2161                 :          3 :         if (old_size < pos)
    2162                 :          0 :                 pagecache_isize_extended(inode, old_size, pos);
    2163                 :            :         /*
    2164                 :            :          * Don't mark the inode dirty under page lock. First, it unnecessarily
    2165                 :            :          * makes the holding time of page lock longer. Second, it forces lock
    2166                 :            :          * ordering of page lock and transaction start for journaling
    2167                 :            :          * filesystems.
    2168                 :            :          */
    2169                 :          3 :         if (i_size_changed)
    2170                 :            :                 mark_inode_dirty(inode);
    2171                 :          3 :         return copied;
    2172                 :            : }
    2173                 :            : EXPORT_SYMBOL(generic_write_end);
    2174                 :            : 
    2175                 :            : /*
    2176                 :            :  * block_is_partially_uptodate checks whether buffers within a page are
    2177                 :            :  * uptodate or not.
    2178                 :            :  *
    2179                 :            :  * Returns true if all buffers which correspond to a file portion
    2180                 :            :  * we want to read are uptodate.
    2181                 :            :  */
    2182                 :          0 : int block_is_partially_uptodate(struct page *page, unsigned long from,
    2183                 :            :                                         unsigned long count)
    2184                 :            : {
    2185                 :            :         unsigned block_start, block_end, blocksize;
    2186                 :            :         unsigned to;
    2187                 :            :         struct buffer_head *bh, *head;
    2188                 :            :         int ret = 1;
    2189                 :            : 
    2190                 :          0 :         if (!page_has_buffers(page))
    2191                 :            :                 return 0;
    2192                 :            : 
    2193                 :          0 :         head = page_buffers(page);
    2194                 :          0 :         blocksize = head->b_size;
    2195                 :          0 :         to = min_t(unsigned, PAGE_SIZE - from, count);
    2196                 :          0 :         to = from + to;
    2197                 :          0 :         if (from < blocksize && to > PAGE_SIZE - blocksize)
    2198                 :            :                 return 0;
    2199                 :            : 
    2200                 :            :         bh = head;
    2201                 :            :         block_start = 0;
    2202                 :            :         do {
    2203                 :          0 :                 block_end = block_start + blocksize;
    2204                 :          0 :                 if (block_end > from && block_start < to) {
    2205                 :          0 :                         if (!buffer_uptodate(bh)) {
    2206                 :            :                                 ret = 0;
    2207                 :            :                                 break;
    2208                 :            :                         }
    2209                 :          0 :                         if (block_end >= to)
    2210                 :            :                                 break;
    2211                 :            :                 }
    2212                 :            :                 block_start = block_end;
    2213                 :          0 :                 bh = bh->b_this_page;
    2214                 :          0 :         } while (bh != head);
    2215                 :            : 
    2216                 :          0 :         return ret;
    2217                 :            : }
    2218                 :            : EXPORT_SYMBOL(block_is_partially_uptodate);
    2219                 :            : 
    2220                 :            : /*
    2221                 :            :  * Generic "read page" function for block devices that have the normal
    2222                 :            :  * get_block functionality. This is most of the block device filesystems.
    2223                 :            :  * Reads the page asynchronously --- the unlock_buffer() and
    2224                 :            :  * set/clear_buffer_uptodate() functions propagate buffer state into the
    2225                 :            :  * page struct once IO has completed.
    2226                 :            :  */
    2227                 :          3 : int block_read_full_page(struct page *page, get_block_t *get_block)
    2228                 :            : {
    2229                 :          3 :         struct inode *inode = page->mapping->host;
    2230                 :            :         sector_t iblock, lblock;
    2231                 :            :         struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
    2232                 :            :         unsigned int blocksize, bbits;
    2233                 :            :         int nr, i;
    2234                 :            :         int fully_mapped = 1;
    2235                 :            : 
    2236                 :          3 :         head = create_page_buffers(page, inode, 0);
    2237                 :          3 :         blocksize = head->b_size;
    2238                 :          3 :         bbits = block_size_bits(blocksize);
    2239                 :            : 
    2240                 :          3 :         iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
    2241                 :          3 :         lblock = (i_size_read(inode)+blocksize-1) >> bbits;
    2242                 :            :         bh = head;
    2243                 :            :         nr = 0;
    2244                 :            :         i = 0;
    2245                 :            : 
    2246                 :            :         do {
    2247                 :          3 :                 if (buffer_uptodate(bh))
    2248                 :          3 :                         continue;
    2249                 :            : 
    2250                 :          3 :                 if (!buffer_mapped(bh)) {
    2251                 :            :                         int err = 0;
    2252                 :            : 
    2253                 :            :                         fully_mapped = 0;
    2254                 :          3 :                         if (iblock < lblock) {
    2255                 :          3 :                                 WARN_ON(bh->b_size != blocksize);
    2256                 :          3 :                                 err = get_block(inode, iblock, bh, 0);
    2257                 :          3 :                                 if (err)
    2258                 :            :                                         SetPageError(page);
    2259                 :            :                         }
    2260                 :          3 :                         if (!buffer_mapped(bh)) {
    2261                 :          0 :                                 zero_user(page, i * blocksize, blocksize);
    2262                 :          0 :                                 if (!err)
    2263                 :            :                                         set_buffer_uptodate(bh);
    2264                 :          0 :                                 continue;
    2265                 :            :                         }
    2266                 :            :                         /*
    2267                 :            :                          * get_block() might have updated the buffer
    2268                 :            :                          * synchronously
    2269                 :            :                          */
    2270                 :          3 :                         if (buffer_uptodate(bh))
    2271                 :          0 :                                 continue;
    2272                 :            :                 }
    2273                 :          3 :                 arr[nr++] = bh;
    2274                 :          3 :         } while (i++, iblock++, (bh = bh->b_this_page) != head);
    2275                 :            : 
    2276                 :          3 :         if (fully_mapped)
    2277                 :            :                 SetPageMappedToDisk(page);
    2278                 :            : 
    2279                 :          3 :         if (!nr) {
    2280                 :            :                 /*
    2281                 :            :                  * All buffers are uptodate - we can set the page uptodate
    2282                 :            :                  * as well. But not if get_block() returned an error.
    2283                 :            :                  */
    2284                 :          3 :                 if (!PageError(page))
    2285                 :            :                         SetPageUptodate(page);
    2286                 :          3 :                 unlock_page(page);
    2287                 :          3 :                 return 0;
    2288                 :            :         }
    2289                 :            : 
    2290                 :            :         /* Stage two: lock the buffers */
    2291                 :          3 :         for (i = 0; i < nr; i++) {
    2292                 :          3 :                 bh = arr[i];
    2293                 :          3 :                 lock_buffer(bh);
    2294                 :          3 :                 mark_buffer_async_read(bh);
    2295                 :            :         }
    2296                 :            : 
    2297                 :            :         /*
    2298                 :            :          * Stage 3: start the IO.  Check for uptodateness
    2299                 :            :          * inside the buffer lock in case another process reading
    2300                 :            :          * the underlying blockdev brought it uptodate (the sct fix).
    2301                 :            :          */
    2302                 :          3 :         for (i = 0; i < nr; i++) {
    2303                 :          3 :                 bh = arr[i];
    2304                 :          3 :                 if (buffer_uptodate(bh))
    2305                 :          0 :                         end_buffer_async_read(bh, 1);
    2306                 :            :                 else
    2307                 :            :                         submit_bh(REQ_OP_READ, 0, bh);
    2308                 :            :         }
    2309                 :            :         return 0;
    2310                 :            : }
    2311                 :            : EXPORT_SYMBOL(block_read_full_page);
    2312                 :            : 
    2313                 :            : /* utility function for filesystems that need to do work on expanding
    2314                 :            :  * truncates.  Uses filesystem pagecache writes to allow the filesystem to
    2315                 :            :  * deal with the hole.  
    2316                 :            :  */
    2317                 :          0 : int generic_cont_expand_simple(struct inode *inode, loff_t size)
    2318                 :            : {
    2319                 :          0 :         struct address_space *mapping = inode->i_mapping;
    2320                 :            :         struct page *page;
    2321                 :            :         void *fsdata;
    2322                 :            :         int err;
    2323                 :            : 
    2324                 :          0 :         err = inode_newsize_ok(inode, size);
    2325                 :          0 :         if (err)
    2326                 :            :                 goto out;
    2327                 :            : 
    2328                 :          0 :         err = pagecache_write_begin(NULL, mapping, size, 0,
    2329                 :            :                                     AOP_FLAG_CONT_EXPAND, &page, &fsdata);
    2330                 :          0 :         if (err)
    2331                 :            :                 goto out;
    2332                 :            : 
    2333                 :          0 :         err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
    2334                 :          0 :         BUG_ON(err > 0);
    2335                 :            : 
    2336                 :            : out:
    2337                 :          0 :         return err;
    2338                 :            : }
    2339                 :            : EXPORT_SYMBOL(generic_cont_expand_simple);
    2340                 :            : 
    2341                 :          0 : static int cont_expand_zero(struct file *file, struct address_space *mapping,
    2342                 :            :                             loff_t pos, loff_t *bytes)
    2343                 :            : {
    2344                 :          0 :         struct inode *inode = mapping->host;
    2345                 :            :         unsigned int blocksize = i_blocksize(inode);
    2346                 :            :         struct page *page;
    2347                 :            :         void *fsdata;
    2348                 :            :         pgoff_t index, curidx;
    2349                 :            :         loff_t curpos;
    2350                 :            :         unsigned zerofrom, offset, len;
    2351                 :            :         int err = 0;
    2352                 :            : 
    2353                 :          0 :         index = pos >> PAGE_SHIFT;
    2354                 :          0 :         offset = pos & ~PAGE_MASK;
    2355                 :            : 
    2356                 :          0 :         while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
    2357                 :          0 :                 zerofrom = curpos & ~PAGE_MASK;
    2358                 :          0 :                 if (zerofrom & (blocksize-1)) {
    2359                 :          0 :                         *bytes |= (blocksize-1);
    2360                 :          0 :                         (*bytes)++;
    2361                 :            :                 }
    2362                 :          0 :                 len = PAGE_SIZE - zerofrom;
    2363                 :            : 
    2364                 :          0 :                 err = pagecache_write_begin(file, mapping, curpos, len, 0,
    2365                 :            :                                             &page, &fsdata);
    2366                 :          0 :                 if (err)
    2367                 :            :                         goto out;
    2368                 :          0 :                 zero_user(page, zerofrom, len);
    2369                 :          0 :                 err = pagecache_write_end(file, mapping, curpos, len, len,
    2370                 :            :                                                 page, fsdata);
    2371                 :          0 :                 if (err < 0)
    2372                 :            :                         goto out;
    2373                 :          0 :                 BUG_ON(err != len);
    2374                 :            :                 err = 0;
    2375                 :            : 
    2376                 :          0 :                 balance_dirty_pages_ratelimited(mapping);
    2377                 :            : 
    2378                 :          0 :                 if (fatal_signal_pending(current)) {
    2379                 :            :                         err = -EINTR;
    2380                 :            :                         goto out;
    2381                 :            :                 }
    2382                 :            :         }
    2383                 :            : 
    2384                 :            :         /* page covers the boundary, find the boundary offset */
    2385                 :          0 :         if (index == curidx) {
    2386                 :          0 :                 zerofrom = curpos & ~PAGE_MASK;
    2387                 :            :                 /* if we will expand the thing last block will be filled */
    2388                 :          0 :                 if (offset <= zerofrom) {
    2389                 :            :                         goto out;
    2390                 :            :                 }
    2391                 :          0 :                 if (zerofrom & (blocksize-1)) {
    2392                 :          0 :                         *bytes |= (blocksize-1);
    2393                 :          0 :                         (*bytes)++;
    2394                 :            :                 }
    2395                 :          0 :                 len = offset - zerofrom;
    2396                 :            : 
    2397                 :          0 :                 err = pagecache_write_begin(file, mapping, curpos, len, 0,
    2398                 :            :                                             &page, &fsdata);
    2399                 :          0 :                 if (err)
    2400                 :            :                         goto out;
    2401                 :          0 :                 zero_user(page, zerofrom, len);
    2402                 :          0 :                 err = pagecache_write_end(file, mapping, curpos, len, len,
    2403                 :            :                                                 page, fsdata);
    2404                 :          0 :                 if (err < 0)
    2405                 :            :                         goto out;
    2406                 :          0 :                 BUG_ON(err != len);
    2407                 :            :                 err = 0;
    2408                 :            :         }
    2409                 :            : out:
    2410                 :          0 :         return err;
    2411                 :            : }
    2412                 :            : 
    2413                 :            : /*
    2414                 :            :  * For moronic filesystems that do not allow holes in file.
    2415                 :            :  * We may have to extend the file.
    2416                 :            :  */
    2417                 :          0 : int cont_write_begin(struct file *file, struct address_space *mapping,
    2418                 :            :                         loff_t pos, unsigned len, unsigned flags,
    2419                 :            :                         struct page **pagep, void **fsdata,
    2420                 :            :                         get_block_t *get_block, loff_t *bytes)
    2421                 :            : {
    2422                 :          0 :         struct inode *inode = mapping->host;
    2423                 :            :         unsigned int blocksize = i_blocksize(inode);
    2424                 :            :         unsigned int zerofrom;
    2425                 :            :         int err;
    2426                 :            : 
    2427                 :          0 :         err = cont_expand_zero(file, mapping, pos, bytes);
    2428                 :          0 :         if (err)
    2429                 :            :                 return err;
    2430                 :            : 
    2431                 :          0 :         zerofrom = *bytes & ~PAGE_MASK;
    2432                 :          0 :         if (pos+len > *bytes && zerofrom & (blocksize-1)) {
    2433                 :          0 :                 *bytes |= (blocksize-1);
    2434                 :          0 :                 (*bytes)++;
    2435                 :            :         }
    2436                 :            : 
    2437                 :          0 :         return block_write_begin(mapping, pos, len, flags, pagep, get_block);
    2438                 :            : }
    2439                 :            : EXPORT_SYMBOL(cont_write_begin);
    2440                 :            : 
    2441                 :          0 : int block_commit_write(struct page *page, unsigned from, unsigned to)
    2442                 :            : {
    2443                 :          3 :         struct inode *inode = page->mapping->host;
    2444                 :          3 :         __block_commit_write(inode,page,from,to);
    2445                 :          0 :         return 0;
    2446                 :            : }
    2447                 :            : EXPORT_SYMBOL(block_commit_write);
    2448                 :            : 
    2449                 :            : /*
    2450                 :            :  * block_page_mkwrite() is not allowed to change the file size as it gets
    2451                 :            :  * called from a page fault handler when a page is first dirtied. Hence we must
    2452                 :            :  * be careful to check for EOF conditions here. We set the page up correctly
    2453                 :            :  * for a written page which means we get ENOSPC checking when writing into
    2454                 :            :  * holes and correct delalloc and unwritten extent mapping on filesystems that
    2455                 :            :  * support these features.
    2456                 :            :  *
    2457                 :            :  * We are not allowed to take the i_mutex here so we have to play games to
    2458                 :            :  * protect against truncate races as the page could now be beyond EOF.  Because
    2459                 :            :  * truncate writes the inode size before removing pages, once we have the
    2460                 :            :  * page lock we can determine safely if the page is beyond EOF. If it is not
    2461                 :            :  * beyond EOF, then the page is guaranteed safe against truncation until we
    2462                 :            :  * unlock the page.
    2463                 :            :  *
    2464                 :            :  * Direct callers of this function should protect against filesystem freezing
    2465                 :            :  * using sb_start_pagefault() - sb_end_pagefault() functions.
    2466                 :            :  */
    2467                 :          3 : int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
    2468                 :            :                          get_block_t get_block)
    2469                 :            : {
    2470                 :          3 :         struct page *page = vmf->page;
    2471                 :          3 :         struct inode *inode = file_inode(vma->vm_file);
    2472                 :            :         unsigned long end;
    2473                 :            :         loff_t size;
    2474                 :            :         int ret;
    2475                 :            : 
    2476                 :          3 :         lock_page(page);
    2477                 :            :         size = i_size_read(inode);
    2478                 :          3 :         if ((page->mapping != inode->i_mapping) ||
    2479                 :            :             (page_offset(page) > size)) {
    2480                 :            :                 /* We overload EFAULT to mean page got truncated */
    2481                 :            :                 ret = -EFAULT;
    2482                 :            :                 goto out_unlock;
    2483                 :            :         }
    2484                 :            : 
    2485                 :            :         /* page is wholly or partially inside EOF */
    2486                 :          3 :         if (((page->index + 1) << PAGE_SHIFT) > size)
    2487                 :          0 :                 end = size & ~PAGE_MASK;
    2488                 :            :         else
    2489                 :            :                 end = PAGE_SIZE;
    2490                 :            : 
    2491                 :            :         ret = __block_write_begin(page, 0, end, get_block);
    2492                 :          3 :         if (!ret)
    2493                 :            :                 ret = block_commit_write(page, 0, end);
    2494                 :            : 
    2495                 :          3 :         if (unlikely(ret < 0))
    2496                 :            :                 goto out_unlock;
    2497                 :          3 :         set_page_dirty(page);
    2498                 :          3 :         wait_for_stable_page(page);
    2499                 :          3 :         return 0;
    2500                 :            : out_unlock:
    2501                 :          0 :         unlock_page(page);
    2502                 :          0 :         return ret;
    2503                 :            : }
    2504                 :            : EXPORT_SYMBOL(block_page_mkwrite);
    2505                 :            : 
    2506                 :            : /*
    2507                 :            :  * nobh_write_begin()'s prereads are special: the buffer_heads are freed
    2508                 :            :  * immediately, while under the page lock.  So it needs a special end_io
    2509                 :            :  * handler which does not touch the bh after unlocking it.
    2510                 :            :  */
    2511                 :          0 : static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
    2512                 :            : {
    2513                 :          0 :         __end_buffer_read_notouch(bh, uptodate);
    2514                 :          0 : }
    2515                 :            : 
    2516                 :            : /*
    2517                 :            :  * Attach the singly-linked list of buffers created by nobh_write_begin, to
    2518                 :            :  * the page (converting it to circular linked list and taking care of page
    2519                 :            :  * dirty races).
    2520                 :            :  */
    2521                 :          0 : static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
    2522                 :            : {
    2523                 :            :         struct buffer_head *bh;
    2524                 :            : 
    2525                 :          0 :         BUG_ON(!PageLocked(page));
    2526                 :            : 
    2527                 :          0 :         spin_lock(&page->mapping->private_lock);
    2528                 :            :         bh = head;
    2529                 :            :         do {
    2530                 :          0 :                 if (PageDirty(page))
    2531                 :            :                         set_buffer_dirty(bh);
    2532                 :          0 :                 if (!bh->b_this_page)
    2533                 :          0 :                         bh->b_this_page = head;
    2534                 :          0 :                 bh = bh->b_this_page;
    2535                 :          0 :         } while (bh != head);
    2536                 :            :         attach_page_buffers(page, head);
    2537                 :          0 :         spin_unlock(&page->mapping->private_lock);
    2538                 :          0 : }
    2539                 :            : 
    2540                 :            : /*
    2541                 :            :  * On entry, the page is fully not uptodate.
    2542                 :            :  * On exit the page is fully uptodate in the areas outside (from,to)
    2543                 :            :  * The filesystem needs to handle block truncation upon failure.
    2544                 :            :  */
    2545                 :          0 : int nobh_write_begin(struct address_space *mapping,
    2546                 :            :                         loff_t pos, unsigned len, unsigned flags,
    2547                 :            :                         struct page **pagep, void **fsdata,
    2548                 :            :                         get_block_t *get_block)
    2549                 :            : {
    2550                 :          0 :         struct inode *inode = mapping->host;
    2551                 :          0 :         const unsigned blkbits = inode->i_blkbits;
    2552                 :          0 :         const unsigned blocksize = 1 << blkbits;
    2553                 :            :         struct buffer_head *head, *bh;
    2554                 :            :         struct page *page;
    2555                 :            :         pgoff_t index;
    2556                 :            :         unsigned from, to;
    2557                 :            :         unsigned block_in_page;
    2558                 :            :         unsigned block_start, block_end;
    2559                 :            :         sector_t block_in_file;
    2560                 :            :         int nr_reads = 0;
    2561                 :            :         int ret = 0;
    2562                 :            :         int is_mapped_to_disk = 1;
    2563                 :            : 
    2564                 :          0 :         index = pos >> PAGE_SHIFT;
    2565                 :          0 :         from = pos & (PAGE_SIZE - 1);
    2566                 :          0 :         to = from + len;
    2567                 :            : 
    2568                 :          0 :         page = grab_cache_page_write_begin(mapping, index, flags);
    2569                 :          0 :         if (!page)
    2570                 :            :                 return -ENOMEM;
    2571                 :          0 :         *pagep = page;
    2572                 :          0 :         *fsdata = NULL;
    2573                 :            : 
    2574                 :          0 :         if (page_has_buffers(page)) {
    2575                 :            :                 ret = __block_write_begin(page, pos, len, get_block);
    2576                 :          0 :                 if (unlikely(ret))
    2577                 :            :                         goto out_release;
    2578                 :            :                 return ret;
    2579                 :            :         }
    2580                 :            : 
    2581                 :          0 :         if (PageMappedToDisk(page))
    2582                 :            :                 return 0;
    2583                 :            : 
    2584                 :            :         /*
    2585                 :            :          * Allocate buffers so that we can keep track of state, and potentially
    2586                 :            :          * attach them to the page if an error occurs. In the common case of
    2587                 :            :          * no error, they will just be freed again without ever being attached
    2588                 :            :          * to the page (which is all OK, because we're under the page lock).
    2589                 :            :          *
    2590                 :            :          * Be careful: the buffer linked list is a NULL terminated one, rather
    2591                 :            :          * than the circular one we're used to.
    2592                 :            :          */
    2593                 :          0 :         head = alloc_page_buffers(page, blocksize, false);
    2594                 :          0 :         if (!head) {
    2595                 :            :                 ret = -ENOMEM;
    2596                 :            :                 goto out_release;
    2597                 :            :         }
    2598                 :            : 
    2599                 :          0 :         block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
    2600                 :            : 
    2601                 :            :         /*
    2602                 :            :          * We loop across all blocks in the page, whether or not they are
    2603                 :            :          * part of the affected region.  This is so we can discover if the
    2604                 :            :          * page is fully mapped-to-disk.
    2605                 :            :          */
    2606                 :          0 :         for (block_start = 0, block_in_page = 0, bh = head;
    2607                 :            :                   block_start < PAGE_SIZE;
    2608                 :          0 :                   block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
    2609                 :            :                 int create;
    2610                 :            : 
    2611                 :          0 :                 block_end = block_start + blocksize;
    2612                 :          0 :                 bh->b_state = 0;
    2613                 :            :                 create = 1;
    2614                 :          0 :                 if (block_start >= to)
    2615                 :            :                         create = 0;
    2616                 :          0 :                 ret = get_block(inode, block_in_file + block_in_page,
    2617                 :            :                                         bh, create);
    2618                 :          0 :                 if (ret)
    2619                 :            :                         goto failed;
    2620                 :          0 :                 if (!buffer_mapped(bh))
    2621                 :            :                         is_mapped_to_disk = 0;
    2622                 :          0 :                 if (buffer_new(bh))
    2623                 :          0 :                         clean_bdev_bh_alias(bh);
    2624                 :          0 :                 if (PageUptodate(page)) {
    2625                 :            :                         set_buffer_uptodate(bh);
    2626                 :          0 :                         continue;
    2627                 :            :                 }
    2628                 :          0 :                 if (buffer_new(bh) || !buffer_mapped(bh)) {
    2629                 :          0 :                         zero_user_segments(page, block_start, from,
    2630                 :            :                                                         to, block_end);
    2631                 :          0 :                         continue;
    2632                 :            :                 }
    2633                 :          0 :                 if (buffer_uptodate(bh))
    2634                 :          0 :                         continue;       /* reiserfs does this */
    2635                 :          0 :                 if (block_start < from || block_end > to) {
    2636                 :          0 :                         lock_buffer(bh);
    2637                 :          0 :                         bh->b_end_io = end_buffer_read_nobh;
    2638                 :            :                         submit_bh(REQ_OP_READ, 0, bh);
    2639                 :          0 :                         nr_reads++;
    2640                 :            :                 }
    2641                 :            :         }
    2642                 :            : 
    2643                 :          0 :         if (nr_reads) {
    2644                 :            :                 /*
    2645                 :            :                  * The page is locked, so these buffers are protected from
    2646                 :            :                  * any VM or truncate activity.  Hence we don't need to care
    2647                 :            :                  * for the buffer_head refcounts.
    2648                 :            :                  */
    2649                 :          0 :                 for (bh = head; bh; bh = bh->b_this_page) {
    2650                 :          0 :                         wait_on_buffer(bh);
    2651                 :          0 :                         if (!buffer_uptodate(bh))
    2652                 :            :                                 ret = -EIO;
    2653                 :            :                 }
    2654                 :          0 :                 if (ret)
    2655                 :            :                         goto failed;
    2656                 :            :         }
    2657                 :            : 
    2658                 :          0 :         if (is_mapped_to_disk)
    2659                 :            :                 SetPageMappedToDisk(page);
    2660                 :            : 
    2661                 :          0 :         *fsdata = head; /* to be released by nobh_write_end */
    2662                 :            : 
    2663                 :          0 :         return 0;
    2664                 :            : 
    2665                 :            : failed:
    2666                 :          0 :         BUG_ON(!ret);
    2667                 :            :         /*
    2668                 :            :          * Error recovery is a bit difficult. We need to zero out blocks that
    2669                 :            :          * were newly allocated, and dirty them to ensure they get written out.
    2670                 :            :          * Buffers need to be attached to the page at this point, otherwise
    2671                 :            :          * the handling of potential IO errors during writeout would be hard
    2672                 :            :          * (could try doing synchronous writeout, but what if that fails too?)
    2673                 :            :          */
    2674                 :          0 :         attach_nobh_buffers(page, head);
    2675                 :          0 :         page_zero_new_buffers(page, from, to);
    2676                 :            : 
    2677                 :            : out_release:
    2678                 :          0 :         unlock_page(page);
    2679                 :          0 :         put_page(page);
    2680                 :          0 :         *pagep = NULL;
    2681                 :            : 
    2682                 :          0 :         return ret;
    2683                 :            : }
    2684                 :            : EXPORT_SYMBOL(nobh_write_begin);
    2685                 :            : 
    2686                 :          0 : int nobh_write_end(struct file *file, struct address_space *mapping,
    2687                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2688                 :            :                         struct page *page, void *fsdata)
    2689                 :            : {
    2690                 :          0 :         struct inode *inode = page->mapping->host;
    2691                 :            :         struct buffer_head *head = fsdata;
    2692                 :            :         struct buffer_head *bh;
    2693                 :          0 :         BUG_ON(fsdata != NULL && page_has_buffers(page));
    2694                 :            : 
    2695                 :          0 :         if (unlikely(copied < len) && head)
    2696                 :          0 :                 attach_nobh_buffers(page, head);
    2697                 :          0 :         if (page_has_buffers(page))
    2698                 :          0 :                 return generic_write_end(file, mapping, pos, len,
    2699                 :            :                                         copied, page, fsdata);
    2700                 :            : 
    2701                 :            :         SetPageUptodate(page);
    2702                 :          0 :         set_page_dirty(page);
    2703                 :          0 :         if (pos+copied > inode->i_size) {
    2704                 :            :                 i_size_write(inode, pos+copied);
    2705                 :            :                 mark_inode_dirty(inode);
    2706                 :            :         }
    2707                 :            : 
    2708                 :          0 :         unlock_page(page);
    2709                 :          0 :         put_page(page);
    2710                 :            : 
    2711                 :          0 :         while (head) {
    2712                 :            :                 bh = head;
    2713                 :          0 :                 head = head->b_this_page;
    2714                 :          0 :                 free_buffer_head(bh);
    2715                 :            :         }
    2716                 :            : 
    2717                 :          0 :         return copied;
    2718                 :            : }
    2719                 :            : EXPORT_SYMBOL(nobh_write_end);
    2720                 :            : 
    2721                 :            : /*
    2722                 :            :  * nobh_writepage() - based on block_full_write_page() except
    2723                 :            :  * that it tries to operate without attaching bufferheads to
    2724                 :            :  * the page.
    2725                 :            :  */
    2726                 :          0 : int nobh_writepage(struct page *page, get_block_t *get_block,
    2727                 :            :                         struct writeback_control *wbc)
    2728                 :            : {
    2729                 :          0 :         struct inode * const inode = page->mapping->host;
    2730                 :            :         loff_t i_size = i_size_read(inode);
    2731                 :          0 :         const pgoff_t end_index = i_size >> PAGE_SHIFT;
    2732                 :            :         unsigned offset;
    2733                 :            :         int ret;
    2734                 :            : 
    2735                 :            :         /* Is the page fully inside i_size? */
    2736                 :          0 :         if (page->index < end_index)
    2737                 :            :                 goto out;
    2738                 :            : 
    2739                 :            :         /* Is the page fully outside i_size? (truncate in progress) */
    2740                 :          0 :         offset = i_size & (PAGE_SIZE-1);
    2741                 :          0 :         if (page->index >= end_index+1 || !offset) {
    2742                 :            :                 /*
    2743                 :            :                  * The page may have dirty, unmapped buffers.  For example,
    2744                 :            :                  * they may have been added in ext3_writepage().  Make them
    2745                 :            :                  * freeable here, so the page does not leak.
    2746                 :            :                  */
    2747                 :            : #if 0
    2748                 :            :                 /* Not really sure about this  - do we need this ? */
    2749                 :            :                 if (page->mapping->a_ops->invalidatepage)
    2750                 :            :                         page->mapping->a_ops->invalidatepage(page, offset);
    2751                 :            : #endif
    2752                 :          0 :                 unlock_page(page);
    2753                 :          0 :                 return 0; /* don't care */
    2754                 :            :         }
    2755                 :            : 
    2756                 :            :         /*
    2757                 :            :          * The page straddles i_size.  It must be zeroed out on each and every
    2758                 :            :          * writepage invocation because it may be mmapped.  "A file is mapped
    2759                 :            :          * in multiples of the page size.  For a file that is not a multiple of
    2760                 :            :          * the  page size, the remaining memory is zeroed when mapped, and
    2761                 :            :          * writes to that region are not written out to the file."
    2762                 :            :          */
    2763                 :            :         zero_user_segment(page, offset, PAGE_SIZE);
    2764                 :            : out:
    2765                 :          0 :         ret = mpage_writepage(page, get_block, wbc);
    2766                 :          0 :         if (ret == -EAGAIN)
    2767                 :          0 :                 ret = __block_write_full_page(inode, page, get_block, wbc,
    2768                 :            :                                               end_buffer_async_write);
    2769                 :          0 :         return ret;
    2770                 :            : }
    2771                 :            : EXPORT_SYMBOL(nobh_writepage);
    2772                 :            : 
    2773                 :          0 : int nobh_truncate_page(struct address_space *mapping,
    2774                 :            :                         loff_t from, get_block_t *get_block)
    2775                 :            : {
    2776                 :          0 :         pgoff_t index = from >> PAGE_SHIFT;
    2777                 :          0 :         unsigned offset = from & (PAGE_SIZE-1);
    2778                 :            :         unsigned blocksize;
    2779                 :            :         sector_t iblock;
    2780                 :            :         unsigned length, pos;
    2781                 :          0 :         struct inode *inode = mapping->host;
    2782                 :            :         struct page *page;
    2783                 :            :         struct buffer_head map_bh;
    2784                 :            :         int err;
    2785                 :            : 
    2786                 :            :         blocksize = i_blocksize(inode);
    2787                 :          0 :         length = offset & (blocksize - 1);
    2788                 :            : 
    2789                 :            :         /* Block boundary? Nothing to do */
    2790                 :          0 :         if (!length)
    2791                 :            :                 return 0;
    2792                 :            : 
    2793                 :          0 :         length = blocksize - length;
    2794                 :          0 :         iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
    2795                 :            : 
    2796                 :            :         page = grab_cache_page(mapping, index);
    2797                 :            :         err = -ENOMEM;
    2798                 :          0 :         if (!page)
    2799                 :            :                 goto out;
    2800                 :            : 
    2801                 :          0 :         if (page_has_buffers(page)) {
    2802                 :            : has_buffers:
    2803                 :          0 :                 unlock_page(page);
    2804                 :          0 :                 put_page(page);
    2805                 :          0 :                 return block_truncate_page(mapping, from, get_block);
    2806                 :            :         }
    2807                 :            : 
    2808                 :            :         /* Find the buffer that contains "offset" */
    2809                 :            :         pos = blocksize;
    2810                 :          0 :         while (offset >= pos) {
    2811                 :          0 :                 iblock++;
    2812                 :          0 :                 pos += blocksize;
    2813                 :            :         }
    2814                 :            : 
    2815                 :          0 :         map_bh.b_size = blocksize;
    2816                 :          0 :         map_bh.b_state = 0;
    2817                 :          0 :         err = get_block(inode, iblock, &map_bh, 0);
    2818                 :          0 :         if (err)
    2819                 :            :                 goto unlock;
    2820                 :            :         /* unmapped? It's a hole - nothing to do */
    2821                 :          0 :         if (!buffer_mapped(&map_bh))
    2822                 :            :                 goto unlock;
    2823                 :            : 
    2824                 :            :         /* Ok, it's mapped. Make sure it's up-to-date */
    2825                 :          0 :         if (!PageUptodate(page)) {
    2826                 :          0 :                 err = mapping->a_ops->readpage(NULL, page);
    2827                 :          0 :                 if (err) {
    2828                 :          0 :                         put_page(page);
    2829                 :          0 :                         goto out;
    2830                 :            :                 }
    2831                 :          0 :                 lock_page(page);
    2832                 :          0 :                 if (!PageUptodate(page)) {
    2833                 :            :                         err = -EIO;
    2834                 :            :                         goto unlock;
    2835                 :            :                 }
    2836                 :          0 :                 if (page_has_buffers(page))
    2837                 :            :                         goto has_buffers;
    2838                 :            :         }
    2839                 :            :         zero_user(page, offset, length);
    2840                 :          0 :         set_page_dirty(page);
    2841                 :            :         err = 0;
    2842                 :            : 
    2843                 :            : unlock:
    2844                 :          0 :         unlock_page(page);
    2845                 :          0 :         put_page(page);
    2846                 :            : out:
    2847                 :          0 :         return err;
    2848                 :            : }
    2849                 :            : EXPORT_SYMBOL(nobh_truncate_page);
    2850                 :            : 
    2851                 :          0 : int block_truncate_page(struct address_space *mapping,
    2852                 :            :                         loff_t from, get_block_t *get_block)
    2853                 :            : {
    2854                 :          0 :         pgoff_t index = from >> PAGE_SHIFT;
    2855                 :          0 :         unsigned offset = from & (PAGE_SIZE-1);
    2856                 :            :         unsigned blocksize;
    2857                 :            :         sector_t iblock;
    2858                 :            :         unsigned length, pos;
    2859                 :          0 :         struct inode *inode = mapping->host;
    2860                 :            :         struct page *page;
    2861                 :            :         struct buffer_head *bh;
    2862                 :            :         int err;
    2863                 :            : 
    2864                 :            :         blocksize = i_blocksize(inode);
    2865                 :          0 :         length = offset & (blocksize - 1);
    2866                 :            : 
    2867                 :            :         /* Block boundary? Nothing to do */
    2868                 :          0 :         if (!length)
    2869                 :            :                 return 0;
    2870                 :            : 
    2871                 :          0 :         length = blocksize - length;
    2872                 :          0 :         iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
    2873                 :            :         
    2874                 :            :         page = grab_cache_page(mapping, index);
    2875                 :            :         err = -ENOMEM;
    2876                 :          0 :         if (!page)
    2877                 :            :                 goto out;
    2878                 :            : 
    2879                 :          0 :         if (!page_has_buffers(page))
    2880                 :          0 :                 create_empty_buffers(page, blocksize, 0);
    2881                 :            : 
    2882                 :            :         /* Find the buffer that contains "offset" */
    2883                 :          0 :         bh = page_buffers(page);
    2884                 :            :         pos = blocksize;
    2885                 :          0 :         while (offset >= pos) {
    2886                 :          0 :                 bh = bh->b_this_page;
    2887                 :          0 :                 iblock++;
    2888                 :          0 :                 pos += blocksize;
    2889                 :            :         }
    2890                 :            : 
    2891                 :            :         err = 0;
    2892                 :          0 :         if (!buffer_mapped(bh)) {
    2893                 :          0 :                 WARN_ON(bh->b_size != blocksize);
    2894                 :          0 :                 err = get_block(inode, iblock, bh, 0);
    2895                 :          0 :                 if (err)
    2896                 :            :                         goto unlock;
    2897                 :            :                 /* unmapped? It's a hole - nothing to do */
    2898                 :          0 :                 if (!buffer_mapped(bh))
    2899                 :            :                         goto unlock;
    2900                 :            :         }
    2901                 :            : 
    2902                 :            :         /* Ok, it's mapped. Make sure it's up-to-date */
    2903                 :          0 :         if (PageUptodate(page))
    2904                 :          0 :                 set_buffer_uptodate(bh);
    2905                 :            : 
    2906                 :          0 :         if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
    2907                 :            :                 err = -EIO;
    2908                 :          0 :                 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
    2909                 :          0 :                 wait_on_buffer(bh);
    2910                 :            :                 /* Uhhuh. Read error. Complain and punt. */
    2911                 :          0 :                 if (!buffer_uptodate(bh))
    2912                 :            :                         goto unlock;
    2913                 :            :         }
    2914                 :            : 
    2915                 :            :         zero_user(page, offset, length);
    2916                 :          0 :         mark_buffer_dirty(bh);
    2917                 :            :         err = 0;
    2918                 :            : 
    2919                 :            : unlock:
    2920                 :          0 :         unlock_page(page);
    2921                 :          0 :         put_page(page);
    2922                 :            : out:
    2923                 :          0 :         return err;
    2924                 :            : }
    2925                 :            : EXPORT_SYMBOL(block_truncate_page);
    2926                 :            : 
    2927                 :            : /*
    2928                 :            :  * The generic ->writepage function for buffer-backed address_spaces
    2929                 :            :  */
    2930                 :          3 : int block_write_full_page(struct page *page, get_block_t *get_block,
    2931                 :            :                         struct writeback_control *wbc)
    2932                 :            : {
    2933                 :          3 :         struct inode * const inode = page->mapping->host;
    2934                 :            :         loff_t i_size = i_size_read(inode);
    2935                 :          3 :         const pgoff_t end_index = i_size >> PAGE_SHIFT;
    2936                 :            :         unsigned offset;
    2937                 :            : 
    2938                 :            :         /* Is the page fully inside i_size? */
    2939                 :          3 :         if (page->index < end_index)
    2940                 :          3 :                 return __block_write_full_page(inode, page, get_block, wbc,
    2941                 :            :                                                end_buffer_async_write);
    2942                 :            : 
    2943                 :            :         /* Is the page fully outside i_size? (truncate in progress) */
    2944                 :          0 :         offset = i_size & (PAGE_SIZE-1);
    2945                 :          0 :         if (page->index >= end_index+1 || !offset) {
    2946                 :            :                 /*
    2947                 :            :                  * The page may have dirty, unmapped buffers.  For example,
    2948                 :            :                  * they may have been added in ext3_writepage().  Make them
    2949                 :            :                  * freeable here, so the page does not leak.
    2950                 :            :                  */
    2951                 :          0 :                 do_invalidatepage(page, 0, PAGE_SIZE);
    2952                 :          0 :                 unlock_page(page);
    2953                 :          0 :                 return 0; /* don't care */
    2954                 :            :         }
    2955                 :            : 
    2956                 :            :         /*
    2957                 :            :          * The page straddles i_size.  It must be zeroed out on each and every
    2958                 :            :          * writepage invocation because it may be mmapped.  "A file is mapped
    2959                 :            :          * in multiples of the page size.  For a file that is not a multiple of
    2960                 :            :          * the  page size, the remaining memory is zeroed when mapped, and
    2961                 :            :          * writes to that region are not written out to the file."
    2962                 :            :          */
    2963                 :            :         zero_user_segment(page, offset, PAGE_SIZE);
    2964                 :          0 :         return __block_write_full_page(inode, page, get_block, wbc,
    2965                 :            :                                                         end_buffer_async_write);
    2966                 :            : }
    2967                 :            : EXPORT_SYMBOL(block_write_full_page);
    2968                 :            : 
    2969                 :          3 : sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
    2970                 :            :                             get_block_t *get_block)
    2971                 :            : {
    2972                 :          3 :         struct inode *inode = mapping->host;
    2973                 :          3 :         struct buffer_head tmp = {
    2974                 :            :                 .b_size = i_blocksize(inode),
    2975                 :            :         };
    2976                 :            : 
    2977                 :          3 :         get_block(inode, block, &tmp, 0);
    2978                 :          3 :         return tmp.b_blocknr;
    2979                 :            : }
    2980                 :            : EXPORT_SYMBOL(generic_block_bmap);
    2981                 :            : 
    2982                 :          3 : static void end_bio_bh_io_sync(struct bio *bio)
    2983                 :            : {
    2984                 :          3 :         struct buffer_head *bh = bio->bi_private;
    2985                 :            : 
    2986                 :          3 :         if (unlikely(bio_flagged(bio, BIO_QUIET)))
    2987                 :          0 :                 set_bit(BH_Quiet, &bh->b_state);
    2988                 :            : 
    2989                 :          3 :         bh->b_end_io(bh, !bio->bi_status);
    2990                 :          3 :         bio_put(bio);
    2991                 :          3 : }
    2992                 :            : 
    2993                 :            : /*
    2994                 :            :  * This allows us to do IO even on the odd last sectors
    2995                 :            :  * of a device, even if the block size is some multiple
    2996                 :            :  * of the physical sector size.
    2997                 :            :  *
    2998                 :            :  * We'll just truncate the bio to the size of the device,
    2999                 :            :  * and clear the end of the buffer head manually.
    3000                 :            :  *
    3001                 :            :  * Truly out-of-range accesses will turn into actual IO
    3002                 :            :  * errors, this only handles the "we need to be able to
    3003                 :            :  * do IO at the final sector" case.
    3004                 :            :  */
    3005                 :          3 : void guard_bio_eod(struct bio *bio)
    3006                 :            : {
    3007                 :            :         sector_t maxsector;
    3008                 :            :         struct hd_struct *part;
    3009                 :            : 
    3010                 :            :         rcu_read_lock();
    3011                 :          3 :         part = __disk_get_part(bio->bi_disk, bio->bi_partno);
    3012                 :          3 :         if (part)
    3013                 :            :                 maxsector = part_nr_sects_read(part);
    3014                 :            :         else
    3015                 :          0 :                 maxsector = get_capacity(bio->bi_disk);
    3016                 :            :         rcu_read_unlock();
    3017                 :            : 
    3018                 :          3 :         if (!maxsector)
    3019                 :            :                 return;
    3020                 :            : 
    3021                 :            :         /*
    3022                 :            :          * If the *whole* IO is past the end of the device,
    3023                 :            :          * let it through, and the IO layer will turn it into
    3024                 :            :          * an EIO.
    3025                 :            :          */
    3026                 :          3 :         if (unlikely(bio->bi_iter.bi_sector >= maxsector))
    3027                 :            :                 return;
    3028                 :            : 
    3029                 :          3 :         maxsector -= bio->bi_iter.bi_sector;
    3030                 :          3 :         if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
    3031                 :            :                 return;
    3032                 :            : 
    3033                 :          0 :         bio_truncate(bio, maxsector << 9);
    3034                 :            : }
    3035                 :            : 
    3036                 :          3 : static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
    3037                 :            :                          enum rw_hint write_hint, struct writeback_control *wbc)
    3038                 :            : {
    3039                 :            :         struct bio *bio;
    3040                 :            : 
    3041                 :          3 :         BUG_ON(!buffer_locked(bh));
    3042                 :          3 :         BUG_ON(!buffer_mapped(bh));
    3043                 :          3 :         BUG_ON(!bh->b_end_io);
    3044                 :          3 :         BUG_ON(buffer_delay(bh));
    3045                 :          3 :         BUG_ON(buffer_unwritten(bh));
    3046                 :            : 
    3047                 :            :         /*
    3048                 :            :          * Only clear out a write error when rewriting
    3049                 :            :          */
    3050                 :          3 :         if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
    3051                 :            :                 clear_buffer_write_io_error(bh);
    3052                 :            : 
    3053                 :            :         /*
    3054                 :            :          * from here on down, it's all bio -- do the initial mapping,
    3055                 :            :          * submit_bio -> generic_make_request may further map this bio around
    3056                 :            :          */
    3057                 :            :         bio = bio_alloc(GFP_NOIO, 1);
    3058                 :            : 
    3059                 :          3 :         bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
    3060                 :          3 :         bio_set_dev(bio, bh->b_bdev);
    3061                 :          3 :         bio->bi_write_hint = write_hint;
    3062                 :            : 
    3063                 :          3 :         bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
    3064                 :          3 :         BUG_ON(bio->bi_iter.bi_size != bh->b_size);
    3065                 :            : 
    3066                 :          3 :         bio->bi_end_io = end_bio_bh_io_sync;
    3067                 :          3 :         bio->bi_private = bh;
    3068                 :            : 
    3069                 :          3 :         if (buffer_meta(bh))
    3070                 :          3 :                 op_flags |= REQ_META;
    3071                 :          3 :         if (buffer_prio(bh))
    3072                 :          3 :                 op_flags |= REQ_PRIO;
    3073                 :          3 :         bio_set_op_attrs(bio, op, op_flags);
    3074                 :            : 
    3075                 :            :         /* Take care of bh's that straddle the end of the device */
    3076                 :          3 :         guard_bio_eod(bio);
    3077                 :            : 
    3078                 :          3 :         if (wbc) {
    3079                 :            :                 wbc_init_bio(wbc, bio);
    3080                 :          3 :                 wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
    3081                 :            :         }
    3082                 :            : 
    3083                 :          3 :         submit_bio(bio);
    3084                 :          3 :         return 0;
    3085                 :            : }
    3086                 :            : 
    3087                 :          3 : int submit_bh(int op, int op_flags, struct buffer_head *bh)
    3088                 :            : {
    3089                 :          3 :         return submit_bh_wbc(op, op_flags, bh, 0, NULL);
    3090                 :            : }
    3091                 :            : EXPORT_SYMBOL(submit_bh);
    3092                 :            : 
    3093                 :            : /**
    3094                 :            :  * ll_rw_block: low-level access to block devices (DEPRECATED)
    3095                 :            :  * @op: whether to %READ or %WRITE
    3096                 :            :  * @op_flags: req_flag_bits
    3097                 :            :  * @nr: number of &struct buffer_heads in the array
    3098                 :            :  * @bhs: array of pointers to &struct buffer_head
    3099                 :            :  *
    3100                 :            :  * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
    3101                 :            :  * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
    3102                 :            :  * @op_flags contains flags modifying the detailed I/O behavior, most notably
    3103                 :            :  * %REQ_RAHEAD.
    3104                 :            :  *
    3105                 :            :  * This function drops any buffer that it cannot get a lock on (with the
    3106                 :            :  * BH_Lock state bit), any buffer that appears to be clean when doing a write
    3107                 :            :  * request, and any buffer that appears to be up-to-date when doing read
    3108                 :            :  * request.  Further it marks as clean buffers that are processed for
    3109                 :            :  * writing (the buffer cache won't assume that they are actually clean
    3110                 :            :  * until the buffer gets unlocked).
    3111                 :            :  *
    3112                 :            :  * ll_rw_block sets b_end_io to simple completion handler that marks
    3113                 :            :  * the buffer up-to-date (if appropriate), unlocks the buffer and wakes
    3114                 :            :  * any waiters. 
    3115                 :            :  *
    3116                 :            :  * All of the buffers must be for the same device, and must also be a
    3117                 :            :  * multiple of the current approved size for the device.
    3118                 :            :  */
    3119                 :          3 : void ll_rw_block(int op, int op_flags,  int nr, struct buffer_head *bhs[])
    3120                 :            : {
    3121                 :            :         int i;
    3122                 :            : 
    3123                 :          3 :         for (i = 0; i < nr; i++) {
    3124                 :          3 :                 struct buffer_head *bh = bhs[i];
    3125                 :            : 
    3126                 :          3 :                 if (!trylock_buffer(bh))
    3127                 :          3 :                         continue;
    3128                 :          3 :                 if (op == WRITE) {
    3129                 :          0 :                         if (test_clear_buffer_dirty(bh)) {
    3130                 :          0 :                                 bh->b_end_io = end_buffer_write_sync;
    3131                 :            :                                 get_bh(bh);
    3132                 :            :                                 submit_bh(op, op_flags, bh);
    3133                 :          0 :                                 continue;
    3134                 :            :                         }
    3135                 :            :                 } else {
    3136                 :          3 :                         if (!buffer_uptodate(bh)) {
    3137                 :          3 :                                 bh->b_end_io = end_buffer_read_sync;
    3138                 :            :                                 get_bh(bh);
    3139                 :            :                                 submit_bh(op, op_flags, bh);
    3140                 :          3 :                                 continue;
    3141                 :            :                         }
    3142                 :            :                 }
    3143                 :          3 :                 unlock_buffer(bh);
    3144                 :            :         }
    3145                 :          3 : }
    3146                 :            : EXPORT_SYMBOL(ll_rw_block);
    3147                 :            : 
    3148                 :          3 : void write_dirty_buffer(struct buffer_head *bh, int op_flags)
    3149                 :            : {
    3150                 :          3 :         lock_buffer(bh);
    3151                 :          3 :         if (!test_clear_buffer_dirty(bh)) {
    3152                 :          0 :                 unlock_buffer(bh);
    3153                 :          3 :                 return;
    3154                 :            :         }
    3155                 :          3 :         bh->b_end_io = end_buffer_write_sync;
    3156                 :            :         get_bh(bh);
    3157                 :            :         submit_bh(REQ_OP_WRITE, op_flags, bh);
    3158                 :            : }
    3159                 :            : EXPORT_SYMBOL(write_dirty_buffer);
    3160                 :            : 
    3161                 :            : /*
    3162                 :            :  * For a data-integrity writeout, we need to wait upon any in-progress I/O
    3163                 :            :  * and then start new I/O and then wait upon it.  The caller must have a ref on
    3164                 :            :  * the buffer_head.
    3165                 :            :  */
    3166                 :          3 : int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
    3167                 :            : {
    3168                 :            :         int ret = 0;
    3169                 :            : 
    3170                 :          3 :         WARN_ON(atomic_read(&bh->b_count) < 1);
    3171                 :          3 :         lock_buffer(bh);
    3172                 :          3 :         if (test_clear_buffer_dirty(bh)) {
    3173                 :            :                 get_bh(bh);
    3174                 :          3 :                 bh->b_end_io = end_buffer_write_sync;
    3175                 :            :                 ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
    3176                 :          3 :                 wait_on_buffer(bh);
    3177                 :          3 :                 if (!ret && !buffer_uptodate(bh))
    3178                 :            :                         ret = -EIO;
    3179                 :            :         } else {
    3180                 :          0 :                 unlock_buffer(bh);
    3181                 :            :         }
    3182                 :          3 :         return ret;
    3183                 :            : }
    3184                 :            : EXPORT_SYMBOL(__sync_dirty_buffer);
    3185                 :            : 
    3186                 :          3 : int sync_dirty_buffer(struct buffer_head *bh)
    3187                 :            : {
    3188                 :          3 :         return __sync_dirty_buffer(bh, REQ_SYNC);
    3189                 :            : }
    3190                 :            : EXPORT_SYMBOL(sync_dirty_buffer);
    3191                 :            : 
    3192                 :            : /*
    3193                 :            :  * try_to_free_buffers() checks if all the buffers on this particular page
    3194                 :            :  * are unused, and releases them if so.
    3195                 :            :  *
    3196                 :            :  * Exclusion against try_to_free_buffers may be obtained by either
    3197                 :            :  * locking the page or by holding its mapping's private_lock.
    3198                 :            :  *
    3199                 :            :  * If the page is dirty but all the buffers are clean then we need to
    3200                 :            :  * be sure to mark the page clean as well.  This is because the page
    3201                 :            :  * may be against a block device, and a later reattachment of buffers
    3202                 :            :  * to a dirty page will set *all* buffers dirty.  Which would corrupt
    3203                 :            :  * filesystem data on the same device.
    3204                 :            :  *
    3205                 :            :  * The same applies to regular filesystem pages: if all the buffers are
    3206                 :            :  * clean then we set the page clean and proceed.  To do that, we require
    3207                 :            :  * total exclusion from __set_page_dirty_buffers().  That is obtained with
    3208                 :            :  * private_lock.
    3209                 :            :  *
    3210                 :            :  * try_to_free_buffers() is non-blocking.
    3211                 :            :  */
    3212                 :            : static inline int buffer_busy(struct buffer_head *bh)
    3213                 :            : {
    3214                 :          3 :         return atomic_read(&bh->b_count) |
    3215                 :          3 :                 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
    3216                 :            : }
    3217                 :            : 
    3218                 :            : static int
    3219                 :          3 : drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
    3220                 :            : {
    3221                 :          3 :         struct buffer_head *head = page_buffers(page);
    3222                 :            :         struct buffer_head *bh;
    3223                 :            : 
    3224                 :            :         bh = head;
    3225                 :            :         do {
    3226                 :          3 :                 if (buffer_busy(bh))
    3227                 :            :                         goto failed;
    3228                 :          3 :                 bh = bh->b_this_page;
    3229                 :          3 :         } while (bh != head);
    3230                 :            : 
    3231                 :            :         do {
    3232                 :          3 :                 struct buffer_head *next = bh->b_this_page;
    3233                 :            : 
    3234                 :          3 :                 if (bh->b_assoc_map)
    3235                 :          0 :                         __remove_assoc_queue(bh);
    3236                 :            :                 bh = next;
    3237                 :          3 :         } while (bh != head);
    3238                 :          3 :         *buffers_to_free = head;
    3239                 :          3 :         __clear_page_buffers(page);
    3240                 :          3 :         return 1;
    3241                 :            : failed:
    3242                 :            :         return 0;
    3243                 :            : }
    3244                 :            : 
    3245                 :          3 : int try_to_free_buffers(struct page *page)
    3246                 :            : {
    3247                 :          3 :         struct address_space * const mapping = page->mapping;
    3248                 :          3 :         struct buffer_head *buffers_to_free = NULL;
    3249                 :            :         int ret = 0;
    3250                 :            : 
    3251                 :          3 :         BUG_ON(!PageLocked(page));
    3252                 :          3 :         if (PageWriteback(page))
    3253                 :            :                 return 0;
    3254                 :            : 
    3255                 :          3 :         if (mapping == NULL) {          /* can this still happen? */
    3256                 :          0 :                 ret = drop_buffers(page, &buffers_to_free);
    3257                 :          0 :                 goto out;
    3258                 :            :         }
    3259                 :            : 
    3260                 :            :         spin_lock(&mapping->private_lock);
    3261                 :          3 :         ret = drop_buffers(page, &buffers_to_free);
    3262                 :            : 
    3263                 :            :         /*
    3264                 :            :          * If the filesystem writes its buffers by hand (eg ext3)
    3265                 :            :          * then we can have clean buffers against a dirty page.  We
    3266                 :            :          * clean the page here; otherwise the VM will never notice
    3267                 :            :          * that the filesystem did any IO at all.
    3268                 :            :          *
    3269                 :            :          * Also, during truncate, discard_buffer will have marked all
    3270                 :            :          * the page's buffers clean.  We discover that here and clean
    3271                 :            :          * the page also.
    3272                 :            :          *
    3273                 :            :          * private_lock must be held over this entire operation in order
    3274                 :            :          * to synchronise against __set_page_dirty_buffers and prevent the
    3275                 :            :          * dirty bit from being lost.
    3276                 :            :          */
    3277                 :          3 :         if (ret)
    3278                 :          3 :                 cancel_dirty_page(page);
    3279                 :            :         spin_unlock(&mapping->private_lock);
    3280                 :            : out:
    3281                 :          3 :         if (buffers_to_free) {
    3282                 :            :                 struct buffer_head *bh = buffers_to_free;
    3283                 :            : 
    3284                 :            :                 do {
    3285                 :          3 :                         struct buffer_head *next = bh->b_this_page;
    3286                 :          3 :                         free_buffer_head(bh);
    3287                 :            :                         bh = next;
    3288                 :          3 :                 } while (bh != buffers_to_free);
    3289                 :            :         }
    3290                 :          3 :         return ret;
    3291                 :            : }
    3292                 :            : EXPORT_SYMBOL(try_to_free_buffers);
    3293                 :            : 
    3294                 :            : /*
    3295                 :            :  * There are no bdflush tunables left.  But distributions are
    3296                 :            :  * still running obsolete flush daemons, so we terminate them here.
    3297                 :            :  *
    3298                 :            :  * Use of bdflush() is deprecated and will be removed in a future kernel.
    3299                 :            :  * The `flush-X' kernel threads fully replace bdflush daemons and this call.
    3300                 :            :  */
    3301                 :          0 : SYSCALL_DEFINE2(bdflush, int, func, long, data)
    3302                 :            : {
    3303                 :            :         static int msg_count;
    3304                 :            : 
    3305                 :          0 :         if (!capable(CAP_SYS_ADMIN))
    3306                 :            :                 return -EPERM;
    3307                 :            : 
    3308                 :          0 :         if (msg_count < 5) {
    3309                 :          0 :                 msg_count++;
    3310                 :          0 :                 printk(KERN_INFO
    3311                 :            :                         "warning: process `%s' used the obsolete bdflush"
    3312                 :          0 :                         " system call\n", current->comm);
    3313                 :          0 :                 printk(KERN_INFO "Fix your initscripts?\n");
    3314                 :            :         }
    3315                 :            : 
    3316                 :          0 :         if (func == 1)
    3317                 :          0 :                 do_exit(0);
    3318                 :            :         return 0;
    3319                 :            : }
    3320                 :            : 
    3321                 :            : /*
    3322                 :            :  * Buffer-head allocation
    3323                 :            :  */
    3324                 :            : static struct kmem_cache *bh_cachep __read_mostly;
    3325                 :            : 
    3326                 :            : /*
    3327                 :            :  * Once the number of bh's in the machine exceeds this level, we start
    3328                 :            :  * stripping them in writeback.
    3329                 :            :  */
    3330                 :            : static unsigned long max_buffer_heads;
    3331                 :            : 
    3332                 :            : int buffer_heads_over_limit;
    3333                 :            : 
    3334                 :            : struct bh_accounting {
    3335                 :            :         int nr;                 /* Number of live bh's */
    3336                 :            :         int ratelimit;          /* Limit cacheline bouncing */
    3337                 :            : };
    3338                 :            : 
    3339                 :            : static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
    3340                 :            : 
    3341                 :          3 : static void recalc_bh_state(void)
    3342                 :            : {
    3343                 :            :         int i;
    3344                 :            :         int tot = 0;
    3345                 :            : 
    3346                 :          3 :         if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
    3347                 :          3 :                 return;
    3348                 :          3 :         __this_cpu_write(bh_accounting.ratelimit, 0);
    3349                 :          3 :         for_each_online_cpu(i)
    3350                 :          3 :                 tot += per_cpu(bh_accounting, i).nr;
    3351                 :          3 :         buffer_heads_over_limit = (tot > max_buffer_heads);
    3352                 :            : }
    3353                 :            : 
    3354                 :          3 : struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
    3355                 :            : {
    3356                 :          3 :         struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
    3357                 :          3 :         if (ret) {
    3358                 :          3 :                 INIT_LIST_HEAD(&ret->b_assoc_buffers);
    3359                 :          3 :                 preempt_disable();
    3360                 :          3 :                 __this_cpu_inc(bh_accounting.nr);
    3361                 :          3 :                 recalc_bh_state();
    3362                 :          3 :                 preempt_enable();
    3363                 :            :         }
    3364                 :          3 :         return ret;
    3365                 :            : }
    3366                 :            : EXPORT_SYMBOL(alloc_buffer_head);
    3367                 :            : 
    3368                 :          3 : void free_buffer_head(struct buffer_head *bh)
    3369                 :            : {
    3370                 :          3 :         BUG_ON(!list_empty(&bh->b_assoc_buffers));
    3371                 :          3 :         kmem_cache_free(bh_cachep, bh);
    3372                 :          3 :         preempt_disable();
    3373                 :          3 :         __this_cpu_dec(bh_accounting.nr);
    3374                 :          3 :         recalc_bh_state();
    3375                 :          3 :         preempt_enable();
    3376                 :          3 : }
    3377                 :            : EXPORT_SYMBOL(free_buffer_head);
    3378                 :            : 
    3379                 :          0 : static int buffer_exit_cpu_dead(unsigned int cpu)
    3380                 :            : {
    3381                 :            :         int i;
    3382                 :          0 :         struct bh_lru *b = &per_cpu(bh_lrus, cpu);
    3383                 :            : 
    3384                 :          0 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    3385                 :          0 :                 brelse(b->bhs[i]);
    3386                 :          0 :                 b->bhs[i] = NULL;
    3387                 :            :         }
    3388                 :          0 :         this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
    3389                 :          0 :         per_cpu(bh_accounting, cpu).nr = 0;
    3390                 :          0 :         return 0;
    3391                 :            : }
    3392                 :            : 
    3393                 :            : /**
    3394                 :            :  * bh_uptodate_or_lock - Test whether the buffer is uptodate
    3395                 :            :  * @bh: struct buffer_head
    3396                 :            :  *
    3397                 :            :  * Return true if the buffer is up-to-date and false,
    3398                 :            :  * with the buffer locked, if not.
    3399                 :            :  */
    3400                 :          3 : int bh_uptodate_or_lock(struct buffer_head *bh)
    3401                 :            : {
    3402                 :          3 :         if (!buffer_uptodate(bh)) {
    3403                 :          3 :                 lock_buffer(bh);
    3404                 :          3 :                 if (!buffer_uptodate(bh))
    3405                 :            :                         return 0;
    3406                 :          3 :                 unlock_buffer(bh);
    3407                 :            :         }
    3408                 :            :         return 1;
    3409                 :            : }
    3410                 :            : EXPORT_SYMBOL(bh_uptodate_or_lock);
    3411                 :            : 
    3412                 :            : /**
    3413                 :            :  * bh_submit_read - Submit a locked buffer for reading
    3414                 :            :  * @bh: struct buffer_head
    3415                 :            :  *
    3416                 :            :  * Returns zero on success and -EIO on error.
    3417                 :            :  */
    3418                 :          3 : int bh_submit_read(struct buffer_head *bh)
    3419                 :            : {
    3420                 :          3 :         BUG_ON(!buffer_locked(bh));
    3421                 :            : 
    3422                 :          3 :         if (buffer_uptodate(bh)) {
    3423                 :          0 :                 unlock_buffer(bh);
    3424                 :          0 :                 return 0;
    3425                 :            :         }
    3426                 :            : 
    3427                 :            :         get_bh(bh);
    3428                 :          3 :         bh->b_end_io = end_buffer_read_sync;
    3429                 :            :         submit_bh(REQ_OP_READ, 0, bh);
    3430                 :          3 :         wait_on_buffer(bh);
    3431                 :          3 :         if (buffer_uptodate(bh))
    3432                 :            :                 return 0;
    3433                 :          0 :         return -EIO;
    3434                 :            : }
    3435                 :            : EXPORT_SYMBOL(bh_submit_read);
    3436                 :            : 
    3437                 :          3 : void __init buffer_init(void)
    3438                 :            : {
    3439                 :            :         unsigned long nrpages;
    3440                 :            :         int ret;
    3441                 :            : 
    3442                 :          3 :         bh_cachep = kmem_cache_create("buffer_head",
    3443                 :            :                         sizeof(struct buffer_head), 0,
    3444                 :            :                                 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
    3445                 :            :                                 SLAB_MEM_SPREAD),
    3446                 :            :                                 NULL);
    3447                 :            : 
    3448                 :            :         /*
    3449                 :            :          * Limit the bh occupancy to 10% of ZONE_NORMAL
    3450                 :            :          */
    3451                 :          3 :         nrpages = (nr_free_buffer_pages() * 10) / 100;
    3452                 :          3 :         max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
    3453                 :            :         ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
    3454                 :            :                                         NULL, buffer_exit_cpu_dead);
    3455                 :          3 :         WARN_ON(ret < 0);
    3456                 :          3 : }
    

Generated by: LCOV version 1.14