LCOV - Real - fs/block

LCOV - code coverage report

Current view:	top level - fs - block_dev.c (source / functions)		Hit	Total	Coverage
Test:	Real	Lines:	413	784	52.7 %
Date:	2020-10-17 15:46:16	Functions:	0	78	0.0 %
Legend:	Neither, QEMU, Real, Both	Branches:	0	0	-

           Branch data     Line data    Source code

       1                 :            : // SPDX-License-Identifier: GPL-2.0-only
       2                 :            : /*
       3                 :            :  *  linux/fs/block_dev.c
       4                 :            :  *
       5                 :            :  *  Copyright (C) 1991, 1992  Linus Torvalds
       6                 :            :  *  Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
       7                 :            :  */
       8                 :            : 
       9                 :            : #include <linux/init.h>
      10                 :            : #include <linux/mm.h>
      11                 :            : #include <linux/fcntl.h>
      12                 :            : #include <linux/slab.h>
      13                 :            : #include <linux/kmod.h>
      14                 :            : #include <linux/major.h>
      15                 :            : #include <linux/device_cgroup.h>
      16                 :            : #include <linux/highmem.h>
      17                 :            : #include <linux/blkdev.h>
      18                 :            : #include <linux/backing-dev.h>
      19                 :            : #include <linux/module.h>
      20                 :            : #include <linux/blkpg.h>
      21                 :            : #include <linux/magic.h>
      22                 :            : #include <linux/dax.h>
      23                 :            : #include <linux/buffer_head.h>
      24                 :            : #include <linux/swap.h>
      25                 :            : #include <linux/pagevec.h>
      26                 :            : #include <linux/writeback.h>
      27                 :            : #include <linux/mpage.h>
      28                 :            : #include <linux/mount.h>
      29                 :            : #include <linux/pseudo_fs.h>
      30                 :            : #include <linux/uio.h>
      31                 :            : #include <linux/namei.h>
      32                 :            : #include <linux/log2.h>
      33                 :            : #include <linux/cleancache.h>
      34                 :            : #include <linux/task_io_accounting_ops.h>
      35                 :            : #include <linux/falloc.h>
      36                 :            : #include <linux/uaccess.h>
      37                 :            : #include <linux/suspend.h>
      38                 :            : #include "internal.h"
      39                 :            : 
      40                 :            : struct bdev_inode {
      41                 :            :         struct block_device bdev;
      42                 :            :         struct inode vfs_inode;
      43                 :            : };
      44                 :            : 
      45                 :            : static const struct address_space_operations def_blk_aops;
      46                 :            : 
      47                 :            : static inline struct bdev_inode *BDEV_I(struct inode *inode)
      48                 :            : {
      49                 :          3 :         return container_of(inode, struct bdev_inode, vfs_inode);
      50                 :            : }
      51                 :            : 
      52                 :          3 : struct block_device *I_BDEV(struct inode *inode)
      53                 :            : {
      54                 :          3 :         return &BDEV_I(inode)->bdev;
      55                 :            : }
      56                 :            : EXPORT_SYMBOL(I_BDEV);
      57                 :            : 
      58                 :          3 : static void bdev_write_inode(struct block_device *bdev)
      59                 :            : {
      60                 :          3 :         struct inode *inode = bdev->bd_inode;
      61                 :            :         int ret;
      62                 :            : 
      63                 :            :         spin_lock(&inode->i_lock);
      64                 :          3 :         while (inode->i_state & I_DIRTY) {
      65                 :            :                 spin_unlock(&inode->i_lock);
      66                 :          0 :                 ret = write_inode_now(inode, true);
      67                 :          0 :                 if (ret) {
      68                 :            :                         char name[BDEVNAME_SIZE];
      69                 :          0 :                         pr_warn_ratelimited("VFS: Dirty inode writeback failed "
      70                 :            :                                             "for block device %s (err=%d).\n",
      71                 :            :                                             bdevname(bdev, name), ret);
      72                 :            :                 }
      73                 :            :                 spin_lock(&inode->i_lock);
      74                 :            :         }
      75                 :            :         spin_unlock(&inode->i_lock);
      76                 :          3 : }
      77                 :            : 
      78                 :            : /* Kill _all_ buffers and pagecache , dirty or not.. */
      79                 :          3 : void kill_bdev(struct block_device *bdev)
      80                 :            : {
      81                 :          3 :         struct address_space *mapping = bdev->bd_inode->i_mapping;
      82                 :            : 
      83                 :          3 :         if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
      84                 :          3 :                 return;
      85                 :            : 
      86                 :          3 :         invalidate_bh_lrus();
      87                 :          3 :         truncate_inode_pages(mapping, 0);
      88                 :            : }       
      89                 :            : EXPORT_SYMBOL(kill_bdev);
      90                 :            : 
      91                 :            : /* Invalidate clean unused buffers and pagecache. */
      92                 :          3 : void invalidate_bdev(struct block_device *bdev)
      93                 :            : {
      94                 :          3 :         struct address_space *mapping = bdev->bd_inode->i_mapping;
      95                 :            : 
      96                 :          3 :         if (mapping->nrpages) {
      97                 :          0 :                 invalidate_bh_lrus();
      98                 :          0 :                 lru_add_drain_all();    /* make sure all lru add caches are flushed */
      99                 :          0 :                 invalidate_mapping_pages(mapping, 0, -1);
     100                 :            :         }
     101                 :            :         /* 99% of the time, we don't need to flush the cleancache on the bdev.
     102                 :            :          * But, for the strange corners, lets be cautious
     103                 :            :          */
     104                 :            :         cleancache_invalidate_inode(mapping);
     105                 :          3 : }
     106                 :            : EXPORT_SYMBOL(invalidate_bdev);
     107                 :            : 
     108                 :          3 : static void set_init_blocksize(struct block_device *bdev)
     109                 :            : {
     110                 :            :         unsigned bsize = bdev_logical_block_size(bdev);
     111                 :          3 :         loff_t size = i_size_read(bdev->bd_inode);
     112                 :            : 
     113                 :          3 :         while (bsize < PAGE_SIZE) {
     114                 :          3 :                 if (size & bsize)
     115                 :            :                         break;
     116                 :          3 :                 bsize <<= 1;
     117                 :            :         }
     118                 :          3 :         bdev->bd_block_size = bsize;
     119                 :          3 :         bdev->bd_inode->i_blkbits = blksize_bits(bsize);
     120                 :          3 : }
     121                 :            : 
     122                 :          3 : int set_blocksize(struct block_device *bdev, int size)
     123                 :            : {
     124                 :            :         /* Size must be a power of two, and between 512 and PAGE_SIZE */
     125                 :          3 :         if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
     126                 :            :                 return -EINVAL;
     127                 :            : 
     128                 :            :         /* Size cannot be smaller than the size supported by the device */
     129                 :          3 :         if (size < bdev_logical_block_size(bdev))
     130                 :            :                 return -EINVAL;
     131                 :            : 
     132                 :            :         /* Don't change the size if it is same as current */
     133                 :          3 :         if (bdev->bd_block_size != size) {
     134                 :            :                 sync_blockdev(bdev);
     135                 :          3 :                 bdev->bd_block_size = size;
     136                 :          3 :                 bdev->bd_inode->i_blkbits = blksize_bits(size);
     137                 :          3 :                 kill_bdev(bdev);
     138                 :            :         }
     139                 :            :         return 0;
     140                 :            : }
     141                 :            : 
     142                 :            : EXPORT_SYMBOL(set_blocksize);
     143                 :            : 
     144                 :          3 : int sb_set_blocksize(struct super_block *sb, int size)
     145                 :            : {
     146                 :          3 :         if (set_blocksize(sb->s_bdev, size))
     147                 :            :                 return 0;
     148                 :            :         /* If we get here, we know size is power of two
     149                 :            :          * and it's value is between 512 and PAGE_SIZE */
     150                 :          3 :         sb->s_blocksize = size;
     151                 :          3 :         sb->s_blocksize_bits = blksize_bits(size);
     152                 :          3 :         return sb->s_blocksize;
     153                 :            : }
     154                 :            : 
     155                 :            : EXPORT_SYMBOL(sb_set_blocksize);
     156                 :            : 
     157                 :          3 : int sb_min_blocksize(struct super_block *sb, int size)
     158                 :            : {
     159                 :          3 :         int minsize = bdev_logical_block_size(sb->s_bdev);
     160                 :          3 :         if (size < minsize)
     161                 :            :                 size = minsize;
     162                 :          3 :         return sb_set_blocksize(sb, size);
     163                 :            : }
     164                 :            : 
     165                 :            : EXPORT_SYMBOL(sb_min_blocksize);
     166                 :            : 
     167                 :            : static int
     168                 :          3 : blkdev_get_block(struct inode *inode, sector_t iblock,
     169                 :            :                 struct buffer_head *bh, int create)
     170                 :            : {
     171                 :          3 :         bh->b_bdev = I_BDEV(inode);
     172                 :          3 :         bh->b_blocknr = iblock;
     173                 :            :         set_buffer_mapped(bh);
     174                 :          3 :         return 0;
     175                 :            : }
     176                 :            : 
     177                 :            : static struct inode *bdev_file_inode(struct file *file)
     178                 :            : {
     179                 :          3 :         return file->f_mapping->host;
     180                 :            : }
     181                 :            : 
     182                 :            : static unsigned int dio_bio_write_op(struct kiocb *iocb)
     183                 :            : {
     184                 :            :         unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
     185                 :            : 
     186                 :            :         /* avoid the need for a I/O completion work item */
     187                 :          0 :         if (iocb->ki_flags & IOCB_DSYNC)
     188                 :            :                 op |= REQ_FUA;
     189                 :            :         return op;
     190                 :            : }
     191                 :            : 
     192                 :            : #define DIO_INLINE_BIO_VECS 4
     193                 :            : 
     194                 :          0 : static void blkdev_bio_end_io_simple(struct bio *bio)
     195                 :            : {
     196                 :          0 :         struct task_struct *waiter = bio->bi_private;
     197                 :            : 
     198                 :            :         WRITE_ONCE(bio->bi_private, NULL);
     199                 :          0 :         blk_wake_io_task(waiter);
     200                 :          0 : }
     201                 :            : 
     202                 :            : static ssize_t
     203                 :          0 : __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
     204                 :            :                 int nr_pages)
     205                 :            : {
     206                 :          0 :         struct file *file = iocb->ki_filp;
     207                 :            :         struct block_device *bdev = I_BDEV(bdev_file_inode(file));
     208                 :            :         struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
     209                 :          0 :         loff_t pos = iocb->ki_pos;
     210                 :            :         bool should_dirty = false;
     211                 :            :         struct bio bio;
     212                 :            :         ssize_t ret;
     213                 :            :         blk_qc_t qc;
     214                 :            : 
     215                 :          0 :         if ((pos | iov_iter_alignment(iter)) &
     216                 :          0 :             (bdev_logical_block_size(bdev) - 1))
     217                 :            :                 return -EINVAL;
     218                 :            : 
     219                 :          0 :         if (nr_pages <= DIO_INLINE_BIO_VECS)
     220                 :            :                 vecs = inline_vecs;
     221                 :            :         else {
     222                 :          0 :                 vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
     223                 :            :                                      GFP_KERNEL);
     224                 :          0 :                 if (!vecs)
     225                 :            :                         return -ENOMEM;
     226                 :            :         }
     227                 :            : 
     228                 :          0 :         bio_init(&bio, vecs, nr_pages);
     229                 :          0 :         bio_set_dev(&bio, bdev);
     230                 :          0 :         bio.bi_iter.bi_sector = pos >> 9;
     231                 :          0 :         bio.bi_write_hint = iocb->ki_hint;
     232                 :          0 :         bio.bi_private = current;
     233                 :          0 :         bio.bi_end_io = blkdev_bio_end_io_simple;
     234                 :          0 :         bio.bi_ioprio = iocb->ki_ioprio;
     235                 :            : 
     236                 :          0 :         ret = bio_iov_iter_get_pages(&bio, iter);
     237                 :          0 :         if (unlikely(ret))
     238                 :            :                 goto out;
     239                 :          0 :         ret = bio.bi_iter.bi_size;
     240                 :            : 
     241                 :          0 :         if (iov_iter_rw(iter) == READ) {
     242                 :          0 :                 bio.bi_opf = REQ_OP_READ;
     243                 :          0 :                 if (iter_is_iovec(iter))
     244                 :            :                         should_dirty = true;
     245                 :            :         } else {
     246                 :          0 :                 bio.bi_opf = dio_bio_write_op(iocb);
     247                 :            :                 task_io_account_write(ret);
     248                 :            :         }
     249                 :          0 :         if (iocb->ki_flags & IOCB_HIPRI)
     250                 :            :                 bio_set_polled(&bio, iocb);
     251                 :            : 
     252                 :          0 :         qc = submit_bio(&bio);
     253                 :            :         for (;;) {
     254                 :          0 :                 set_current_state(TASK_UNINTERRUPTIBLE);
     255                 :          0 :                 if (!READ_ONCE(bio.bi_private))
     256                 :            :                         break;
     257                 :          0 :                 if (!(iocb->ki_flags & IOCB_HIPRI) ||
     258                 :          0 :                     !blk_poll(bdev_get_queue(bdev), qc, true))
     259                 :          0 :                         io_schedule();
     260                 :            :         }
     261                 :          0 :         __set_current_state(TASK_RUNNING);
     262                 :            : 
     263                 :          0 :         bio_release_pages(&bio, should_dirty);
     264                 :          0 :         if (unlikely(bio.bi_status))
     265                 :          0 :                 ret = blk_status_to_errno(bio.bi_status);
     266                 :            : 
     267                 :            : out:
     268                 :          0 :         if (vecs != inline_vecs)
     269                 :          0 :                 kfree(vecs);
     270                 :            : 
     271                 :          0 :         bio_uninit(&bio);
     272                 :            : 
     273                 :          0 :         return ret;
     274                 :            : }
     275                 :            : 
     276                 :            : struct blkdev_dio {
     277                 :            :         union {
     278                 :            :                 struct kiocb            *iocb;
     279                 :            :                 struct task_struct      *waiter;
     280                 :            :         };
     281                 :            :         size_t                  size;
     282                 :            :         atomic_t                ref;
     283                 :            :         bool                    multi_bio : 1;
     284                 :            :         bool                    should_dirty : 1;
     285                 :            :         bool                    is_sync : 1;
     286                 :            :         struct bio              bio;
     287                 :            : };
     288                 :            : 
     289                 :            : static struct bio_set blkdev_dio_pool;
     290                 :            : 
     291                 :          0 : static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
     292                 :            : {
     293                 :          0 :         struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
     294                 :            :         struct request_queue *q = bdev_get_queue(bdev);
     295                 :            : 
     296                 :          0 :         return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
     297                 :            : }
     298                 :            : 
     299                 :          0 : static void blkdev_bio_end_io(struct bio *bio)
     300                 :            : {
     301                 :          0 :         struct blkdev_dio *dio = bio->bi_private;
     302                 :          0 :         bool should_dirty = dio->should_dirty;
     303                 :            : 
     304                 :          0 :         if (bio->bi_status && !dio->bio.bi_status)
     305                 :          0 :                 dio->bio.bi_status = bio->bi_status;
     306                 :            : 
     307                 :          0 :         if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
     308                 :          0 :                 if (!dio->is_sync) {
     309                 :          0 :                         struct kiocb *iocb = dio->iocb;
     310                 :            :                         ssize_t ret;
     311                 :            : 
     312                 :          0 :                         if (likely(!dio->bio.bi_status)) {
     313                 :          0 :                                 ret = dio->size;
     314                 :          0 :                                 iocb->ki_pos += ret;
     315                 :            :                         } else {
     316                 :          0 :                                 ret = blk_status_to_errno(dio->bio.bi_status);
     317                 :            :                         }
     318                 :            : 
     319                 :          0 :                         dio->iocb->ki_complete(iocb, ret, 0);
     320                 :          0 :                         if (dio->multi_bio)
     321                 :          0 :                                 bio_put(&dio->bio);
     322                 :            :                 } else {
     323                 :          0 :                         struct task_struct *waiter = dio->waiter;
     324                 :            : 
     325                 :            :                         WRITE_ONCE(dio->waiter, NULL);
     326                 :          0 :                         blk_wake_io_task(waiter);
     327                 :            :                 }
     328                 :            :         }
     329                 :            : 
     330                 :          0 :         if (should_dirty) {
     331                 :          0 :                 bio_check_pages_dirty(bio);
     332                 :            :         } else {
     333                 :          0 :                 bio_release_pages(bio, false);
     334                 :          0 :                 bio_put(bio);
     335                 :            :         }
     336                 :          0 : }
     337                 :            : 
     338                 :            : static ssize_t
     339                 :          0 : __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
     340                 :            : {
     341                 :          0 :         struct file *file = iocb->ki_filp;
     342                 :            :         struct inode *inode = bdev_file_inode(file);
     343                 :            :         struct block_device *bdev = I_BDEV(inode);
     344                 :            :         struct blk_plug plug;
     345                 :            :         struct blkdev_dio *dio;
     346                 :            :         struct bio *bio;
     347                 :          0 :         bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
     348                 :          0 :         bool is_read = (iov_iter_rw(iter) == READ), is_sync;
     349                 :          0 :         loff_t pos = iocb->ki_pos;
     350                 :            :         blk_qc_t qc = BLK_QC_T_NONE;
     351                 :            :         int ret = 0;
     352                 :            : 
     353                 :          0 :         if ((pos | iov_iter_alignment(iter)) &
     354                 :          0 :             (bdev_logical_block_size(bdev) - 1))
     355                 :            :                 return -EINVAL;
     356                 :            : 
     357                 :          0 :         bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
     358                 :            : 
     359                 :          0 :         dio = container_of(bio, struct blkdev_dio, bio);
     360                 :          0 :         dio->is_sync = is_sync = is_sync_kiocb(iocb);
     361                 :          0 :         if (dio->is_sync) {
     362                 :          0 :                 dio->waiter = current;
     363                 :          0 :                 bio_get(bio);
     364                 :            :         } else {
     365                 :          0 :                 dio->iocb = iocb;
     366                 :            :         }
     367                 :            : 
     368                 :          0 :         dio->size = 0;
     369                 :          0 :         dio->multi_bio = false;
     370                 :          0 :         dio->should_dirty = is_read && iter_is_iovec(iter);
     371                 :            : 
     372                 :            :         /*
     373                 :            :          * Don't plug for HIPRI/polled IO, as those should go straight
     374                 :            :          * to issue
     375                 :            :          */
     376                 :          0 :         if (!is_poll)
     377                 :          0 :                 blk_start_plug(&plug);
     378                 :            : 
     379                 :            :         for (;;) {
     380                 :          0 :                 bio_set_dev(bio, bdev);
     381                 :          0 :                 bio->bi_iter.bi_sector = pos >> 9;
     382                 :          0 :                 bio->bi_write_hint = iocb->ki_hint;
     383                 :          0 :                 bio->bi_private = dio;
     384                 :          0 :                 bio->bi_end_io = blkdev_bio_end_io;
     385                 :          0 :                 bio->bi_ioprio = iocb->ki_ioprio;
     386                 :            : 
     387                 :          0 :                 ret = bio_iov_iter_get_pages(bio, iter);
     388                 :          0 :                 if (unlikely(ret)) {
     389                 :          0 :                         bio->bi_status = BLK_STS_IOERR;
     390                 :          0 :                         bio_endio(bio);
     391                 :          0 :                         break;
     392                 :            :                 }
     393                 :            : 
     394                 :          0 :                 if (is_read) {
     395                 :          0 :                         bio->bi_opf = REQ_OP_READ;
     396                 :          0 :                         if (dio->should_dirty)
     397                 :          0 :                                 bio_set_pages_dirty(bio);
     398                 :            :                 } else {
     399                 :          0 :                         bio->bi_opf = dio_bio_write_op(iocb);
     400                 :          0 :                         task_io_account_write(bio->bi_iter.bi_size);
     401                 :            :                 }
     402                 :            : 
     403                 :          0 :                 dio->size += bio->bi_iter.bi_size;
     404                 :          0 :                 pos += bio->bi_iter.bi_size;
     405                 :            : 
     406                 :          0 :                 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
     407                 :          0 :                 if (!nr_pages) {
     408                 :            :                         bool polled = false;
     409                 :            : 
     410                 :          0 :                         if (iocb->ki_flags & IOCB_HIPRI) {
     411                 :            :                                 bio_set_polled(bio, iocb);
     412                 :            :                                 polled = true;
     413                 :            :                         }
     414                 :            : 
     415                 :          0 :                         qc = submit_bio(bio);
     416                 :            : 
     417                 :          0 :                         if (polled)
     418                 :            :                                 WRITE_ONCE(iocb->ki_cookie, qc);
     419                 :            :                         break;
     420                 :            :                 }
     421                 :            : 
     422                 :          0 :                 if (!dio->multi_bio) {
     423                 :            :                         /*
     424                 :            :                          * AIO needs an extra reference to ensure the dio
     425                 :            :                          * structure which is embedded into the first bio
     426                 :            :                          * stays around.
     427                 :            :                          */
     428                 :          0 :                         if (!is_sync)
     429                 :          0 :                                 bio_get(bio);
     430                 :          0 :                         dio->multi_bio = true;
     431                 :            :                         atomic_set(&dio->ref, 2);
     432                 :            :                 } else {
     433                 :          0 :                         atomic_inc(&dio->ref);
     434                 :            :                 }
     435                 :            : 
     436                 :          0 :                 submit_bio(bio);
     437                 :          0 :                 bio = bio_alloc(GFP_KERNEL, nr_pages);
     438                 :            :         }
     439                 :            : 
     440                 :          0 :         if (!is_poll)
     441                 :          0 :                 blk_finish_plug(&plug);
     442                 :            : 
     443                 :          0 :         if (!is_sync)
     444                 :            :                 return -EIOCBQUEUED;
     445                 :            : 
     446                 :            :         for (;;) {
     447                 :          0 :                 set_current_state(TASK_UNINTERRUPTIBLE);
     448                 :          0 :                 if (!READ_ONCE(dio->waiter))
     449                 :            :                         break;
     450                 :            : 
     451                 :          0 :                 if (!(iocb->ki_flags & IOCB_HIPRI) ||
     452                 :          0 :                     !blk_poll(bdev_get_queue(bdev), qc, true))
     453                 :          0 :                         io_schedule();
     454                 :            :         }
     455                 :          0 :         __set_current_state(TASK_RUNNING);
     456                 :            : 
     457                 :          0 :         if (!ret)
     458                 :          0 :                 ret = blk_status_to_errno(dio->bio.bi_status);
     459                 :          0 :         if (likely(!ret))
     460                 :          0 :                 ret = dio->size;
     461                 :            : 
     462                 :          0 :         bio_put(&dio->bio);
     463                 :          0 :         return ret;
     464                 :            : }
     465                 :            : 
     466                 :            : static ssize_t
     467                 :          0 : blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
     468                 :            : {
     469                 :            :         int nr_pages;
     470                 :            : 
     471                 :          0 :         nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1);
     472                 :          0 :         if (!nr_pages)
     473                 :            :                 return 0;
     474                 :          0 :         if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
     475                 :          0 :                 return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
     476                 :            : 
     477                 :          0 :         return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
     478                 :            : }
     479                 :            : 
     480                 :          3 : static __init int blkdev_init(void)
     481                 :            : {
     482                 :          3 :         return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
     483                 :            : }
     484                 :            : module_init(blkdev_init);
     485                 :            : 
     486                 :          3 : int __sync_blockdev(struct block_device *bdev, int wait)
     487                 :            : {
     488                 :          3 :         if (!bdev)
     489                 :            :                 return 0;
     490                 :          3 :         if (!wait)
     491                 :          0 :                 return filemap_flush(bdev->bd_inode->i_mapping);
     492                 :          3 :         return filemap_write_and_wait(bdev->bd_inode->i_mapping);
     493                 :            : }
     494                 :            : 
     495                 :            : /*
     496                 :            :  * Write out and wait upon all the dirty data associated with a block
     497                 :            :  * device via its mapping.  Does not take the superblock lock.
     498                 :            :  */
     499                 :          1 : int sync_blockdev(struct block_device *bdev)
     500                 :            : {
     501                 :          3 :         return __sync_blockdev(bdev, 1);
     502                 :            : }
     503                 :            : EXPORT_SYMBOL(sync_blockdev);
     504                 :            : 
     505                 :            : /*
     506                 :            :  * Write out and wait upon all dirty data associated with this
     507                 :            :  * device.   Filesystem data as well as the underlying block
     508                 :            :  * device.  Takes the superblock lock.
     509                 :            :  */
     510                 :          3 : int fsync_bdev(struct block_device *bdev)
     511                 :            : {
     512                 :          3 :         struct super_block *sb = get_super(bdev);
     513                 :          3 :         if (sb) {
     514                 :          0 :                 int res = sync_filesystem(sb);
     515                 :          0 :                 drop_super(sb);
     516                 :          0 :                 return res;
     517                 :            :         }
     518                 :          3 :         return sync_blockdev(bdev);
     519                 :            : }
     520                 :            : EXPORT_SYMBOL(fsync_bdev);
     521                 :            : 
     522                 :            : /**
     523                 :            :  * freeze_bdev  --  lock a filesystem and force it into a consistent state
     524                 :            :  * @bdev:       blockdevice to lock
     525                 :            :  *
     526                 :            :  * If a superblock is found on this device, we take the s_umount semaphore
     527                 :            :  * on it to make sure nobody unmounts until the snapshot creation is done.
     528                 :            :  * The reference counter (bd_fsfreeze_count) guarantees that only the last
     529                 :            :  * unfreeze process can unfreeze the frozen filesystem actually when multiple
     530                 :            :  * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
     531                 :            :  * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
     532                 :            :  * actually.
     533                 :            :  */
     534                 :          0 : struct super_block *freeze_bdev(struct block_device *bdev)
     535                 :            : {
     536                 :            :         struct super_block *sb;
     537                 :            :         int error = 0;
     538                 :            : 
     539                 :          0 :         mutex_lock(&bdev->bd_fsfreeze_mutex);
     540                 :          0 :         if (++bdev->bd_fsfreeze_count > 1) {
     541                 :            :                 /*
     542                 :            :                  * We don't even need to grab a reference - the first call
     543                 :            :                  * to freeze_bdev grab an active reference and only the last
     544                 :            :                  * thaw_bdev drops it.
     545                 :            :                  */
     546                 :          0 :                 sb = get_super(bdev);
     547                 :          0 :                 if (sb)
     548                 :          0 :                         drop_super(sb);
     549                 :          0 :                 mutex_unlock(&bdev->bd_fsfreeze_mutex);
     550                 :          0 :                 return sb;
     551                 :            :         }
     552                 :            : 
     553                 :          0 :         sb = get_active_super(bdev);
     554                 :          0 :         if (!sb)
     555                 :            :                 goto out;
     556                 :          0 :         if (sb->s_op->freeze_super)
     557                 :          0 :                 error = sb->s_op->freeze_super(sb);
     558                 :            :         else
     559                 :          0 :                 error = freeze_super(sb);
     560                 :          0 :         if (error) {
     561                 :          0 :                 deactivate_super(sb);
     562                 :          0 :                 bdev->bd_fsfreeze_count--;
     563                 :          0 :                 mutex_unlock(&bdev->bd_fsfreeze_mutex);
     564                 :          0 :                 return ERR_PTR(error);
     565                 :            :         }
     566                 :          0 :         deactivate_super(sb);
     567                 :            :  out:
     568                 :            :         sync_blockdev(bdev);
     569                 :          0 :         mutex_unlock(&bdev->bd_fsfreeze_mutex);
     570                 :          0 :         return sb;      /* thaw_bdev releases s->s_umount */
     571                 :            : }
     572                 :            : EXPORT_SYMBOL(freeze_bdev);
     573                 :            : 
     574                 :            : /**
     575                 :            :  * thaw_bdev  -- unlock filesystem
     576                 :            :  * @bdev:       blockdevice to unlock
     577                 :            :  * @sb:         associated superblock
     578                 :            :  *
     579                 :            :  * Unlocks the filesystem and marks it writeable again after freeze_bdev().
     580                 :            :  */
     581                 :          0 : int thaw_bdev(struct block_device *bdev, struct super_block *sb)
     582                 :            : {
     583                 :            :         int error = -EINVAL;
     584                 :            : 
     585                 :          0 :         mutex_lock(&bdev->bd_fsfreeze_mutex);
     586                 :          0 :         if (!bdev->bd_fsfreeze_count)
     587                 :            :                 goto out;
     588                 :            : 
     589                 :            :         error = 0;
     590                 :          0 :         if (--bdev->bd_fsfreeze_count > 0)
     591                 :            :                 goto out;
     592                 :            : 
     593                 :          0 :         if (!sb)
     594                 :            :                 goto out;
     595                 :            : 
     596                 :          0 :         if (sb->s_op->thaw_super)
     597                 :          0 :                 error = sb->s_op->thaw_super(sb);
     598                 :            :         else
     599                 :          0 :                 error = thaw_super(sb);
     600                 :          0 :         if (error)
     601                 :          0 :                 bdev->bd_fsfreeze_count++;
     602                 :            : out:
     603                 :          0 :         mutex_unlock(&bdev->bd_fsfreeze_mutex);
     604                 :          0 :         return error;
     605                 :            : }
     606                 :            : EXPORT_SYMBOL(thaw_bdev);
     607                 :            : 
     608                 :          3 : static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
     609                 :            : {
     610                 :          3 :         return block_write_full_page(page, blkdev_get_block, wbc);
     611                 :            : }
     612                 :            : 
     613                 :          3 : static int blkdev_readpage(struct file * file, struct page * page)
     614                 :            : {
     615                 :          3 :         return block_read_full_page(page, blkdev_get_block);
     616                 :            : }
     617                 :            : 
     618                 :          3 : static int blkdev_readpages(struct file *file, struct address_space *mapping,
     619                 :            :                         struct list_head *pages, unsigned nr_pages)
     620                 :            : {
     621                 :          3 :         return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
     622                 :            : }
     623                 :            : 
     624                 :          0 : static int blkdev_write_begin(struct file *file, struct address_space *mapping,
     625                 :            :                         loff_t pos, unsigned len, unsigned flags,
     626                 :            :                         struct page **pagep, void **fsdata)
     627                 :            : {
     628                 :          0 :         return block_write_begin(mapping, pos, len, flags, pagep,
     629                 :            :                                  blkdev_get_block);
     630                 :            : }
     631                 :            : 
     632                 :          0 : static int blkdev_write_end(struct file *file, struct address_space *mapping,
     633                 :            :                         loff_t pos, unsigned len, unsigned copied,
     634                 :            :                         struct page *page, void *fsdata)
     635                 :            : {
     636                 :            :         int ret;
     637                 :          0 :         ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
     638                 :            : 
     639                 :          0 :         unlock_page(page);
     640                 :          0 :         put_page(page);
     641                 :            : 
     642                 :          0 :         return ret;
     643                 :            : }
     644                 :            : 
     645                 :            : /*
     646                 :            :  * private llseek:
     647                 :            :  * for a block special file file_inode(file)->i_size is zero
     648                 :            :  * so we compute the size by hand (just as in block_read/write above)
     649                 :            :  */
     650                 :          3 : static loff_t block_llseek(struct file *file, loff_t offset, int whence)
     651                 :            : {
     652                 :            :         struct inode *bd_inode = bdev_file_inode(file);
     653                 :            :         loff_t retval;
     654                 :            : 
     655                 :            :         inode_lock(bd_inode);
     656                 :          3 :         retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
     657                 :            :         inode_unlock(bd_inode);
     658                 :          3 :         return retval;
     659                 :            : }
     660                 :            :         
     661                 :          3 : int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
     662                 :            : {
     663                 :            :         struct inode *bd_inode = bdev_file_inode(filp);
     664                 :            :         struct block_device *bdev = I_BDEV(bd_inode);
     665                 :            :         int error;
     666                 :            :         
     667                 :          3 :         error = file_write_and_wait_range(filp, start, end);
     668                 :          3 :         if (error)
     669                 :            :                 return error;
     670                 :            : 
     671                 :            :         /*
     672                 :            :          * There is no need to serialise calls to blkdev_issue_flush with
     673                 :            :          * i_mutex and doing so causes performance issues with concurrent
     674                 :            :          * O_SYNC writers to a block device.
     675                 :            :          */
     676                 :          3 :         error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
     677                 :          3 :         if (error == -EOPNOTSUPP)
     678                 :            :                 error = 0;
     679                 :            : 
     680                 :          3 :         return error;
     681                 :            : }
     682                 :            : EXPORT_SYMBOL(blkdev_fsync);
     683                 :            : 
     684                 :            : /**
     685                 :            :  * bdev_read_page() - Start reading a page from a block device
     686                 :            :  * @bdev: The device to read the page from
     687                 :            :  * @sector: The offset on the device to read the page to (need not be aligned)
     688                 :            :  * @page: The page to read
     689                 :            :  *
     690                 :            :  * On entry, the page should be locked.  It will be unlocked when the page
     691                 :            :  * has been read.  If the block driver implements rw_page synchronously,
     692                 :            :  * that will be true on exit from this function, but it need not be.
     693                 :            :  *
     694                 :            :  * Errors returned by this function are usually "soft", eg out of memory, or
     695                 :            :  * queue full; callers should try a different route to read this page rather
     696                 :            :  * than propagate an error back up the stack.
     697                 :            :  *
     698                 :            :  * Return: negative errno if an error occurs, 0 if submission was successful.
     699                 :            :  */
     700                 :          3 : int bdev_read_page(struct block_device *bdev, sector_t sector,
     701                 :            :                         struct page *page)
     702                 :            : {
     703                 :          3 :         const struct block_device_operations *ops = bdev->bd_disk->fops;
     704                 :            :         int result = -EOPNOTSUPP;
     705                 :            : 
     706                 :          3 :         if (!ops->rw_page || bdev_get_integrity(bdev))
     707                 :            :                 return result;
     708                 :            : 
     709                 :          3 :         result = blk_queue_enter(bdev->bd_queue, 0);
     710                 :          3 :         if (result)
     711                 :            :                 return result;
     712                 :          3 :         result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
     713                 :            :                               REQ_OP_READ);
     714                 :          3 :         blk_queue_exit(bdev->bd_queue);
     715                 :          3 :         return result;
     716                 :            : }
     717                 :            : EXPORT_SYMBOL_GPL(bdev_read_page);
     718                 :            : 
     719                 :            : /**
     720                 :            :  * bdev_write_page() - Start writing a page to a block device
     721                 :            :  * @bdev: The device to write the page to
     722                 :            :  * @sector: The offset on the device to write the page to (need not be aligned)
     723                 :            :  * @page: The page to write
     724                 :            :  * @wbc: The writeback_control for the write
     725                 :            :  *
     726                 :            :  * On entry, the page should be locked and not currently under writeback.
     727                 :            :  * On exit, if the write started successfully, the page will be unlocked and
     728                 :            :  * under writeback.  If the write failed already (eg the driver failed to
     729                 :            :  * queue the page to the device), the page will still be locked.  If the
     730                 :            :  * caller is a ->writepage implementation, it will need to unlock the page.
     731                 :            :  *
     732                 :            :  * Errors returned by this function are usually "soft", eg out of memory, or
     733                 :            :  * queue full; callers should try a different route to write this page rather
     734                 :            :  * than propagate an error back up the stack.
     735                 :            :  *
     736                 :            :  * Return: negative errno if an error occurs, 0 if submission was successful.
     737                 :            :  */
     738                 :          0 : int bdev_write_page(struct block_device *bdev, sector_t sector,
     739                 :            :                         struct page *page, struct writeback_control *wbc)
     740                 :            : {
     741                 :            :         int result;
     742                 :          0 :         const struct block_device_operations *ops = bdev->bd_disk->fops;
     743                 :            : 
     744                 :          0 :         if (!ops->rw_page || bdev_get_integrity(bdev))
     745                 :            :                 return -EOPNOTSUPP;
     746                 :          0 :         result = blk_queue_enter(bdev->bd_queue, 0);
     747                 :          0 :         if (result)
     748                 :            :                 return result;
     749                 :            : 
     750                 :            :         set_page_writeback(page);
     751                 :          0 :         result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
     752                 :            :                               REQ_OP_WRITE);
     753                 :          0 :         if (result) {
     754                 :          0 :                 end_page_writeback(page);
     755                 :            :         } else {
     756                 :          0 :                 clean_page_buffers(page);
     757                 :          0 :                 unlock_page(page);
     758                 :            :         }
     759                 :          0 :         blk_queue_exit(bdev->bd_queue);
     760                 :          0 :         return result;
     761                 :            : }
     762                 :            : EXPORT_SYMBOL_GPL(bdev_write_page);
     763                 :            : 
     764                 :            : /*
     765                 :            :  * pseudo-fs
     766                 :            :  */
     767                 :            : 
     768                 :            : static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
     769                 :            : static struct kmem_cache * bdev_cachep __read_mostly;
     770                 :            : 
     771                 :          3 : static struct inode *bdev_alloc_inode(struct super_block *sb)
     772                 :            : {
     773                 :          3 :         struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
     774                 :          3 :         if (!ei)
     775                 :            :                 return NULL;
     776                 :          3 :         return &ei->vfs_inode;
     777                 :            : }
     778                 :            : 
     779                 :          3 : static void bdev_free_inode(struct inode *inode)
     780                 :            : {
     781                 :          3 :         kmem_cache_free(bdev_cachep, BDEV_I(inode));
     782                 :          3 : }
     783                 :            : 
     784                 :          3 : static void init_once(void *foo)
     785                 :            : {
     786                 :            :         struct bdev_inode *ei = (struct bdev_inode *) foo;
     787                 :          3 :         struct block_device *bdev = &ei->bdev;
     788                 :            : 
     789                 :          3 :         memset(bdev, 0, sizeof(*bdev));
     790                 :          3 :         mutex_init(&bdev->bd_mutex);
     791                 :          3 :         INIT_LIST_HEAD(&bdev->bd_list);
     792                 :            : #ifdef CONFIG_SYSFS
     793                 :          3 :         INIT_LIST_HEAD(&bdev->bd_holder_disks);
     794                 :            : #endif
     795                 :          3 :         bdev->bd_bdi = &noop_backing_dev_info;
     796                 :          3 :         inode_init_once(&ei->vfs_inode);
     797                 :            :         /* Initialize mutex for freeze. */
     798                 :          3 :         mutex_init(&bdev->bd_fsfreeze_mutex);
     799                 :          3 : }
     800                 :            : 
     801                 :          3 : static void bdev_evict_inode(struct inode *inode)
     802                 :            : {
     803                 :            :         struct block_device *bdev = &BDEV_I(inode)->bdev;
     804                 :          3 :         truncate_inode_pages_final(&inode->i_data);
     805                 :          3 :         invalidate_inode_buffers(inode); /* is it needed here? */
     806                 :          3 :         clear_inode(inode);
     807                 :            :         spin_lock(&bdev_lock);
     808                 :          3 :         list_del_init(&bdev->bd_list);
     809                 :            :         spin_unlock(&bdev_lock);
     810                 :            :         /* Detach inode from wb early as bdi_put() may free bdi->wb */
     811                 :          3 :         inode_detach_wb(inode);
     812                 :          3 :         if (bdev->bd_bdi != &noop_backing_dev_info) {
     813                 :          3 :                 bdi_put(bdev->bd_bdi);
     814                 :          3 :                 bdev->bd_bdi = &noop_backing_dev_info;
     815                 :            :         }
     816                 :          3 : }
     817                 :            : 
     818                 :            : static const struct super_operations bdev_sops = {
     819                 :            :         .statfs = simple_statfs,
     820                 :            :         .alloc_inode = bdev_alloc_inode,
     821                 :            :         .free_inode = bdev_free_inode,
     822                 :            :         .drop_inode = generic_delete_inode,
     823                 :            :         .evict_inode = bdev_evict_inode,
     824                 :            : };
     825                 :            : 
     826                 :          3 : static int bd_init_fs_context(struct fs_context *fc)
     827                 :            : {
     828                 :          3 :         struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
     829                 :          3 :         if (!ctx)
     830                 :            :                 return -ENOMEM;
     831                 :          3 :         fc->s_iflags |= SB_I_CGROUPWB;
     832                 :          3 :         ctx->ops = &bdev_sops;
     833                 :          3 :         return 0;
     834                 :            : }
     835                 :            : 
     836                 :            : static struct file_system_type bd_type = {
     837                 :            :         .name           = "bdev",
     838                 :            :         .init_fs_context = bd_init_fs_context,
     839                 :            :         .kill_sb        = kill_anon_super,
     840                 :            : };
     841                 :            : 
     842                 :            : struct super_block *blockdev_superblock __read_mostly;
     843                 :            : EXPORT_SYMBOL_GPL(blockdev_superblock);
     844                 :            : 
     845                 :          3 : void __init bdev_cache_init(void)
     846                 :            : {
     847                 :            :         int err;
     848                 :            :         static struct vfsmount *bd_mnt;
     849                 :            : 
     850                 :          3 :         bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
     851                 :            :                         0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
     852                 :            :                                 SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
     853                 :            :                         init_once);
     854                 :          3 :         err = register_filesystem(&bd_type);
     855                 :          3 :         if (err)
     856                 :          0 :                 panic("Cannot register bdev pseudo-fs");
     857                 :          3 :         bd_mnt = kern_mount(&bd_type);
     858                 :          3 :         if (IS_ERR(bd_mnt))
     859                 :          0 :                 panic("Cannot create bdev pseudo-fs");
     860                 :          3 :         blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
     861                 :          3 : }
     862                 :            : 
     863                 :            : /*
     864                 :            :  * Most likely _very_ bad one - but then it's hardly critical for small
     865                 :            :  * /dev and can be fixed when somebody will need really large one.
     866                 :            :  * Keep in mind that it will be fed through icache hash function too.
     867                 :            :  */
     868                 :            : static inline unsigned long hash(dev_t dev)
     869                 :            : {
     870                 :          3 :         return MAJOR(dev)+MINOR(dev);
     871                 :            : }
     872                 :            : 
     873                 :          3 : static int bdev_test(struct inode *inode, void *data)
     874                 :            : {
     875                 :          3 :         return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
     876                 :            : }
     877                 :            : 
     878                 :          3 : static int bdev_set(struct inode *inode, void *data)
     879                 :            : {
     880                 :          3 :         BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
     881                 :          3 :         return 0;
     882                 :            : }
     883                 :            : 
     884                 :            : static LIST_HEAD(all_bdevs);
     885                 :            : 
     886                 :            : /*
     887                 :            :  * If there is a bdev inode for this device, unhash it so that it gets evicted
     888                 :            :  * as soon as last inode reference is dropped.
     889                 :            :  */
     890                 :          0 : void bdev_unhash_inode(dev_t dev)
     891                 :            : {
     892                 :            :         struct inode *inode;
     893                 :            : 
     894                 :          0 :         inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
     895                 :          0 :         if (inode) {
     896                 :            :                 remove_inode_hash(inode);
     897                 :          0 :                 iput(inode);
     898                 :            :         }
     899                 :          0 : }
     900                 :            : 
     901                 :          3 : struct block_device *bdget(dev_t dev)
     902                 :            : {
     903                 :            :         struct block_device *bdev;
     904                 :            :         struct inode *inode;
     905                 :            : 
     906                 :          3 :         inode = iget5_locked(blockdev_superblock, hash(dev),
     907                 :            :                         bdev_test, bdev_set, &dev);
     908                 :            : 
     909                 :          3 :         if (!inode)
     910                 :            :                 return NULL;
     911                 :            : 
     912                 :          3 :         bdev = &BDEV_I(inode)->bdev;
     913                 :            : 
     914                 :          3 :         if (inode->i_state & I_NEW) {
     915                 :          3 :                 bdev->bd_contains = NULL;
     916                 :          3 :                 bdev->bd_super = NULL;
     917                 :          3 :                 bdev->bd_inode = inode;
     918                 :          3 :                 bdev->bd_block_size = i_blocksize(inode);
     919                 :          3 :                 bdev->bd_part_count = 0;
     920                 :          3 :                 bdev->bd_invalidated = 0;
     921                 :          3 :                 inode->i_mode = S_IFBLK;
     922                 :          3 :                 inode->i_rdev = dev;
     923                 :          3 :                 inode->i_bdev = bdev;
     924                 :          3 :                 inode->i_data.a_ops = &def_blk_aops;
     925                 :            :                 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
     926                 :            :                 spin_lock(&bdev_lock);
     927                 :          3 :                 list_add(&bdev->bd_list, &all_bdevs);
     928                 :            :                 spin_unlock(&bdev_lock);
     929                 :          3 :                 unlock_new_inode(inode);
     930                 :            :         }
     931                 :          3 :         return bdev;
     932                 :            : }
     933                 :            : 
     934                 :            : EXPORT_SYMBOL(bdget);
     935                 :            : 
     936                 :            : /**
     937                 :            :  * bdgrab -- Grab a reference to an already referenced block device
     938                 :            :  * @bdev:       Block device to grab a reference to.
     939                 :            :  */
     940                 :          0 : struct block_device *bdgrab(struct block_device *bdev)
     941                 :            : {
     942                 :          3 :         ihold(bdev->bd_inode);
     943                 :          0 :         return bdev;
     944                 :            : }
     945                 :            : EXPORT_SYMBOL(bdgrab);
     946                 :            : 
     947                 :          3 : long nr_blockdev_pages(void)
     948                 :            : {
     949                 :            :         struct block_device *bdev;
     950                 :            :         long ret = 0;
     951                 :            :         spin_lock(&bdev_lock);
     952                 :          3 :         list_for_each_entry(bdev, &all_bdevs, bd_list) {
     953                 :          3 :                 ret += bdev->bd_inode->i_mapping->nrpages;
     954                 :            :         }
     955                 :            :         spin_unlock(&bdev_lock);
     956                 :          3 :         return ret;
     957                 :            : }
     958                 :            : 
     959                 :          3 : void bdput(struct block_device *bdev)
     960                 :            : {
     961                 :          3 :         iput(bdev->bd_inode);
     962                 :          3 : }
     963                 :            : 
     964                 :            : EXPORT_SYMBOL(bdput);
     965                 :            :  
     966                 :          3 : static struct block_device *bd_acquire(struct inode *inode)
     967                 :            : {
     968                 :            :         struct block_device *bdev;
     969                 :            : 
     970                 :            :         spin_lock(&bdev_lock);
     971                 :          3 :         bdev = inode->i_bdev;
     972                 :          3 :         if (bdev && !inode_unhashed(bdev->bd_inode)) {
     973                 :            :                 bdgrab(bdev);
     974                 :            :                 spin_unlock(&bdev_lock);
     975                 :          3 :                 return bdev;
     976                 :            :         }
     977                 :            :         spin_unlock(&bdev_lock);
     978                 :            : 
     979                 :            :         /*
     980                 :            :          * i_bdev references block device inode that was already shut down
     981                 :            :          * (corresponding device got removed).  Remove the reference and look
     982                 :            :          * up block device inode again just in case new device got
     983                 :            :          * reestablished under the same device number.
     984                 :            :          */
     985                 :          3 :         if (bdev)
     986                 :          0 :                 bd_forget(inode);
     987                 :            : 
     988                 :          3 :         bdev = bdget(inode->i_rdev);
     989                 :          3 :         if (bdev) {
     990                 :            :                 spin_lock(&bdev_lock);
     991                 :          3 :                 if (!inode->i_bdev) {
     992                 :            :                         /*
     993                 :            :                          * We take an additional reference to bd_inode,
     994                 :            :                          * and it's released in clear_inode() of inode.
     995                 :            :                          * So, we can access it via ->i_mapping always
     996                 :            :                          * without igrab().
     997                 :            :                          */
     998                 :            :                         bdgrab(bdev);
     999                 :          3 :                         inode->i_bdev = bdev;
    1000                 :          3 :                         inode->i_mapping = bdev->bd_inode->i_mapping;
    1001                 :            :                 }
    1002                 :            :                 spin_unlock(&bdev_lock);
    1003                 :            :         }
    1004                 :          3 :         return bdev;
    1005                 :            : }
    1006                 :            : 
    1007                 :            : /* Call when you free inode */
    1008                 :            : 
    1009                 :          3 : void bd_forget(struct inode *inode)
    1010                 :            : {
    1011                 :            :         struct block_device *bdev = NULL;
    1012                 :            : 
    1013                 :            :         spin_lock(&bdev_lock);
    1014                 :          3 :         if (!sb_is_blkdev_sb(inode->i_sb))
    1015                 :          3 :                 bdev = inode->i_bdev;
    1016                 :          3 :         inode->i_bdev = NULL;
    1017                 :          3 :         inode->i_mapping = &inode->i_data;
    1018                 :            :         spin_unlock(&bdev_lock);
    1019                 :            : 
    1020                 :          3 :         if (bdev)
    1021                 :            :                 bdput(bdev);
    1022                 :          3 : }
    1023                 :            : 
    1024                 :            : /**
    1025                 :            :  * bd_may_claim - test whether a block device can be claimed
    1026                 :            :  * @bdev: block device of interest
    1027                 :            :  * @whole: whole block device containing @bdev, may equal @bdev
    1028                 :            :  * @holder: holder trying to claim @bdev
    1029                 :            :  *
    1030                 :            :  * Test whether @bdev can be claimed by @holder.
    1031                 :            :  *
    1032                 :            :  * CONTEXT:
    1033                 :            :  * spin_lock(&bdev_lock).
    1034                 :            :  *
    1035                 :            :  * RETURNS:
    1036                 :            :  * %true if @bdev can be claimed, %false otherwise.
    1037                 :            :  */
    1038                 :          0 : static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
    1039                 :            :                          void *holder)
    1040                 :            : {
    1041                 :          3 :         if (bdev->bd_holder == holder)
    1042                 :            :                 return true;     /* already a holder */
    1043                 :          3 :         else if (bdev->bd_holder != NULL)
    1044                 :            :                 return false;    /* held by someone else */
    1045                 :          3 :         else if (whole == bdev)
    1046                 :            :                 return true;     /* is a whole device which isn't held */
    1047                 :            : 
    1048                 :          3 :         else if (whole->bd_holder == bd_may_claim)
    1049                 :            :                 return true;     /* is a partition of a device that is being partitioned */
    1050                 :          3 :         else if (whole->bd_holder != NULL)
    1051                 :            :                 return false;    /* is a partition of a held device */
    1052                 :            :         else
    1053                 :          0 :                 return true;     /* is a partition of an un-held device */
    1054                 :            : }
    1055                 :            : 
    1056                 :            : /**
    1057                 :            :  * bd_prepare_to_claim - prepare to claim a block device
    1058                 :            :  * @bdev: block device of interest
    1059                 :            :  * @whole: the whole device containing @bdev, may equal @bdev
    1060                 :            :  * @holder: holder trying to claim @bdev
    1061                 :            :  *
    1062                 :            :  * Prepare to claim @bdev.  This function fails if @bdev is already
    1063                 :            :  * claimed by another holder and waits if another claiming is in
    1064                 :            :  * progress.  This function doesn't actually claim.  On successful
    1065                 :            :  * return, the caller has ownership of bd_claiming and bd_holder[s].
    1066                 :            :  *
    1067                 :            :  * CONTEXT:
    1068                 :            :  * spin_lock(&bdev_lock).  Might release bdev_lock, sleep and regrab
    1069                 :            :  * it multiple times.
    1070                 :            :  *
    1071                 :            :  * RETURNS:
    1072                 :            :  * 0 if @bdev can be claimed, -EBUSY otherwise.
    1073                 :            :  */
    1074                 :          3 : static int bd_prepare_to_claim(struct block_device *bdev,
    1075                 :            :                                struct block_device *whole, void *holder)
    1076                 :            : {
    1077                 :            : retry:
    1078                 :            :         /* if someone else claimed, fail */
    1079                 :          3 :         if (!bd_may_claim(bdev, whole, holder))
    1080                 :            :                 return -EBUSY;
    1081                 :            : 
    1082                 :            :         /* if claiming is already in progress, wait for it to finish */
    1083                 :          3 :         if (whole->bd_claiming) {
    1084                 :          0 :                 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
    1085                 :          0 :                 DEFINE_WAIT(wait);
    1086                 :            : 
    1087                 :          0 :                 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
    1088                 :            :                 spin_unlock(&bdev_lock);
    1089                 :          0 :                 schedule();
    1090                 :          0 :                 finish_wait(wq, &wait);
    1091                 :            :                 spin_lock(&bdev_lock);
    1092                 :            :                 goto retry;
    1093                 :            :         }
    1094                 :            : 
    1095                 :            :         /* yay, all mine */
    1096                 :            :         return 0;
    1097                 :            : }
    1098                 :            : 
    1099                 :          3 : static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
    1100                 :            : {
    1101                 :          3 :         struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
    1102                 :            : 
    1103                 :          3 :         if (!disk)
    1104                 :            :                 return NULL;
    1105                 :            :         /*
    1106                 :            :          * Now that we hold gendisk reference we make sure bdev we looked up is
    1107                 :            :          * not stale. If it is, it means device got removed and created before
    1108                 :            :          * we looked up gendisk and we fail open in such case. Associating
    1109                 :            :          * unhashed bdev with newly created gendisk could lead to two bdevs
    1110                 :            :          * (and thus two independent caches) being associated with one device
    1111                 :            :          * which is bad.
    1112                 :            :          */
    1113                 :          3 :         if (inode_unhashed(bdev->bd_inode)) {
    1114                 :          0 :                 put_disk_and_module(disk);
    1115                 :          0 :                 return NULL;
    1116                 :            :         }
    1117                 :            :         return disk;
    1118                 :            : }
    1119                 :            : 
    1120                 :            : /**
    1121                 :            :  * bd_start_claiming - start claiming a block device
    1122                 :            :  * @bdev: block device of interest
    1123                 :            :  * @holder: holder trying to claim @bdev
    1124                 :            :  *
    1125                 :            :  * @bdev is about to be opened exclusively.  Check @bdev can be opened
    1126                 :            :  * exclusively and mark that an exclusive open is in progress.  Each
    1127                 :            :  * successful call to this function must be matched with a call to
    1128                 :            :  * either bd_finish_claiming() or bd_abort_claiming() (which do not
    1129                 :            :  * fail).
    1130                 :            :  *
    1131                 :            :  * This function is used to gain exclusive access to the block device
    1132                 :            :  * without actually causing other exclusive open attempts to fail. It
    1133                 :            :  * should be used when the open sequence itself requires exclusive
    1134                 :            :  * access but may subsequently fail.
    1135                 :            :  *
    1136                 :            :  * CONTEXT:
    1137                 :            :  * Might sleep.
    1138                 :            :  *
    1139                 :            :  * RETURNS:
    1140                 :            :  * Pointer to the block device containing @bdev on success, ERR_PTR()
    1141                 :            :  * value on failure.
    1142                 :            :  */
    1143                 :          3 : struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
    1144                 :            : {
    1145                 :            :         struct gendisk *disk;
    1146                 :            :         struct block_device *whole;
    1147                 :            :         int partno, err;
    1148                 :            : 
    1149                 :          3 :         might_sleep();
    1150                 :            : 
    1151                 :            :         /*
    1152                 :            :          * @bdev might not have been initialized properly yet, look up
    1153                 :            :          * and grab the outer block device the hard way.
    1154                 :            :          */
    1155                 :          3 :         disk = bdev_get_gendisk(bdev, &partno);
    1156                 :          3 :         if (!disk)
    1157                 :            :                 return ERR_PTR(-ENXIO);
    1158                 :            : 
    1159                 :            :         /*
    1160                 :            :          * Normally, @bdev should equal what's returned from bdget_disk()
    1161                 :            :          * if partno is 0; however, some drivers (floppy) use multiple
    1162                 :            :          * bdev's for the same physical device and @bdev may be one of the
    1163                 :            :          * aliases.  Keep @bdev if partno is 0.  This means claimer
    1164                 :            :          * tracking is broken for those devices but it has always been that
    1165                 :            :          * way.
    1166                 :            :          */
    1167                 :          3 :         if (partno)
    1168                 :          3 :                 whole = bdget_disk(disk, 0);
    1169                 :            :         else
    1170                 :            :                 whole = bdgrab(bdev);
    1171                 :            : 
    1172                 :          3 :         put_disk_and_module(disk);
    1173                 :          3 :         if (!whole)
    1174                 :            :                 return ERR_PTR(-ENOMEM);
    1175                 :            : 
    1176                 :            :         /* prepare to claim, if successful, mark claiming in progress */
    1177                 :            :         spin_lock(&bdev_lock);
    1178                 :            : 
    1179                 :          3 :         err = bd_prepare_to_claim(bdev, whole, holder);
    1180                 :          3 :         if (err == 0) {
    1181                 :          3 :                 whole->bd_claiming = holder;
    1182                 :            :                 spin_unlock(&bdev_lock);
    1183                 :          3 :                 return whole;
    1184                 :            :         } else {
    1185                 :            :                 spin_unlock(&bdev_lock);
    1186                 :            :                 bdput(whole);
    1187                 :          2 :                 return ERR_PTR(err);
    1188                 :            :         }
    1189                 :            : }
    1190                 :            : EXPORT_SYMBOL(bd_start_claiming);
    1191                 :            : 
    1192                 :          3 : static void bd_clear_claiming(struct block_device *whole, void *holder)
    1193                 :            : {
    1194                 :            :         lockdep_assert_held(&bdev_lock);
    1195                 :            :         /* tell others that we're done */
    1196                 :          3 :         BUG_ON(whole->bd_claiming != holder);
    1197                 :          3 :         whole->bd_claiming = NULL;
    1198                 :          3 :         wake_up_bit(&whole->bd_claiming, 0);
    1199                 :          3 : }
    1200                 :            : 
    1201                 :            : /**
    1202                 :            :  * bd_finish_claiming - finish claiming of a block device
    1203                 :            :  * @bdev: block device of interest
    1204                 :            :  * @whole: whole block device (returned from bd_start_claiming())
    1205                 :            :  * @holder: holder that has claimed @bdev
    1206                 :            :  *
    1207                 :            :  * Finish exclusive open of a block device. Mark the device as exlusively
    1208                 :            :  * open by the holder and wake up all waiters for exclusive open to finish.
    1209                 :            :  */
    1210                 :          3 : void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
    1211                 :            :                         void *holder)
    1212                 :            : {
    1213                 :            :         spin_lock(&bdev_lock);
    1214                 :          3 :         BUG_ON(!bd_may_claim(bdev, whole, holder));
    1215                 :            :         /*
    1216                 :            :          * Note that for a whole device bd_holders will be incremented twice,
    1217                 :            :          * and bd_holder will be set to bd_may_claim before being set to holder
    1218                 :            :          */
    1219                 :          3 :         whole->bd_holders++;
    1220                 :          3 :         whole->bd_holder = bd_may_claim;
    1221                 :          3 :         bdev->bd_holders++;
    1222                 :          3 :         bdev->bd_holder = holder;
    1223                 :          3 :         bd_clear_claiming(whole, holder);
    1224                 :            :         spin_unlock(&bdev_lock);
    1225                 :          3 : }
    1226                 :            : EXPORT_SYMBOL(bd_finish_claiming);
    1227                 :            : 
    1228                 :            : /**
    1229                 :            :  * bd_abort_claiming - abort claiming of a block device
    1230                 :            :  * @bdev: block device of interest
    1231                 :            :  * @whole: whole block device (returned from bd_start_claiming())
    1232                 :            :  * @holder: holder that has claimed @bdev
    1233                 :            :  *
    1234                 :            :  * Abort claiming of a block device when the exclusive open failed. This can be
    1235                 :            :  * also used when exclusive open is not actually desired and we just needed
    1236                 :            :  * to block other exclusive openers for a while.
    1237                 :            :  */
    1238                 :          0 : void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
    1239                 :            :                        void *holder)
    1240                 :            : {
    1241                 :            :         spin_lock(&bdev_lock);
    1242                 :          0 :         bd_clear_claiming(whole, holder);
    1243                 :            :         spin_unlock(&bdev_lock);
    1244                 :          0 : }
    1245                 :            : EXPORT_SYMBOL(bd_abort_claiming);
    1246                 :            : 
    1247                 :            : #ifdef CONFIG_SYSFS
    1248                 :            : struct bd_holder_disk {
    1249                 :            :         struct list_head        list;
    1250                 :            :         struct gendisk          *disk;
    1251                 :            :         int                     refcnt;
    1252                 :            : };
    1253                 :            : 
    1254                 :            : static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
    1255                 :            :                                                   struct gendisk *disk)
    1256                 :            : {
    1257                 :            :         struct bd_holder_disk *holder;
    1258                 :            : 
    1259                 :          0 :         list_for_each_entry(holder, &bdev->bd_holder_disks, list)
    1260                 :          0 :                 if (holder->disk == disk)
    1261                 :          0 :                         return holder;
    1262                 :            :         return NULL;
    1263                 :            : }
    1264                 :            : 
    1265                 :            : static int add_symlink(struct kobject *from, struct kobject *to)
    1266                 :            : {
    1267                 :          0 :         return sysfs_create_link(from, to, kobject_name(to));
    1268                 :            : }
    1269                 :            : 
    1270                 :            : static void del_symlink(struct kobject *from, struct kobject *to)
    1271                 :            : {
    1272                 :          0 :         sysfs_remove_link(from, kobject_name(to));
    1273                 :            : }
    1274                 :            : 
    1275                 :            : /**
    1276                 :            :  * bd_link_disk_holder - create symlinks between holding disk and slave bdev
    1277                 :            :  * @bdev: the claimed slave bdev
    1278                 :            :  * @disk: the holding disk
    1279                 :            :  *
    1280                 :            :  * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
    1281                 :            :  *
    1282                 :            :  * This functions creates the following sysfs symlinks.
    1283                 :            :  *
    1284                 :            :  * - from "slaves" directory of the holder @disk to the claimed @bdev
    1285                 :            :  * - from "holders" directory of the @bdev to the holder @disk
    1286                 :            :  *
    1287                 :            :  * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
    1288                 :            :  * passed to bd_link_disk_holder(), then:
    1289                 :            :  *
    1290                 :            :  *   /sys/block/dm-0/slaves/sda --> /sys/block/sda
    1291                 :            :  *   /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
    1292                 :            :  *
    1293                 :            :  * The caller must have claimed @bdev before calling this function and
    1294                 :            :  * ensure that both @bdev and @disk are valid during the creation and
    1295                 :            :  * lifetime of these symlinks.
    1296                 :            :  *
    1297                 :            :  * CONTEXT:
    1298                 :            :  * Might sleep.
    1299                 :            :  *
    1300                 :            :  * RETURNS:
    1301                 :            :  * 0 on success, -errno on failure.
    1302                 :            :  */
    1303                 :          0 : int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
    1304                 :            : {
    1305                 :            :         struct bd_holder_disk *holder;
    1306                 :            :         int ret = 0;
    1307                 :            : 
    1308                 :          0 :         mutex_lock(&bdev->bd_mutex);
    1309                 :            : 
    1310                 :          0 :         WARN_ON_ONCE(!bdev->bd_holder);
    1311                 :            : 
    1312                 :            :         /* FIXME: remove the following once add_disk() handles errors */
    1313                 :          0 :         if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
    1314                 :            :                 goto out_unlock;
    1315                 :            : 
    1316                 :            :         holder = bd_find_holder_disk(bdev, disk);
    1317                 :          0 :         if (holder) {
    1318                 :          0 :                 holder->refcnt++;
    1319                 :          0 :                 goto out_unlock;
    1320                 :            :         }
    1321                 :            : 
    1322                 :          0 :         holder = kzalloc(sizeof(*holder), GFP_KERNEL);
    1323                 :          0 :         if (!holder) {
    1324                 :            :                 ret = -ENOMEM;
    1325                 :            :                 goto out_unlock;
    1326                 :            :         }
    1327                 :            : 
    1328                 :          0 :         INIT_LIST_HEAD(&holder->list);
    1329                 :          0 :         holder->disk = disk;
    1330                 :          0 :         holder->refcnt = 1;
    1331                 :            : 
    1332                 :          0 :         ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
    1333                 :          0 :         if (ret)
    1334                 :            :                 goto out_free;
    1335                 :            : 
    1336                 :          0 :         ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
    1337                 :          0 :         if (ret)
    1338                 :            :                 goto out_del;
    1339                 :            :         /*
    1340                 :            :          * bdev could be deleted beneath us which would implicitly destroy
    1341                 :            :          * the holder directory.  Hold on to it.
    1342                 :            :          */
    1343                 :          0 :         kobject_get(bdev->bd_part->holder_dir);
    1344                 :            : 
    1345                 :          0 :         list_add(&holder->list, &bdev->bd_holder_disks);
    1346                 :            :         goto out_unlock;
    1347                 :            : 
    1348                 :            : out_del:
    1349                 :          0 :         del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
    1350                 :            : out_free:
    1351                 :          0 :         kfree(holder);
    1352                 :            : out_unlock:
    1353                 :          0 :         mutex_unlock(&bdev->bd_mutex);
    1354                 :          0 :         return ret;
    1355                 :            : }
    1356                 :            : EXPORT_SYMBOL_GPL(bd_link_disk_holder);
    1357                 :            : 
    1358                 :            : /**
    1359                 :            :  * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
    1360                 :            :  * @bdev: the calimed slave bdev
    1361                 :            :  * @disk: the holding disk
    1362                 :            :  *
    1363                 :            :  * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
    1364                 :            :  *
    1365                 :            :  * CONTEXT:
    1366                 :            :  * Might sleep.
    1367                 :            :  */
    1368                 :          0 : void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
    1369                 :            : {
    1370                 :            :         struct bd_holder_disk *holder;
    1371                 :            : 
    1372                 :          0 :         mutex_lock(&bdev->bd_mutex);
    1373                 :            : 
    1374                 :            :         holder = bd_find_holder_disk(bdev, disk);
    1375                 :            : 
    1376                 :          0 :         if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
    1377                 :          0 :                 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
    1378                 :          0 :                 del_symlink(bdev->bd_part->holder_dir,
    1379                 :            :                             &disk_to_dev(disk)->kobj);
    1380                 :          0 :                 kobject_put(bdev->bd_part->holder_dir);
    1381                 :          0 :                 list_del_init(&holder->list);
    1382                 :          0 :                 kfree(holder);
    1383                 :            :         }
    1384                 :            : 
    1385                 :          0 :         mutex_unlock(&bdev->bd_mutex);
    1386                 :          0 : }
    1387                 :            : EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
    1388                 :            : #endif
    1389                 :            : 
    1390                 :            : /**
    1391                 :            :  * flush_disk - invalidates all buffer-cache entries on a disk
    1392                 :            :  *
    1393                 :            :  * @bdev:      struct block device to be flushed
    1394                 :            :  * @kill_dirty: flag to guide handling of dirty inodes
    1395                 :            :  *
    1396                 :            :  * Invalidates all buffer-cache entries on a disk. It should be called
    1397                 :            :  * when a disk has been changed -- either by a media change or online
    1398                 :            :  * resize.
    1399                 :            :  */
    1400                 :          0 : static void flush_disk(struct block_device *bdev, bool kill_dirty)
    1401                 :            : {
    1402                 :          0 :         if (__invalidate_device(bdev, kill_dirty)) {
    1403                 :          0 :                 printk(KERN_WARNING "VFS: busy inodes on changed media or "
    1404                 :            :                        "resized disk %s\n",
    1405                 :          0 :                        bdev->bd_disk ? bdev->bd_disk->disk_name : "");
    1406                 :            :         }
    1407                 :          0 :         bdev->bd_invalidated = 1;
    1408                 :          0 : }
    1409                 :            : 
    1410                 :            : /**
    1411                 :            :  * check_disk_size_change - checks for disk size change and adjusts bdev size.
    1412                 :            :  * @disk: struct gendisk to check
    1413                 :            :  * @bdev: struct bdev to adjust.
    1414                 :            :  * @verbose: if %true log a message about a size change if there is any
    1415                 :            :  *
    1416                 :            :  * This routine checks to see if the bdev size does not match the disk size
    1417                 :            :  * and adjusts it if it differs. When shrinking the bdev size, its all caches
    1418                 :            :  * are freed.
    1419                 :            :  */
    1420                 :          3 : void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
    1421                 :            :                 bool verbose)
    1422                 :            : {
    1423                 :            :         loff_t disk_size, bdev_size;
    1424                 :            : 
    1425                 :          3 :         disk_size = (loff_t)get_capacity(disk) << 9;
    1426                 :          3 :         bdev_size = i_size_read(bdev->bd_inode);
    1427                 :          3 :         if (disk_size != bdev_size) {
    1428                 :          0 :                 if (verbose) {
    1429                 :          0 :                         printk(KERN_INFO
    1430                 :            :                                "%s: detected capacity change from %lld to %lld\n",
    1431                 :          0 :                                disk->disk_name, bdev_size, disk_size);
    1432                 :            :                 }
    1433                 :          0 :                 i_size_write(bdev->bd_inode, disk_size);
    1434                 :          0 :                 if (bdev_size > disk_size)
    1435                 :          0 :                         flush_disk(bdev, false);
    1436                 :            :         }
    1437                 :          3 : }
    1438                 :            : 
    1439                 :            : /**
    1440                 :            :  * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
    1441                 :            :  * @disk: struct gendisk to be revalidated
    1442                 :            :  *
    1443                 :            :  * This routine is a wrapper for lower-level driver's revalidate_disk
    1444                 :            :  * call-backs.  It is used to do common pre and post operations needed
    1445                 :            :  * for all revalidate_disk operations.
    1446                 :            :  */
    1447                 :          0 : int revalidate_disk(struct gendisk *disk)
    1448                 :            : {
    1449                 :            :         int ret = 0;
    1450                 :            : 
    1451                 :          0 :         if (disk->fops->revalidate_disk)
    1452                 :          0 :                 ret = disk->fops->revalidate_disk(disk);
    1453                 :            : 
    1454                 :            :         /*
    1455                 :            :          * Hidden disks don't have associated bdev so there's no point in
    1456                 :            :          * revalidating it.
    1457                 :            :          */
    1458                 :          0 :         if (!(disk->flags & GENHD_FL_HIDDEN)) {
    1459                 :          0 :                 struct block_device *bdev = bdget_disk(disk, 0);
    1460                 :            : 
    1461                 :          0 :                 if (!bdev)
    1462                 :            :                         return ret;
    1463                 :            : 
    1464                 :          0 :                 mutex_lock(&bdev->bd_mutex);
    1465                 :          0 :                 check_disk_size_change(disk, bdev, ret == 0);
    1466                 :          0 :                 bdev->bd_invalidated = 0;
    1467                 :          0 :                 mutex_unlock(&bdev->bd_mutex);
    1468                 :            :                 bdput(bdev);
    1469                 :            :         }
    1470                 :            :         return ret;
    1471                 :            : }
    1472                 :            : EXPORT_SYMBOL(revalidate_disk);
    1473                 :            : 
    1474                 :            : /*
    1475                 :            :  * This routine checks whether a removable media has been changed,
    1476                 :            :  * and invalidates all buffer-cache-entries in that case. This
    1477                 :            :  * is a relatively slow routine, so we have to try to minimize using
    1478                 :            :  * it. Thus it is called only upon a 'mount' or 'open'. This
    1479                 :            :  * is the best way of combining speed and utility, I think.
    1480                 :            :  * People changing diskettes in the middle of an operation deserve
    1481                 :            :  * to lose :-)
    1482                 :            :  */
    1483                 :          3 : int check_disk_change(struct block_device *bdev)
    1484                 :            : {
    1485                 :          3 :         struct gendisk *disk = bdev->bd_disk;
    1486                 :          3 :         const struct block_device_operations *bdops = disk->fops;
    1487                 :            :         unsigned int events;
    1488                 :            : 
    1489                 :          3 :         events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
    1490                 :            :                                    DISK_EVENT_EJECT_REQUEST);
    1491                 :          3 :         if (!(events & DISK_EVENT_MEDIA_CHANGE))
    1492                 :            :                 return 0;
    1493                 :            : 
    1494                 :          0 :         flush_disk(bdev, true);
    1495                 :          0 :         if (bdops->revalidate_disk)
    1496                 :          0 :                 bdops->revalidate_disk(bdev->bd_disk);
    1497                 :            :         return 1;
    1498                 :            : }
    1499                 :            : 
    1500                 :            : EXPORT_SYMBOL(check_disk_change);
    1501                 :            : 
    1502                 :          3 : void bd_set_size(struct block_device *bdev, loff_t size)
    1503                 :            : {
    1504                 :          3 :         inode_lock(bdev->bd_inode);
    1505                 :          3 :         i_size_write(bdev->bd_inode, size);
    1506                 :          3 :         inode_unlock(bdev->bd_inode);
    1507                 :          3 : }
    1508                 :            : EXPORT_SYMBOL(bd_set_size);
    1509                 :            : 
    1510                 :            : static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
    1511                 :            : 
    1512                 :          3 : static void bdev_disk_changed(struct block_device *bdev, bool invalidate)
    1513                 :            : {
    1514                 :          3 :         if (disk_part_scan_enabled(bdev->bd_disk)) {
    1515                 :          3 :                 if (invalidate)
    1516                 :          0 :                         invalidate_partitions(bdev->bd_disk, bdev);
    1517                 :            :                 else
    1518                 :          3 :                         rescan_partitions(bdev->bd_disk, bdev);
    1519                 :            :         } else {
    1520                 :          0 :                 check_disk_size_change(bdev->bd_disk, bdev, !invalidate);
    1521                 :          0 :                 bdev->bd_invalidated = 0;
    1522                 :            :         }
    1523                 :          3 : }
    1524                 :            : 
    1525                 :            : /*
    1526                 :            :  * bd_mutex locking:
    1527                 :            :  *
    1528                 :            :  *  mutex_lock(part->bd_mutex)
    1529                 :            :  *    mutex_lock_nested(whole->bd_mutex, 1)
    1530                 :            :  */
    1531                 :            : 
    1532                 :          3 : static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
    1533                 :            : {
    1534                 :            :         struct gendisk *disk;
    1535                 :            :         int ret;
    1536                 :            :         int partno;
    1537                 :            :         int perm = 0;
    1538                 :            :         bool first_open = false;
    1539                 :            : 
    1540                 :          3 :         if (mode & FMODE_READ)
    1541                 :            :                 perm |= MAY_READ;
    1542                 :          3 :         if (mode & FMODE_WRITE)
    1543                 :          3 :                 perm |= MAY_WRITE;
    1544                 :            :         /*
    1545                 :            :          * hooks: /n/, see "layering violations".
    1546                 :            :          */
    1547                 :          3 :         if (!for_part) {
    1548                 :          3 :                 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
    1549                 :          3 :                 if (ret != 0)
    1550                 :            :                         return ret;
    1551                 :            :         }
    1552                 :            : 
    1553                 :            :  restart:
    1554                 :            : 
    1555                 :            :         ret = -ENXIO;
    1556                 :          3 :         disk = bdev_get_gendisk(bdev, &partno);
    1557                 :          3 :         if (!disk)
    1558                 :            :                 goto out;
    1559                 :            : 
    1560                 :          3 :         disk_block_events(disk);
    1561                 :          3 :         mutex_lock_nested(&bdev->bd_mutex, for_part);
    1562                 :          3 :         if (!bdev->bd_openers) {
    1563                 :            :                 first_open = true;
    1564                 :          3 :                 bdev->bd_disk = disk;
    1565                 :          3 :                 bdev->bd_queue = disk->queue;
    1566                 :          3 :                 bdev->bd_contains = bdev;
    1567                 :          3 :                 bdev->bd_partno = partno;
    1568                 :            : 
    1569                 :          3 :                 if (!partno) {
    1570                 :            :                         ret = -ENXIO;
    1571                 :          3 :                         bdev->bd_part = disk_get_part(disk, partno);
    1572                 :          3 :                         if (!bdev->bd_part)
    1573                 :            :                                 goto out_clear;
    1574                 :            : 
    1575                 :            :                         ret = 0;
    1576                 :          3 :                         if (disk->fops->open) {
    1577                 :          3 :                                 ret = disk->fops->open(bdev, mode);
    1578                 :          3 :                                 if (ret == -ERESTARTSYS) {
    1579                 :            :                                         /* Lost a race with 'disk' being
    1580                 :            :                                          * deleted, try again.
    1581                 :            :                                          * See md.c
    1582                 :            :                                          */
    1583                 :          0 :                                         disk_put_part(bdev->bd_part);
    1584                 :          0 :                                         bdev->bd_part = NULL;
    1585                 :          0 :                                         bdev->bd_disk = NULL;
    1586                 :          0 :                                         bdev->bd_queue = NULL;
    1587                 :          0 :                                         mutex_unlock(&bdev->bd_mutex);
    1588                 :          0 :                                         disk_unblock_events(disk);
    1589                 :          0 :                                         put_disk_and_module(disk);
    1590                 :          0 :                                         goto restart;
    1591                 :            :                                 }
    1592                 :            :                         }
    1593                 :            : 
    1594                 :          3 :                         if (!ret) {
    1595                 :          3 :                                 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
    1596                 :          3 :                                 set_init_blocksize(bdev);
    1597                 :            :                         }
    1598                 :            : 
    1599                 :            :                         /*
    1600                 :            :                          * If the device is invalidated, rescan partition
    1601                 :            :                          * if open succeeded or failed with -ENOMEDIUM.
    1602                 :            :                          * The latter is necessary to prevent ghost
    1603                 :            :                          * partitions on a removed medium.
    1604                 :            :                          */
    1605                 :          3 :                         if (bdev->bd_invalidated &&
    1606                 :          3 :                             (!ret || ret == -ENOMEDIUM))
    1607                 :          3 :                                 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
    1608                 :            : 
    1609                 :          3 :                         if (ret)
    1610                 :            :                                 goto out_clear;
    1611                 :            :                 } else {
    1612                 :            :                         struct block_device *whole;
    1613                 :          3 :                         whole = bdget_disk(disk, 0);
    1614                 :            :                         ret = -ENOMEM;
    1615                 :          3 :                         if (!whole)
    1616                 :            :                                 goto out_clear;
    1617                 :          3 :                         BUG_ON(for_part);
    1618                 :          3 :                         ret = __blkdev_get(whole, mode, 1);
    1619                 :          3 :                         if (ret) {
    1620                 :            :                                 bdput(whole);
    1621                 :            :                                 goto out_clear;
    1622                 :            :                         }
    1623                 :          3 :                         bdev->bd_contains = whole;
    1624                 :          3 :                         bdev->bd_part = disk_get_part(disk, partno);
    1625                 :          3 :                         if (!(disk->flags & GENHD_FL_UP) ||
    1626                 :          3 :                             !bdev->bd_part || !bdev->bd_part->nr_sects) {
    1627                 :            :                                 ret = -ENXIO;
    1628                 :            :                                 goto out_clear;
    1629                 :            :                         }
    1630                 :          3 :                         bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
    1631                 :          3 :                         set_init_blocksize(bdev);
    1632                 :            :                 }
    1633                 :            : 
    1634                 :          3 :                 if (bdev->bd_bdi == &noop_backing_dev_info)
    1635                 :          3 :                         bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
    1636                 :            :         } else {
    1637                 :          3 :                 if (bdev->bd_contains == bdev) {
    1638                 :            :                         ret = 0;
    1639                 :          3 :                         if (bdev->bd_disk->fops->open)
    1640                 :          3 :                                 ret = bdev->bd_disk->fops->open(bdev, mode);
    1641                 :            :                         /* the same as first opener case, read comment there */
    1642                 :          3 :                         if (bdev->bd_invalidated &&
    1643                 :          0 :                             (!ret || ret == -ENOMEDIUM))
    1644                 :          0 :                                 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
    1645                 :          3 :                         if (ret)
    1646                 :            :                                 goto out_unlock_bdev;
    1647                 :            :                 }
    1648                 :            :         }
    1649                 :          3 :         bdev->bd_openers++;
    1650                 :          3 :         if (for_part)
    1651                 :          3 :                 bdev->bd_part_count++;
    1652                 :          3 :         mutex_unlock(&bdev->bd_mutex);
    1653                 :          3 :         disk_unblock_events(disk);
    1654                 :            :         /* only one opener holds refs to the module and disk */
    1655                 :          3 :         if (!first_open)
    1656                 :          3 :                 put_disk_and_module(disk);
    1657                 :            :         return 0;
    1658                 :            : 
    1659                 :            :  out_clear:
    1660                 :          0 :         disk_put_part(bdev->bd_part);
    1661                 :          0 :         bdev->bd_disk = NULL;
    1662                 :          0 :         bdev->bd_part = NULL;
    1663                 :          0 :         bdev->bd_queue = NULL;
    1664                 :          0 :         if (bdev != bdev->bd_contains)
    1665                 :          0 :                 __blkdev_put(bdev->bd_contains, mode, 1);
    1666                 :          0 :         bdev->bd_contains = NULL;
    1667                 :            :  out_unlock_bdev:
    1668                 :          0 :         mutex_unlock(&bdev->bd_mutex);
    1669                 :          0 :         disk_unblock_events(disk);
    1670                 :          0 :         put_disk_and_module(disk);
    1671                 :            :  out:
    1672                 :            : 
    1673                 :          0 :         return ret;
    1674                 :            : }
    1675                 :            : 
    1676                 :            : /**
    1677                 :            :  * blkdev_get - open a block device
    1678                 :            :  * @bdev: block_device to open
    1679                 :            :  * @mode: FMODE_* mask
    1680                 :            :  * @holder: exclusive holder identifier
    1681                 :            :  *
    1682                 :            :  * Open @bdev with @mode.  If @mode includes %FMODE_EXCL, @bdev is
    1683                 :            :  * open with exclusive access.  Specifying %FMODE_EXCL with %NULL
    1684                 :            :  * @holder is invalid.  Exclusive opens may nest for the same @holder.
    1685                 :            :  *
    1686                 :            :  * On success, the reference count of @bdev is unchanged.  On failure,
    1687                 :            :  * @bdev is put.
    1688                 :            :  *
    1689                 :            :  * CONTEXT:
    1690                 :            :  * Might sleep.
    1691                 :            :  *
    1692                 :            :  * RETURNS:
    1693                 :            :  * 0 on success, -errno on failure.
    1694                 :            :  */
    1695                 :          3 : int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
    1696                 :            : {
    1697                 :            :         struct block_device *whole = NULL;
    1698                 :            :         int res;
    1699                 :            : 
    1700                 :          3 :         WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
    1701                 :            : 
    1702                 :          3 :         if ((mode & FMODE_EXCL) && holder) {
    1703                 :          3 :                 whole = bd_start_claiming(bdev, holder);
    1704                 :          3 :                 if (IS_ERR(whole)) {
    1705                 :            :                         bdput(bdev);
    1706                 :          2 :                         return PTR_ERR(whole);
    1707                 :            :                 }
    1708                 :            :         }
    1709                 :            : 
    1710                 :          3 :         res = __blkdev_get(bdev, mode, 0);
    1711                 :            : 
    1712                 :          3 :         if (whole) {
    1713                 :          3 :                 struct gendisk *disk = whole->bd_disk;
    1714                 :            : 
    1715                 :            :                 /* finish claiming */
    1716                 :          3 :                 mutex_lock(&bdev->bd_mutex);
    1717                 :          3 :                 if (!res)
    1718                 :          3 :                         bd_finish_claiming(bdev, whole, holder);
    1719                 :            :                 else
    1720                 :          0 :                         bd_abort_claiming(bdev, whole, holder);
    1721                 :            :                 /*
    1722                 :            :                  * Block event polling for write claims if requested.  Any
    1723                 :            :                  * write holder makes the write_holder state stick until
    1724                 :            :                  * all are released.  This is good enough and tracking
    1725                 :            :                  * individual writeable reference is too fragile given the
    1726                 :            :                  * way @mode is used in blkdev_get/put().
    1727                 :            :                  */
    1728                 :          3 :                 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
    1729                 :          3 :                     (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
    1730                 :          0 :                         bdev->bd_write_holder = true;
    1731                 :          0 :                         disk_block_events(disk);
    1732                 :            :                 }
    1733                 :            : 
    1734                 :          3 :                 mutex_unlock(&bdev->bd_mutex);
    1735                 :            :                 bdput(whole);
    1736                 :            :         }
    1737                 :            : 
    1738                 :          3 :         if (res)
    1739                 :            :                 bdput(bdev);
    1740                 :            : 
    1741                 :          3 :         return res;
    1742                 :            : }
    1743                 :            : EXPORT_SYMBOL(blkdev_get);
    1744                 :            : 
    1745                 :            : /**
    1746                 :            :  * blkdev_get_by_path - open a block device by name
    1747                 :            :  * @path: path to the block device to open
    1748                 :            :  * @mode: FMODE_* mask
    1749                 :            :  * @holder: exclusive holder identifier
    1750                 :            :  *
    1751                 :            :  * Open the blockdevice described by the device file at @path.  @mode
    1752                 :            :  * and @holder are identical to blkdev_get().
    1753                 :            :  *
    1754                 :            :  * On success, the returned block_device has reference count of one.
    1755                 :            :  *
    1756                 :            :  * CONTEXT:
    1757                 :            :  * Might sleep.
    1758                 :            :  *
    1759                 :            :  * RETURNS:
    1760                 :            :  * Pointer to block_device on success, ERR_PTR(-errno) on failure.
    1761                 :            :  */
    1762                 :          3 : struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
    1763                 :            :                                         void *holder)
    1764                 :            : {
    1765                 :            :         struct block_device *bdev;
    1766                 :            :         int err;
    1767                 :            : 
    1768                 :          3 :         bdev = lookup_bdev(path);
    1769                 :          3 :         if (IS_ERR(bdev))
    1770                 :            :                 return bdev;
    1771                 :            : 
    1772                 :          3 :         err = blkdev_get(bdev, mode, holder);
    1773                 :          3 :         if (err)
    1774                 :          0 :                 return ERR_PTR(err);
    1775                 :            : 
    1776                 :          3 :         if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
    1777                 :          0 :                 blkdev_put(bdev, mode);
    1778                 :          0 :                 return ERR_PTR(-EACCES);
    1779                 :            :         }
    1780                 :            : 
    1781                 :          3 :         return bdev;
    1782                 :            : }
    1783                 :            : EXPORT_SYMBOL(blkdev_get_by_path);
    1784                 :            : 
    1785                 :            : /**
    1786                 :            :  * blkdev_get_by_dev - open a block device by device number
    1787                 :            :  * @dev: device number of block device to open
    1788                 :            :  * @mode: FMODE_* mask
    1789                 :            :  * @holder: exclusive holder identifier
    1790                 :            :  *
    1791                 :            :  * Open the blockdevice described by device number @dev.  @mode and
    1792                 :            :  * @holder are identical to blkdev_get().
    1793                 :            :  *
    1794                 :            :  * Use it ONLY if you really do not have anything better - i.e. when
    1795                 :            :  * you are behind a truly sucky interface and all you are given is a
    1796                 :            :  * device number.  _Never_ to be used for internal purposes.  If you
    1797                 :            :  * ever need it - reconsider your API.
    1798                 :            :  *
    1799                 :            :  * On success, the returned block_device has reference count of one.
    1800                 :            :  *
    1801                 :            :  * CONTEXT:
    1802                 :            :  * Might sleep.
    1803                 :            :  *
    1804                 :            :  * RETURNS:
    1805                 :            :  * Pointer to block_device on success, ERR_PTR(-errno) on failure.
    1806                 :            :  */
    1807                 :          0 : struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
    1808                 :            : {
    1809                 :            :         struct block_device *bdev;
    1810                 :            :         int err;
    1811                 :            : 
    1812                 :          0 :         bdev = bdget(dev);
    1813                 :          0 :         if (!bdev)
    1814                 :            :                 return ERR_PTR(-ENOMEM);
    1815                 :            : 
    1816                 :          0 :         err = blkdev_get(bdev, mode, holder);
    1817                 :          0 :         if (err)
    1818                 :          0 :                 return ERR_PTR(err);
    1819                 :            : 
    1820                 :            :         return bdev;
    1821                 :            : }
    1822                 :            : EXPORT_SYMBOL(blkdev_get_by_dev);
    1823                 :            : 
    1824                 :          3 : static int blkdev_open(struct inode * inode, struct file * filp)
    1825                 :            : {
    1826                 :            :         struct block_device *bdev;
    1827                 :            : 
    1828                 :            :         /*
    1829                 :            :          * Preserve backwards compatibility and allow large file access
    1830                 :            :          * even if userspace doesn't ask for it explicitly. Some mkfs
    1831                 :            :          * binary needs it. We might want to drop this workaround
    1832                 :            :          * during an unstable branch.
    1833                 :            :          */
    1834                 :          3 :         filp->f_flags |= O_LARGEFILE;
    1835                 :            : 
    1836                 :          3 :         filp->f_mode |= FMODE_NOWAIT;
    1837                 :            : 
    1838                 :          3 :         if (filp->f_flags & O_NDELAY)
    1839                 :          3 :                 filp->f_mode |= FMODE_NDELAY;
    1840                 :          3 :         if (filp->f_flags & O_EXCL)
    1841                 :          2 :                 filp->f_mode |= FMODE_EXCL;
    1842                 :          3 :         if ((filp->f_flags & O_ACCMODE) == 3)
    1843                 :          0 :                 filp->f_mode |= FMODE_WRITE_IOCTL;
    1844                 :            : 
    1845                 :          3 :         bdev = bd_acquire(inode);
    1846                 :          3 :         if (bdev == NULL)
    1847                 :            :                 return -ENOMEM;
    1848                 :            : 
    1849                 :          3 :         filp->f_mapping = bdev->bd_inode->i_mapping;
    1850                 :          3 :         filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
    1851                 :            : 
    1852                 :          3 :         return blkdev_get(bdev, filp->f_mode, filp);
    1853                 :            : }
    1854                 :            : 
    1855                 :          3 : static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
    1856                 :            : {
    1857                 :          3 :         struct gendisk *disk = bdev->bd_disk;
    1858                 :            :         struct block_device *victim = NULL;
    1859                 :            : 
    1860                 :          3 :         mutex_lock_nested(&bdev->bd_mutex, for_part);
    1861                 :          3 :         if (for_part)
    1862                 :          3 :                 bdev->bd_part_count--;
    1863                 :            : 
    1864                 :          3 :         if (!--bdev->bd_openers) {
    1865                 :          3 :                 WARN_ON_ONCE(bdev->bd_holders);
    1866                 :            :                 sync_blockdev(bdev);
    1867                 :          3 :                 kill_bdev(bdev);
    1868                 :            : 
    1869                 :          3 :                 bdev_write_inode(bdev);
    1870                 :            :         }
    1871                 :          3 :         if (bdev->bd_contains == bdev) {
    1872                 :          3 :                 if (disk->fops->release)
    1873                 :          3 :                         disk->fops->release(disk, mode);
    1874                 :            :         }
    1875                 :          3 :         if (!bdev->bd_openers) {
    1876                 :          3 :                 disk_put_part(bdev->bd_part);
    1877                 :          3 :                 bdev->bd_part = NULL;
    1878                 :          3 :                 bdev->bd_disk = NULL;
    1879                 :          3 :                 if (bdev != bdev->bd_contains)
    1880                 :            :                         victim = bdev->bd_contains;
    1881                 :          3 :                 bdev->bd_contains = NULL;
    1882                 :            : 
    1883                 :          3 :                 put_disk_and_module(disk);
    1884                 :            :         }
    1885                 :          3 :         mutex_unlock(&bdev->bd_mutex);
    1886                 :            :         bdput(bdev);
    1887                 :          3 :         if (victim)
    1888                 :          3 :                 __blkdev_put(victim, mode, 1);
    1889                 :          3 : }
    1890                 :            : 
    1891                 :          3 : void blkdev_put(struct block_device *bdev, fmode_t mode)
    1892                 :            : {
    1893                 :          3 :         mutex_lock(&bdev->bd_mutex);
    1894                 :            : 
    1895                 :          3 :         if (mode & FMODE_EXCL) {
    1896                 :            :                 bool bdev_free;
    1897                 :            : 
    1898                 :            :                 /*
    1899                 :            :                  * Release a claim on the device.  The holder fields
    1900                 :            :                  * are protected with bdev_lock.  bd_mutex is to
    1901                 :            :                  * synchronize disk_holder unlinking.
    1902                 :            :                  */
    1903                 :            :                 spin_lock(&bdev_lock);
    1904                 :            : 
    1905                 :          1 :                 WARN_ON_ONCE(--bdev->bd_holders < 0);
    1906                 :          1 :                 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
    1907                 :            : 
    1908                 :            :                 /* bd_contains might point to self, check in a separate step */
    1909                 :          1 :                 if ((bdev_free = !bdev->bd_holders))
    1910                 :          1 :                         bdev->bd_holder = NULL;
    1911                 :          1 :                 if (!bdev->bd_contains->bd_holders)
    1912                 :          1 :                         bdev->bd_contains->bd_holder = NULL;
    1913                 :            : 
    1914                 :            :                 spin_unlock(&bdev_lock);
    1915                 :            : 
    1916                 :            :                 /*
    1917                 :            :                  * If this was the last claim, remove holder link and
    1918                 :            :                  * unblock evpoll if it was a write holder.
    1919                 :            :                  */
    1920                 :          1 :                 if (bdev_free && bdev->bd_write_holder) {
    1921                 :          0 :                         disk_unblock_events(bdev->bd_disk);
    1922                 :          0 :                         bdev->bd_write_holder = false;
    1923                 :            :                 }
    1924                 :            :         }
    1925                 :            : 
    1926                 :            :         /*
    1927                 :            :          * Trigger event checking and tell drivers to flush MEDIA_CHANGE
    1928                 :            :          * event.  This is to ensure detection of media removal commanded
    1929                 :            :          * from userland - e.g. eject(1).
    1930                 :            :          */
    1931                 :          3 :         disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
    1932                 :            : 
    1933                 :          3 :         mutex_unlock(&bdev->bd_mutex);
    1934                 :            : 
    1935                 :          3 :         __blkdev_put(bdev, mode, 0);
    1936                 :          3 : }
    1937                 :            : EXPORT_SYMBOL(blkdev_put);
    1938                 :            : 
    1939                 :          3 : static int blkdev_close(struct inode * inode, struct file * filp)
    1940                 :            : {
    1941                 :            :         struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
    1942                 :          3 :         blkdev_put(bdev, filp->f_mode);
    1943                 :          3 :         return 0;
    1944                 :            : }
    1945                 :            : 
    1946                 :          3 : static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
    1947                 :            : {
    1948                 :            :         struct block_device *bdev = I_BDEV(bdev_file_inode(file));
    1949                 :          3 :         fmode_t mode = file->f_mode;
    1950                 :            : 
    1951                 :            :         /*
    1952                 :            :          * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
    1953                 :            :          * to updated it before every ioctl.
    1954                 :            :          */
    1955                 :          3 :         if (file->f_flags & O_NDELAY)
    1956                 :          0 :                 mode |= FMODE_NDELAY;
    1957                 :            :         else
    1958                 :          3 :                 mode &= ~FMODE_NDELAY;
    1959                 :            : 
    1960                 :          3 :         return blkdev_ioctl(bdev, mode, cmd, arg);
    1961                 :            : }
    1962                 :            : 
    1963                 :            : /*
    1964                 :            :  * Write data to the block device.  Only intended for the block device itself
    1965                 :            :  * and the raw driver which basically is a fake block device.
    1966                 :            :  *
    1967                 :            :  * Does not take i_mutex for the write and thus is not for general purpose
    1968                 :            :  * use.
    1969                 :            :  */
    1970                 :          0 : ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
    1971                 :            : {
    1972                 :          0 :         struct file *file = iocb->ki_filp;
    1973                 :            :         struct inode *bd_inode = bdev_file_inode(file);
    1974                 :            :         loff_t size = i_size_read(bd_inode);
    1975                 :            :         struct blk_plug plug;
    1976                 :            :         ssize_t ret;
    1977                 :            : 
    1978                 :          0 :         if (bdev_read_only(I_BDEV(bd_inode)))
    1979                 :            :                 return -EPERM;
    1980                 :            : 
    1981                 :            :         /* uswsusp needs write permission to the swap */
    1982                 :          0 :         if (IS_SWAPFILE(bd_inode) && !hibernation_available())
    1983                 :            :                 return -ETXTBSY;
    1984                 :            : 
    1985                 :          0 :         if (!iov_iter_count(from))
    1986                 :            :                 return 0;
    1987                 :            : 
    1988                 :          0 :         if (iocb->ki_pos >= size)
    1989                 :            :                 return -ENOSPC;
    1990                 :            : 
    1991                 :          0 :         if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
    1992                 :            :                 return -EOPNOTSUPP;
    1993                 :            : 
    1994                 :          0 :         iov_iter_truncate(from, size - iocb->ki_pos);
    1995                 :            : 
    1996                 :          0 :         blk_start_plug(&plug);
    1997                 :          0 :         ret = __generic_file_write_iter(iocb, from);
    1998                 :          0 :         if (ret > 0)
    1999                 :          0 :                 ret = generic_write_sync(iocb, ret);
    2000                 :          0 :         blk_finish_plug(&plug);
    2001                 :          0 :         return ret;
    2002                 :            : }
    2003                 :            : EXPORT_SYMBOL_GPL(blkdev_write_iter);
    2004                 :            : 
    2005                 :          3 : ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
    2006                 :            : {
    2007                 :          3 :         struct file *file = iocb->ki_filp;
    2008                 :            :         struct inode *bd_inode = bdev_file_inode(file);
    2009                 :            :         loff_t size = i_size_read(bd_inode);
    2010                 :          3 :         loff_t pos = iocb->ki_pos;
    2011                 :            : 
    2012                 :          3 :         if (pos >= size)
    2013                 :            :                 return 0;
    2014                 :            : 
    2015                 :          3 :         size -= pos;
    2016                 :          3 :         iov_iter_truncate(to, size);
    2017                 :          3 :         return generic_file_read_iter(iocb, to);
    2018                 :            : }
    2019                 :            : EXPORT_SYMBOL_GPL(blkdev_read_iter);
    2020                 :            : 
    2021                 :            : /*
    2022                 :            :  * Try to release a page associated with block device when the system
    2023                 :            :  * is under memory pressure.
    2024                 :            :  */
    2025                 :          3 : static int blkdev_releasepage(struct page *page, gfp_t wait)
    2026                 :            : {
    2027                 :          3 :         struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
    2028                 :            : 
    2029                 :          3 :         if (super && super->s_op->bdev_try_to_free_page)
    2030                 :          0 :                 return super->s_op->bdev_try_to_free_page(super, page, wait);
    2031                 :            : 
    2032                 :          3 :         return try_to_free_buffers(page);
    2033                 :            : }
    2034                 :            : 
    2035                 :          3 : static int blkdev_writepages(struct address_space *mapping,
    2036                 :            :                              struct writeback_control *wbc)
    2037                 :            : {
    2038                 :          3 :         return generic_writepages(mapping, wbc);
    2039                 :            : }
    2040                 :            : 
    2041                 :            : static const struct address_space_operations def_blk_aops = {
    2042                 :            :         .readpage       = blkdev_readpage,
    2043                 :            :         .readpages      = blkdev_readpages,
    2044                 :            :         .writepage      = blkdev_writepage,
    2045                 :            :         .write_begin    = blkdev_write_begin,
    2046                 :            :         .write_end      = blkdev_write_end,
    2047                 :            :         .writepages     = blkdev_writepages,
    2048                 :            :         .releasepage    = blkdev_releasepage,
    2049                 :            :         .direct_IO      = blkdev_direct_IO,
    2050                 :            :         .migratepage    = buffer_migrate_page_norefs,
    2051                 :            :         .is_dirty_writeback = buffer_check_dirty_writeback,
    2052                 :            : };
    2053                 :            : 
    2054                 :            : #define BLKDEV_FALLOC_FL_SUPPORTED                                      \
    2055                 :            :                 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
    2056                 :            :                  FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
    2057                 :            : 
    2058                 :          0 : static long blkdev_fallocate(struct file *file, int mode, loff_t start,
    2059                 :            :                              loff_t len)
    2060                 :            : {
    2061                 :            :         struct block_device *bdev = I_BDEV(bdev_file_inode(file));
    2062                 :            :         struct address_space *mapping;
    2063                 :          0 :         loff_t end = start + len - 1;
    2064                 :            :         loff_t isize;
    2065                 :            :         int error;
    2066                 :            : 
    2067                 :            :         /* Fail if we don't recognize the flags. */
    2068                 :          0 :         if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
    2069                 :            :                 return -EOPNOTSUPP;
    2070                 :            : 
    2071                 :            :         /* Don't go off the end of the device. */
    2072                 :          0 :         isize = i_size_read(bdev->bd_inode);
    2073                 :          0 :         if (start >= isize)
    2074                 :            :                 return -EINVAL;
    2075                 :          0 :         if (end >= isize) {
    2076                 :          0 :                 if (mode & FALLOC_FL_KEEP_SIZE) {
    2077                 :          0 :                         len = isize - start;
    2078                 :          0 :                         end = start + len - 1;
    2079                 :            :                 } else
    2080                 :            :                         return -EINVAL;
    2081                 :            :         }
    2082                 :            : 
    2083                 :            :         /*
    2084                 :            :          * Don't allow IO that isn't aligned to logical block size.
    2085                 :            :          */
    2086                 :          0 :         if ((start | len) & (bdev_logical_block_size(bdev) - 1))
    2087                 :            :                 return -EINVAL;
    2088                 :            : 
    2089                 :            :         /* Invalidate the page cache, including dirty pages. */
    2090                 :          0 :         mapping = bdev->bd_inode->i_mapping;
    2091                 :          0 :         truncate_inode_pages_range(mapping, start, end);
    2092                 :            : 
    2093                 :          0 :         switch (mode) {
    2094                 :            :         case FALLOC_FL_ZERO_RANGE:
    2095                 :            :         case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
    2096                 :          0 :                 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
    2097                 :            :                                             GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
    2098                 :          0 :                 break;
    2099                 :            :         case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
    2100                 :          0 :                 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
    2101                 :            :                                              GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
    2102                 :          0 :                 break;
    2103                 :            :         case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
    2104                 :          0 :                 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
    2105                 :            :                                              GFP_KERNEL, 0);
    2106                 :          0 :                 break;
    2107                 :            :         default:
    2108                 :            :                 return -EOPNOTSUPP;
    2109                 :            :         }
    2110                 :          0 :         if (error)
    2111                 :            :                 return error;
    2112                 :            : 
    2113                 :            :         /*
    2114                 :            :          * Invalidate again; if someone wandered in and dirtied a page,
    2115                 :            :          * the caller will be given -EBUSY.  The third argument is
    2116                 :            :          * inclusive, so the rounding here is safe.
    2117                 :            :          */
    2118                 :          0 :         return invalidate_inode_pages2_range(mapping,
    2119                 :          0 :                                              start >> PAGE_SHIFT,
    2120                 :          0 :                                              end >> PAGE_SHIFT);
    2121                 :            : }
    2122                 :            : 
    2123                 :            : const struct file_operations def_blk_fops = {
    2124                 :            :         .open           = blkdev_open,
    2125                 :            :         .release        = blkdev_close,
    2126                 :            :         .llseek         = block_llseek,
    2127                 :            :         .read_iter      = blkdev_read_iter,
    2128                 :            :         .write_iter     = blkdev_write_iter,
    2129                 :            :         .iopoll         = blkdev_iopoll,
    2130                 :            :         .mmap           = generic_file_mmap,
    2131                 :            :         .fsync          = blkdev_fsync,
    2132                 :            :         .unlocked_ioctl = block_ioctl,
    2133                 :            : #ifdef CONFIG_COMPAT
    2134                 :            :         .compat_ioctl   = compat_blkdev_ioctl,
    2135                 :            : #endif
    2136                 :            :         .splice_read    = generic_file_splice_read,
    2137                 :            :         .splice_write   = iter_file_splice_write,
    2138                 :            :         .fallocate      = blkdev_fallocate,
    2139                 :            : };
    2140                 :            : 
    2141                 :          0 : int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
    2142                 :            : {
    2143                 :            :         int res;
    2144                 :          0 :         mm_segment_t old_fs = get_fs();
    2145                 :            :         set_fs(KERNEL_DS);
    2146                 :          0 :         res = blkdev_ioctl(bdev, 0, cmd, arg);
    2147                 :            :         set_fs(old_fs);
    2148                 :          0 :         return res;
    2149                 :            : }
    2150                 :            : 
    2151                 :            : EXPORT_SYMBOL(ioctl_by_bdev);
    2152                 :            : 
    2153                 :            : /**
    2154                 :            :  * lookup_bdev  - lookup a struct block_device by name
    2155                 :            :  * @pathname:   special file representing the block device
    2156                 :            :  *
    2157                 :            :  * Get a reference to the blockdevice at @pathname in the current
    2158                 :            :  * namespace if possible and return it.  Return ERR_PTR(error)
    2159                 :            :  * otherwise.
    2160                 :            :  */
    2161                 :          3 : struct block_device *lookup_bdev(const char *pathname)
    2162                 :            : {
    2163                 :            :         struct block_device *bdev;
    2164                 :            :         struct inode *inode;
    2165                 :            :         struct path path;
    2166                 :            :         int error;
    2167                 :            : 
    2168                 :          3 :         if (!pathname || !*pathname)
    2169                 :            :                 return ERR_PTR(-EINVAL);
    2170                 :            : 
    2171                 :          3 :         error = kern_path(pathname, LOOKUP_FOLLOW, &path);
    2172                 :          3 :         if (error)
    2173                 :          0 :                 return ERR_PTR(error);
    2174                 :            : 
    2175                 :          3 :         inode = d_backing_inode(path.dentry);
    2176                 :            :         error = -ENOTBLK;
    2177                 :          3 :         if (!S_ISBLK(inode->i_mode))
    2178                 :            :                 goto fail;
    2179                 :            :         error = -EACCES;
    2180                 :          3 :         if (!may_open_dev(&path))
    2181                 :            :                 goto fail;
    2182                 :            :         error = -ENOMEM;
    2183                 :          3 :         bdev = bd_acquire(inode);
    2184                 :          3 :         if (!bdev)
    2185                 :            :                 goto fail;
    2186                 :            : out:
    2187                 :          3 :         path_put(&path);
    2188                 :          3 :         return bdev;
    2189                 :            : fail:
    2190                 :            :         bdev = ERR_PTR(error);
    2191                 :          0 :         goto out;
    2192                 :            : }
    2193                 :            : EXPORT_SYMBOL(lookup_bdev);
    2194                 :            : 
    2195                 :          3 : int __invalidate_device(struct block_device *bdev, bool kill_dirty)
    2196                 :            : {
    2197                 :          3 :         struct super_block *sb = get_super(bdev);
    2198                 :            :         int res = 0;
    2199                 :            : 
    2200                 :          3 :         if (sb) {
    2201                 :            :                 /*
    2202                 :            :                  * no need to lock the super, get_super holds the
    2203                 :            :                  * read mutex so the filesystem cannot go away
    2204                 :            :                  * under us (->put_super runs with the write lock
    2205                 :            :                  * hold).
    2206                 :            :                  */
    2207                 :          0 :                 shrink_dcache_sb(sb);
    2208                 :          0 :                 res = invalidate_inodes(sb, kill_dirty);
    2209                 :          0 :                 drop_super(sb);
    2210                 :            :         }
    2211                 :          3 :         invalidate_bdev(bdev);
    2212                 :          3 :         return res;
    2213                 :            : }
    2214                 :            : EXPORT_SYMBOL(__invalidate_device);
    2215                 :            : 
    2216                 :          3 : void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
    2217                 :            : {
    2218                 :            :         struct inode *inode, *old_inode = NULL;
    2219                 :            : 
    2220                 :          3 :         spin_lock(&blockdev_superblock->s_inode_list_lock);
    2221                 :          3 :         list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
    2222                 :          3 :                 struct address_space *mapping = inode->i_mapping;
    2223                 :            :                 struct block_device *bdev;
    2224                 :            : 
    2225                 :            :                 spin_lock(&inode->i_lock);
    2226                 :          3 :                 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
    2227                 :          3 :                     mapping->nrpages == 0) {
    2228                 :            :                         spin_unlock(&inode->i_lock);
    2229                 :          3 :                         continue;
    2230                 :            :                 }
    2231                 :          3 :                 __iget(inode);
    2232                 :            :                 spin_unlock(&inode->i_lock);
    2233                 :          3 :                 spin_unlock(&blockdev_superblock->s_inode_list_lock);
    2234                 :            :                 /*
    2235                 :            :                  * We hold a reference to 'inode' so it couldn't have been
    2236                 :            :                  * removed from s_inodes list while we dropped the
    2237                 :            :                  * s_inode_list_lock  We cannot iput the inode now as we can
    2238                 :            :                  * be holding the last reference and we cannot iput it under
    2239                 :            :                  * s_inode_list_lock. So we keep the reference and iput it
    2240                 :            :                  * later.
    2241                 :            :                  */
    2242                 :          3 :                 iput(old_inode);
    2243                 :            :                 old_inode = inode;
    2244                 :            :                 bdev = I_BDEV(inode);
    2245                 :            : 
    2246                 :          3 :                 mutex_lock(&bdev->bd_mutex);
    2247                 :          3 :                 if (bdev->bd_openers)
    2248                 :          3 :                         func(bdev, arg);
    2249                 :          3 :                 mutex_unlock(&bdev->bd_mutex);
    2250                 :            : 
    2251                 :          3 :                 spin_lock(&blockdev_superblock->s_inode_list_lock);
    2252                 :            :         }
    2253                 :          3 :         spin_unlock(&blockdev_superblock->s_inode_list_lock);
    2254                 :          3 :         iput(old_inode);
    2255                 :          3 : }

Generated by: LCOV version 1.14