LCOV - code coverage report
Current view: top level - fs/ext4 - mballoc.c (source / functions) Hit Total Coverage
Test: gcov_data_raspi2_qemu_modules_combined.info Lines: 1211 1867 64.9 %
Date: 2020-09-30 20:25:01 Functions: 62 83 74.7 %
Branches: 621 2231 27.8 %

           Branch data     Line data    Source code
       1                 :            : // SPDX-License-Identifier: GPL-2.0
       2                 :            : /*
       3                 :            :  * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
       4                 :            :  * Written by Alex Tomas <alex@clusterfs.com>
       5                 :            :  */
       6                 :            : 
       7                 :            : 
       8                 :            : /*
       9                 :            :  * mballoc.c contains the multiblocks allocation routines
      10                 :            :  */
      11                 :            : 
      12                 :            : #include "ext4_jbd2.h"
      13                 :            : #include "mballoc.h"
      14                 :            : #include <linux/log2.h>
      15                 :            : #include <linux/module.h>
      16                 :            : #include <linux/slab.h>
      17                 :            : #include <linux/nospec.h>
      18                 :            : #include <linux/backing-dev.h>
      19                 :            : #include <trace/events/ext4.h>
      20                 :            : 
      21                 :            : #ifdef CONFIG_EXT4_DEBUG
      22                 :            : ushort ext4_mballoc_debug __read_mostly;
      23                 :            : 
      24                 :            : module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
      25                 :            : MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
      26                 :            : #endif
      27                 :            : 
      28                 :            : /*
      29                 :            :  * MUSTDO:
      30                 :            :  *   - test ext4_ext_search_left() and ext4_ext_search_right()
      31                 :            :  *   - search for metadata in few groups
      32                 :            :  *
      33                 :            :  * TODO v4:
      34                 :            :  *   - normalization should take into account whether file is still open
      35                 :            :  *   - discard preallocations if no free space left (policy?)
      36                 :            :  *   - don't normalize tails
      37                 :            :  *   - quota
      38                 :            :  *   - reservation for superuser
      39                 :            :  *
      40                 :            :  * TODO v3:
      41                 :            :  *   - bitmap read-ahead (proposed by Oleg Drokin aka green)
      42                 :            :  *   - track min/max extents in each group for better group selection
      43                 :            :  *   - mb_mark_used() may allocate chunk right after splitting buddy
      44                 :            :  *   - tree of groups sorted by number of free blocks
      45                 :            :  *   - error handling
      46                 :            :  */
      47                 :            : 
      48                 :            : /*
      49                 :            :  * The allocation request involve request for multiple number of blocks
      50                 :            :  * near to the goal(block) value specified.
      51                 :            :  *
      52                 :            :  * During initialization phase of the allocator we decide to use the
      53                 :            :  * group preallocation or inode preallocation depending on the size of
      54                 :            :  * the file. The size of the file could be the resulting file size we
      55                 :            :  * would have after allocation, or the current file size, which ever
      56                 :            :  * is larger. If the size is less than sbi->s_mb_stream_request we
      57                 :            :  * select to use the group preallocation. The default value of
      58                 :            :  * s_mb_stream_request is 16 blocks. This can also be tuned via
      59                 :            :  * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
      60                 :            :  * terms of number of blocks.
      61                 :            :  *
      62                 :            :  * The main motivation for having small file use group preallocation is to
      63                 :            :  * ensure that we have small files closer together on the disk.
      64                 :            :  *
      65                 :            :  * First stage the allocator looks at the inode prealloc list,
      66                 :            :  * ext4_inode_info->i_prealloc_list, which contains list of prealloc
      67                 :            :  * spaces for this particular inode. The inode prealloc space is
      68                 :            :  * represented as:
      69                 :            :  *
      70                 :            :  * pa_lstart -> the logical start block for this prealloc space
      71                 :            :  * pa_pstart -> the physical start block for this prealloc space
      72                 :            :  * pa_len    -> length for this prealloc space (in clusters)
      73                 :            :  * pa_free   ->  free space available in this prealloc space (in clusters)
      74                 :            :  *
      75                 :            :  * The inode preallocation space is used looking at the _logical_ start
      76                 :            :  * block. If only the logical file block falls within the range of prealloc
      77                 :            :  * space we will consume the particular prealloc space. This makes sure that
      78                 :            :  * we have contiguous physical blocks representing the file blocks
      79                 :            :  *
      80                 :            :  * The important thing to be noted in case of inode prealloc space is that
      81                 :            :  * we don't modify the values associated to inode prealloc space except
      82                 :            :  * pa_free.
      83                 :            :  *
      84                 :            :  * If we are not able to find blocks in the inode prealloc space and if we
      85                 :            :  * have the group allocation flag set then we look at the locality group
      86                 :            :  * prealloc space. These are per CPU prealloc list represented as
      87                 :            :  *
      88                 :            :  * ext4_sb_info.s_locality_groups[smp_processor_id()]
      89                 :            :  *
      90                 :            :  * The reason for having a per cpu locality group is to reduce the contention
      91                 :            :  * between CPUs. It is possible to get scheduled at this point.
      92                 :            :  *
      93                 :            :  * The locality group prealloc space is used looking at whether we have
      94                 :            :  * enough free space (pa_free) within the prealloc space.
      95                 :            :  *
      96                 :            :  * If we can't allocate blocks via inode prealloc or/and locality group
      97                 :            :  * prealloc then we look at the buddy cache. The buddy cache is represented
      98                 :            :  * by ext4_sb_info.s_buddy_cache (struct inode) whose file offset gets
      99                 :            :  * mapped to the buddy and bitmap information regarding different
     100                 :            :  * groups. The buddy information is attached to buddy cache inode so that
     101                 :            :  * we can access them through the page cache. The information regarding
     102                 :            :  * each group is loaded via ext4_mb_load_buddy.  The information involve
     103                 :            :  * block bitmap and buddy information. The information are stored in the
     104                 :            :  * inode as:
     105                 :            :  *
     106                 :            :  *  {                        page                        }
     107                 :            :  *  [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
     108                 :            :  *
     109                 :            :  *
     110                 :            :  * one block each for bitmap and buddy information.  So for each group we
     111                 :            :  * take up 2 blocks. A page can contain blocks_per_page (PAGE_SIZE /
     112                 :            :  * blocksize) blocks.  So it can have information regarding groups_per_page
     113                 :            :  * which is blocks_per_page/2
     114                 :            :  *
     115                 :            :  * The buddy cache inode is not stored on disk. The inode is thrown
     116                 :            :  * away when the filesystem is unmounted.
     117                 :            :  *
     118                 :            :  * We look for count number of blocks in the buddy cache. If we were able
     119                 :            :  * to locate that many free blocks we return with additional information
     120                 :            :  * regarding rest of the contiguous physical block available
     121                 :            :  *
     122                 :            :  * Before allocating blocks via buddy cache we normalize the request
     123                 :            :  * blocks. This ensure we ask for more blocks that we needed. The extra
     124                 :            :  * blocks that we get after allocation is added to the respective prealloc
     125                 :            :  * list. In case of inode preallocation we follow a list of heuristics
     126                 :            :  * based on file size. This can be found in ext4_mb_normalize_request. If
     127                 :            :  * we are doing a group prealloc we try to normalize the request to
     128                 :            :  * sbi->s_mb_group_prealloc.  The default value of s_mb_group_prealloc is
     129                 :            :  * dependent on the cluster size; for non-bigalloc file systems, it is
     130                 :            :  * 512 blocks. This can be tuned via
     131                 :            :  * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
     132                 :            :  * terms of number of blocks. If we have mounted the file system with -O
     133                 :            :  * stripe=<value> option the group prealloc request is normalized to the
     134                 :            :  * the smallest multiple of the stripe value (sbi->s_stripe) which is
     135                 :            :  * greater than the default mb_group_prealloc.
     136                 :            :  *
     137                 :            :  * The regular allocator (using the buddy cache) supports a few tunables.
     138                 :            :  *
     139                 :            :  * /sys/fs/ext4/<partition>/mb_min_to_scan
     140                 :            :  * /sys/fs/ext4/<partition>/mb_max_to_scan
     141                 :            :  * /sys/fs/ext4/<partition>/mb_order2_req
     142                 :            :  *
     143                 :            :  * The regular allocator uses buddy scan only if the request len is power of
     144                 :            :  * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
     145                 :            :  * value of s_mb_order2_reqs can be tuned via
     146                 :            :  * /sys/fs/ext4/<partition>/mb_order2_req.  If the request len is equal to
     147                 :            :  * stripe size (sbi->s_stripe), we try to search for contiguous block in
     148                 :            :  * stripe size. This should result in better allocation on RAID setups. If
     149                 :            :  * not, we search in the specific group using bitmap for best extents. The
     150                 :            :  * tunable min_to_scan and max_to_scan control the behaviour here.
     151                 :            :  * min_to_scan indicate how long the mballoc __must__ look for a best
     152                 :            :  * extent and max_to_scan indicates how long the mballoc __can__ look for a
     153                 :            :  * best extent in the found extents. Searching for the blocks starts with
     154                 :            :  * the group specified as the goal value in allocation context via
     155                 :            :  * ac_g_ex. Each group is first checked based on the criteria whether it
     156                 :            :  * can be used for allocation. ext4_mb_good_group explains how the groups are
     157                 :            :  * checked.
     158                 :            :  *
     159                 :            :  * Both the prealloc space are getting populated as above. So for the first
     160                 :            :  * request we will hit the buddy cache which will result in this prealloc
     161                 :            :  * space getting filled. The prealloc space is then later used for the
     162                 :            :  * subsequent request.
     163                 :            :  */
     164                 :            : 
     165                 :            : /*
     166                 :            :  * mballoc operates on the following data:
     167                 :            :  *  - on-disk bitmap
     168                 :            :  *  - in-core buddy (actually includes buddy and bitmap)
     169                 :            :  *  - preallocation descriptors (PAs)
     170                 :            :  *
     171                 :            :  * there are two types of preallocations:
     172                 :            :  *  - inode
     173                 :            :  *    assiged to specific inode and can be used for this inode only.
     174                 :            :  *    it describes part of inode's space preallocated to specific
     175                 :            :  *    physical blocks. any block from that preallocated can be used
     176                 :            :  *    independent. the descriptor just tracks number of blocks left
     177                 :            :  *    unused. so, before taking some block from descriptor, one must
     178                 :            :  *    make sure corresponded logical block isn't allocated yet. this
     179                 :            :  *    also means that freeing any block within descriptor's range
     180                 :            :  *    must discard all preallocated blocks.
     181                 :            :  *  - locality group
     182                 :            :  *    assigned to specific locality group which does not translate to
     183                 :            :  *    permanent set of inodes: inode can join and leave group. space
     184                 :            :  *    from this type of preallocation can be used for any inode. thus
     185                 :            :  *    it's consumed from the beginning to the end.
     186                 :            :  *
     187                 :            :  * relation between them can be expressed as:
     188                 :            :  *    in-core buddy = on-disk bitmap + preallocation descriptors
     189                 :            :  *
     190                 :            :  * this mean blocks mballoc considers used are:
     191                 :            :  *  - allocated blocks (persistent)
     192                 :            :  *  - preallocated blocks (non-persistent)
     193                 :            :  *
     194                 :            :  * consistency in mballoc world means that at any time a block is either
     195                 :            :  * free or used in ALL structures. notice: "any time" should not be read
     196                 :            :  * literally -- time is discrete and delimited by locks.
     197                 :            :  *
     198                 :            :  *  to keep it simple, we don't use block numbers, instead we count number of
     199                 :            :  *  blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA.
     200                 :            :  *
     201                 :            :  * all operations can be expressed as:
     202                 :            :  *  - init buddy:                       buddy = on-disk + PAs
     203                 :            :  *  - new PA:                           buddy += N; PA = N
     204                 :            :  *  - use inode PA:                     on-disk += N; PA -= N
     205                 :            :  *  - discard inode PA                  buddy -= on-disk - PA; PA = 0
     206                 :            :  *  - use locality group PA             on-disk += N; PA -= N
     207                 :            :  *  - discard locality group PA         buddy -= PA; PA = 0
     208                 :            :  *  note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap
     209                 :            :  *        is used in real operation because we can't know actual used
     210                 :            :  *        bits from PA, only from on-disk bitmap
     211                 :            :  *
     212                 :            :  * if we follow this strict logic, then all operations above should be atomic.
     213                 :            :  * given some of them can block, we'd have to use something like semaphores
     214                 :            :  * killing performance on high-end SMP hardware. let's try to relax it using
     215                 :            :  * the following knowledge:
     216                 :            :  *  1) if buddy is referenced, it's already initialized
     217                 :            :  *  2) while block is used in buddy and the buddy is referenced,
     218                 :            :  *     nobody can re-allocate that block
     219                 :            :  *  3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has
     220                 :            :  *     bit set and PA claims same block, it's OK. IOW, one can set bit in
     221                 :            :  *     on-disk bitmap if buddy has same bit set or/and PA covers corresponded
     222                 :            :  *     block
     223                 :            :  *
     224                 :            :  * so, now we're building a concurrency table:
     225                 :            :  *  - init buddy vs.
     226                 :            :  *    - new PA
     227                 :            :  *      blocks for PA are allocated in the buddy, buddy must be referenced
     228                 :            :  *      until PA is linked to allocation group to avoid concurrent buddy init
     229                 :            :  *    - use inode PA
     230                 :            :  *      we need to make sure that either on-disk bitmap or PA has uptodate data
     231                 :            :  *      given (3) we care that PA-=N operation doesn't interfere with init
     232                 :            :  *    - discard inode PA
     233                 :            :  *      the simplest way would be to have buddy initialized by the discard
     234                 :            :  *    - use locality group PA
     235                 :            :  *      again PA-=N must be serialized with init
     236                 :            :  *    - discard locality group PA
     237                 :            :  *      the simplest way would be to have buddy initialized by the discard
     238                 :            :  *  - new PA vs.
     239                 :            :  *    - use inode PA
     240                 :            :  *      i_data_sem serializes them
     241                 :            :  *    - discard inode PA
     242                 :            :  *      discard process must wait until PA isn't used by another process
     243                 :            :  *    - use locality group PA
     244                 :            :  *      some mutex should serialize them
     245                 :            :  *    - discard locality group PA
     246                 :            :  *      discard process must wait until PA isn't used by another process
     247                 :            :  *  - use inode PA
     248                 :            :  *    - use inode PA
     249                 :            :  *      i_data_sem or another mutex should serializes them
     250                 :            :  *    - discard inode PA
     251                 :            :  *      discard process must wait until PA isn't used by another process
     252                 :            :  *    - use locality group PA
     253                 :            :  *      nothing wrong here -- they're different PAs covering different blocks
     254                 :            :  *    - discard locality group PA
     255                 :            :  *      discard process must wait until PA isn't used by another process
     256                 :            :  *
     257                 :            :  * now we're ready to make few consequences:
     258                 :            :  *  - PA is referenced and while it is no discard is possible
     259                 :            :  *  - PA is referenced until block isn't marked in on-disk bitmap
     260                 :            :  *  - PA changes only after on-disk bitmap
     261                 :            :  *  - discard must not compete with init. either init is done before
     262                 :            :  *    any discard or they're serialized somehow
     263                 :            :  *  - buddy init as sum of on-disk bitmap and PAs is done atomically
     264                 :            :  *
     265                 :            :  * a special case when we've used PA to emptiness. no need to modify buddy
     266                 :            :  * in this case, but we should care about concurrent init
     267                 :            :  *
     268                 :            :  */
     269                 :            : 
     270                 :            :  /*
     271                 :            :  * Logic in few words:
     272                 :            :  *
     273                 :            :  *  - allocation:
     274                 :            :  *    load group
     275                 :            :  *    find blocks
     276                 :            :  *    mark bits in on-disk bitmap
     277                 :            :  *    release group
     278                 :            :  *
     279                 :            :  *  - use preallocation:
     280                 :            :  *    find proper PA (per-inode or group)
     281                 :            :  *    load group
     282                 :            :  *    mark bits in on-disk bitmap
     283                 :            :  *    release group
     284                 :            :  *    release PA
     285                 :            :  *
     286                 :            :  *  - free:
     287                 :            :  *    load group
     288                 :            :  *    mark bits in on-disk bitmap
     289                 :            :  *    release group
     290                 :            :  *
     291                 :            :  *  - discard preallocations in group:
     292                 :            :  *    mark PAs deleted
     293                 :            :  *    move them onto local list
     294                 :            :  *    load on-disk bitmap
     295                 :            :  *    load group
     296                 :            :  *    remove PA from object (inode or locality group)
     297                 :            :  *    mark free blocks in-core
     298                 :            :  *
     299                 :            :  *  - discard inode's preallocations:
     300                 :            :  */
     301                 :            : 
     302                 :            : /*
     303                 :            :  * Locking rules
     304                 :            :  *
     305                 :            :  * Locks:
     306                 :            :  *  - bitlock on a group        (group)
     307                 :            :  *  - object (inode/locality)   (object)
     308                 :            :  *  - per-pa lock               (pa)
     309                 :            :  *
     310                 :            :  * Paths:
     311                 :            :  *  - new pa
     312                 :            :  *    object
     313                 :            :  *    group
     314                 :            :  *
     315                 :            :  *  - find and use pa:
     316                 :            :  *    pa
     317                 :            :  *
     318                 :            :  *  - release consumed pa:
     319                 :            :  *    pa
     320                 :            :  *    group
     321                 :            :  *    object
     322                 :            :  *
     323                 :            :  *  - generate in-core bitmap:
     324                 :            :  *    group
     325                 :            :  *        pa
     326                 :            :  *
     327                 :            :  *  - discard all for given object (inode, locality group):
     328                 :            :  *    object
     329                 :            :  *        pa
     330                 :            :  *    group
     331                 :            :  *
     332                 :            :  *  - discard all for given group:
     333                 :            :  *    group
     334                 :            :  *        pa
     335                 :            :  *    group
     336                 :            :  *        object
     337                 :            :  *
     338                 :            :  */
     339                 :            : static struct kmem_cache *ext4_pspace_cachep;
     340                 :            : static struct kmem_cache *ext4_ac_cachep;
     341                 :            : static struct kmem_cache *ext4_free_data_cachep;
     342                 :            : 
     343                 :            : /* We create slab caches for groupinfo data structures based on the
     344                 :            :  * superblock block size.  There will be one per mounted filesystem for
     345                 :            :  * each unique s_blocksize_bits */
     346                 :            : #define NR_GRPINFO_CACHES 8
     347                 :            : static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
     348                 :            : 
     349                 :            : static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
     350                 :            :         "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
     351                 :            :         "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
     352                 :            :         "ext4_groupinfo_64k", "ext4_groupinfo_128k"
     353                 :            : };
     354                 :            : 
     355                 :            : static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
     356                 :            :                                         ext4_group_t group);
     357                 :            : static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
     358                 :            :                                                 ext4_group_t group);
     359                 :            : 
     360                 :            : static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
     361                 :            : {
     362                 :            : #if BITS_PER_LONG == 64
     363                 :            :         *bit += ((unsigned long) addr & 7UL) << 3;
     364                 :            :         addr = (void *) ((unsigned long) addr & ~7UL);
     365                 :            : #elif BITS_PER_LONG == 32
     366                 :   12213702 :         *bit += ((unsigned long) addr & 3UL) << 3;
     367                 :   12076812 :         addr = (void *) ((unsigned long) addr & ~3UL);
     368                 :            : #else
     369                 :            : #error "how many bits you are?!"
     370                 :            : #endif
     371                 :            :         return addr;
     372                 :            : }
     373                 :            : 
     374                 :            : static inline int mb_test_bit(int bit, void *addr)
     375                 :            : {
     376                 :            :         /*
     377                 :            :          * ext4_test_bit on architecture like powerpc
     378                 :            :          * needs unsigned long aligned address
     379                 :            :          */
     380                 :            :         addr = mb_correct_addr_and_bit(&bit, addr);
     381                 :            :         return ext4_test_bit(bit, addr);
     382                 :            : }
     383                 :            : 
     384                 :            : static inline void mb_set_bit(int bit, void *addr)
     385                 :            : {
     386                 :            :         addr = mb_correct_addr_and_bit(&bit, addr);
     387                 :            :         ext4_set_bit(bit, addr);
     388                 :            : }
     389                 :            : 
     390                 :            : static inline void mb_clear_bit(int bit, void *addr)
     391                 :            : {
     392                 :            :         addr = mb_correct_addr_and_bit(&bit, addr);
     393                 :            :         ext4_clear_bit(bit, addr);
     394                 :            : }
     395                 :            : 
     396                 :            : static inline int mb_test_and_clear_bit(int bit, void *addr)
     397                 :            : {
     398                 :            :         addr = mb_correct_addr_and_bit(&bit, addr);
     399                 :            :         return ext4_test_and_clear_bit(bit, addr);
     400                 :            : }
     401                 :            : 
     402                 :    1046418 : static inline int mb_find_next_zero_bit(void *addr, int max, int start)
     403                 :            : {
     404                 :            :         int fix = 0, ret, tmpmax;
     405                 :            :         addr = mb_correct_addr_and_bit(&fix, addr);
     406                 :    1046418 :         tmpmax = max + fix;
     407                 :    1046418 :         start += fix;
     408                 :            : 
     409                 :    1046418 :         ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
     410         [ +  - ]:    1046418 :         if (ret > max)
     411                 :            :                 return max;
     412                 :    1046418 :         return ret;
     413                 :            : }
     414                 :            : 
     415                 :     256116 : static inline int mb_find_next_bit(void *addr, int max, int start)
     416                 :            : {
     417                 :            :         int fix = 0, ret, tmpmax;
     418                 :            :         addr = mb_correct_addr_and_bit(&fix, addr);
     419                 :     256116 :         tmpmax = max + fix;
     420                 :     256116 :         start += fix;
     421                 :            : 
     422                 :     256116 :         ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
     423         [ +  - ]:     256116 :         if (ret > max)
     424                 :            :                 return max;
     425                 :     256116 :         return ret;
     426                 :            : }
     427                 :            : 
     428                 :    1482232 : static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
     429                 :            : {
     430                 :            :         char *bb;
     431                 :            : 
     432         [ -  + ]:    1482232 :         BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
     433         [ -  + ]:    1482232 :         BUG_ON(max == NULL);
     434                 :            : 
     435         [ +  + ]:    1482232 :         if (order > e4b->bd_blkbits + 1) {
     436                 :          6 :                 *max = 0;
     437                 :          6 :                 return NULL;
     438                 :            :         }
     439                 :            : 
     440                 :            :         /* at order 0 we see each particular block */
     441         [ +  + ]:    1482226 :         if (order == 0) {
     442                 :    1167902 :                 *max = 1 << (e4b->bd_blkbits + 3);
     443                 :    1167902 :                 return e4b->bd_bitmap;
     444                 :            :         }
     445                 :            : 
     446                 :     628648 :         bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
     447                 :     314324 :         *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
     448                 :            : 
     449                 :     314324 :         return bb;
     450                 :            : }
     451                 :            : 
     452                 :            : #ifdef DOUBLE_CHECK
     453                 :            : static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
     454                 :            :                            int first, int count)
     455                 :            : {
     456                 :            :         int i;
     457                 :            :         struct super_block *sb = e4b->bd_sb;
     458                 :            : 
     459                 :            :         if (unlikely(e4b->bd_info->bb_bitmap == NULL))
     460                 :            :                 return;
     461                 :            :         assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
     462                 :            :         for (i = 0; i < count; i++) {
     463                 :            :                 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
     464                 :            :                         ext4_fsblk_t blocknr;
     465                 :            : 
     466                 :            :                         blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
     467                 :            :                         blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
     468                 :            :                         ext4_grp_locked_error(sb, e4b->bd_group,
     469                 :            :                                               inode ? inode->i_ino : 0,
     470                 :            :                                               blocknr,
     471                 :            :                                               "freeing block already freed "
     472                 :            :                                               "(bit %u)",
     473                 :            :                                               first + i);
     474                 :            :                         ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
     475                 :            :                                         EXT4_GROUP_INFO_BBITMAP_CORRUPT);
     476                 :            :                 }
     477                 :            :                 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
     478                 :            :         }
     479                 :            : }
     480                 :            : 
     481                 :            : static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
     482                 :            : {
     483                 :            :         int i;
     484                 :            : 
     485                 :            :         if (unlikely(e4b->bd_info->bb_bitmap == NULL))
     486                 :            :                 return;
     487                 :            :         assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
     488                 :            :         for (i = 0; i < count; i++) {
     489                 :            :                 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
     490                 :            :                 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
     491                 :            :         }
     492                 :            : }
     493                 :            : 
     494                 :            : static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
     495                 :            : {
     496                 :            :         if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
     497                 :            :                 unsigned char *b1, *b2;
     498                 :            :                 int i;
     499                 :            :                 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
     500                 :            :                 b2 = (unsigned char *) bitmap;
     501                 :            :                 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
     502                 :            :                         if (b1[i] != b2[i]) {
     503                 :            :                                 ext4_msg(e4b->bd_sb, KERN_ERR,
     504                 :            :                                          "corruption in group %u "
     505                 :            :                                          "at byte %u(%u): %x in copy != %x "
     506                 :            :                                          "on disk/prealloc",
     507                 :            :                                          e4b->bd_group, i, i * 8, b1[i], b2[i]);
     508                 :            :                                 BUG();
     509                 :            :                         }
     510                 :            :                 }
     511                 :            :         }
     512                 :            : }
     513                 :            : 
     514                 :            : #else
     515                 :            : static inline void mb_free_blocks_double(struct inode *inode,
     516                 :            :                                 struct ext4_buddy *e4b, int first, int count)
     517                 :            : {
     518                 :            :         return;
     519                 :            : }
     520                 :            : static inline void mb_mark_used_double(struct ext4_buddy *e4b,
     521                 :            :                                                 int first, int count)
     522                 :            : {
     523                 :            :         return;
     524                 :            : }
     525                 :            : static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
     526                 :            : {
     527                 :            :         return;
     528                 :            : }
     529                 :            : #endif
     530                 :            : 
     531                 :            : #ifdef AGGRESSIVE_CHECK
     532                 :            : 
     533                 :            : #define MB_CHECK_ASSERT(assert)                                         \
     534                 :            : do {                                                                    \
     535                 :            :         if (!(assert)) {                                                \
     536                 :            :                 printk(KERN_EMERG                                       \
     537                 :            :                         "Assertion failure in %s() at %s:%d: \"%s\"\n",     \
     538                 :            :                         function, file, line, # assert);                \
     539                 :            :                 BUG();                                                  \
     540                 :            :         }                                                               \
     541                 :            : } while (0)
     542                 :            : 
     543                 :            : static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
     544                 :            :                                 const char *function, int line)
     545                 :            : {
     546                 :            :         struct super_block *sb = e4b->bd_sb;
     547                 :            :         int order = e4b->bd_blkbits + 1;
     548                 :            :         int max;
     549                 :            :         int max2;
     550                 :            :         int i;
     551                 :            :         int j;
     552                 :            :         int k;
     553                 :            :         int count;
     554                 :            :         struct ext4_group_info *grp;
     555                 :            :         int fragments = 0;
     556                 :            :         int fstart;
     557                 :            :         struct list_head *cur;
     558                 :            :         void *buddy;
     559                 :            :         void *buddy2;
     560                 :            : 
     561                 :            :         {
     562                 :            :                 static int mb_check_counter;
     563                 :            :                 if (mb_check_counter++ % 100 != 0)
     564                 :            :                         return 0;
     565                 :            :         }
     566                 :            : 
     567                 :            :         while (order > 1) {
     568                 :            :                 buddy = mb_find_buddy(e4b, order, &max);
     569                 :            :                 MB_CHECK_ASSERT(buddy);
     570                 :            :                 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
     571                 :            :                 MB_CHECK_ASSERT(buddy2);
     572                 :            :                 MB_CHECK_ASSERT(buddy != buddy2);
     573                 :            :                 MB_CHECK_ASSERT(max * 2 == max2);
     574                 :            : 
     575                 :            :                 count = 0;
     576                 :            :                 for (i = 0; i < max; i++) {
     577                 :            : 
     578                 :            :                         if (mb_test_bit(i, buddy)) {
     579                 :            :                                 /* only single bit in buddy2 may be 1 */
     580                 :            :                                 if (!mb_test_bit(i << 1, buddy2)) {
     581                 :            :                                         MB_CHECK_ASSERT(
     582                 :            :                                                 mb_test_bit((i<<1)+1, buddy2));
     583                 :            :                                 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
     584                 :            :                                         MB_CHECK_ASSERT(
     585                 :            :                                                 mb_test_bit(i << 1, buddy2));
     586                 :            :                                 }
     587                 :            :                                 continue;
     588                 :            :                         }
     589                 :            : 
     590                 :            :                         /* both bits in buddy2 must be 1 */
     591                 :            :                         MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
     592                 :            :                         MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
     593                 :            : 
     594                 :            :                         for (j = 0; j < (1 << order); j++) {
     595                 :            :                                 k = (i * (1 << order)) + j;
     596                 :            :                                 MB_CHECK_ASSERT(
     597                 :            :                                         !mb_test_bit(k, e4b->bd_bitmap));
     598                 :            :                         }
     599                 :            :                         count++;
     600                 :            :                 }
     601                 :            :                 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
     602                 :            :                 order--;
     603                 :            :         }
     604                 :            : 
     605                 :            :         fstart = -1;
     606                 :            :         buddy = mb_find_buddy(e4b, 0, &max);
     607                 :            :         for (i = 0; i < max; i++) {
     608                 :            :                 if (!mb_test_bit(i, buddy)) {
     609                 :            :                         MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
     610                 :            :                         if (fstart == -1) {
     611                 :            :                                 fragments++;
     612                 :            :                                 fstart = i;
     613                 :            :                         }
     614                 :            :                         continue;
     615                 :            :                 }
     616                 :            :                 fstart = -1;
     617                 :            :                 /* check used bits only */
     618                 :            :                 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
     619                 :            :                         buddy2 = mb_find_buddy(e4b, j, &max2);
     620                 :            :                         k = i >> j;
     621                 :            :                         MB_CHECK_ASSERT(k < max2);
     622                 :            :                         MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
     623                 :            :                 }
     624                 :            :         }
     625                 :            :         MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
     626                 :            :         MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
     627                 :            : 
     628                 :            :         grp = ext4_get_group_info(sb, e4b->bd_group);
     629                 :            :         list_for_each(cur, &grp->bb_prealloc_list) {
     630                 :            :                 ext4_group_t groupnr;
     631                 :            :                 struct ext4_prealloc_space *pa;
     632                 :            :                 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
     633                 :            :                 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
     634                 :            :                 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
     635                 :            :                 for (i = 0; i < pa->pa_len; i++)
     636                 :            :                         MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
     637                 :            :         }
     638                 :            :         return 0;
     639                 :            : }
     640                 :            : #undef MB_CHECK_ASSERT
     641                 :            : #define mb_check_buddy(e4b) __mb_check_buddy(e4b,       \
     642                 :            :                                         __FILE__, __func__, __LINE__)
     643                 :            : #else
     644                 :            : #define mb_check_buddy(e4b)
     645                 :            : #endif
     646                 :            : 
     647                 :            : /*
     648                 :            :  * Divide blocks started from @first with length @len into
     649                 :            :  * smaller chunks with power of 2 blocks.
     650                 :            :  * Clear the bits in bitmap which the blocks of the chunk(s) covered,
     651                 :            :  * then increase bb_counters[] for corresponded chunk size.
     652                 :            :  */
     653                 :     207458 : static void ext4_mb_mark_free_simple(struct super_block *sb,
     654                 :            :                                 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
     655                 :            :                                         struct ext4_group_info *grp)
     656                 :            : {
     657                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
     658                 :            :         ext4_grpblk_t min;
     659                 :            :         ext4_grpblk_t max;
     660                 :            :         ext4_grpblk_t chunk;
     661                 :            :         unsigned int border;
     662                 :            : 
     663         [ -  + ]:     207458 :         BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
     664                 :            : 
     665                 :     207458 :         border = 2 << sb->s_blocksize_bits;
     666                 :            : 
     667         [ +  + ]:    1214402 :         while (len > 0) {
     668                 :            :                 /* find how many blocks can be covered since this position */
     669                 :    1598972 :                 max = ffs(first | border) - 1;
     670                 :            : 
     671                 :            :                 /* find how many blocks of power 2 we need to mark */
     672                 :    1598972 :                 min = fls(len) - 1;
     673                 :            : 
     674         [ +  + ]:     799486 :                 if (max < min)
     675                 :            :                         min = max;
     676                 :     799486 :                 chunk = 1 << min;
     677                 :            : 
     678                 :            :                 /* mark multiblock chunks only */
     679                 :     799486 :                 grp->bb_counters[min]++;
     680         [ +  + ]:     799486 :                 if (min > 0)
     681                 :    1288872 :                         mb_clear_bit(first >> min,
     682                 :     644436 :                                      buddy + sbi->s_mb_offsets[min]);
     683                 :            : 
     684                 :     799486 :                 len -= chunk;
     685                 :     799486 :                 first += chunk;
     686                 :            :         }
     687                 :     207458 : }
     688                 :            : 
     689                 :            : /*
     690                 :            :  * Cache the order of the largest free extent we have available in this block
     691                 :            :  * group.
     692                 :            :  */
     693                 :            : static void
     694                 :            : mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
     695                 :            : {
     696                 :            :         int i;
     697                 :            :         int bits;
     698                 :            : 
     699                 :     152740 :         grp->bb_largest_free_order = -1; /* uninit */
     700                 :            : 
     701                 :     152740 :         bits = sb->s_blocksize_bits + 1;
     702   [ +  +  +  -  :     833750 :         for (i = bits; i >= 0; i--) {
                   +  + ]
     703   [ +  +  +  +  :     985580 :                 if (grp->bb_counters[i] > 0) {
                   +  + ]
     704                 :     151830 :                         grp->bb_largest_free_order = i;
     705                 :            :                         break;
     706                 :            :                 }
     707                 :            :         }
     708                 :            : }
     709                 :            : 
     710                 :            : static noinline_for_stack
     711                 :       5112 : void ext4_mb_generate_buddy(struct super_block *sb,
     712                 :            :                                 void *buddy, void *bitmap, ext4_group_t group)
     713                 :            : {
     714                 :       5112 :         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
     715                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
     716                 :       5112 :         ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
     717                 :            :         ext4_grpblk_t i = 0;
     718                 :            :         ext4_grpblk_t first;
     719                 :            :         ext4_grpblk_t len;
     720                 :            :         unsigned free = 0;
     721                 :            :         unsigned fragments = 0;
     722         [ +  - ]:       5112 :         unsigned long long period = get_cycles();
     723                 :            : 
     724                 :            :         /* initialize buddy from bitmap which is aggregation
     725                 :            :          * of on-disk bitmap and preallocations */
     726                 :       5112 :         i = mb_find_next_zero_bit(bitmap, max, 0);
     727                 :       5112 :         grp->bb_first_free = i;
     728         [ +  + ]:     266098 :         while (i < max) {
     729                 :     255874 :                 fragments++;
     730                 :            :                 first = i;
     731                 :     255874 :                 i = mb_find_next_bit(bitmap, max, i);
     732                 :     255874 :                 len = i - first;
     733                 :     255874 :                 free += len;
     734         [ +  + ]:     255874 :                 if (len > 1)
     735                 :     207458 :                         ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
     736                 :            :                 else
     737                 :      48416 :                         grp->bb_counters[0]++;
     738         [ +  + ]:     255874 :                 if (i < max)
     739                 :     252966 :                         i = mb_find_next_zero_bit(bitmap, max, i);
     740                 :            :         }
     741                 :       5112 :         grp->bb_fragments = fragments;
     742                 :            : 
     743         [ -  + ]:       5112 :         if (free != grp->bb_free) {
     744                 :          0 :                 ext4_grp_locked_error(sb, group, 0, 0,
     745                 :            :                                       "block bitmap and bg descriptor "
     746                 :            :                                       "inconsistent: %u vs %u free clusters",
     747                 :            :                                       free, grp->bb_free);
     748                 :            :                 /*
     749                 :            :                  * If we intend to continue, we consider group descriptor
     750                 :            :                  * corrupt and update bb_free using bitmap value
     751                 :            :                  */
     752                 :          0 :                 grp->bb_free = free;
     753                 :          0 :                 ext4_mark_group_bitmap_corrupted(sb, group,
     754                 :            :                                         EXT4_GROUP_INFO_BBITMAP_CORRUPT);
     755                 :            :         }
     756                 :            :         mb_set_largest_free_order(sb, grp);
     757                 :            : 
     758                 :       5112 :         clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
     759                 :            : 
     760         [ +  - ]:       5112 :         period = get_cycles() - period;
     761                 :            :         spin_lock(&sbi->s_bal_lock);
     762                 :       5112 :         sbi->s_mb_buddies_generated++;
     763                 :       5112 :         sbi->s_mb_generation_time += period;
     764                 :            :         spin_unlock(&sbi->s_bal_lock);
     765                 :       5112 : }
     766                 :            : 
     767                 :          0 : static void mb_regenerate_buddy(struct ext4_buddy *e4b)
     768                 :            : {
     769                 :            :         int count;
     770                 :            :         int order = 1;
     771                 :            :         void *buddy;
     772                 :            : 
     773         [ #  # ]:          0 :         while ((buddy = mb_find_buddy(e4b, order++, &count))) {
     774                 :          0 :                 ext4_set_bits(buddy, 0, count);
     775                 :            :         }
     776                 :          0 :         e4b->bd_info->bb_fragments = 0;
     777                 :          0 :         memset(e4b->bd_info->bb_counters, 0,
     778                 :            :                 sizeof(*e4b->bd_info->bb_counters) *
     779                 :          0 :                 (e4b->bd_sb->s_blocksize_bits + 2));
     780                 :            : 
     781                 :          0 :         ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
     782                 :            :                 e4b->bd_bitmap, e4b->bd_group);
     783                 :          0 : }
     784                 :            : 
     785                 :            : /* The buddy information is attached the buddy cache inode
     786                 :            :  * for convenience. The information regarding each group
     787                 :            :  * is loaded via ext4_mb_load_buddy. The information involve
     788                 :            :  * block bitmap and buddy information. The information are
     789                 :            :  * stored in the inode as
     790                 :            :  *
     791                 :            :  * {                        page                        }
     792                 :            :  * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
     793                 :            :  *
     794                 :            :  *
     795                 :            :  * one block each for bitmap and buddy information.
     796                 :            :  * So for each group we take up 2 blocks. A page can
     797                 :            :  * contain blocks_per_page (PAGE_SIZE / blocksize)  blocks.
     798                 :            :  * So it can have information regarding groups_per_page which
     799                 :            :  * is blocks_per_page/2
     800                 :            :  *
     801                 :            :  * Locking note:  This routine takes the block group lock of all groups
     802                 :            :  * for this page; do not hold this lock when calling this routine!
     803                 :            :  */
     804                 :            : 
     805                 :      10224 : static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
     806                 :            : {
     807                 :            :         ext4_group_t ngroups;
     808                 :            :         int blocksize;
     809                 :            :         int blocks_per_page;
     810                 :            :         int groups_per_page;
     811                 :            :         int err = 0;
     812                 :            :         int i;
     813                 :            :         ext4_group_t first_group, group;
     814                 :            :         int first_block;
     815                 :            :         struct super_block *sb;
     816                 :            :         struct buffer_head *bhs;
     817                 :            :         struct buffer_head **bh = NULL;
     818                 :            :         struct inode *inode;
     819                 :            :         char *data;
     820                 :            :         char *bitmap;
     821                 :            :         struct ext4_group_info *grinfo;
     822                 :            : 
     823                 :            :         mb_debug(1, "init page %lu\n", page->index);
     824                 :            : 
     825                 :      10224 :         inode = page->mapping->host;
     826                 :      10224 :         sb = inode->i_sb;
     827                 :            :         ngroups = ext4_get_groups_count(sb);
     828                 :            :         blocksize = i_blocksize(inode);
     829                 :      10224 :         blocks_per_page = PAGE_SIZE / blocksize;
     830                 :            : 
     831                 :      10224 :         groups_per_page = blocks_per_page >> 1;
     832         [ +  - ]:      10224 :         if (groups_per_page == 0)
     833                 :            :                 groups_per_page = 1;
     834                 :            : 
     835                 :            :         /* allocate buffer_heads to read bitmaps */
     836         [ -  + ]:      10224 :         if (groups_per_page > 1) {
     837                 :          0 :                 i = sizeof(struct buffer_head *) * groups_per_page;
     838                 :          0 :                 bh = kzalloc(i, gfp);
     839         [ #  # ]:          0 :                 if (bh == NULL) {
     840                 :            :                         err = -ENOMEM;
     841                 :            :                         goto out;
     842                 :            :                 }
     843                 :            :         } else
     844                 :            :                 bh = &bhs;
     845                 :            : 
     846                 :      10224 :         first_group = page->index * blocks_per_page / 2;
     847                 :            : 
     848                 :            :         /* read all groups the page covers into the cache */
     849         [ +  + ]:      20448 :         for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
     850         [ +  - ]:      10224 :                 if (group >= ngroups)
     851                 :            :                         break;
     852                 :            : 
     853                 :      10224 :                 grinfo = ext4_get_group_info(sb, group);
     854                 :            :                 /*
     855                 :            :                  * If page is uptodate then we came here after online resize
     856                 :            :                  * which added some new uninitialized group info structs, so
     857                 :            :                  * we must skip all initialized uptodate buddies on the page,
     858                 :            :                  * which may be currently in use by an allocating task.
     859                 :            :                  */
     860   [ -  +  #  # ]:      10224 :                 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
     861                 :          0 :                         bh[i] = NULL;
     862                 :          0 :                         continue;
     863                 :            :                 }
     864                 :      10224 :                 bh[i] = ext4_read_block_bitmap_nowait(sb, group);
     865         [ -  + ]:      10224 :                 if (IS_ERR(bh[i])) {
     866                 :          0 :                         err = PTR_ERR(bh[i]);
     867                 :          0 :                         bh[i] = NULL;
     868                 :          0 :                         goto out;
     869                 :            :                 }
     870                 :            :                 mb_debug(1, "read bitmap for group %u\n", group);
     871                 :            :         }
     872                 :            : 
     873                 :            :         /* wait for I/O completion */
     874         [ +  + ]:      10224 :         for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
     875                 :            :                 int err2;
     876                 :            : 
     877         [ -  + ]:      10224 :                 if (!bh[i])
     878                 :          0 :                         continue;
     879                 :      10224 :                 err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
     880         [ +  - ]:      10224 :                 if (!err)
     881                 :            :                         err = err2;
     882                 :            :         }
     883                 :            : 
     884                 :      10224 :         first_block = page->index * blocks_per_page;
     885         [ +  + ]:      20448 :         for (i = 0; i < blocks_per_page; i++) {
     886                 :      10224 :                 group = (first_block + i) >> 1;
     887         [ +  - ]:      10224 :                 if (group >= ngroups)
     888                 :            :                         break;
     889                 :            : 
     890         [ -  + ]:      10224 :                 if (!bh[group - first_group])
     891                 :            :                         /* skip initialized uptodate buddy */
     892                 :          0 :                         continue;
     893                 :            : 
     894         [ -  + ]:      10224 :                 if (!buffer_verified(bh[group - first_group]))
     895                 :            :                         /* Skip faulty bitmaps */
     896                 :          0 :                         continue;
     897                 :            :                 err = 0;
     898                 :            : 
     899                 :            :                 /*
     900                 :            :                  * data carry information regarding this
     901                 :            :                  * particular group in the format specified
     902                 :            :                  * above
     903                 :            :                  *
     904                 :            :                  */
     905                 :      10224 :                 data = page_address(page) + (i * blocksize);
     906                 :      10224 :                 bitmap = bh[group - first_group]->b_data;
     907                 :            : 
     908                 :            :                 /*
     909                 :            :                  * We place the buddy block and bitmap block
     910                 :            :                  * close together
     911                 :            :                  */
     912         [ +  + ]:      10224 :                 if ((first_block + i) & 1) {
     913                 :            :                         /* this is block of buddy */
     914         [ -  + ]:       5112 :                         BUG_ON(incore == NULL);
     915                 :            :                         mb_debug(1, "put buddy for group %u in page %lu/%x\n",
     916                 :            :                                 group, page->index, i * blocksize);
     917                 :       5112 :                         trace_ext4_mb_buddy_bitmap_load(sb, group);
     918                 :       5112 :                         grinfo = ext4_get_group_info(sb, group);
     919                 :       5112 :                         grinfo->bb_fragments = 0;
     920                 :      10224 :                         memset(grinfo->bb_counters, 0,
     921                 :            :                                sizeof(*grinfo->bb_counters) *
     922                 :       5112 :                                 (sb->s_blocksize_bits+2));
     923                 :            :                         /*
     924                 :            :                          * incore got set to the group block bitmap below
     925                 :            :                          */
     926                 :       5112 :                         ext4_lock_group(sb, group);
     927                 :            :                         /* init the buddy */
     928                 :       5112 :                         memset(data, 0xff, blocksize);
     929                 :       5112 :                         ext4_mb_generate_buddy(sb, data, incore, group);
     930                 :            :                         ext4_unlock_group(sb, group);
     931                 :            :                         incore = NULL;
     932                 :            :                 } else {
     933                 :            :                         /* this is block of bitmap */
     934         [ -  + ]:       5112 :                         BUG_ON(incore != NULL);
     935                 :            :                         mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
     936                 :            :                                 group, page->index, i * blocksize);
     937                 :       5112 :                         trace_ext4_mb_bitmap_load(sb, group);
     938                 :            : 
     939                 :            :                         /* see comments in ext4_mb_put_pa() */
     940                 :       5112 :                         ext4_lock_group(sb, group);
     941                 :       5112 :                         memcpy(data, bitmap, blocksize);
     942                 :            : 
     943                 :            :                         /* mark all preallocated blks used in in-core bitmap */
     944                 :       5112 :                         ext4_mb_generate_from_pa(sb, data, group);
     945                 :       5112 :                         ext4_mb_generate_from_freelist(sb, data, group);
     946                 :            :                         ext4_unlock_group(sb, group);
     947                 :            : 
     948                 :            :                         /* set incore so that the buddy information can be
     949                 :            :                          * generated using this
     950                 :            :                          */
     951                 :            :                         incore = data;
     952                 :            :                 }
     953                 :            :         }
     954                 :            :         SetPageUptodate(page);
     955                 :            : 
     956                 :            : out:
     957         [ +  - ]:      10224 :         if (bh) {
     958         [ +  + ]:      10224 :                 for (i = 0; i < groups_per_page; i++)
     959                 :      10224 :                         brelse(bh[i]);
     960         [ -  + ]:      10224 :                 if (bh != &bhs)
     961                 :          0 :                         kfree(bh);
     962                 :            :         }
     963                 :      10224 :         return err;
     964                 :            : }
     965                 :            : 
     966                 :            : /*
     967                 :            :  * Lock the buddy and bitmap pages. This make sure other parallel init_group
     968                 :            :  * on the same buddy page doesn't happen whild holding the buddy page lock.
     969                 :            :  * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
     970                 :            :  * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
     971                 :            :  */
     972                 :       5112 : static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
     973                 :            :                 ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
     974                 :            : {
     975                 :       5112 :         struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
     976                 :            :         int block, pnum, poff;
     977                 :            :         int blocks_per_page;
     978                 :            :         struct page *page;
     979                 :            : 
     980                 :       5112 :         e4b->bd_buddy_page = NULL;
     981                 :       5112 :         e4b->bd_bitmap_page = NULL;
     982                 :            : 
     983                 :       5112 :         blocks_per_page = PAGE_SIZE / sb->s_blocksize;
     984                 :            :         /*
     985                 :            :          * the buddy cache inode stores the block bitmap
     986                 :            :          * and buddy information in consecutive blocks.
     987                 :            :          * So for each group we need two blocks.
     988                 :            :          */
     989                 :       5112 :         block = group * 2;
     990                 :       5112 :         pnum = block / blocks_per_page;
     991                 :       5112 :         poff = block % blocks_per_page;
     992                 :       5112 :         page = find_or_create_page(inode->i_mapping, pnum, gfp);
     993         [ +  - ]:       5112 :         if (!page)
     994                 :            :                 return -ENOMEM;
     995         [ -  + ]:       5112 :         BUG_ON(page->mapping != inode->i_mapping);
     996                 :       5112 :         e4b->bd_bitmap_page = page;
     997                 :       5112 :         e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
     998                 :            : 
     999         [ +  - ]:       5112 :         if (blocks_per_page >= 2) {
    1000                 :            :                 /* buddy and bitmap are on the same page */
    1001                 :            :                 return 0;
    1002                 :            :         }
    1003                 :            : 
    1004                 :       5112 :         block++;
    1005                 :       5112 :         pnum = block / blocks_per_page;
    1006                 :       5112 :         page = find_or_create_page(inode->i_mapping, pnum, gfp);
    1007         [ +  - ]:       5112 :         if (!page)
    1008                 :            :                 return -ENOMEM;
    1009         [ -  + ]:       5112 :         BUG_ON(page->mapping != inode->i_mapping);
    1010                 :       5112 :         e4b->bd_buddy_page = page;
    1011                 :       5112 :         return 0;
    1012                 :            : }
    1013                 :            : 
    1014                 :       5112 : static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
    1015                 :            : {
    1016         [ +  - ]:       5112 :         if (e4b->bd_bitmap_page) {
    1017                 :       5112 :                 unlock_page(e4b->bd_bitmap_page);
    1018                 :       5112 :                 put_page(e4b->bd_bitmap_page);
    1019                 :            :         }
    1020         [ +  - ]:       5112 :         if (e4b->bd_buddy_page) {
    1021                 :       5112 :                 unlock_page(e4b->bd_buddy_page);
    1022                 :       5112 :                 put_page(e4b->bd_buddy_page);
    1023                 :            :         }
    1024                 :       5112 : }
    1025                 :            : 
    1026                 :            : /*
    1027                 :            :  * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
    1028                 :            :  * block group lock of all groups for this page; do not hold the BG lock when
    1029                 :            :  * calling this routine!
    1030                 :            :  */
    1031                 :            : static noinline_for_stack
    1032                 :       5112 : int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
    1033                 :            : {
    1034                 :            : 
    1035                 :            :         struct ext4_group_info *this_grp;
    1036                 :            :         struct ext4_buddy e4b;
    1037                 :            :         struct page *page;
    1038                 :            :         int ret = 0;
    1039                 :            : 
    1040                 :       5112 :         might_sleep();
    1041                 :            :         mb_debug(1, "init group %u\n", group);
    1042                 :       5112 :         this_grp = ext4_get_group_info(sb, group);
    1043                 :            :         /*
    1044                 :            :          * This ensures that we don't reinit the buddy cache
    1045                 :            :          * page which map to the group from which we are already
    1046                 :            :          * allocating. If we are looking at the buddy cache we would
    1047                 :            :          * have taken a reference using ext4_mb_load_buddy and that
    1048                 :            :          * would have pinned buddy page to page cache.
    1049                 :            :          * The call to ext4_mb_get_buddy_page_lock will mark the
    1050                 :            :          * page accessed.
    1051                 :            :          */
    1052                 :       5112 :         ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
    1053   [ +  -  +  - ]:      10224 :         if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
    1054                 :            :                 /*
    1055                 :            :                  * somebody initialized the group
    1056                 :            :                  * return without doing anything
    1057                 :            :                  */
    1058                 :            :                 goto err;
    1059                 :            :         }
    1060                 :            : 
    1061                 :       5112 :         page = e4b.bd_bitmap_page;
    1062                 :       5112 :         ret = ext4_mb_init_cache(page, NULL, gfp);
    1063         [ +  - ]:       5112 :         if (ret)
    1064                 :            :                 goto err;
    1065         [ +  - ]:       5112 :         if (!PageUptodate(page)) {
    1066                 :            :                 ret = -EIO;
    1067                 :            :                 goto err;
    1068                 :            :         }
    1069                 :            : 
    1070         [ +  - ]:       5112 :         if (e4b.bd_buddy_page == NULL) {
    1071                 :            :                 /*
    1072                 :            :                  * If both the bitmap and buddy are in
    1073                 :            :                  * the same page we don't need to force
    1074                 :            :                  * init the buddy
    1075                 :            :                  */
    1076                 :            :                 ret = 0;
    1077                 :            :                 goto err;
    1078                 :            :         }
    1079                 :            :         /* init buddy cache */
    1080                 :            :         page = e4b.bd_buddy_page;
    1081                 :       5112 :         ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
    1082         [ +  - ]:       5112 :         if (ret)
    1083                 :            :                 goto err;
    1084         [ -  + ]:       5112 :         if (!PageUptodate(page)) {
    1085                 :            :                 ret = -EIO;
    1086                 :          0 :                 goto err;
    1087                 :            :         }
    1088                 :            : err:
    1089                 :       5112 :         ext4_mb_put_buddy_page_lock(&e4b);
    1090                 :       5112 :         return ret;
    1091                 :            : }
    1092                 :            : 
    1093                 :            : /*
    1094                 :            :  * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
    1095                 :            :  * block group lock of all groups for this page; do not hold the BG lock when
    1096                 :            :  * calling this routine!
    1097                 :            :  */
    1098                 :            : static noinline_for_stack int
    1099                 :     156082 : ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
    1100                 :            :                        struct ext4_buddy *e4b, gfp_t gfp)
    1101                 :            : {
    1102                 :            :         int blocks_per_page;
    1103                 :            :         int block;
    1104                 :            :         int pnum;
    1105                 :            :         int poff;
    1106                 :            :         struct page *page;
    1107                 :            :         int ret;
    1108                 :            :         struct ext4_group_info *grp;
    1109                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    1110                 :     156082 :         struct inode *inode = sbi->s_buddy_cache;
    1111                 :            : 
    1112                 :     156082 :         might_sleep();
    1113                 :            :         mb_debug(1, "load group %u\n", group);
    1114                 :            : 
    1115                 :     156082 :         blocks_per_page = PAGE_SIZE / sb->s_blocksize;
    1116                 :     156082 :         grp = ext4_get_group_info(sb, group);
    1117                 :            : 
    1118                 :     156082 :         e4b->bd_blkbits = sb->s_blocksize_bits;
    1119                 :     156082 :         e4b->bd_info = grp;
    1120                 :     156082 :         e4b->bd_sb = sb;
    1121                 :     156082 :         e4b->bd_group = group;
    1122                 :     156082 :         e4b->bd_buddy_page = NULL;
    1123                 :     156082 :         e4b->bd_bitmap_page = NULL;
    1124                 :            : 
    1125         [ +  + ]:     156082 :         if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
    1126                 :            :                 /*
    1127                 :            :                  * we need full data about the group
    1128                 :            :                  * to make a good selection
    1129                 :            :                  */
    1130                 :       1810 :                 ret = ext4_mb_init_group(sb, group, gfp);
    1131         [ +  - ]:       1810 :                 if (ret)
    1132                 :            :                         return ret;
    1133                 :            :         }
    1134                 :            : 
    1135                 :            :         /*
    1136                 :            :          * the buddy cache inode stores the block bitmap
    1137                 :            :          * and buddy information in consecutive blocks.
    1138                 :            :          * So for each group we need two blocks.
    1139                 :            :          */
    1140                 :     156082 :         block = group * 2;
    1141                 :     156082 :         pnum = block / blocks_per_page;
    1142                 :     156082 :         poff = block % blocks_per_page;
    1143                 :            : 
    1144                 :            :         /* we could use find_or_create_page(), but it locks page
    1145                 :            :          * what we'd like to avoid in fast path ... */
    1146                 :     156082 :         page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
    1147   [ +  -  -  + ]:     312164 :         if (page == NULL || !PageUptodate(page)) {
    1148         [ #  # ]:          0 :                 if (page)
    1149                 :            :                         /*
    1150                 :            :                          * drop the page reference and try
    1151                 :            :                          * to get the page with lock. If we
    1152                 :            :                          * are not uptodate that implies
    1153                 :            :                          * somebody just created the page but
    1154                 :            :                          * is yet to initialize the same. So
    1155                 :            :                          * wait for it to initialize.
    1156                 :            :                          */
    1157                 :          0 :                         put_page(page);
    1158                 :          0 :                 page = find_or_create_page(inode->i_mapping, pnum, gfp);
    1159         [ #  # ]:          0 :                 if (page) {
    1160         [ #  # ]:          0 :                         BUG_ON(page->mapping != inode->i_mapping);
    1161         [ #  # ]:          0 :                         if (!PageUptodate(page)) {
    1162                 :          0 :                                 ret = ext4_mb_init_cache(page, NULL, gfp);
    1163         [ #  # ]:          0 :                                 if (ret) {
    1164                 :          0 :                                         unlock_page(page);
    1165                 :          0 :                                         goto err;
    1166                 :            :                                 }
    1167                 :            :                                 mb_cmp_bitmaps(e4b, page_address(page) +
    1168                 :            :                                                (poff * sb->s_blocksize));
    1169                 :            :                         }
    1170                 :          0 :                         unlock_page(page);
    1171                 :            :                 }
    1172                 :            :         }
    1173         [ +  - ]:     156082 :         if (page == NULL) {
    1174                 :            :                 ret = -ENOMEM;
    1175                 :            :                 goto err;
    1176                 :            :         }
    1177         [ +  - ]:     156082 :         if (!PageUptodate(page)) {
    1178                 :            :                 ret = -EIO;
    1179                 :            :                 goto err;
    1180                 :            :         }
    1181                 :            : 
    1182                 :            :         /* Pages marked accessed already */
    1183                 :     156082 :         e4b->bd_bitmap_page = page;
    1184                 :     156082 :         e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
    1185                 :            : 
    1186                 :     156082 :         block++;
    1187                 :     156082 :         pnum = block / blocks_per_page;
    1188                 :     156082 :         poff = block % blocks_per_page;
    1189                 :            : 
    1190                 :     156082 :         page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
    1191   [ +  -  -  + ]:     312164 :         if (page == NULL || !PageUptodate(page)) {
    1192         [ #  # ]:          0 :                 if (page)
    1193                 :          0 :                         put_page(page);
    1194                 :          0 :                 page = find_or_create_page(inode->i_mapping, pnum, gfp);
    1195         [ #  # ]:          0 :                 if (page) {
    1196         [ #  # ]:          0 :                         BUG_ON(page->mapping != inode->i_mapping);
    1197         [ #  # ]:          0 :                         if (!PageUptodate(page)) {
    1198                 :          0 :                                 ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
    1199                 :            :                                                          gfp);
    1200         [ #  # ]:          0 :                                 if (ret) {
    1201                 :          0 :                                         unlock_page(page);
    1202                 :          0 :                                         goto err;
    1203                 :            :                                 }
    1204                 :            :                         }
    1205                 :          0 :                         unlock_page(page);
    1206                 :            :                 }
    1207                 :            :         }
    1208         [ +  - ]:     156082 :         if (page == NULL) {
    1209                 :            :                 ret = -ENOMEM;
    1210                 :            :                 goto err;
    1211                 :            :         }
    1212         [ +  - ]:     156082 :         if (!PageUptodate(page)) {
    1213                 :            :                 ret = -EIO;
    1214                 :            :                 goto err;
    1215                 :            :         }
    1216                 :            : 
    1217                 :            :         /* Pages marked accessed already */
    1218                 :     156082 :         e4b->bd_buddy_page = page;
    1219                 :     156082 :         e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
    1220                 :            : 
    1221         [ -  + ]:     156082 :         BUG_ON(e4b->bd_bitmap_page == NULL);
    1222         [ -  + ]:     156082 :         BUG_ON(e4b->bd_buddy_page == NULL);
    1223                 :            : 
    1224                 :            :         return 0;
    1225                 :            : 
    1226                 :            : err:
    1227         [ #  # ]:          0 :         if (page)
    1228                 :          0 :                 put_page(page);
    1229         [ #  # ]:          0 :         if (e4b->bd_bitmap_page)
    1230                 :          0 :                 put_page(e4b->bd_bitmap_page);
    1231         [ #  # ]:          0 :         if (e4b->bd_buddy_page)
    1232                 :          0 :                 put_page(e4b->bd_buddy_page);
    1233                 :          0 :         e4b->bd_buddy = NULL;
    1234                 :          0 :         e4b->bd_bitmap = NULL;
    1235                 :          0 :         return ret;
    1236                 :            : }
    1237                 :            : 
    1238                 :            : static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
    1239                 :            :                               struct ext4_buddy *e4b)
    1240                 :            : {
    1241                 :     147924 :         return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
    1242                 :            : }
    1243                 :            : 
    1244                 :     156082 : static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
    1245                 :            : {
    1246         [ +  - ]:     156082 :         if (e4b->bd_bitmap_page)
    1247                 :     156082 :                 put_page(e4b->bd_bitmap_page);
    1248         [ +  - ]:     156082 :         if (e4b->bd_buddy_page)
    1249                 :     156082 :                 put_page(e4b->bd_buddy_page);
    1250                 :     156082 : }
    1251                 :            : 
    1252                 :            : 
    1253                 :    1178102 : static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
    1254                 :            : {
    1255                 :            :         int order = 1;
    1256                 :    1178102 :         int bb_incr = 1 << (e4b->bd_blkbits - 1);
    1257                 :            :         void *bb;
    1258                 :            : 
    1259         [ -  + ]:    1178102 :         BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
    1260         [ +  - ]:    1178102 :         BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
    1261                 :            : 
    1262                 :            :         bb = e4b->bd_buddy;
    1263         [ +  + ]:    8239856 :         while (order <= e4b->bd_blkbits + 1) {
    1264                 :    7898354 :                 block = block >> 1;
    1265         [ +  + ]:    7898354 :                 if (!mb_test_bit(block, bb)) {
    1266                 :            :                         /* this block is part of buddy of order 'order' */
    1267                 :     836600 :                         return order;
    1268                 :            :                 }
    1269                 :    7061754 :                 bb += bb_incr;
    1270                 :    7061754 :                 bb_incr >>= 1;
    1271                 :    7061754 :                 order++;
    1272                 :            :         }
    1273                 :            :         return 0;
    1274                 :            : }
    1275                 :            : 
    1276                 :       7922 : static void mb_clear_bits(void *bm, int cur, int len)
    1277                 :            : {
    1278                 :            :         __u32 *addr;
    1279                 :            : 
    1280                 :       7922 :         len = cur + len;
    1281         [ +  + ]:      58944 :         while (cur < len) {
    1282   [ +  +  +  + ]:      43100 :                 if ((cur & 31) == 0 && (len - cur) >= 32) {
    1283                 :            :                         /* fast path: clear whole word at once */
    1284                 :      28950 :                         addr = bm + (cur >> 3);
    1285                 :      28950 :                         *addr = 0;
    1286                 :      28950 :                         cur += 32;
    1287                 :      28950 :                         continue;
    1288                 :            :                 }
    1289                 :            :                 mb_clear_bit(cur, bm);
    1290                 :      14150 :                 cur++;
    1291                 :            :         }
    1292                 :       7922 : }
    1293                 :            : 
    1294                 :            : /* clear bits in given range
    1295                 :            :  * will return first found zero bit if any, -1 otherwise
    1296                 :            :  */
    1297                 :       6372 : static int mb_test_and_clear_bits(void *bm, int cur, int len)
    1298                 :            : {
    1299                 :            :         __u32 *addr;
    1300                 :            :         int zero_bit = -1;
    1301                 :            : 
    1302                 :       6372 :         len = cur + len;
    1303         [ +  + ]:      62126 :         while (cur < len) {
    1304   [ +  +  +  + ]:      49382 :                 if ((cur & 31) == 0 && (len - cur) >= 32) {
    1305                 :            :                         /* fast path: clear whole word at once */
    1306                 :      29700 :                         addr = bm + (cur >> 3);
    1307   [ -  +  #  # ]:      29700 :                         if (*addr != (__u32)(-1) && zero_bit == -1)
    1308                 :          0 :                                 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
    1309                 :      29700 :                         *addr = 0;
    1310                 :      29700 :                         cur += 32;
    1311                 :      29700 :                         continue;
    1312                 :            :                 }
    1313   [ -  +  #  # ]:      19682 :                 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
    1314                 :            :                         zero_bit = cur;
    1315                 :      19682 :                 cur++;
    1316                 :            :         }
    1317                 :            : 
    1318                 :       6372 :         return zero_bit;
    1319                 :            : }
    1320                 :            : 
    1321                 :     287216 : void ext4_set_bits(void *bm, int cur, int len)
    1322                 :            : {
    1323                 :            :         __u32 *addr;
    1324                 :            : 
    1325                 :     287216 :         len = cur + len;
    1326         [ +  + ]:    1110822 :         while (cur < len) {
    1327   [ +  +  +  + ]:     536390 :                 if ((cur & 31) == 0 && (len - cur) >= 32) {
    1328                 :            :                         /* fast path: set whole word at once */
    1329                 :     102442 :                         addr = bm + (cur >> 3);
    1330                 :     102442 :                         *addr = 0xffffffff;
    1331                 :     102442 :                         cur += 32;
    1332                 :     102442 :                         continue;
    1333                 :            :                 }
    1334                 :            :                 mb_set_bit(cur, bm);
    1335                 :     433948 :                 cur++;
    1336                 :            :         }
    1337                 :     287216 : }
    1338                 :            : 
    1339                 :            : /*
    1340                 :            :  * _________________________________________________________________ */
    1341                 :            : 
    1342                 :      13348 : static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
    1343                 :            : {
    1344         [ +  + ]:      26696 :         if (mb_test_bit(*bit + side, bitmap)) {
    1345                 :            :                 mb_clear_bit(*bit, bitmap);
    1346                 :       4978 :                 (*bit) -= side;
    1347                 :       4978 :                 return 1;
    1348                 :            :         }
    1349                 :            :         else {
    1350                 :       8370 :                 (*bit) += side;
    1351                 :            :                 mb_set_bit(*bit, bitmap);
    1352                 :       8370 :                 return -1;
    1353                 :            :         }
    1354                 :            : }
    1355                 :            : 
    1356                 :       3322 : static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
    1357                 :            : {
    1358                 :            :         int max;
    1359                 :            :         int order = 1;
    1360                 :       3322 :         void *buddy = mb_find_buddy(e4b, order, &max);
    1361                 :            : 
    1362         [ +  - ]:      17320 :         while (buddy) {
    1363                 :            :                 void *buddy2;
    1364                 :            : 
    1365                 :            :                 /* Bits in range [first; last] are known to be set since
    1366                 :            :                  * corresponding blocks were allocated. Bits in range
    1367                 :            :                  * (first; last) will stay set because they form buddies on
    1368                 :            :                  * upper layer. We just deal with borders if they don't
    1369                 :            :                  * align with upper layer and then go up.
    1370                 :            :                  * Releasing entire group is all about clearing
    1371                 :            :                  * single bit of highest order buddy.
    1372                 :            :                  */
    1373                 :            : 
    1374                 :            :                 /* Example:
    1375                 :            :                  * ---------------------------------
    1376                 :            :                  * |   1   |   1   |   1   |   1   |
    1377                 :            :                  * ---------------------------------
    1378                 :            :                  * | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
    1379                 :            :                  * ---------------------------------
    1380                 :            :                  *   0   1   2   3   4   5   6   7
    1381                 :            :                  *      \_____________________/
    1382                 :            :                  *
    1383                 :            :                  * Neither [1] nor [6] is aligned to above layer.
    1384                 :            :                  * Left neighbour [0] is free, so mark it busy,
    1385                 :            :                  * decrease bb_counters and extend range to
    1386                 :            :                  * [0; 6]
    1387                 :            :                  * Right neighbour [7] is busy. It can't be coaleasced with [6], so
    1388                 :            :                  * mark [6] free, increase bb_counters and shrink range to
    1389                 :            :                  * [0; 5].
    1390                 :            :                  * Then shift range to [0; 2], go up and do the same.
    1391                 :            :                  */
    1392                 :            : 
    1393                 :            : 
    1394         [ +  + ]:      13998 :                 if (first & 1)
    1395                 :       3910 :                         e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
    1396         [ +  + ]:      13998 :                 if (!(last & 1))
    1397                 :       9438 :                         e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
    1398         [ +  + ]:      13998 :                 if (first > last)
    1399                 :            :                         break;
    1400                 :      10682 :                 order++;
    1401                 :            : 
    1402   [ +  -  +  + ]:      10682 :                 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
    1403                 :          6 :                         mb_clear_bits(buddy, first, last - first + 1);
    1404                 :          6 :                         e4b->bd_info->bb_counters[order - 1] += last - first + 1;
    1405                 :          6 :                         break;
    1406                 :            :                 }
    1407                 :      10676 :                 first >>= 1;
    1408                 :      10676 :                 last >>= 1;
    1409                 :            :                 buddy = buddy2;
    1410                 :            :         }
    1411                 :       3322 : }
    1412                 :            : 
    1413                 :       6372 : static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
    1414                 :            :                            int first, int count)
    1415                 :            : {
    1416                 :            :         int left_is_free = 0;
    1417                 :            :         int right_is_free = 0;
    1418                 :            :         int block;
    1419                 :       6372 :         int last = first + count - 1;
    1420                 :       6372 :         struct super_block *sb = e4b->bd_sb;
    1421                 :            : 
    1422   [ -  +  +  - ]:       6372 :         if (WARN_ON(count == 0))
    1423                 :            :                 return;
    1424         [ -  + ]:       6372 :         BUG_ON(last >= (sb->s_blocksize << 3));
    1425         [ -  + ]:      19116 :         assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
    1426                 :            :         /* Don't bother if the block group is corrupt. */
    1427         [ +  - ]:      12744 :         if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
    1428                 :            :                 return;
    1429                 :            : 
    1430                 :            :         mb_check_buddy(e4b);
    1431                 :            :         mb_free_blocks_double(inode, e4b, first, count);
    1432                 :            : 
    1433                 :       6372 :         e4b->bd_info->bb_free += count;
    1434         [ +  + ]:       6372 :         if (first < e4b->bd_info->bb_first_free)
    1435                 :        774 :                 e4b->bd_info->bb_first_free = first;
    1436                 :            : 
    1437                 :            :         /* access memory sequentially: check left neighbour,
    1438                 :            :          * clear range and then check right neighbour
    1439                 :            :          */
    1440         [ +  + ]:       6372 :         if (first != 0)
    1441                 :      12684 :                 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
    1442                 :       6372 :         block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
    1443         [ +  + ]:      12744 :         if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
    1444                 :      12740 :                 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
    1445                 :            : 
    1446         [ -  + ]:       6372 :         if (unlikely(block != -1)) {
    1447                 :            :                 struct ext4_sb_info *sbi = EXT4_SB(sb);
    1448                 :            :                 ext4_fsblk_t blocknr;
    1449                 :            : 
    1450                 :          0 :                 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
    1451                 :          0 :                 blocknr += EXT4_C2B(sbi, block);
    1452         [ #  # ]:          0 :                 ext4_grp_locked_error(sb, e4b->bd_group,
    1453                 :            :                                       inode ? inode->i_ino : 0,
    1454                 :            :                                       blocknr,
    1455                 :            :                                       "freeing already freed block "
    1456                 :            :                                       "(bit %u); block bitmap corrupt.",
    1457                 :            :                                       block);
    1458                 :          0 :                 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
    1459                 :            :                                 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
    1460                 :          0 :                 mb_regenerate_buddy(e4b);
    1461                 :          0 :                 goto done;
    1462                 :            :         }
    1463                 :            : 
    1464                 :            :         /* let's maintain fragments counter */
    1465         [ +  + ]:       6372 :         if (left_is_free && right_is_free)
    1466                 :       1102 :                 e4b->bd_info->bb_fragments--;
    1467         [ +  + ]:       5270 :         else if (!left_is_free && !right_is_free)
    1468                 :       2714 :                 e4b->bd_info->bb_fragments++;
    1469                 :            : 
    1470                 :            :         /* buddy[0] == bd_bitmap is a special case, so handle
    1471                 :            :          * it right away and let mb_buddy_mark_free stay free of
    1472                 :            :          * zero order checks.
    1473                 :            :          * Check if neighbours are to be coaleasced,
    1474                 :            :          * adjust bitmap bb_counters and borders appropriately.
    1475                 :            :          */
    1476         [ +  + ]:       6372 :         if (first & 1) {
    1477                 :       3300 :                 first += !left_is_free;
    1478         [ +  + ]:       3300 :                 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
    1479                 :            :         }
    1480         [ +  + ]:       6372 :         if (!(last & 1)) {
    1481                 :       2350 :                 last -= !right_is_free;
    1482         [ +  + ]:       2350 :                 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
    1483                 :            :         }
    1484                 :            : 
    1485         [ +  + ]:       6372 :         if (first <= last)
    1486                 :       3322 :                 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
    1487                 :            : 
    1488                 :            : done:
    1489                 :       6372 :         mb_set_largest_free_order(sb, e4b->bd_info);
    1490                 :            :         mb_check_buddy(e4b);
    1491                 :            : }
    1492                 :            : 
    1493                 :     835876 : static int mb_find_extent(struct ext4_buddy *e4b, int block,
    1494                 :            :                                 int needed, struct ext4_free_extent *ex)
    1495                 :            : {
    1496                 :            :         int next = block;
    1497                 :            :         int max, order;
    1498                 :            :         void *buddy;
    1499                 :            : 
    1500         [ -  + ]:    2507628 :         assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
    1501         [ -  + ]:     835876 :         BUG_ON(ex == NULL);
    1502                 :            : 
    1503                 :     835876 :         buddy = mb_find_buddy(e4b, 0, &max);
    1504         [ -  + ]:     835876 :         BUG_ON(buddy == NULL);
    1505         [ -  + ]:     835876 :         BUG_ON(block >= max);
    1506         [ +  + ]:     835876 :         if (mb_test_bit(block, buddy)) {
    1507                 :        128 :                 ex->fe_len = 0;
    1508                 :        128 :                 ex->fe_start = 0;
    1509                 :        128 :                 ex->fe_group = 0;
    1510                 :        128 :                 return 0;
    1511                 :            :         }
    1512                 :            : 
    1513                 :            :         /* find actual order */
    1514                 :     835748 :         order = mb_find_order_for_block(e4b, block);
    1515                 :     835748 :         block = block >> order;
    1516                 :            : 
    1517                 :     835748 :         ex->fe_len = 1 << order;
    1518                 :     835748 :         ex->fe_start = block << order;
    1519                 :     835748 :         ex->fe_group = e4b->bd_group;
    1520                 :            : 
    1521                 :            :         /* calc difference from given start */
    1522                 :     835748 :         next = next - ex->fe_start;
    1523                 :     835748 :         ex->fe_len -= next;
    1524                 :     835748 :         ex->fe_start += next;
    1525                 :            : 
    1526   [ +  +  +  - ]:    1917106 :         while (needed > ex->fe_len &&
    1527                 :     195404 :                mb_find_buddy(e4b, order, &max)) {
    1528                 :            : 
    1529         [ +  + ]:     195404 :                 if (block + 1 >= max)
    1530                 :            :                         break;
    1531                 :            : 
    1532                 :     195254 :                 next = (block + 1) * (1 << order);
    1533         [ +  + ]:     390508 :                 if (mb_test_bit(next, e4b->bd_bitmap))
    1534                 :            :                         break;
    1535                 :            : 
    1536                 :      50206 :                 order = mb_find_order_for_block(e4b, next);
    1537                 :            : 
    1538                 :      50206 :                 block = next >> order;
    1539                 :      50206 :                 ex->fe_len += 1 << order;
    1540                 :            :         }
    1541                 :            : 
    1542         [ -  + ]:    1671496 :         if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
    1543                 :            :                 /* Should never happen! (but apparently sometimes does?!?) */
    1544                 :          0 :                 WARN_ON(1);
    1545                 :          0 :                 ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
    1546                 :            :                            "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
    1547                 :            :                            block, order, needed, ex->fe_group, ex->fe_start,
    1548                 :            :                            ex->fe_len, ex->fe_logical);
    1549                 :          0 :                 ex->fe_len = 0;
    1550                 :          0 :                 ex->fe_start = 0;
    1551                 :          0 :                 ex->fe_group = 0;
    1552                 :            :         }
    1553                 :     835748 :         return ex->fe_len;
    1554                 :            : }
    1555                 :            : 
    1556                 :     141256 : static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
    1557                 :            : {
    1558                 :            :         int ord;
    1559                 :            :         int mlen = 0;
    1560                 :     141256 :         int max = 0;
    1561                 :            :         int cur;
    1562                 :     141256 :         int start = ex->fe_start;
    1563                 :     141256 :         int len = ex->fe_len;
    1564                 :            :         unsigned ret = 0;
    1565                 :            :         int len0 = len;
    1566                 :            :         void *buddy;
    1567                 :            : 
    1568         [ -  + ]:     141256 :         BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
    1569         [ -  + ]:     141256 :         BUG_ON(e4b->bd_group != ex->fe_group);
    1570         [ -  + ]:     282512 :         assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
    1571                 :            :         mb_check_buddy(e4b);
    1572                 :            :         mb_mark_used_double(e4b, start, len);
    1573                 :            : 
    1574                 :     141256 :         e4b->bd_info->bb_free -= len;
    1575         [ +  + ]:     141256 :         if (e4b->bd_info->bb_first_free == start)
    1576                 :       4640 :                 e4b->bd_info->bb_first_free += len;
    1577                 :            : 
    1578                 :            :         /* let's maintain fragments counter */
    1579         [ +  + ]:     141256 :         if (start != 0)
    1580                 :     282480 :                 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
    1581         [ +  + ]:     282512 :         if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
    1582                 :     282436 :                 max = !mb_test_bit(start + len, e4b->bd_bitmap);
    1583         [ +  + ]:     141256 :         if (mlen && max)
    1584                 :       5394 :                 e4b->bd_info->bb_fragments++;
    1585         [ +  + ]:     135862 :         else if (!mlen && !max)
    1586                 :      19050 :                 e4b->bd_info->bb_fragments--;
    1587                 :            : 
    1588                 :            :         /* let's maintain buddy itself */
    1589         [ +  + ]:     433404 :         while (len) {
    1590                 :     292148 :                 ord = mb_find_order_for_block(e4b, start);
    1591                 :            : 
    1592   [ +  -  +  + ]:     292148 :                 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
    1593                 :            :                         /* the whole chunk may be allocated at once! */
    1594                 :            :                         mlen = 1 << ord;
    1595                 :     160236 :                         buddy = mb_find_buddy(e4b, ord, &max);
    1596         [ -  + ]:     160236 :                         BUG_ON((start >> ord) >= max);
    1597                 :            :                         mb_set_bit(start >> ord, buddy);
    1598                 :     160236 :                         e4b->bd_info->bb_counters[ord]--;
    1599                 :     160236 :                         start += mlen;
    1600                 :     160236 :                         len -= mlen;
    1601         [ -  + ]:     160236 :                         BUG_ON(len < 0);
    1602                 :     160236 :                         continue;
    1603                 :            :                 }
    1604                 :            : 
    1605                 :            :                 /* store for history */
    1606         [ +  + ]:     131912 :                 if (ret == 0)
    1607                 :      64926 :                         ret = len | (ord << 16);
    1608                 :            : 
    1609                 :            :                 /* we have to split large buddy */
    1610         [ -  + ]:     131912 :                 BUG_ON(ord <= 0);
    1611                 :     131912 :                 buddy = mb_find_buddy(e4b, ord, &max);
    1612                 :            :                 mb_set_bit(start >> ord, buddy);
    1613                 :     131912 :                 e4b->bd_info->bb_counters[ord]--;
    1614                 :            : 
    1615                 :     131912 :                 ord--;
    1616                 :     131912 :                 cur = (start >> ord) & ~1U;
    1617                 :     131912 :                 buddy = mb_find_buddy(e4b, ord, &max);
    1618                 :            :                 mb_clear_bit(cur, buddy);
    1619                 :     131912 :                 mb_clear_bit(cur + 1, buddy);
    1620                 :     131912 :                 e4b->bd_info->bb_counters[ord]++;
    1621                 :     131912 :                 e4b->bd_info->bb_counters[ord]++;
    1622                 :            :         }
    1623                 :     141256 :         mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
    1624                 :            : 
    1625                 :     141256 :         ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
    1626                 :            :         mb_check_buddy(e4b);
    1627                 :            : 
    1628                 :     141256 :         return ret;
    1629                 :            : }
    1630                 :            : 
    1631                 :            : /*
    1632                 :            :  * Must be called under group lock!
    1633                 :            :  */
    1634                 :     141256 : static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
    1635                 :            :                                         struct ext4_buddy *e4b)
    1636                 :            : {
    1637                 :     141256 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    1638                 :            :         int ret;
    1639                 :            : 
    1640         [ -  + ]:     141256 :         BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
    1641         [ -  + ]:     141256 :         BUG_ON(ac->ac_status == AC_STATUS_FOUND);
    1642                 :            : 
    1643                 :     141256 :         ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
    1644                 :     141256 :         ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
    1645                 :     141256 :         ret = mb_mark_used(e4b, &ac->ac_b_ex);
    1646                 :            : 
    1647                 :            :         /* preallocation can change ac_b_ex, thus we store actually
    1648                 :            :          * allocated blocks for history */
    1649                 :     141256 :         ac->ac_f_ex = ac->ac_b_ex;
    1650                 :            : 
    1651                 :     141256 :         ac->ac_status = AC_STATUS_FOUND;
    1652                 :     141256 :         ac->ac_tail = ret & 0xffff;
    1653                 :     141256 :         ac->ac_buddy = ret >> 16;
    1654                 :            : 
    1655                 :            :         /*
    1656                 :            :          * take the page reference. We want the page to be pinned
    1657                 :            :          * so that we don't get a ext4_mb_init_cache_call for this
    1658                 :            :          * group until we update the bitmap. That would mean we
    1659                 :            :          * double allocate blocks. The reference is dropped
    1660                 :            :          * in ext4_mb_release_context
    1661                 :            :          */
    1662                 :     141256 :         ac->ac_bitmap_page = e4b->bd_bitmap_page;
    1663                 :     141256 :         get_page(ac->ac_bitmap_page);
    1664                 :     141256 :         ac->ac_buddy_page = e4b->bd_buddy_page;
    1665                 :     141256 :         get_page(ac->ac_buddy_page);
    1666                 :            :         /* store last allocated for subsequent stream allocation */
    1667         [ +  + ]:     141256 :         if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
    1668                 :            :                 spin_lock(&sbi->s_md_lock);
    1669                 :       2230 :                 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
    1670                 :       2230 :                 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
    1671                 :            :                 spin_unlock(&sbi->s_md_lock);
    1672                 :            :         }
    1673                 :     141256 : }
    1674                 :            : 
    1675                 :            : /*
    1676                 :            :  * regular allocator, for general purposes allocation
    1677                 :            :  */
    1678                 :            : 
    1679                 :     738320 : static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
    1680                 :            :                                         struct ext4_buddy *e4b,
    1681                 :            :                                         int finish_group)
    1682                 :            : {
    1683                 :     738320 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    1684                 :            :         struct ext4_free_extent *bex = &ac->ac_b_ex;
    1685                 :            :         struct ext4_free_extent *gex = &ac->ac_g_ex;
    1686                 :            :         struct ext4_free_extent ex;
    1687                 :            :         int max;
    1688                 :            : 
    1689         [ +  + ]:     738320 :         if (ac->ac_status == AC_STATUS_FOUND)
    1690                 :     283142 :                 return;
    1691                 :            :         /*
    1692                 :            :          * We don't want to scan for a whole year
    1693                 :            :          */
    1694   [ +  +  +  - ]:     655092 :         if (ac->ac_found > sbi->s_mb_max_to_scan &&
    1695                 :         72 :                         !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
    1696                 :         72 :                 ac->ac_status = AC_STATUS_BREAK;
    1697                 :         72 :                 return;
    1698                 :            :         }
    1699                 :            : 
    1700                 :            :         /*
    1701                 :            :          * Haven't found good chunk so far, let's continue
    1702                 :            :          */
    1703         [ +  + ]:     654948 :         if (bex->fe_len < gex->fe_len)
    1704                 :            :                 return;
    1705                 :            : 
    1706   [ +  +  +  + ]:     513662 :         if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
    1707         [ +  - ]:      58484 :                         && bex->fe_group == e4b->bd_group) {
    1708                 :            :                 /* recheck chunk's availability - we don't know
    1709                 :            :                  * when it was found (within this lock-unlock
    1710                 :            :                  * period or not) */
    1711                 :      58484 :                 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
    1712         [ +  - ]:      58484 :                 if (max >= gex->fe_len) {
    1713                 :      58484 :                         ext4_mb_use_best_found(ac, e4b);
    1714                 :      58484 :                         return;
    1715                 :            :                 }
    1716                 :            :         }
    1717                 :            : }
    1718                 :            : 
    1719                 :            : /*
    1720                 :            :  * The routine checks whether found extent is good enough. If it is,
    1721                 :            :  * then the extent gets marked used and flag is set to the context
    1722                 :            :  * to stop scanning. Otherwise, the extent is compared with the
    1723                 :            :  * previous found extent and if new one is better, then it's stored
    1724                 :            :  * in the context. Later, the best found extent will be used, if
    1725                 :            :  * mballoc can't find good enough extent.
    1726                 :            :  *
    1727                 :            :  * FIXME: real allocation policy is to be designed yet!
    1728                 :            :  */
    1729                 :     775210 : static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
    1730                 :            :                                         struct ext4_free_extent *ex,
    1731                 :            :                                         struct ext4_buddy *e4b)
    1732                 :            : {
    1733                 :            :         struct ext4_free_extent *bex = &ac->ac_b_ex;
    1734                 :            :         struct ext4_free_extent *gex = &ac->ac_g_ex;
    1735                 :            : 
    1736         [ -  + ]:     775210 :         BUG_ON(ex->fe_len <= 0);
    1737         [ -  + ]:    1550420 :         BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
    1738         [ -  + ]:     775210 :         BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
    1739         [ -  + ]:     775210 :         BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
    1740                 :            : 
    1741                 :     775210 :         ac->ac_found++;
    1742                 :            : 
    1743                 :            :         /*
    1744                 :            :          * The special case - take what you catch first
    1745                 :            :          */
    1746         [ -  + ]:     775210 :         if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
    1747                 :          0 :                 *bex = *ex;
    1748                 :          0 :                 ext4_mb_use_best_found(ac, e4b);
    1749                 :          0 :                 return;
    1750                 :            :         }
    1751                 :            : 
    1752                 :            :         /*
    1753                 :            :          * Let's check whether the chuck is good enough
    1754                 :            :          */
    1755         [ +  + ]:     775210 :         if (ex->fe_len == gex->fe_len) {
    1756                 :      67902 :                 *bex = *ex;
    1757                 :      67902 :                 ext4_mb_use_best_found(ac, e4b);
    1758                 :      67902 :                 return;
    1759                 :            :         }
    1760                 :            : 
    1761                 :            :         /*
    1762                 :            :          * If this is first found extent, just store it in the context
    1763                 :            :          */
    1764         [ +  + ]:     707308 :         if (bex->fe_len == 0) {
    1765                 :      95700 :                 *bex = *ex;
    1766                 :      95700 :                 return;
    1767                 :            :         }
    1768                 :            : 
    1769                 :            :         /*
    1770                 :            :          * If new found extent is better, store it in the context
    1771                 :            :          */
    1772         [ +  + ]:     611608 :         if (bex->fe_len < gex->fe_len) {
    1773                 :            :                 /* if the request isn't satisfied, any found extent
    1774                 :            :                  * larger than previous best one is better */
    1775         [ +  + ]:     142590 :                 if (ex->fe_len > bex->fe_len)
    1776                 :       3120 :                         *bex = *ex;
    1777         [ +  + ]:     469018 :         } else if (ex->fe_len > gex->fe_len) {
    1778                 :            :                 /* if the request is satisfied, then we try to find
    1779                 :            :                  * an extent that still satisfy the request, but is
    1780                 :            :                  * smaller than previous one */
    1781         [ +  + ]:     466808 :                 if (ex->fe_len < bex->fe_len)
    1782                 :      38818 :                         *bex = *ex;
    1783                 :            :         }
    1784                 :            : 
    1785                 :     611608 :         ext4_mb_check_limits(ac, e4b, 0);
    1786                 :            : }
    1787                 :            : 
    1788                 :            : static noinline_for_stack
    1789                 :         36 : int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
    1790                 :            :                                         struct ext4_buddy *e4b)
    1791                 :            : {
    1792                 :         36 :         struct ext4_free_extent ex = ac->ac_b_ex;
    1793                 :         36 :         ext4_group_t group = ex.fe_group;
    1794                 :            :         int max;
    1795                 :            :         int err;
    1796                 :            : 
    1797         [ -  + ]:         36 :         BUG_ON(ex.fe_len <= 0);
    1798                 :         36 :         err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
    1799         [ +  - ]:         36 :         if (err)
    1800                 :            :                 return err;
    1801                 :            : 
    1802                 :         36 :         ext4_lock_group(ac->ac_sb, group);
    1803                 :         36 :         max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
    1804                 :            : 
    1805         [ +  - ]:         36 :         if (max > 0) {
    1806                 :         36 :                 ac->ac_b_ex = ex;
    1807                 :         36 :                 ext4_mb_use_best_found(ac, e4b);
    1808                 :            :         }
    1809                 :            : 
    1810                 :         36 :         ext4_unlock_group(ac->ac_sb, group);
    1811                 :         36 :         ext4_mb_unload_buddy(e4b);
    1812                 :            : 
    1813                 :         36 :         return 0;
    1814                 :            : }
    1815                 :            : 
    1816                 :            : static noinline_for_stack
    1817                 :     141256 : int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
    1818                 :            :                                 struct ext4_buddy *e4b)
    1819                 :            : {
    1820                 :     141256 :         ext4_group_t group = ac->ac_g_ex.fe_group;
    1821                 :            :         int max;
    1822                 :            :         int err;
    1823                 :     141256 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    1824                 :     141256 :         struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
    1825                 :            :         struct ext4_free_extent ex;
    1826                 :            : 
    1827         [ +  + ]:     141256 :         if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
    1828                 :            :                 return 0;
    1829         [ +  + ]:       2148 :         if (grp->bb_free == 0)
    1830                 :            :                 return 0;
    1831                 :            : 
    1832                 :       2146 :         err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
    1833         [ +  - ]:       2146 :         if (err)
    1834                 :            :                 return err;
    1835                 :            : 
    1836         [ -  + ]:       4292 :         if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
    1837                 :          0 :                 ext4_mb_unload_buddy(e4b);
    1838                 :          0 :                 return 0;
    1839                 :            :         }
    1840                 :            : 
    1841                 :       2146 :         ext4_lock_group(ac->ac_sb, group);
    1842                 :       2146 :         max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
    1843                 :            :                              ac->ac_g_ex.fe_len, &ex);
    1844                 :       2146 :         ex.fe_logical = 0xDEADFA11; /* debug value */
    1845                 :            : 
    1846   [ +  +  -  + ]:       2146 :         if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
    1847                 :            :                 ext4_fsblk_t start;
    1848                 :            : 
    1849                 :          0 :                 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
    1850                 :          0 :                         ex.fe_start;
    1851                 :            :                 /* use do_div to get remainder (would be 64-bit modulo) */
    1852   [ #  #  #  #  :          0 :                 if (do_div(start, sbi->s_stripe) == 0) {
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
             #  #  #  # ]
    1853                 :          0 :                         ac->ac_found++;
    1854                 :          0 :                         ac->ac_b_ex = ex;
    1855                 :          0 :                         ext4_mb_use_best_found(ac, e4b);
    1856                 :            :                 }
    1857         [ +  + ]:       2146 :         } else if (max >= ac->ac_g_ex.fe_len) {
    1858         [ -  + ]:       1946 :                 BUG_ON(ex.fe_len <= 0);
    1859         [ -  + ]:       1946 :                 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
    1860         [ -  + ]:       1946 :                 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
    1861                 :       1946 :                 ac->ac_found++;
    1862                 :       1946 :                 ac->ac_b_ex = ex;
    1863                 :       1946 :                 ext4_mb_use_best_found(ac, e4b);
    1864   [ +  +  -  + ]:        200 :         } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
    1865                 :            :                 /* Sometimes, caller may want to merge even small
    1866                 :            :                  * number of blocks to an existing extent */
    1867         [ #  # ]:          0 :                 BUG_ON(ex.fe_len <= 0);
    1868         [ #  # ]:          0 :                 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
    1869         [ #  # ]:          0 :                 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
    1870                 :          0 :                 ac->ac_found++;
    1871                 :          0 :                 ac->ac_b_ex = ex;
    1872                 :          0 :                 ext4_mb_use_best_found(ac, e4b);
    1873                 :            :         }
    1874                 :       2146 :         ext4_unlock_group(ac->ac_sb, group);
    1875                 :       2146 :         ext4_mb_unload_buddy(e4b);
    1876                 :            : 
    1877                 :       2146 :         return 0;
    1878                 :            : }
    1879                 :            : 
    1880                 :            : /*
    1881                 :            :  * The routine scans buddy structures (not bitmap!) from given order
    1882                 :            :  * to max order and tries to find big enough chunk to satisfy the req
    1883                 :            :  */
    1884                 :            : static noinline_for_stack
    1885                 :      12900 : void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
    1886                 :            :                                         struct ext4_buddy *e4b)
    1887                 :            : {
    1888                 :      12900 :         struct super_block *sb = ac->ac_sb;
    1889                 :      12900 :         struct ext4_group_info *grp = e4b->bd_info;
    1890                 :            :         void *buddy;
    1891                 :            :         int i;
    1892                 :            :         int k;
    1893                 :            :         int max;
    1894                 :            : 
    1895         [ -  + ]:      12900 :         BUG_ON(ac->ac_2order <= 0);
    1896         [ +  + ]:      48228 :         for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
    1897         [ +  + ]:      24102 :                 if (grp->bb_counters[i] == 0)
    1898                 :      11214 :                         continue;
    1899                 :            : 
    1900                 :      12888 :                 buddy = mb_find_buddy(e4b, i, &max);
    1901         [ -  + ]:      12888 :                 BUG_ON(buddy == NULL);
    1902                 :            : 
    1903                 :      12888 :                 k = mb_find_next_zero_bit(buddy, max, 0);
    1904         [ -  + ]:      12888 :                 BUG_ON(k >= max);
    1905                 :            : 
    1906                 :      12888 :                 ac->ac_found++;
    1907                 :            : 
    1908                 :      12888 :                 ac->ac_b_ex.fe_len = 1 << i;
    1909                 :      12888 :                 ac->ac_b_ex.fe_start = k << i;
    1910                 :      12888 :                 ac->ac_b_ex.fe_group = e4b->bd_group;
    1911                 :            : 
    1912                 :      12888 :                 ext4_mb_use_best_found(ac, e4b);
    1913                 :            : 
    1914         [ -  + ]:      12888 :                 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
    1915                 :            : 
    1916         [ -  + ]:      12888 :                 if (EXT4_SB(sb)->s_mb_stats)
    1917                 :          0 :                         atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
    1918                 :            : 
    1919                 :            :                 break;
    1920                 :            :         }
    1921                 :      12900 : }
    1922                 :            : 
    1923                 :            : /*
    1924                 :            :  * The routine scans the group and measures all found extents.
    1925                 :            :  * In order to optimize scanning, caller must pass number of
    1926                 :            :  * free blocks in the group, so the routine can know upper limit.
    1927                 :            :  */
    1928                 :            : static noinline_for_stack
    1929                 :     126712 : void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
    1930                 :            :                                         struct ext4_buddy *e4b)
    1931                 :            : {
    1932                 :     126712 :         struct super_block *sb = ac->ac_sb;
    1933                 :     126712 :         void *bitmap = e4b->bd_bitmap;
    1934                 :            :         struct ext4_free_extent ex;
    1935                 :            :         int i;
    1936                 :            :         int free;
    1937                 :            : 
    1938                 :     126712 :         free = e4b->bd_info->bb_free;
    1939   [ -  +  +  - ]:     126712 :         if (WARN_ON(free <= 0))
    1940                 :          0 :                 return;
    1941                 :            : 
    1942                 :     126712 :         i = e4b->bd_info->bb_first_free;
    1943                 :            : 
    1944   [ +  +  +  + ]:    1028634 :         while (free && ac->ac_status == AC_STATUS_CONTINUE) {
    1945                 :     775210 :                 i = mb_find_next_zero_bit(bitmap,
    1946                 :     775210 :                                                 EXT4_CLUSTERS_PER_GROUP(sb), i);
    1947         [ -  + ]:    1550420 :                 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
    1948                 :            :                         /*
    1949                 :            :                          * IF we have corrupt bitmap, we won't find any
    1950                 :            :                          * free blocks even though group info says we
    1951                 :            :                          * we have free blocks
    1952                 :            :                          */
    1953                 :          0 :                         ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
    1954                 :            :                                         "%d free clusters as per "
    1955                 :            :                                         "group info. But bitmap says 0",
    1956                 :            :                                         free);
    1957                 :          0 :                         ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
    1958                 :            :                                         EXT4_GROUP_INFO_BBITMAP_CORRUPT);
    1959                 :          0 :                         break;
    1960                 :            :                 }
    1961                 :            : 
    1962                 :     775210 :                 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
    1963   [ -  +  +  - ]:     775210 :                 if (WARN_ON(ex.fe_len <= 0))
    1964                 :            :                         break;
    1965         [ -  + ]:     775210 :                 if (free < ex.fe_len) {
    1966                 :          0 :                         ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
    1967                 :            :                                         "%d free clusters as per "
    1968                 :            :                                         "group info. But got %d blocks",
    1969                 :            :                                         free, ex.fe_len);
    1970                 :          0 :                         ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
    1971                 :            :                                         EXT4_GROUP_INFO_BBITMAP_CORRUPT);
    1972                 :            :                         /*
    1973                 :            :                          * The number of free blocks differs. This mostly
    1974                 :            :                          * indicate that the bitmap is corrupt. So exit
    1975                 :            :                          * without claiming the space.
    1976                 :            :                          */
    1977                 :          0 :                         break;
    1978                 :            :                 }
    1979                 :     775210 :                 ex.fe_logical = 0xDEADC0DE; /* debug value */
    1980                 :     775210 :                 ext4_mb_measure_extent(ac, &ex, e4b);
    1981                 :            : 
    1982                 :     775210 :                 i += ex.fe_len;
    1983                 :     775210 :                 free -= ex.fe_len;
    1984                 :            :         }
    1985                 :            : 
    1986                 :     126712 :         ext4_mb_check_limits(ac, e4b, 1);
    1987                 :            : }
    1988                 :            : 
    1989                 :            : /*
    1990                 :            :  * This is a special case for storages like raid5
    1991                 :            :  * we try to find stripe-aligned chunks for stripe-size-multiple requests
    1992                 :            :  */
    1993                 :            : static noinline_for_stack
    1994                 :          0 : void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
    1995                 :            :                                  struct ext4_buddy *e4b)
    1996                 :            : {
    1997                 :          0 :         struct super_block *sb = ac->ac_sb;
    1998                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    1999                 :          0 :         void *bitmap = e4b->bd_bitmap;
    2000                 :            :         struct ext4_free_extent ex;
    2001                 :            :         ext4_fsblk_t first_group_block;
    2002                 :            :         ext4_fsblk_t a;
    2003                 :            :         ext4_grpblk_t i;
    2004                 :            :         int max;
    2005                 :            : 
    2006         [ #  # ]:          0 :         BUG_ON(sbi->s_stripe == 0);
    2007                 :            : 
    2008                 :            :         /* find first stripe-aligned block in group */
    2009                 :          0 :         first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
    2010                 :            : 
    2011                 :          0 :         a = first_group_block + sbi->s_stripe - 1;
    2012   [ #  #  #  #  :          0 :         do_div(a, sbi->s_stripe);
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
                   #  # ]
    2013                 :          0 :         i = (a * sbi->s_stripe) - first_group_block;
    2014                 :            : 
    2015         [ #  # ]:          0 :         while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
    2016         [ #  # ]:          0 :                 if (!mb_test_bit(i, bitmap)) {
    2017                 :          0 :                         max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
    2018         [ #  # ]:          0 :                         if (max >= sbi->s_stripe) {
    2019                 :          0 :                                 ac->ac_found++;
    2020                 :          0 :                                 ex.fe_logical = 0xDEADF00D; /* debug value */
    2021                 :          0 :                                 ac->ac_b_ex = ex;
    2022                 :          0 :                                 ext4_mb_use_best_found(ac, e4b);
    2023                 :          0 :                                 break;
    2024                 :            :                         }
    2025                 :            :                 }
    2026                 :          0 :                 i += sbi->s_stripe;
    2027                 :            :         }
    2028                 :          0 : }
    2029                 :            : 
    2030                 :            : /*
    2031                 :            :  * This is now called BEFORE we load the buddy bitmap.
    2032                 :            :  * Returns either 1 or 0 indicating that the group is either suitable
    2033                 :            :  * for the allocation or not. In addition it can also return negative
    2034                 :            :  * error code when something goes wrong.
    2035                 :            :  */
    2036                 :    1517148 : static int ext4_mb_good_group(struct ext4_allocation_context *ac,
    2037                 :            :                                 ext4_group_t group, int cr)
    2038                 :            : {
    2039                 :            :         unsigned free, fragments;
    2040                 :    1517148 :         int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
    2041                 :    1517148 :         struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
    2042                 :            : 
    2043         [ -  + ]:    1517148 :         BUG_ON(cr < 0 || cr >= 4);
    2044                 :            : 
    2045                 :    1517148 :         free = grp->bb_free;
    2046         [ +  + ]:    1517148 :         if (free == 0)
    2047                 :            :                 return 0;
    2048   [ +  +  +  + ]:     290372 :         if (cr <= 2 && free < ac->ac_g_ex.fe_len)
    2049                 :            :                 return 0;
    2050                 :            : 
    2051         [ +  - ]:     285512 :         if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
    2052                 :            :                 return 0;
    2053                 :            : 
    2054                 :            :         /* We only do this if the grp has never been initialized */
    2055         [ +  + ]:     285512 :         if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
    2056                 :       3302 :                 int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
    2057         [ +  - ]:       3302 :                 if (ret)
    2058                 :            :                         return ret;
    2059                 :            :         }
    2060                 :            : 
    2061                 :     285512 :         fragments = grp->bb_fragments;
    2062         [ +  - ]:     285512 :         if (fragments == 0)
    2063                 :            :                 return 0;
    2064                 :            : 
    2065   [ +  +  +  -  :     285512 :         switch (cr) {
                      + ]
    2066                 :            :         case 0:
    2067         [ -  + ]:      31622 :                 BUG_ON(ac->ac_2order == 0);
    2068                 :            : 
    2069                 :            :                 /* Avoid using the first bg of a flexgroup for data files */
    2070   [ +  -  +  - ]:      31622 :                 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
    2071         [ +  + ]:      31622 :                     (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
    2072                 :      31622 :                     ((group % flex_size) == 0))
    2073                 :            :                         return 0;
    2074                 :            : 
    2075   [ +  -  +  + ]:      61544 :                 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
    2076                 :      30772 :                     (free / fragments) >= ac->ac_g_ex.fe_len)
    2077                 :            :                         return 1;
    2078                 :            : 
    2079         [ +  + ]:       5824 :                 if (grp->bb_largest_free_order < ac->ac_2order)
    2080                 :            :                         return 0;
    2081                 :            : 
    2082                 :        852 :                 return 1;
    2083                 :            :         case 1:
    2084         [ +  + ]:     253198 :                 if ((free / fragments) >= ac->ac_g_ex.fe_len)
    2085                 :            :                         return 1;
    2086                 :            :                 break;
    2087                 :            :         case 2:
    2088         [ -  + ]:        296 :                 if (free >= ac->ac_g_ex.fe_len)
    2089                 :            :                         return 1;
    2090                 :            :                 break;
    2091                 :            :         case 3:
    2092                 :            :                 return 1;
    2093                 :            :         default:
    2094                 :          0 :                 BUG();
    2095                 :            :         }
    2096                 :            : 
    2097                 :        466 :         return 0;
    2098                 :            : }
    2099                 :            : 
    2100                 :            : static noinline_for_stack int
    2101                 :     141256 : ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
    2102                 :            : {
    2103                 :            :         ext4_group_t ngroups, group, i;
    2104                 :            :         int cr;
    2105                 :            :         int err = 0, first_err = 0;
    2106                 :            :         struct ext4_sb_info *sbi;
    2107                 :            :         struct super_block *sb;
    2108                 :            :         struct ext4_buddy e4b;
    2109                 :            : 
    2110                 :     141256 :         sb = ac->ac_sb;
    2111                 :            :         sbi = EXT4_SB(sb);
    2112                 :            :         ngroups = ext4_get_groups_count(sb);
    2113                 :            :         /* non-extent files are limited to low blocks/groups */
    2114         [ -  + ]:     282512 :         if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
    2115                 :          0 :                 ngroups = sbi->s_blockfile_groups;
    2116                 :            : 
    2117         [ -  + ]:     141256 :         BUG_ON(ac->ac_status == AC_STATUS_FOUND);
    2118                 :            : 
    2119                 :            :         /* first, try the goal */
    2120                 :     141256 :         err = ext4_mb_find_by_goal(ac, &e4b);
    2121   [ +  -  +  + ]:     141256 :         if (err || ac->ac_status == AC_STATUS_FOUND)
    2122                 :            :                 goto out;
    2123                 :            : 
    2124         [ +  - ]:     139310 :         if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
    2125                 :            :                 goto out;
    2126                 :            : 
    2127                 :            :         /*
    2128                 :            :          * ac->ac2_order is set only if the fe_len is a power of 2
    2129                 :            :          * if ac2_order is set we also set criteria to 0 so that we
    2130                 :            :          * try exact allocation using buddy.
    2131                 :            :          */
    2132                 :     278620 :         i = fls(ac->ac_g_ex.fe_len);
    2133                 :     139310 :         ac->ac_2order = 0;
    2134                 :            :         /*
    2135                 :            :          * We search using buddy data only if the order of the request
    2136                 :            :          * is greater than equal to the sbi_s_mb_order2_reqs
    2137                 :            :          * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
    2138                 :            :          * We also support searching for power-of-two requests only for
    2139                 :            :          * requests upto maximum buddy size we have constructed.
    2140                 :            :          */
    2141   [ +  +  +  + ]:     139310 :         if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) {
    2142                 :            :                 /*
    2143                 :            :                  * This should tell if fe_len is exactly power of 2
    2144                 :            :                  */
    2145         [ +  + ]:      19212 :                 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
    2146                 :      12888 :                         ac->ac_2order = array_index_nospec(i - 1,
    2147                 :            :                                                            sb->s_blocksize_bits + 2);
    2148                 :            :         }
    2149                 :            : 
    2150                 :            :         /* if stream allocation is enabled, use global goal */
    2151         [ +  + ]:     139310 :         if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
    2152                 :            :                 /* TBD: may be hot point */
    2153                 :            :                 spin_lock(&sbi->s_md_lock);
    2154                 :        284 :                 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
    2155                 :        284 :                 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
    2156                 :            :                 spin_unlock(&sbi->s_md_lock);
    2157                 :            :         }
    2158                 :            : 
    2159                 :            :         /* Let's just scan groups to find more-less suitable blocks */
    2160                 :     139310 :         cr = ac->ac_2order ? 0 : 1;
    2161                 :            :         /*
    2162                 :            :          * cr == 0 try to get exact allocation,
    2163                 :            :          * cr == 3  try to get anything
    2164                 :            :          */
    2165                 :            : repeat:
    2166   [ +  +  +  + ]:     139402 :         for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
    2167                 :     139402 :                 ac->ac_criteria = cr;
    2168                 :            :                 /*
    2169                 :            :                  * searching for the right group start
    2170                 :            :                  * from the goal value specified
    2171                 :            :                  */
    2172                 :     139402 :                 group = ac->ac_g_ex.fe_group;
    2173                 :            : 
    2174         [ +  + ]:    1377628 :                 for (i = 0; i < ngroups; group++, i++) {
    2175                 :            :                         int ret = 0;
    2176                 :    1377536 :                         cond_resched();
    2177                 :            :                         /*
    2178                 :            :                          * Artificially restricted ngroups for non-extent
    2179                 :            :                          * files makes group > ngroups possible on first loop.
    2180                 :            :                          */
    2181         [ +  + ]:    1377536 :                         if (group >= ngroups)
    2182                 :            :                                 group = 0;
    2183                 :            : 
    2184                 :            :                         /* This now checks without needing the buddy page */
    2185                 :    1377536 :                         ret = ext4_mb_good_group(ac, group, cr);
    2186         [ +  + ]:    1377536 :                         if (ret <= 0) {
    2187         [ +  - ]:    1237924 :                                 if (!first_err)
    2188                 :            :                                         first_err = ret;
    2189                 :    1237924 :                                 continue;
    2190                 :            :                         }
    2191                 :            : 
    2192                 :            :                         err = ext4_mb_load_buddy(sb, group, &e4b);
    2193         [ +  - ]:     139612 :                         if (err)
    2194                 :            :                                 goto out;
    2195                 :            : 
    2196                 :     139612 :                         ext4_lock_group(sb, group);
    2197                 :            : 
    2198                 :            :                         /*
    2199                 :            :                          * We need to check again after locking the
    2200                 :            :                          * block group
    2201                 :            :                          */
    2202                 :     139612 :                         ret = ext4_mb_good_group(ac, group, cr);
    2203         [ -  + ]:     139612 :                         if (ret <= 0) {
    2204                 :            :                                 ext4_unlock_group(sb, group);
    2205                 :          0 :                                 ext4_mb_unload_buddy(&e4b);
    2206         [ #  # ]:          0 :                                 if (!first_err)
    2207                 :            :                                         first_err = ret;
    2208                 :          0 :                                 continue;
    2209                 :            :                         }
    2210                 :            : 
    2211                 :     139612 :                         ac->ac_groups_scanned++;
    2212         [ +  + ]:     139612 :                         if (cr == 0)
    2213                 :      12900 :                                 ext4_mb_simple_scan_group(ac, &e4b);
    2214   [ +  +  -  +  :     126712 :                         else if (cr == 1 && sbi->s_stripe &&
                   #  # ]
    2215                 :          0 :                                         !(ac->ac_g_ex.fe_len % sbi->s_stripe))
    2216                 :          0 :                                 ext4_mb_scan_aligned(ac, &e4b);
    2217                 :            :                         else
    2218                 :     126712 :                                 ext4_mb_complex_scan_group(ac, &e4b);
    2219                 :            : 
    2220                 :            :                         ext4_unlock_group(sb, group);
    2221                 :     139612 :                         ext4_mb_unload_buddy(&e4b);
    2222                 :            : 
    2223         [ +  + ]:     139612 :                         if (ac->ac_status != AC_STATUS_CONTINUE)
    2224                 :            :                                 break;
    2225                 :            :                 }
    2226                 :            :         }
    2227                 :            : 
    2228   [ +  -  +  +  :     139346 :         if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
                   +  - ]
    2229                 :         36 :             !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
    2230                 :            :                 /*
    2231                 :            :                  * We've been searching too long. Let's try to allocate
    2232                 :            :                  * the best chunk we've found so far
    2233                 :            :                  */
    2234                 :            : 
    2235                 :         36 :                 ext4_mb_try_best_found(ac, &e4b);
    2236         [ -  + ]:         36 :                 if (ac->ac_status != AC_STATUS_FOUND) {
    2237                 :            :                         /*
    2238                 :            :                          * Someone more lucky has already allocated it.
    2239                 :            :                          * The only thing we can do is just take first
    2240                 :            :                          * found block(s)
    2241                 :            :                         printk(KERN_DEBUG "EXT4-fs: someone won our chunk\n");
    2242                 :            :                          */
    2243                 :          0 :                         ac->ac_b_ex.fe_group = 0;
    2244                 :          0 :                         ac->ac_b_ex.fe_start = 0;
    2245                 :          0 :                         ac->ac_b_ex.fe_len = 0;
    2246                 :          0 :                         ac->ac_status = AC_STATUS_CONTINUE;
    2247                 :          0 :                         ac->ac_flags |= EXT4_MB_HINT_FIRST;
    2248                 :            :                         cr = 3;
    2249                 :          0 :                         atomic_inc(&sbi->s_mb_lost_chunks);
    2250                 :            :                         goto repeat;
    2251                 :            :                 }
    2252                 :            :         }
    2253                 :            : out:
    2254   [ +  -  -  +  :     141256 :         if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
                   #  # ]
    2255                 :            :                 err = first_err;
    2256                 :     141256 :         return err;
    2257                 :            : }
    2258                 :            : 
    2259                 :          0 : static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
    2260                 :            : {
    2261                 :          0 :         struct super_block *sb = PDE_DATA(file_inode(seq->file));
    2262                 :            :         ext4_group_t group;
    2263                 :            : 
    2264   [ #  #  #  # ]:          0 :         if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
    2265                 :            :                 return NULL;
    2266                 :          0 :         group = *pos + 1;
    2267                 :          0 :         return (void *) ((unsigned long) group);
    2268                 :            : }
    2269                 :            : 
    2270                 :          0 : static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
    2271                 :            : {
    2272                 :          0 :         struct super_block *sb = PDE_DATA(file_inode(seq->file));
    2273                 :            :         ext4_group_t group;
    2274                 :            : 
    2275                 :          0 :         ++*pos;
    2276   [ #  #  #  # ]:          0 :         if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
    2277                 :            :                 return NULL;
    2278                 :          0 :         group = *pos + 1;
    2279                 :          0 :         return (void *) ((unsigned long) group);
    2280                 :            : }
    2281                 :            : 
    2282                 :          0 : static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
    2283                 :            : {
    2284                 :          0 :         struct super_block *sb = PDE_DATA(file_inode(seq->file));
    2285                 :          0 :         ext4_group_t group = (ext4_group_t) ((unsigned long) v);
    2286                 :            :         int i;
    2287                 :            :         int err, buddy_loaded = 0;
    2288                 :            :         struct ext4_buddy e4b;
    2289                 :            :         struct ext4_group_info *grinfo;
    2290         [ #  # ]:          0 :         unsigned char blocksize_bits = min_t(unsigned char,
    2291                 :            :                                              sb->s_blocksize_bits,
    2292                 :            :                                              EXT4_MAX_BLOCK_LOG_SIZE);
    2293                 :            :         struct sg {
    2294                 :            :                 struct ext4_group_info info;
    2295                 :            :                 ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
    2296                 :            :         } sg;
    2297                 :            : 
    2298                 :          0 :         group--;
    2299         [ #  # ]:          0 :         if (group == 0)
    2300                 :          0 :                 seq_puts(seq, "#group: free  frags first ["
    2301                 :            :                               " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
    2302                 :            :                               " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
    2303                 :            : 
    2304                 :          0 :         i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
    2305                 :            :                 sizeof(struct ext4_group_info);
    2306                 :            : 
    2307                 :          0 :         grinfo = ext4_get_group_info(sb, group);
    2308                 :            :         /* Load the group info in memory only if not already loaded. */
    2309         [ #  # ]:          0 :         if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
    2310                 :            :                 err = ext4_mb_load_buddy(sb, group, &e4b);
    2311         [ #  # ]:          0 :                 if (err) {
    2312                 :          0 :                         seq_printf(seq, "#%-5u: I/O error\n", group);
    2313                 :          0 :                         return 0;
    2314                 :            :                 }
    2315                 :            :                 buddy_loaded = 1;
    2316                 :            :         }
    2317                 :            : 
    2318                 :          0 :         memcpy(&sg, ext4_get_group_info(sb, group), i);
    2319                 :            : 
    2320         [ #  # ]:          0 :         if (buddy_loaded)
    2321                 :          0 :                 ext4_mb_unload_buddy(&e4b);
    2322                 :            : 
    2323                 :          0 :         seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
    2324                 :            :                         sg.info.bb_fragments, sg.info.bb_first_free);
    2325         [ #  # ]:          0 :         for (i = 0; i <= 13; i++)
    2326         [ #  # ]:          0 :                 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
    2327                 :            :                                 sg.info.bb_counters[i] : 0);
    2328                 :          0 :         seq_printf(seq, " ]\n");
    2329                 :            : 
    2330                 :          0 :         return 0;
    2331                 :            : }
    2332                 :            : 
    2333                 :          0 : static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
    2334                 :            : {
    2335                 :          0 : }
    2336                 :            : 
    2337                 :            : const struct seq_operations ext4_mb_seq_groups_ops = {
    2338                 :            :         .start  = ext4_mb_seq_groups_start,
    2339                 :            :         .next   = ext4_mb_seq_groups_next,
    2340                 :            :         .stop   = ext4_mb_seq_groups_stop,
    2341                 :            :         .show   = ext4_mb_seq_groups_show,
    2342                 :            : };
    2343                 :            : 
    2344                 :            : static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
    2345                 :            : {
    2346                 :      21412 :         int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
    2347                 :      21412 :         struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
    2348                 :            : 
    2349   [ #  #  #  #  :      21412 :         BUG_ON(!cachep);
                   -  + ]
    2350                 :            :         return cachep;
    2351                 :            : }
    2352                 :            : 
    2353                 :            : /*
    2354                 :            :  * Allocate the top-level s_group_info array for the specified number
    2355                 :            :  * of groups
    2356                 :            :  */
    2357                 :        404 : int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
    2358                 :            : {
    2359                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2360                 :            :         unsigned size;
    2361                 :            :         struct ext4_group_info ***old_groupinfo, ***new_groupinfo;
    2362                 :            : 
    2363                 :        808 :         size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
    2364                 :        404 :                 EXT4_DESC_PER_BLOCK_BITS(sb);
    2365         [ +  - ]:        404 :         if (size <= sbi->s_group_info_size)
    2366                 :            :                 return 0;
    2367                 :            : 
    2368   [ -  +  #  #  :        404 :         size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
                   #  # ]
    2369                 :            :         new_groupinfo = kvzalloc(size, GFP_KERNEL);
    2370         [ -  + ]:        404 :         if (!new_groupinfo) {
    2371                 :          0 :                 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
    2372                 :          0 :                 return -ENOMEM;
    2373                 :            :         }
    2374                 :            :         rcu_read_lock();
    2375                 :        404 :         old_groupinfo = rcu_dereference(sbi->s_group_info);
    2376         [ -  + ]:        404 :         if (old_groupinfo)
    2377                 :          0 :                 memcpy(new_groupinfo, old_groupinfo,
    2378                 :          0 :                        sbi->s_group_info_size * sizeof(*sbi->s_group_info));
    2379                 :            :         rcu_read_unlock();
    2380                 :        404 :         rcu_assign_pointer(sbi->s_group_info, new_groupinfo);
    2381                 :        404 :         sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
    2382         [ -  + ]:        404 :         if (old_groupinfo)
    2383                 :          0 :                 ext4_kvfree_array_rcu(old_groupinfo);
    2384                 :            :         ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", 
    2385                 :            :                    sbi->s_group_info_size);
    2386                 :            :         return 0;
    2387                 :            : }
    2388                 :            : 
    2389                 :            : /* Create and initialize ext4_group_info data for the given group. */
    2390                 :      21412 : int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
    2391                 :            :                           struct ext4_group_desc *desc)
    2392                 :            : {
    2393                 :            :         int i;
    2394                 :            :         int metalen = 0;
    2395                 :      21412 :         int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb);
    2396                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2397                 :            :         struct ext4_group_info **meta_group_info;
    2398                 :      21412 :         struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
    2399                 :            : 
    2400                 :            :         /*
    2401                 :            :          * First check if this group is the first of a reserved block.
    2402                 :            :          * If it's true, we have to allocate a new table of pointers
    2403                 :            :          * to ext4_group_info structures
    2404                 :            :          */
    2405         [ +  + ]:      21412 :         if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
    2406                 :        404 :                 metalen = sizeof(*meta_group_info) <<
    2407                 :            :                         EXT4_DESC_PER_BLOCK_BITS(sb);
    2408                 :            :                 meta_group_info = kmalloc(metalen, GFP_NOFS);
    2409         [ -  + ]:        404 :                 if (meta_group_info == NULL) {
    2410                 :          0 :                         ext4_msg(sb, KERN_ERR, "can't allocate mem "
    2411                 :            :                                  "for a buddy group");
    2412                 :          0 :                         goto exit_meta_group_info;
    2413                 :            :                 }
    2414                 :            :                 rcu_read_lock();
    2415                 :        404 :                 rcu_dereference(sbi->s_group_info)[idx] = meta_group_info;
    2416                 :            :                 rcu_read_unlock();
    2417                 :            :         }
    2418                 :            : 
    2419                 :      21412 :         meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx);
    2420                 :      21412 :         i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
    2421                 :            : 
    2422                 :      42824 :         meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
    2423         [ -  + ]:      21412 :         if (meta_group_info[i] == NULL) {
    2424                 :          0 :                 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
    2425                 :            :                 goto exit_group_info;
    2426                 :            :         }
    2427                 :      21412 :         set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
    2428                 :            :                 &(meta_group_info[i]->bb_state));
    2429                 :            : 
    2430                 :            :         /*
    2431                 :            :          * initialize bb_free to be able to skip
    2432                 :            :          * empty groups without initialization
    2433                 :            :          */
    2434   [ -  +  #  # ]:      21412 :         if (ext4_has_group_desc_csum(sb) &&
    2435                 :          0 :             (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
    2436                 :          0 :                 meta_group_info[i]->bb_free =
    2437                 :          0 :                         ext4_free_clusters_after_init(sb, group, desc);
    2438                 :            :         } else {
    2439                 :      42824 :                 meta_group_info[i]->bb_free =
    2440                 :      21412 :                         ext4_free_group_clusters(sb, desc);
    2441                 :            :         }
    2442                 :            : 
    2443                 :      21412 :         INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
    2444                 :      21412 :         init_rwsem(&meta_group_info[i]->alloc_sem);
    2445                 :      21412 :         meta_group_info[i]->bb_free_root = RB_ROOT;
    2446                 :      21412 :         meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
    2447                 :            : 
    2448                 :            : #ifdef DOUBLE_CHECK
    2449                 :            :         {
    2450                 :            :                 struct buffer_head *bh;
    2451                 :            :                 meta_group_info[i]->bb_bitmap =
    2452                 :            :                         kmalloc(sb->s_blocksize, GFP_NOFS);
    2453                 :            :                 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
    2454                 :            :                 bh = ext4_read_block_bitmap(sb, group);
    2455                 :            :                 BUG_ON(IS_ERR_OR_NULL(bh));
    2456                 :            :                 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
    2457                 :            :                         sb->s_blocksize);
    2458                 :            :                 put_bh(bh);
    2459                 :            :         }
    2460                 :            : #endif
    2461                 :            : 
    2462                 :      21412 :         return 0;
    2463                 :            : 
    2464                 :            : exit_group_info:
    2465                 :            :         /* If a meta_group_info table has been allocated, release it now */
    2466         [ #  # ]:          0 :         if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
    2467                 :            :                 struct ext4_group_info ***group_info;
    2468                 :            : 
    2469                 :            :                 rcu_read_lock();
    2470                 :          0 :                 group_info = rcu_dereference(sbi->s_group_info);
    2471                 :          0 :                 kfree(group_info[idx]);
    2472                 :          0 :                 group_info[idx] = NULL;
    2473                 :            :                 rcu_read_unlock();
    2474                 :            :         }
    2475                 :            : exit_meta_group_info:
    2476                 :            :         return -ENOMEM;
    2477                 :            : } /* ext4_mb_add_groupinfo */
    2478                 :            : 
    2479                 :        404 : static int ext4_mb_init_backend(struct super_block *sb)
    2480                 :            : {
    2481                 :            :         ext4_group_t ngroups = ext4_get_groups_count(sb);
    2482                 :            :         ext4_group_t i;
    2483                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2484                 :            :         int err;
    2485                 :            :         struct ext4_group_desc *desc;
    2486                 :            :         struct ext4_group_info ***group_info;
    2487                 :            :         struct kmem_cache *cachep;
    2488                 :            : 
    2489                 :        404 :         err = ext4_mb_alloc_groupinfo(sb, ngroups);
    2490         [ +  - ]:        404 :         if (err)
    2491                 :            :                 return err;
    2492                 :            : 
    2493                 :        404 :         sbi->s_buddy_cache = new_inode(sb);
    2494         [ -  + ]:        404 :         if (sbi->s_buddy_cache == NULL) {
    2495                 :          0 :                 ext4_msg(sb, KERN_ERR, "can't get new inode");
    2496                 :          0 :                 goto err_freesgi;
    2497                 :            :         }
    2498                 :            :         /* To avoid potentially colliding with an valid on-disk inode number,
    2499                 :            :          * use EXT4_BAD_INO for the buddy cache inode number.  This inode is
    2500                 :            :          * not in the inode hash, so it should never be found by iget(), but
    2501                 :            :          * this will avoid confusion if it ever shows up during debugging. */
    2502                 :        404 :         sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
    2503                 :        404 :         EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
    2504         [ +  + ]:      21816 :         for (i = 0; i < ngroups; i++) {
    2505                 :      21412 :                 cond_resched();
    2506                 :      21412 :                 desc = ext4_get_group_desc(sb, i, NULL);
    2507         [ -  + ]:      21412 :                 if (desc == NULL) {
    2508                 :          0 :                         ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
    2509                 :          0 :                         goto err_freebuddy;
    2510                 :            :                 }
    2511         [ +  - ]:      21412 :                 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
    2512                 :            :                         goto err_freebuddy;
    2513                 :            :         }
    2514                 :            : 
    2515                 :            :         return 0;
    2516                 :            : 
    2517                 :            : err_freebuddy:
    2518                 :          0 :         cachep = get_groupinfo_cache(sb->s_blocksize_bits);
    2519         [ #  # ]:          0 :         while (i-- > 0)
    2520                 :          0 :                 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
    2521                 :          0 :         i = sbi->s_group_info_size;
    2522                 :            :         rcu_read_lock();
    2523                 :          0 :         group_info = rcu_dereference(sbi->s_group_info);
    2524         [ #  # ]:          0 :         while (i-- > 0)
    2525                 :          0 :                 kfree(group_info[i]);
    2526                 :            :         rcu_read_unlock();
    2527                 :          0 :         iput(sbi->s_buddy_cache);
    2528                 :            : err_freesgi:
    2529                 :            :         rcu_read_lock();
    2530                 :          0 :         kvfree(rcu_dereference(sbi->s_group_info));
    2531                 :            :         rcu_read_unlock();
    2532                 :          0 :         return -ENOMEM;
    2533                 :            : }
    2534                 :            : 
    2535                 :          0 : static void ext4_groupinfo_destroy_slabs(void)
    2536                 :            : {
    2537                 :            :         int i;
    2538                 :            : 
    2539         [ #  # ]:          0 :         for (i = 0; i < NR_GRPINFO_CACHES; i++) {
    2540                 :          0 :                 kmem_cache_destroy(ext4_groupinfo_caches[i]);
    2541                 :          0 :                 ext4_groupinfo_caches[i] = NULL;
    2542                 :            :         }
    2543                 :          0 : }
    2544                 :            : 
    2545                 :        404 : static int ext4_groupinfo_create_slab(size_t size)
    2546                 :            : {
    2547                 :            :         static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
    2548                 :            :         int slab_size;
    2549   [ -  +  #  #  :        808 :         int blocksize_bits = order_base_2(size);
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
                      # ]
    2550                 :        404 :         int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
    2551                 :            :         struct kmem_cache *cachep;
    2552                 :            : 
    2553         [ +  - ]:        404 :         if (cache_index >= NR_GRPINFO_CACHES)
    2554                 :            :                 return -EINVAL;
    2555                 :            : 
    2556         [ -  + ]:        404 :         if (unlikely(cache_index < 0))
    2557                 :            :                 cache_index = 0;
    2558                 :            : 
    2559                 :        404 :         mutex_lock(&ext4_grpinfo_slab_create_mutex);
    2560         [ -  + ]:        404 :         if (ext4_groupinfo_caches[cache_index]) {
    2561                 :          0 :                 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
    2562                 :          0 :                 return 0;       /* Already created */
    2563                 :            :         }
    2564                 :            : 
    2565                 :        404 :         slab_size = offsetof(struct ext4_group_info,
    2566                 :            :                                 bb_counters[blocksize_bits + 2]);
    2567                 :            : 
    2568                 :        404 :         cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
    2569                 :            :                                         slab_size, 0, SLAB_RECLAIM_ACCOUNT,
    2570                 :            :                                         NULL);
    2571                 :            : 
    2572                 :        404 :         ext4_groupinfo_caches[cache_index] = cachep;
    2573                 :            : 
    2574                 :        404 :         mutex_unlock(&ext4_grpinfo_slab_create_mutex);
    2575         [ -  + ]:        404 :         if (!cachep) {
    2576                 :          0 :                 printk(KERN_EMERG
    2577                 :            :                        "EXT4-fs: no memory for groupinfo slab cache\n");
    2578                 :          0 :                 return -ENOMEM;
    2579                 :            :         }
    2580                 :            : 
    2581                 :            :         return 0;
    2582                 :            : }
    2583                 :            : 
    2584                 :        404 : int ext4_mb_init(struct super_block *sb)
    2585                 :            : {
    2586                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2587                 :            :         unsigned i, j;
    2588                 :            :         unsigned offset, offset_incr;
    2589                 :            :         unsigned max;
    2590                 :            :         int ret;
    2591                 :            : 
    2592                 :        404 :         i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
    2593                 :            : 
    2594                 :        404 :         sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
    2595         [ +  - ]:        404 :         if (sbi->s_mb_offsets == NULL) {
    2596                 :            :                 ret = -ENOMEM;
    2597                 :            :                 goto out;
    2598                 :            :         }
    2599                 :            : 
    2600                 :        404 :         i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
    2601                 :        404 :         sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
    2602         [ +  - ]:        404 :         if (sbi->s_mb_maxs == NULL) {
    2603                 :            :                 ret = -ENOMEM;
    2604                 :            :                 goto out;
    2605                 :            :         }
    2606                 :            : 
    2607                 :        404 :         ret = ext4_groupinfo_create_slab(sb->s_blocksize);
    2608         [ +  - ]:        404 :         if (ret < 0)
    2609                 :            :                 goto out;
    2610                 :            : 
    2611                 :            :         /* order 0 is regular bitmap */
    2612                 :        404 :         sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
    2613                 :        404 :         sbi->s_mb_offsets[0] = 0;
    2614                 :            : 
    2615                 :            :         i = 1;
    2616                 :            :         offset = 0;
    2617                 :        404 :         offset_incr = 1 << (sb->s_blocksize_bits - 1);
    2618                 :        404 :         max = sb->s_blocksize << 2;
    2619                 :            :         do {
    2620                 :       5252 :                 sbi->s_mb_offsets[i] = offset;
    2621                 :       5252 :                 sbi->s_mb_maxs[i] = max;
    2622                 :       5252 :                 offset += offset_incr;
    2623                 :       5252 :                 offset_incr = offset_incr >> 1;
    2624                 :       5252 :                 max = max >> 1;
    2625                 :       5252 :                 i++;
    2626         [ +  + ]:       5252 :         } while (i <= sb->s_blocksize_bits + 1);
    2627                 :            : 
    2628                 :        404 :         spin_lock_init(&sbi->s_md_lock);
    2629                 :        404 :         spin_lock_init(&sbi->s_bal_lock);
    2630                 :        404 :         sbi->s_mb_free_pending = 0;
    2631                 :        404 :         INIT_LIST_HEAD(&sbi->s_freed_data_list);
    2632                 :            : 
    2633                 :        404 :         sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
    2634                 :        404 :         sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
    2635                 :        404 :         sbi->s_mb_stats = MB_DEFAULT_STATS;
    2636                 :        404 :         sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
    2637                 :        404 :         sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
    2638                 :            :         /*
    2639                 :            :          * The default group preallocation is 512, which for 4k block
    2640                 :            :          * sizes translates to 2 megabytes.  However for bigalloc file
    2641                 :            :          * systems, this is probably too big (i.e, if the cluster size
    2642                 :            :          * is 1 megabyte, then group preallocation size becomes half a
    2643                 :            :          * gigabyte!).  As a default, we will keep a two megabyte
    2644                 :            :          * group pralloc size for cluster sizes up to 64k, and after
    2645                 :            :          * that, we will force a minimum group preallocation size of
    2646                 :            :          * 32 clusters.  This translates to 8 megs when the cluster
    2647                 :            :          * size is 256k, and 32 megs when the cluster size is 1 meg,
    2648                 :            :          * which seems reasonable as a default.
    2649                 :            :          */
    2650                 :        404 :         sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
    2651                 :            :                                        sbi->s_cluster_bits, 32);
    2652                 :            :         /*
    2653                 :            :          * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
    2654                 :            :          * to the lowest multiple of s_stripe which is bigger than
    2655                 :            :          * the s_mb_group_prealloc as determined above. We want
    2656                 :            :          * the preallocation size to be an exact multiple of the
    2657                 :            :          * RAID stripe size so that preallocations don't fragment
    2658                 :            :          * the stripes.
    2659                 :            :          */
    2660         [ -  + ]:        404 :         if (sbi->s_stripe > 1) {
    2661                 :          0 :                 sbi->s_mb_group_prealloc = roundup(
    2662                 :            :                         sbi->s_mb_group_prealloc, sbi->s_stripe);
    2663                 :            :         }
    2664                 :            : 
    2665                 :        404 :         sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
    2666         [ +  - ]:        404 :         if (sbi->s_locality_groups == NULL) {
    2667                 :            :                 ret = -ENOMEM;
    2668                 :            :                 goto out;
    2669                 :            :         }
    2670         [ +  + ]:       2020 :         for_each_possible_cpu(i) {
    2671                 :            :                 struct ext4_locality_group *lg;
    2672                 :       1616 :                 lg = per_cpu_ptr(sbi->s_locality_groups, i);
    2673                 :       1616 :                 mutex_init(&lg->lg_mutex);
    2674         [ +  + ]:      17776 :                 for (j = 0; j < PREALLOC_TB_SIZE; j++)
    2675                 :      16160 :                         INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
    2676                 :       1616 :                 spin_lock_init(&lg->lg_prealloc_lock);
    2677                 :            :         }
    2678                 :            : 
    2679                 :            :         /* init file for buddy data */
    2680                 :        404 :         ret = ext4_mb_init_backend(sb);
    2681         [ -  + ]:        404 :         if (ret != 0)
    2682                 :            :                 goto out_free_locality_groups;
    2683                 :            : 
    2684                 :            :         return 0;
    2685                 :            : 
    2686                 :            : out_free_locality_groups:
    2687                 :          0 :         free_percpu(sbi->s_locality_groups);
    2688                 :          0 :         sbi->s_locality_groups = NULL;
    2689                 :            : out:
    2690                 :          0 :         kfree(sbi->s_mb_offsets);
    2691                 :          0 :         sbi->s_mb_offsets = NULL;
    2692                 :          0 :         kfree(sbi->s_mb_maxs);
    2693                 :          0 :         sbi->s_mb_maxs = NULL;
    2694                 :          0 :         return ret;
    2695                 :            : }
    2696                 :            : 
    2697                 :            : /* need to called with the ext4 group lock held */
    2698                 :          0 : static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
    2699                 :            : {
    2700                 :            :         struct ext4_prealloc_space *pa;
    2701                 :            :         struct list_head *cur, *tmp;
    2702                 :            :         int count = 0;
    2703                 :            : 
    2704         [ #  # ]:          0 :         list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
    2705                 :          0 :                 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
    2706                 :            :                 list_del(&pa->pa_group_list);
    2707                 :            :                 count++;
    2708                 :          0 :                 kmem_cache_free(ext4_pspace_cachep, pa);
    2709                 :            :         }
    2710                 :            :         if (count)
    2711                 :            :                 mb_debug(1, "mballoc: %u PAs left\n", count);
    2712                 :            : 
    2713                 :          0 : }
    2714                 :            : 
    2715                 :          0 : int ext4_mb_release(struct super_block *sb)
    2716                 :            : {
    2717                 :            :         ext4_group_t ngroups = ext4_get_groups_count(sb);
    2718                 :            :         ext4_group_t i;
    2719                 :            :         int num_meta_group_infos;
    2720                 :            :         struct ext4_group_info *grinfo, ***group_info;
    2721                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2722                 :          0 :         struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
    2723                 :            : 
    2724         [ #  # ]:          0 :         if (sbi->s_group_info) {
    2725         [ #  # ]:          0 :                 for (i = 0; i < ngroups; i++) {
    2726                 :          0 :                         cond_resched();
    2727                 :          0 :                         grinfo = ext4_get_group_info(sb, i);
    2728                 :            : #ifdef DOUBLE_CHECK
    2729                 :            :                         kfree(grinfo->bb_bitmap);
    2730                 :            : #endif
    2731                 :          0 :                         ext4_lock_group(sb, i);
    2732                 :          0 :                         ext4_mb_cleanup_pa(grinfo);
    2733                 :            :                         ext4_unlock_group(sb, i);
    2734                 :          0 :                         kmem_cache_free(cachep, grinfo);
    2735                 :            :                 }
    2736                 :          0 :                 num_meta_group_infos = (ngroups +
    2737                 :          0 :                                 EXT4_DESC_PER_BLOCK(sb) - 1) >>
    2738                 :          0 :                         EXT4_DESC_PER_BLOCK_BITS(sb);
    2739                 :            :                 rcu_read_lock();
    2740                 :          0 :                 group_info = rcu_dereference(sbi->s_group_info);
    2741         [ #  # ]:          0 :                 for (i = 0; i < num_meta_group_infos; i++)
    2742                 :          0 :                         kfree(group_info[i]);
    2743                 :          0 :                 kvfree(group_info);
    2744                 :            :                 rcu_read_unlock();
    2745                 :            :         }
    2746                 :          0 :         kfree(sbi->s_mb_offsets);
    2747                 :          0 :         kfree(sbi->s_mb_maxs);
    2748                 :          0 :         iput(sbi->s_buddy_cache);
    2749         [ #  # ]:          0 :         if (sbi->s_mb_stats) {
    2750                 :          0 :                 ext4_msg(sb, KERN_INFO,
    2751                 :            :                        "mballoc: %u blocks %u reqs (%u success)",
    2752                 :            :                                 atomic_read(&sbi->s_bal_allocated),
    2753                 :            :                                 atomic_read(&sbi->s_bal_reqs),
    2754                 :            :                                 atomic_read(&sbi->s_bal_success));
    2755                 :          0 :                 ext4_msg(sb, KERN_INFO,
    2756                 :            :                       "mballoc: %u extents scanned, %u goal hits, "
    2757                 :            :                                 "%u 2^N hits, %u breaks, %u lost",
    2758                 :            :                                 atomic_read(&sbi->s_bal_ex_scanned),
    2759                 :            :                                 atomic_read(&sbi->s_bal_goals),
    2760                 :            :                                 atomic_read(&sbi->s_bal_2orders),
    2761                 :            :                                 atomic_read(&sbi->s_bal_breaks),
    2762                 :            :                                 atomic_read(&sbi->s_mb_lost_chunks));
    2763                 :          0 :                 ext4_msg(sb, KERN_INFO,
    2764                 :            :                        "mballoc: %lu generated and it took %Lu",
    2765                 :            :                                 sbi->s_mb_buddies_generated,
    2766                 :            :                                 sbi->s_mb_generation_time);
    2767                 :          0 :                 ext4_msg(sb, KERN_INFO,
    2768                 :            :                        "mballoc: %u preallocated, %u discarded",
    2769                 :            :                                 atomic_read(&sbi->s_mb_preallocated),
    2770                 :            :                                 atomic_read(&sbi->s_mb_discarded));
    2771                 :            :         }
    2772                 :            : 
    2773                 :          0 :         free_percpu(sbi->s_locality_groups);
    2774                 :            : 
    2775                 :          0 :         return 0;
    2776                 :            : }
    2777                 :            : 
    2778                 :          0 : static inline int ext4_issue_discard(struct super_block *sb,
    2779                 :            :                 ext4_group_t block_group, ext4_grpblk_t cluster, int count,
    2780                 :            :                 struct bio **biop)
    2781                 :            : {
    2782                 :            :         ext4_fsblk_t discard_block;
    2783                 :            : 
    2784                 :          0 :         discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
    2785                 :            :                          ext4_group_first_block_no(sb, block_group));
    2786                 :          0 :         count = EXT4_C2B(EXT4_SB(sb), count);
    2787                 :          0 :         trace_ext4_discard_blocks(sb,
    2788                 :            :                         (unsigned long long) discard_block, count);
    2789         [ #  # ]:          0 :         if (biop) {
    2790                 :          0 :                 return __blkdev_issue_discard(sb->s_bdev,
    2791                 :          0 :                         (sector_t)discard_block << (sb->s_blocksize_bits - 9),
    2792                 :            :                         (sector_t)count << (sb->s_blocksize_bits - 9),
    2793                 :            :                         GFP_NOFS, 0, biop);
    2794                 :            :         } else
    2795                 :          0 :                 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
    2796                 :            : }
    2797                 :            : 
    2798                 :       6130 : static void ext4_free_data_in_buddy(struct super_block *sb,
    2799                 :            :                                     struct ext4_free_data *entry)
    2800                 :            : {
    2801                 :            :         struct ext4_buddy e4b;
    2802                 :            :         struct ext4_group_info *db;
    2803                 :            :         int err, count = 0, count2 = 0;
    2804                 :            : 
    2805                 :            :         mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
    2806                 :            :                  entry->efd_count, entry->efd_group, entry);
    2807                 :            : 
    2808                 :       6130 :         err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
    2809                 :            :         /* we expect to find existing buddy because it's pinned */
    2810         [ -  + ]:       6130 :         BUG_ON(err != 0);
    2811                 :            : 
    2812                 :            :         spin_lock(&EXT4_SB(sb)->s_md_lock);
    2813                 :       6130 :         EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
    2814                 :            :         spin_unlock(&EXT4_SB(sb)->s_md_lock);
    2815                 :            : 
    2816                 :       6130 :         db = e4b.bd_info;
    2817                 :            :         /* there are blocks to put in buddy to make them really free */
    2818                 :            :         count += entry->efd_count;
    2819                 :            :         count2++;
    2820                 :       6130 :         ext4_lock_group(sb, entry->efd_group);
    2821                 :            :         /* Take it out of per group rb tree */
    2822                 :       6130 :         rb_erase(&entry->efd_node, &(db->bb_free_root));
    2823                 :       6130 :         mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
    2824                 :            : 
    2825                 :            :         /*
    2826                 :            :          * Clear the trimmed flag for the group so that the next
    2827                 :            :          * ext4_trim_fs can trim it.
    2828                 :            :          * If the volume is mounted with -o discard, online discard
    2829                 :            :          * is supported and the free blocks will be trimmed online.
    2830                 :            :          */
    2831         [ +  - ]:       6130 :         if (!test_opt(sb, DISCARD))
    2832                 :       6130 :                 EXT4_MB_GRP_CLEAR_TRIMMED(db);
    2833                 :            : 
    2834         [ +  + ]:       6130 :         if (!db->bb_free_root.rb_node) {
    2835                 :            :                 /* No more items in the per group rb tree
    2836                 :            :                  * balance refcounts from ext4_mb_free_metadata()
    2837                 :            :                  */
    2838                 :       3512 :                 put_page(e4b.bd_buddy_page);
    2839                 :       3512 :                 put_page(e4b.bd_bitmap_page);
    2840                 :            :         }
    2841                 :       6130 :         ext4_unlock_group(sb, entry->efd_group);
    2842                 :       6130 :         kmem_cache_free(ext4_free_data_cachep, entry);
    2843                 :       6130 :         ext4_mb_unload_buddy(&e4b);
    2844                 :            : 
    2845                 :            :         mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
    2846                 :       6130 : }
    2847                 :            : 
    2848                 :            : /*
    2849                 :            :  * This function is called by the jbd2 layer once the commit has finished,
    2850                 :            :  * so we know we can free the blocks that were released with that commit.
    2851                 :            :  */
    2852                 :       9462 : void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
    2853                 :            : {
    2854                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2855                 :            :         struct ext4_free_data *entry, *tmp;
    2856                 :       9462 :         struct bio *discard_bio = NULL;
    2857                 :            :         struct list_head freed_data_list;
    2858                 :            :         struct list_head *cut_pos = NULL;
    2859                 :            :         int err;
    2860                 :            : 
    2861                 :            :         INIT_LIST_HEAD(&freed_data_list);
    2862                 :            : 
    2863                 :            :         spin_lock(&sbi->s_md_lock);
    2864         [ +  + ]:      15592 :         list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
    2865         [ +  + ]:       6180 :                 if (entry->efd_tid != commit_tid)
    2866                 :            :                         break;
    2867                 :            :                 cut_pos = &entry->efd_list;
    2868                 :            :         }
    2869         [ +  + ]:       9462 :         if (cut_pos)
    2870                 :       3038 :                 list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
    2871                 :            :                                   cut_pos);
    2872                 :            :         spin_unlock(&sbi->s_md_lock);
    2873                 :            : 
    2874         [ -  + ]:       9462 :         if (test_opt(sb, DISCARD)) {
    2875         [ #  # ]:          0 :                 list_for_each_entry(entry, &freed_data_list, efd_list) {
    2876                 :          0 :                         err = ext4_issue_discard(sb, entry->efd_group,
    2877                 :            :                                                  entry->efd_start_cluster,
    2878                 :            :                                                  entry->efd_count,
    2879                 :            :                                                  &discard_bio);
    2880         [ #  # ]:          0 :                         if (err && err != -EOPNOTSUPP) {
    2881                 :          0 :                                 ext4_msg(sb, KERN_WARNING, "discard request in"
    2882                 :            :                                          " group:%d block:%d count:%d failed"
    2883                 :            :                                          " with %d", entry->efd_group,
    2884                 :            :                                          entry->efd_start_cluster,
    2885                 :            :                                          entry->efd_count, err);
    2886         [ #  # ]:          0 :                         } else if (err == -EOPNOTSUPP)
    2887                 :            :                                 break;
    2888                 :            :                 }
    2889                 :            : 
    2890         [ #  # ]:          0 :                 if (discard_bio) {
    2891                 :          0 :                         submit_bio_wait(discard_bio);
    2892                 :          0 :                         bio_put(discard_bio);
    2893                 :            :                 }
    2894                 :            :         }
    2895                 :            : 
    2896         [ +  + ]:      15592 :         list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
    2897                 :       6130 :                 ext4_free_data_in_buddy(sb, entry);
    2898                 :       9462 : }
    2899                 :            : 
    2900                 :        404 : int __init ext4_init_mballoc(void)
    2901                 :            : {
    2902                 :        404 :         ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
    2903                 :            :                                         SLAB_RECLAIM_ACCOUNT);
    2904         [ +  - ]:        404 :         if (ext4_pspace_cachep == NULL)
    2905                 :            :                 return -ENOMEM;
    2906                 :            : 
    2907                 :        404 :         ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
    2908                 :            :                                     SLAB_RECLAIM_ACCOUNT);
    2909         [ -  + ]:        404 :         if (ext4_ac_cachep == NULL) {
    2910                 :          0 :                 kmem_cache_destroy(ext4_pspace_cachep);
    2911                 :          0 :                 return -ENOMEM;
    2912                 :            :         }
    2913                 :            : 
    2914                 :        404 :         ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
    2915                 :            :                                            SLAB_RECLAIM_ACCOUNT);
    2916         [ -  + ]:        404 :         if (ext4_free_data_cachep == NULL) {
    2917                 :          0 :                 kmem_cache_destroy(ext4_pspace_cachep);
    2918                 :          0 :                 kmem_cache_destroy(ext4_ac_cachep);
    2919                 :          0 :                 return -ENOMEM;
    2920                 :            :         }
    2921                 :            :         return 0;
    2922                 :            : }
    2923                 :            : 
    2924                 :          0 : void ext4_exit_mballoc(void)
    2925                 :            : {
    2926                 :            :         /*
    2927                 :            :          * Wait for completion of call_rcu()'s on ext4_pspace_cachep
    2928                 :            :          * before destroying the slab cache.
    2929                 :            :          */
    2930                 :          0 :         rcu_barrier();
    2931                 :          0 :         kmem_cache_destroy(ext4_pspace_cachep);
    2932                 :          0 :         kmem_cache_destroy(ext4_ac_cachep);
    2933                 :          0 :         kmem_cache_destroy(ext4_free_data_cachep);
    2934                 :          0 :         ext4_groupinfo_destroy_slabs();
    2935                 :          0 : }
    2936                 :            : 
    2937                 :            : 
    2938                 :            : /*
    2939                 :            :  * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
    2940                 :            :  * Returns 0 if success or error code
    2941                 :            :  */
    2942                 :            : static noinline_for_stack int
    2943                 :     145960 : ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
    2944                 :            :                                 handle_t *handle, unsigned int reserv_clstrs)
    2945                 :            : {
    2946                 :            :         struct buffer_head *bitmap_bh = NULL;
    2947                 :            :         struct ext4_group_desc *gdp;
    2948                 :            :         struct buffer_head *gdp_bh;
    2949                 :            :         struct ext4_sb_info *sbi;
    2950                 :            :         struct super_block *sb;
    2951                 :            :         ext4_fsblk_t block;
    2952                 :            :         int err, len;
    2953                 :            : 
    2954         [ -  + ]:     145960 :         BUG_ON(ac->ac_status != AC_STATUS_FOUND);
    2955         [ -  + ]:     145960 :         BUG_ON(ac->ac_b_ex.fe_len <= 0);
    2956                 :            : 
    2957                 :     145960 :         sb = ac->ac_sb;
    2958                 :            :         sbi = EXT4_SB(sb);
    2959                 :            : 
    2960                 :     145960 :         bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
    2961         [ -  + ]:     145960 :         if (IS_ERR(bitmap_bh)) {
    2962                 :            :                 err = PTR_ERR(bitmap_bh);
    2963                 :            :                 bitmap_bh = NULL;
    2964                 :          0 :                 goto out_err;
    2965                 :            :         }
    2966                 :            : 
    2967                 :            :         BUFFER_TRACE(bitmap_bh, "getting write access");
    2968                 :     145960 :         err = ext4_journal_get_write_access(handle, bitmap_bh);
    2969         [ +  - ]:     145960 :         if (err)
    2970                 :            :                 goto out_err;
    2971                 :            : 
    2972                 :            :         err = -EIO;
    2973                 :     145960 :         gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
    2974         [ +  - ]:     145960 :         if (!gdp)
    2975                 :            :                 goto out_err;
    2976                 :            : 
    2977                 :            :         ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
    2978                 :            :                         ext4_free_group_clusters(sb, gdp));
    2979                 :            : 
    2980                 :            :         BUFFER_TRACE(gdp_bh, "get_write_access");
    2981                 :     145960 :         err = ext4_journal_get_write_access(handle, gdp_bh);
    2982         [ +  - ]:     145960 :         if (err)
    2983                 :            :                 goto out_err;
    2984                 :            : 
    2985                 :            :         block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
    2986                 :            : 
    2987                 :     145960 :         len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
    2988         [ -  + ]:     145960 :         if (!ext4_data_block_valid(sbi, block, len)) {
    2989                 :          0 :                 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
    2990                 :            :                            "fs metadata", block, block+len);
    2991                 :            :                 /* File system mounted not to panic on error
    2992                 :            :                  * Fix the bitmap and return EFSCORRUPTED
    2993                 :            :                  * We leak some of the blocks here.
    2994                 :            :                  */
    2995                 :          0 :                 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
    2996                 :          0 :                 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
    2997                 :            :                               ac->ac_b_ex.fe_len);
    2998                 :          0 :                 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
    2999                 :          0 :                 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
    3000         [ #  # ]:          0 :                 if (!err)
    3001                 :            :                         err = -EFSCORRUPTED;
    3002                 :            :                 goto out_err;
    3003                 :            :         }
    3004                 :            : 
    3005                 :     145960 :         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
    3006                 :            : #ifdef AGGRESSIVE_CHECK
    3007                 :            :         {
    3008                 :            :                 int i;
    3009                 :            :                 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
    3010                 :            :                         BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
    3011                 :            :                                                 bitmap_bh->b_data));
    3012                 :            :                 }
    3013                 :            :         }
    3014                 :            : #endif
    3015                 :     145960 :         ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
    3016                 :            :                       ac->ac_b_ex.fe_len);
    3017   [ -  +  #  # ]:     145960 :         if (ext4_has_group_desc_csum(sb) &&
    3018                 :          0 :             (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
    3019                 :          0 :                 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
    3020                 :          0 :                 ext4_free_group_clusters_set(sb, gdp,
    3021                 :            :                                              ext4_free_clusters_after_init(sb,
    3022                 :            :                                                 ac->ac_b_ex.fe_group, gdp));
    3023                 :            :         }
    3024                 :     145960 :         len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
    3025                 :     145960 :         ext4_free_group_clusters_set(sb, gdp, len);
    3026                 :     145960 :         ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
    3027                 :     145960 :         ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
    3028                 :            : 
    3029                 :     145960 :         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
    3030                 :     145960 :         percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
    3031                 :            :         /*
    3032                 :            :          * Now reduce the dirty block count also. Should not go negative
    3033                 :            :          */
    3034         [ +  + ]:     145960 :         if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
    3035                 :            :                 /* release all the reserved blocks if non delalloc */
    3036                 :      84984 :                 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
    3037                 :            :                                    reserv_clstrs);
    3038                 :            : 
    3039         [ +  - ]:     145960 :         if (sbi->s_log_groups_per_flex) {
    3040                 :     145960 :                 ext4_group_t flex_group = ext4_flex_group(sbi,
    3041                 :            :                                                           ac->ac_b_ex.fe_group);
    3042                 :     291920 :                 atomic64_sub(ac->ac_b_ex.fe_len,
    3043                 :     145960 :                              &sbi_array_rcu_deref(sbi, s_flex_groups,
    3044                 :            :                                                   flex_group)->free_clusters);
    3045                 :            :         }
    3046                 :            : 
    3047                 :     145960 :         err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
    3048         [ +  - ]:     145960 :         if (err)
    3049                 :            :                 goto out_err;
    3050                 :     145960 :         err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
    3051                 :            : 
    3052                 :            : out_err:
    3053                 :            :         brelse(bitmap_bh);
    3054                 :     145960 :         return err;
    3055                 :            : }
    3056                 :            : 
    3057                 :            : /*
    3058                 :            :  * here we normalize request for locality group
    3059                 :            :  * Group request are normalized to s_mb_group_prealloc, which goes to
    3060                 :            :  * s_strip if we set the same via mount option.
    3061                 :            :  * s_mb_group_prealloc can be configured via
    3062                 :            :  * /sys/fs/ext4/<partition>/mb_group_prealloc
    3063                 :            :  *
    3064                 :            :  * XXX: should we try to preallocate more than the group has now?
    3065                 :            :  */
    3066                 :       1150 : static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
    3067                 :            : {
    3068                 :       1150 :         struct super_block *sb = ac->ac_sb;
    3069                 :       1150 :         struct ext4_locality_group *lg = ac->ac_lg;
    3070                 :            : 
    3071         [ -  + ]:       1150 :         BUG_ON(lg == NULL);
    3072                 :       1150 :         ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
    3073                 :            :         mb_debug(1, "#%u: goal %u blocks for locality group\n",
    3074                 :            :                 current->pid, ac->ac_g_ex.fe_len);
    3075                 :       1150 : }
    3076                 :            : 
    3077                 :            : /*
    3078                 :            :  * Normalization means making request better in terms of
    3079                 :            :  * size and alignment
    3080                 :            :  */
    3081                 :            : static noinline_for_stack void
    3082                 :     141256 : ext4_mb_normalize_request(struct ext4_allocation_context *ac,
    3083                 :            :                                 struct ext4_allocation_request *ar)
    3084                 :            : {
    3085                 :     141256 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    3086                 :            :         int bsbits, max;
    3087                 :            :         ext4_lblk_t end;
    3088                 :            :         loff_t size, start_off;
    3089                 :            :         loff_t orig_size __maybe_unused;
    3090                 :            :         ext4_lblk_t start;
    3091                 :     141256 :         struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
    3092                 :            :         struct ext4_prealloc_space *pa;
    3093                 :            : 
    3094                 :            :         /* do normalize only data requests, metadata requests
    3095                 :            :            do not need preallocation */
    3096         [ +  + ]:     141256 :         if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
    3097                 :            :                 return;
    3098                 :            : 
    3099                 :            :         /* sometime caller may want exact blocks */
    3100         [ +  - ]:      56336 :         if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
    3101                 :            :                 return;
    3102                 :            : 
    3103                 :            :         /* caller may indicate that preallocation isn't
    3104                 :            :          * required (it's a tail, for example) */
    3105         [ +  + ]:      56336 :         if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
    3106                 :            :                 return;
    3107                 :            : 
    3108         [ +  + ]:       3302 :         if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
    3109                 :       1150 :                 ext4_mb_normalize_group_request(ac);
    3110                 :       1150 :                 return ;
    3111                 :            :         }
    3112                 :            : 
    3113                 :       2152 :         bsbits = ac->ac_sb->s_blocksize_bits;
    3114                 :            : 
    3115                 :            :         /* first, let's learn actual file size
    3116                 :            :          * given current request is allocated */
    3117                 :       2152 :         size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
    3118                 :       2152 :         size = size << bsbits;
    3119         [ -  + ]:       2152 :         if (size < i_size_read(ac->ac_inode))
    3120                 :          0 :                 size = i_size_read(ac->ac_inode);
    3121                 :            :         orig_size = size;
    3122                 :            : 
    3123                 :            :         /* max size of free chunks */
    3124                 :       2152 :         max = 2 << bsbits;
    3125                 :            : 
    3126                 :            : #define NRL_CHECK_SIZE(req, size, max, chunk_size)      \
    3127                 :            :                 (req <= (size) || max <= (chunk_size))
    3128                 :            : 
    3129                 :            :         /* first, try to predict filesize */
    3130                 :            :         /* XXX: should this table be tunable? */
    3131                 :            :         start_off = 0;
    3132         [ +  - ]:       2152 :         if (size <= 16 * 1024) {
    3133                 :            :                 size = 16 * 1024;
    3134         [ +  - ]:       2152 :         } else if (size <= 32 * 1024) {
    3135                 :            :                 size = 32 * 1024;
    3136         [ +  - ]:       2152 :         } else if (size <= 64 * 1024) {
    3137                 :            :                 size = 64 * 1024;
    3138         [ +  + ]:       2152 :         } else if (size <= 128 * 1024) {
    3139                 :            :                 size = 128 * 1024;
    3140         [ +  + ]:       2026 :         } else if (size <= 256 * 1024) {
    3141                 :            :                 size = 256 * 1024;
    3142         [ +  + ]:       1858 :         } else if (size <= 512 * 1024) {
    3143                 :            :                 size = 512 * 1024;
    3144         [ +  + ]:       1744 :         } else if (size <= 1024 * 1024) {
    3145                 :            :                 size = 1024 * 1024;
    3146         [ +  + ]:       1512 :         } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
    3147                 :       1584 :                 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
    3148                 :        792 :                                                 (21 - bsbits)) << 21;
    3149                 :            :                 size = 2 * 1024 * 1024;
    3150         [ +  + ]:        720 :         } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
    3151                 :        692 :                 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
    3152                 :        346 :                                                         (22 - bsbits)) << 22;
    3153                 :            :                 size = 4 * 1024 * 1024;
    3154   [ -  +  #  # ]:        374 :         } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
    3155                 :            :                                         (8<<20)>>bsbits, max, 8 * 1024)) {
    3156                 :        748 :                 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
    3157                 :        374 :                                                         (23 - bsbits)) << 23;
    3158                 :        374 :                 size = 8 * 1024 * 1024;
    3159                 :            :         } else {
    3160                 :          0 :                 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
    3161                 :          0 :                 size      = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
    3162                 :            :                                               ac->ac_o_ex.fe_len) << bsbits;
    3163                 :            :         }
    3164                 :       2152 :         size = size >> bsbits;
    3165                 :       2152 :         start = start_off >> bsbits;
    3166                 :            : 
    3167                 :            :         /* don't cover already allocated blocks in selected range */
    3168   [ +  +  +  + ]:       2152 :         if (ar->pleft && start <= ar->lleft) {
    3169                 :       2124 :                 size -= ar->lleft + 1 - start;
    3170                 :       2124 :                 start = ar->lleft + 1;
    3171                 :            :         }
    3172   [ -  +  #  # ]:       2152 :         if (ar->pright && start + size - 1 >= ar->lright)
    3173                 :          0 :                 size -= start + size - ar->lright;
    3174                 :            : 
    3175                 :            :         /*
    3176                 :            :          * Trim allocation request for filesystems with artificially small
    3177                 :            :          * groups.
    3178                 :            :          */
    3179         [ -  + ]:       4304 :         if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
    3180                 :            :                 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
    3181                 :            : 
    3182                 :       2152 :         end = start + size;
    3183                 :            : 
    3184                 :            :         /* check we don't cross already preallocated blocks */
    3185                 :            :         rcu_read_lock();
    3186         [ -  + ]:       2152 :         list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
    3187                 :            :                 ext4_lblk_t pa_end;
    3188                 :            : 
    3189         [ #  # ]:          0 :                 if (pa->pa_deleted)
    3190                 :          0 :                         continue;
    3191                 :            :                 spin_lock(&pa->pa_lock);
    3192         [ #  # ]:          0 :                 if (pa->pa_deleted) {
    3193                 :            :                         spin_unlock(&pa->pa_lock);
    3194                 :          0 :                         continue;
    3195                 :            :                 }
    3196                 :            : 
    3197                 :          0 :                 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
    3198                 :            :                                                   pa->pa_len);
    3199                 :            : 
    3200                 :            :                 /* PA must not overlap original request */
    3201   [ #  #  #  # ]:          0 :                 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
    3202                 :            :                         ac->ac_o_ex.fe_logical < pa->pa_lstart));
    3203                 :            : 
    3204                 :            :                 /* skip PAs this normalized request doesn't overlap with */
    3205   [ #  #  #  # ]:          0 :                 if (pa->pa_lstart >= end || pa_end <= start) {
    3206                 :            :                         spin_unlock(&pa->pa_lock);
    3207                 :          0 :                         continue;
    3208                 :            :                 }
    3209   [ #  #  #  # ]:          0 :                 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
    3210                 :            : 
    3211                 :            :                 /* adjust start or end to be adjacent to this pa */
    3212         [ #  # ]:          0 :                 if (pa_end <= ac->ac_o_ex.fe_logical) {
    3213         [ #  # ]:          0 :                         BUG_ON(pa_end < start);
    3214                 :            :                         start = pa_end;
    3215         [ #  # ]:          0 :                 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
    3216         [ #  # ]:          0 :                         BUG_ON(pa->pa_lstart > end);
    3217                 :            :                         end = pa->pa_lstart;
    3218                 :            :                 }
    3219                 :            :                 spin_unlock(&pa->pa_lock);
    3220                 :            :         }
    3221                 :            :         rcu_read_unlock();
    3222                 :       2152 :         size = end - start;
    3223                 :            : 
    3224                 :            :         /* XXX: extra loop to check we really don't overlap preallocations */
    3225                 :            :         rcu_read_lock();
    3226         [ -  + ]:       2152 :         list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
    3227                 :            :                 ext4_lblk_t pa_end;
    3228                 :            : 
    3229                 :            :                 spin_lock(&pa->pa_lock);
    3230         [ #  # ]:          0 :                 if (pa->pa_deleted == 0) {
    3231                 :          0 :                         pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
    3232                 :            :                                                           pa->pa_len);
    3233   [ #  #  #  # ]:          0 :                         BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
    3234                 :            :                 }
    3235                 :            :                 spin_unlock(&pa->pa_lock);
    3236                 :            :         }
    3237                 :            :         rcu_read_unlock();
    3238                 :            : 
    3239   [ -  +  #  # ]:       2152 :         if (start + size <= ac->ac_o_ex.fe_logical &&
    3240                 :            :                         start > ac->ac_o_ex.fe_logical) {
    3241                 :          0 :                 ext4_msg(ac->ac_sb, KERN_ERR,
    3242                 :            :                          "start %lu, size %lu, fe_logical %lu",
    3243                 :            :                          (unsigned long) start, (unsigned long) size,
    3244                 :            :                          (unsigned long) ac->ac_o_ex.fe_logical);
    3245                 :          0 :                 BUG();
    3246                 :            :         }
    3247   [ +  -  -  + ]:       4304 :         BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
    3248                 :            : 
    3249                 :            :         /* now prepare goal request */
    3250                 :            : 
    3251                 :            :         /* XXX: is it better to align blocks WRT to logical
    3252                 :            :          * placement or satisfy big request as is */
    3253                 :       2152 :         ac->ac_g_ex.fe_logical = start;
    3254                 :       2152 :         ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
    3255                 :            : 
    3256                 :            :         /* define goal start in order to merge */
    3257   [ -  +  #  # ]:       2152 :         if (ar->pright && (ar->lright == (start + size))) {
    3258                 :            :                 /* merge to the right */
    3259                 :          0 :                 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
    3260                 :            :                                                 &ac->ac_f_ex.fe_group,
    3261                 :            :                                                 &ac->ac_f_ex.fe_start);
    3262                 :          0 :                 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
    3263                 :            :         }
    3264   [ +  +  +  - ]:       2152 :         if (ar->pleft && (ar->lleft + 1 == start)) {
    3265                 :            :                 /* merge to the left */
    3266                 :       2148 :                 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
    3267                 :            :                                                 &ac->ac_f_ex.fe_group,
    3268                 :            :                                                 &ac->ac_f_ex.fe_start);
    3269                 :       2148 :                 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
    3270                 :            :         }
    3271                 :            : 
    3272                 :            :         mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
    3273                 :            :                 (unsigned) orig_size, (unsigned) start);
    3274                 :            : }
    3275                 :            : 
    3276                 :     145960 : static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
    3277                 :            : {
    3278                 :     145960 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    3279                 :            : 
    3280   [ -  +  #  # ]:     145960 :         if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
    3281                 :          0 :                 atomic_inc(&sbi->s_bal_reqs);
    3282                 :          0 :                 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
    3283         [ #  # ]:          0 :                 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
    3284                 :          0 :                         atomic_inc(&sbi->s_bal_success);
    3285                 :          0 :                 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
    3286   [ #  #  #  # ]:          0 :                 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
    3287                 :          0 :                                 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
    3288                 :          0 :                         atomic_inc(&sbi->s_bal_goals);
    3289         [ #  # ]:          0 :                 if (ac->ac_found > sbi->s_mb_max_to_scan)
    3290                 :          0 :                         atomic_inc(&sbi->s_bal_breaks);
    3291                 :            :         }
    3292                 :            : 
    3293         [ +  + ]:     145960 :         if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
    3294                 :     141256 :                 trace_ext4_mballoc_alloc(ac);
    3295                 :            :         else
    3296                 :       4704 :                 trace_ext4_mballoc_prealloc(ac);
    3297                 :     145960 : }
    3298                 :            : 
    3299                 :            : /*
    3300                 :            :  * Called on failure; free up any blocks from the inode PA for this
    3301                 :            :  * context.  We don't need this for MB_GROUP_PA because we only change
    3302                 :            :  * pa_free in ext4_mb_release_context(), but on failure, we've already
    3303                 :            :  * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
    3304                 :            :  */
    3305                 :          0 : static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
    3306                 :            : {
    3307                 :          0 :         struct ext4_prealloc_space *pa = ac->ac_pa;
    3308                 :            :         struct ext4_buddy e4b;
    3309                 :            :         int err;
    3310                 :            : 
    3311         [ #  # ]:          0 :         if (pa == NULL) {
    3312         [ #  # ]:          0 :                 if (ac->ac_f_ex.fe_len == 0)
    3313                 :          0 :                         return;
    3314                 :          0 :                 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
    3315         [ #  # ]:          0 :                 if (err) {
    3316                 :            :                         /*
    3317                 :            :                          * This should never happen since we pin the
    3318                 :            :                          * pages in the ext4_allocation_context so
    3319                 :            :                          * ext4_mb_load_buddy() should never fail.
    3320                 :            :                          */
    3321                 :          0 :                         WARN(1, "mb_load_buddy failed (%d)", err);
    3322                 :          0 :                         return;
    3323                 :            :                 }
    3324                 :          0 :                 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
    3325                 :          0 :                 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
    3326                 :            :                                ac->ac_f_ex.fe_len);
    3327                 :          0 :                 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
    3328                 :          0 :                 ext4_mb_unload_buddy(&e4b);
    3329                 :          0 :                 return;
    3330                 :            :         }
    3331         [ #  # ]:          0 :         if (pa->pa_type == MB_INODE_PA)
    3332                 :          0 :                 pa->pa_free += ac->ac_b_ex.fe_len;
    3333                 :            : }
    3334                 :            : 
    3335                 :            : /*
    3336                 :            :  * use blocks preallocated to inode
    3337                 :            :  */
    3338                 :       4432 : static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
    3339                 :            :                                 struct ext4_prealloc_space *pa)
    3340                 :            : {
    3341                 :       4432 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    3342                 :            :         ext4_fsblk_t start;
    3343                 :            :         ext4_fsblk_t end;
    3344                 :            :         int len;
    3345                 :            : 
    3346                 :            :         /* found preallocated blocks, use them */
    3347                 :       4432 :         start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
    3348                 :       4432 :         end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
    3349                 :            :                   start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
    3350                 :       4432 :         len = EXT4_NUM_B2C(sbi, end - start);
    3351                 :       4432 :         ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
    3352                 :            :                                         &ac->ac_b_ex.fe_start);
    3353                 :       4432 :         ac->ac_b_ex.fe_len = len;
    3354                 :       4432 :         ac->ac_status = AC_STATUS_FOUND;
    3355                 :       4432 :         ac->ac_pa = pa;
    3356                 :            : 
    3357         [ -  + ]:       4432 :         BUG_ON(start < pa->pa_pstart);
    3358         [ -  + ]:       4432 :         BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
    3359         [ -  + ]:       4432 :         BUG_ON(pa->pa_free < len);
    3360                 :       4432 :         pa->pa_free -= len;
    3361                 :            : 
    3362                 :            :         mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
    3363                 :       4432 : }
    3364                 :            : 
    3365                 :            : /*
    3366                 :            :  * use blocks preallocated to locality group
    3367                 :            :  */
    3368                 :       3544 : static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
    3369                 :            :                                 struct ext4_prealloc_space *pa)
    3370                 :            : {
    3371                 :       3544 :         unsigned int len = ac->ac_o_ex.fe_len;
    3372                 :            : 
    3373                 :       3544 :         ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
    3374                 :            :                                         &ac->ac_b_ex.fe_group,
    3375                 :            :                                         &ac->ac_b_ex.fe_start);
    3376                 :       3544 :         ac->ac_b_ex.fe_len = len;
    3377                 :       3544 :         ac->ac_status = AC_STATUS_FOUND;
    3378                 :       3544 :         ac->ac_pa = pa;
    3379                 :            : 
    3380                 :            :         /* we don't correct pa_pstart or pa_plen here to avoid
    3381                 :            :          * possible race when the group is being loaded concurrently
    3382                 :            :          * instead we correct pa later, after blocks are marked
    3383                 :            :          * in on-disk bitmap -- see ext4_mb_release_context()
    3384                 :            :          * Other CPUs are prevented from allocating from this pa by lg_mutex
    3385                 :            :          */
    3386                 :            :         mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
    3387                 :       3544 : }
    3388                 :            : 
    3389                 :            : /*
    3390                 :            :  * Return the prealloc space that have minimal distance
    3391                 :            :  * from the goal block. @cpa is the prealloc
    3392                 :            :  * space that is having currently known minimal distance
    3393                 :            :  * from the goal block.
    3394                 :            :  */
    3395                 :            : static struct ext4_prealloc_space *
    3396                 :       2394 : ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
    3397                 :            :                         struct ext4_prealloc_space *pa,
    3398                 :            :                         struct ext4_prealloc_space *cpa)
    3399                 :            : {
    3400                 :            :         ext4_fsblk_t cur_distance, new_distance;
    3401                 :            : 
    3402         [ +  - ]:       2394 :         if (cpa == NULL) {
    3403                 :       2394 :                 atomic_inc(&pa->pa_count);
    3404                 :       2394 :                 return pa;
    3405                 :            :         }
    3406                 :          0 :         cur_distance = abs(goal_block - cpa->pa_pstart);
    3407                 :          0 :         new_distance = abs(goal_block - pa->pa_pstart);
    3408                 :            : 
    3409         [ #  # ]:          0 :         if (cur_distance <= new_distance)
    3410                 :            :                 return cpa;
    3411                 :            : 
    3412                 :            :         /* drop the previous reference */
    3413                 :          0 :         atomic_dec(&cpa->pa_count);
    3414                 :          0 :         atomic_inc(&pa->pa_count);
    3415                 :          0 :         return pa;
    3416                 :            : }
    3417                 :            : 
    3418                 :            : /*
    3419                 :            :  * search goal blocks in preallocated space
    3420                 :            :  */
    3421                 :            : static noinline_for_stack int
    3422                 :     145960 : ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
    3423                 :            : {
    3424                 :     145960 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    3425                 :            :         int order, i;
    3426                 :     145960 :         struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
    3427                 :            :         struct ext4_locality_group *lg;
    3428                 :            :         struct ext4_prealloc_space *pa, *cpa = NULL;
    3429                 :            :         ext4_fsblk_t goal_block;
    3430                 :            : 
    3431                 :            :         /* only data can be preallocated */
    3432         [ +  + ]:     145960 :         if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
    3433                 :            :                 return 0;
    3434                 :            : 
    3435                 :            :         /* first, try per-file preallocation */
    3436                 :            :         rcu_read_lock();
    3437         [ +  + ]:      61040 :         list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
    3438                 :            : 
    3439                 :            :                 /* all fields in this condition don't change,
    3440                 :            :                  * so we can skip locking for them */
    3441   [ +  -  -  + ]:       4620 :                 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
    3442                 :       2310 :                     ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
    3443                 :       2310 :                                                EXT4_C2B(sbi, pa->pa_len)))
    3444                 :          0 :                         continue;
    3445                 :            : 
    3446                 :            :                 /* non-extent files can't have physical blocks past 2^32 */
    3447   [ -  +  #  # ]:       4620 :                 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
    3448                 :          0 :                     (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
    3449                 :            :                      EXT4_MAX_BLOCK_FILE_PHYS))
    3450                 :          0 :                         continue;
    3451                 :            : 
    3452                 :            :                 /* found preallocated blocks, use them */
    3453                 :            :                 spin_lock(&pa->pa_lock);
    3454   [ +  -  +  - ]:       2310 :                 if (pa->pa_deleted == 0 && pa->pa_free) {
    3455                 :       2310 :                         atomic_inc(&pa->pa_count);
    3456                 :       2310 :                         ext4_mb_use_inode_pa(ac, pa);
    3457                 :            :                         spin_unlock(&pa->pa_lock);
    3458                 :       2310 :                         ac->ac_criteria = 10;
    3459                 :            :                         rcu_read_unlock();
    3460                 :       2310 :                         return 1;
    3461                 :            :                 }
    3462                 :            :                 spin_unlock(&pa->pa_lock);
    3463                 :            :         }
    3464                 :            :         rcu_read_unlock();
    3465                 :            : 
    3466                 :            :         /* can we use group allocation? */
    3467         [ +  + ]:      58730 :         if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
    3468                 :            :                 return 0;
    3469                 :            : 
    3470                 :            :         /* inode may have no locality group for some reason */
    3471                 :       3544 :         lg = ac->ac_lg;
    3472         [ +  - ]:       3544 :         if (lg == NULL)
    3473                 :            :                 return 0;
    3474                 :       7088 :         order  = fls(ac->ac_o_ex.fe_len) - 1;
    3475         [ -  + ]:       3544 :         if (order > PREALLOC_TB_SIZE - 1)
    3476                 :            :                 /* The max size of hash table is PREALLOC_TB_SIZE */
    3477                 :            :                 order = PREALLOC_TB_SIZE - 1;
    3478                 :            : 
    3479                 :       3544 :         goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
    3480                 :            :         /*
    3481                 :            :          * search for the prealloc space that is having
    3482                 :            :          * minimal distance from the goal block.
    3483                 :            :          */
    3484         [ +  + ]:      37776 :         for (i = order; i < PREALLOC_TB_SIZE; i++) {
    3485                 :            :                 rcu_read_lock();
    3486         [ +  + ]:      70858 :                 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
    3487                 :            :                                         pa_inode_list) {
    3488                 :            :                         spin_lock(&pa->pa_lock);
    3489   [ +  -  +  - ]:       4788 :                         if (pa->pa_deleted == 0 &&
    3490                 :       2394 :                                         pa->pa_free >= ac->ac_o_ex.fe_len) {
    3491                 :            : 
    3492                 :       2394 :                                 cpa = ext4_mb_check_group_pa(goal_block,
    3493                 :            :                                                                 pa, cpa);
    3494                 :            :                         }
    3495                 :            :                         spin_unlock(&pa->pa_lock);
    3496                 :            :                 }
    3497                 :            :                 rcu_read_unlock();
    3498                 :            :         }
    3499         [ +  + ]:       3544 :         if (cpa) {
    3500                 :       2394 :                 ext4_mb_use_group_pa(ac, cpa);
    3501                 :       2394 :                 ac->ac_criteria = 20;
    3502                 :       2394 :                 return 1;
    3503                 :            :         }
    3504                 :            :         return 0;
    3505                 :            : }
    3506                 :            : 
    3507                 :            : /*
    3508                 :            :  * the function goes through all block freed in the group
    3509                 :            :  * but not yet committed and marks them used in in-core bitmap.
    3510                 :            :  * buddy must be generated from this bitmap
    3511                 :            :  * Need to be called with the ext4 group lock held
    3512                 :            :  */
    3513                 :       5112 : static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
    3514                 :            :                                                 ext4_group_t group)
    3515                 :            : {
    3516                 :            :         struct rb_node *n;
    3517                 :            :         struct ext4_group_info *grp;
    3518                 :            :         struct ext4_free_data *entry;
    3519                 :            : 
    3520                 :       5112 :         grp = ext4_get_group_info(sb, group);
    3521                 :       5112 :         n = rb_first(&(grp->bb_free_root));
    3522                 :            : 
    3523         [ -  + ]:      10224 :         while (n) {
    3524                 :            :                 entry = rb_entry(n, struct ext4_free_data, efd_node);
    3525                 :          0 :                 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
    3526                 :          0 :                 n = rb_next(n);
    3527                 :            :         }
    3528                 :       5112 :         return;
    3529                 :            : }
    3530                 :            : 
    3531                 :            : /*
    3532                 :            :  * the function goes through all preallocation in this group and marks them
    3533                 :            :  * used in in-core bitmap. buddy must be generated from this bitmap
    3534                 :            :  * Need to be called with ext4 group lock held
    3535                 :            :  */
    3536                 :            : static noinline_for_stack
    3537                 :       5112 : void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
    3538                 :            :                                         ext4_group_t group)
    3539                 :            : {
    3540                 :       5112 :         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
    3541                 :            :         struct ext4_prealloc_space *pa;
    3542                 :            :         struct list_head *cur;
    3543                 :            :         ext4_group_t groupnr;
    3544                 :            :         ext4_grpblk_t start;
    3545                 :            :         int preallocated = 0;
    3546                 :            :         int len;
    3547                 :            : 
    3548                 :            :         /* all form of preallocation discards first load group,
    3549                 :            :          * so the only competing code is preallocation use.
    3550                 :            :          * we don't need any locking here
    3551                 :            :          * notice we do NOT ignore preallocations with pa_deleted
    3552                 :            :          * otherwise we could leave used blocks available for
    3553                 :            :          * allocation in buddy when concurrent ext4_mb_put_pa()
    3554                 :            :          * is dropping preallocation
    3555                 :            :          */
    3556         [ -  + ]:       5112 :         list_for_each(cur, &grp->bb_prealloc_list) {
    3557                 :            :                 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
    3558                 :            :                 spin_lock(&pa->pa_lock);
    3559                 :          0 :                 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
    3560                 :            :                                              &groupnr, &start);
    3561                 :          0 :                 len = pa->pa_len;
    3562                 :            :                 spin_unlock(&pa->pa_lock);
    3563         [ #  # ]:          0 :                 if (unlikely(len == 0))
    3564                 :          0 :                         continue;
    3565         [ #  # ]:          0 :                 BUG_ON(groupnr != group);
    3566                 :          0 :                 ext4_set_bits(bitmap, start, len);
    3567                 :            :                 preallocated += len;
    3568                 :            :         }
    3569                 :            :         mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
    3570                 :       5112 : }
    3571                 :            : 
    3572                 :        266 : static void ext4_mb_pa_callback(struct rcu_head *head)
    3573                 :            : {
    3574                 :            :         struct ext4_prealloc_space *pa;
    3575                 :        266 :         pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
    3576                 :            : 
    3577         [ -  + ]:        266 :         BUG_ON(atomic_read(&pa->pa_count));
    3578         [ -  + ]:        266 :         BUG_ON(pa->pa_deleted == 0);
    3579                 :        266 :         kmem_cache_free(ext4_pspace_cachep, pa);
    3580                 :        266 : }
    3581                 :            : 
    3582                 :            : /*
    3583                 :            :  * drops a reference to preallocated space descriptor
    3584                 :            :  * if this was the last reference and the space is consumed
    3585                 :            :  */
    3586                 :       7976 : static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
    3587                 :            :                         struct super_block *sb, struct ext4_prealloc_space *pa)
    3588                 :            : {
    3589                 :            :         ext4_group_t grp;
    3590                 :            :         ext4_fsblk_t grp_blk;
    3591                 :            : 
    3592                 :            :         /* in this short window concurrent discard can set pa_deleted */
    3593                 :            :         spin_lock(&pa->pa_lock);
    3594   [ +  -  +  + ]:      15952 :         if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
    3595                 :            :                 spin_unlock(&pa->pa_lock);
    3596                 :            :                 return;
    3597                 :            :         }
    3598                 :            : 
    3599         [ -  + ]:         24 :         if (pa->pa_deleted == 1) {
    3600                 :            :                 spin_unlock(&pa->pa_lock);
    3601                 :            :                 return;
    3602                 :            :         }
    3603                 :            : 
    3604                 :         24 :         pa->pa_deleted = 1;
    3605                 :            :         spin_unlock(&pa->pa_lock);
    3606                 :            : 
    3607                 :         24 :         grp_blk = pa->pa_pstart;
    3608                 :            :         /*
    3609                 :            :          * If doing group-based preallocation, pa_pstart may be in the
    3610                 :            :          * next group when pa is used up
    3611                 :            :          */
    3612         [ -  + ]:         24 :         if (pa->pa_type == MB_GROUP_PA)
    3613                 :          0 :                 grp_blk--;
    3614                 :            : 
    3615                 :         24 :         grp = ext4_get_group_number(sb, grp_blk);
    3616                 :            : 
    3617                 :            :         /*
    3618                 :            :          * possible race:
    3619                 :            :          *
    3620                 :            :          *  P1 (buddy init)                     P2 (regular allocation)
    3621                 :            :          *                                      find block B in PA
    3622                 :            :          *  copy on-disk bitmap to buddy
    3623                 :            :          *                                      mark B in on-disk bitmap
    3624                 :            :          *                                      drop PA from group
    3625                 :            :          *  mark all PAs in buddy
    3626                 :            :          *
    3627                 :            :          * thus, P1 initializes buddy with B available. to prevent this
    3628                 :            :          * we make "copy" and "mark all PAs" atomic and serialize "drop PA"
    3629                 :            :          * against that pair
    3630                 :            :          */
    3631                 :         24 :         ext4_lock_group(sb, grp);
    3632                 :            :         list_del(&pa->pa_group_list);
    3633                 :            :         ext4_unlock_group(sb, grp);
    3634                 :            : 
    3635                 :         24 :         spin_lock(pa->pa_obj_lock);
    3636                 :            :         list_del_rcu(&pa->pa_inode_list);
    3637                 :         24 :         spin_unlock(pa->pa_obj_lock);
    3638                 :            : 
    3639                 :         24 :         call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
    3640                 :            : }
    3641                 :            : 
    3642                 :            : /*
    3643                 :            :  * creates new preallocated space for given inode
    3644                 :            :  */
    3645                 :            : static noinline_for_stack int
    3646                 :       2122 : ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
    3647                 :            : {
    3648                 :       2122 :         struct super_block *sb = ac->ac_sb;
    3649                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    3650                 :            :         struct ext4_prealloc_space *pa;
    3651                 :            :         struct ext4_group_info *grp;
    3652                 :            :         struct ext4_inode_info *ei;
    3653                 :            : 
    3654                 :            :         /* preallocate only when found space is larger then requested */
    3655         [ -  + ]:       2122 :         BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
    3656         [ -  + ]:       2122 :         BUG_ON(ac->ac_status != AC_STATUS_FOUND);
    3657         [ -  + ]:       2122 :         BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
    3658                 :            : 
    3659                 :       2122 :         pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
    3660         [ +  - ]:       2122 :         if (pa == NULL)
    3661                 :            :                 return -ENOMEM;
    3662                 :            : 
    3663         [ -  + ]:       2122 :         if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
    3664                 :            :                 int winl;
    3665                 :            :                 int wins;
    3666                 :            :                 int win;
    3667                 :            :                 int offs;
    3668                 :            : 
    3669                 :            :                 /* we can't allocate as much as normalizer wants.
    3670                 :            :                  * so, found space must get proper lstart
    3671                 :            :                  * to cover original request */
    3672         [ #  # ]:          0 :                 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
    3673         [ #  # ]:          0 :                 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
    3674                 :            : 
    3675                 :            :                 /* we're limited by original request in that
    3676                 :            :                  * logical block must be covered any way
    3677                 :            :                  * winl is window we can move our chunk within */
    3678                 :          0 :                 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
    3679                 :            : 
    3680                 :            :                 /* also, we should cover whole original request */
    3681                 :          0 :                 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
    3682                 :            : 
    3683                 :            :                 /* the smallest one defines real window */
    3684                 :          0 :                 win = min(winl, wins);
    3685                 :            : 
    3686                 :          0 :                 offs = ac->ac_o_ex.fe_logical %
    3687                 :          0 :                         EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
    3688         [ #  # ]:          0 :                 if (offs && offs < win)
    3689                 :            :                         win = offs;
    3690                 :            : 
    3691                 :          0 :                 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
    3692                 :          0 :                         EXT4_NUM_B2C(sbi, win);
    3693         [ #  # ]:          0 :                 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
    3694         [ #  # ]:          0 :                 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
    3695                 :            :         }
    3696                 :            : 
    3697                 :            :         /* preallocation can change ac_b_ex, thus we store actually
    3698                 :            :          * allocated blocks for history */
    3699                 :       2122 :         ac->ac_f_ex = ac->ac_b_ex;
    3700                 :            : 
    3701                 :       2122 :         pa->pa_lstart = ac->ac_b_ex.fe_logical;
    3702                 :       2122 :         pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
    3703                 :       2122 :         pa->pa_len = ac->ac_b_ex.fe_len;
    3704                 :       2122 :         pa->pa_free = pa->pa_len;
    3705                 :            :         atomic_set(&pa->pa_count, 1);
    3706                 :       2122 :         spin_lock_init(&pa->pa_lock);
    3707                 :       2122 :         INIT_LIST_HEAD(&pa->pa_inode_list);
    3708                 :       2122 :         INIT_LIST_HEAD(&pa->pa_group_list);
    3709                 :       2122 :         pa->pa_deleted = 0;
    3710                 :       2122 :         pa->pa_type = MB_INODE_PA;
    3711                 :            : 
    3712                 :            :         mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
    3713                 :            :                         pa->pa_pstart, pa->pa_len, pa->pa_lstart);
    3714                 :       2122 :         trace_ext4_mb_new_inode_pa(ac, pa);
    3715                 :            : 
    3716                 :       2122 :         ext4_mb_use_inode_pa(ac, pa);
    3717                 :       2122 :         atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
    3718                 :            : 
    3719                 :       2122 :         ei = EXT4_I(ac->ac_inode);
    3720                 :       2122 :         grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
    3721                 :            : 
    3722                 :       2122 :         pa->pa_obj_lock = &ei->i_prealloc_lock;
    3723                 :       2122 :         pa->pa_inode = ac->ac_inode;
    3724                 :            : 
    3725                 :       2122 :         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
    3726                 :       2122 :         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
    3727                 :       2122 :         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
    3728                 :            : 
    3729                 :       2122 :         spin_lock(pa->pa_obj_lock);
    3730                 :       2122 :         list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
    3731                 :       2122 :         spin_unlock(pa->pa_obj_lock);
    3732                 :            : 
    3733                 :       2122 :         return 0;
    3734                 :            : }
    3735                 :            : 
    3736                 :            : /*
    3737                 :            :  * creates new preallocated space for locality group inodes belongs to
    3738                 :            :  */
    3739                 :            : static noinline_for_stack int
    3740                 :       1150 : ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
    3741                 :            : {
    3742                 :       1150 :         struct super_block *sb = ac->ac_sb;
    3743                 :            :         struct ext4_locality_group *lg;
    3744                 :            :         struct ext4_prealloc_space *pa;
    3745                 :            :         struct ext4_group_info *grp;
    3746                 :            : 
    3747                 :            :         /* preallocate only when found space is larger then requested */
    3748         [ -  + ]:       1150 :         BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
    3749         [ -  + ]:       1150 :         BUG_ON(ac->ac_status != AC_STATUS_FOUND);
    3750         [ -  + ]:       1150 :         BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
    3751                 :            : 
    3752         [ -  + ]:       1150 :         BUG_ON(ext4_pspace_cachep == NULL);
    3753                 :       1150 :         pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
    3754         [ +  - ]:       1150 :         if (pa == NULL)
    3755                 :            :                 return -ENOMEM;
    3756                 :            : 
    3757                 :            :         /* preallocation can change ac_b_ex, thus we store actually
    3758                 :            :          * allocated blocks for history */
    3759                 :       1150 :         ac->ac_f_ex = ac->ac_b_ex;
    3760                 :            : 
    3761                 :       1150 :         pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
    3762                 :       1150 :         pa->pa_lstart = pa->pa_pstart;
    3763                 :       1150 :         pa->pa_len = ac->ac_b_ex.fe_len;
    3764                 :       1150 :         pa->pa_free = pa->pa_len;
    3765                 :            :         atomic_set(&pa->pa_count, 1);
    3766                 :       1150 :         spin_lock_init(&pa->pa_lock);
    3767                 :       1150 :         INIT_LIST_HEAD(&pa->pa_inode_list);
    3768                 :       1150 :         INIT_LIST_HEAD(&pa->pa_group_list);
    3769                 :       1150 :         pa->pa_deleted = 0;
    3770                 :       1150 :         pa->pa_type = MB_GROUP_PA;
    3771                 :            : 
    3772                 :            :         mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
    3773                 :            :                         pa->pa_pstart, pa->pa_len, pa->pa_lstart);
    3774                 :       1150 :         trace_ext4_mb_new_group_pa(ac, pa);
    3775                 :            : 
    3776                 :       1150 :         ext4_mb_use_group_pa(ac, pa);
    3777                 :       2300 :         atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
    3778                 :            : 
    3779                 :       1150 :         grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
    3780                 :       1150 :         lg = ac->ac_lg;
    3781         [ -  + ]:       1150 :         BUG_ON(lg == NULL);
    3782                 :            : 
    3783                 :       1150 :         pa->pa_obj_lock = &lg->lg_prealloc_lock;
    3784                 :       1150 :         pa->pa_inode = NULL;
    3785                 :            : 
    3786                 :       1150 :         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
    3787                 :       1150 :         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
    3788                 :       1150 :         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
    3789                 :            : 
    3790                 :            :         /*
    3791                 :            :          * We will later add the new pa to the right bucket
    3792                 :            :          * after updating the pa_free in ext4_mb_release_context
    3793                 :            :          */
    3794                 :       1150 :         return 0;
    3795                 :            : }
    3796                 :            : 
    3797                 :       3272 : static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
    3798                 :            : {
    3799                 :            :         int err;
    3800                 :            : 
    3801         [ +  + ]:       3272 :         if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
    3802                 :       1150 :                 err = ext4_mb_new_group_pa(ac);
    3803                 :            :         else
    3804                 :       2122 :                 err = ext4_mb_new_inode_pa(ac);
    3805                 :       3272 :         return err;
    3806                 :            : }
    3807                 :            : 
    3808                 :            : /*
    3809                 :            :  * finds all unused blocks in on-disk bitmap, frees them in
    3810                 :            :  * in-core bitmap and buddy.
    3811                 :            :  * @pa must be unlinked from inode and group lists, so that
    3812                 :            :  * nobody else can find/use it.
    3813                 :            :  * the caller MUST hold group/inode locks.
    3814                 :            :  * TODO: optimize the case when there are no in-core structures yet
    3815                 :            :  */
    3816                 :            : static noinline_for_stack int
    3817                 :        242 : ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
    3818                 :            :                         struct ext4_prealloc_space *pa)
    3819                 :            : {
    3820                 :        242 :         struct super_block *sb = e4b->bd_sb;
    3821                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    3822                 :            :         unsigned int end;
    3823                 :            :         unsigned int next;
    3824                 :            :         ext4_group_t group;
    3825                 :            :         ext4_grpblk_t bit;
    3826                 :            :         unsigned long long grp_blk_start;
    3827                 :            :         int free = 0;
    3828                 :            : 
    3829         [ -  + ]:        242 :         BUG_ON(pa->pa_deleted == 0);
    3830                 :        242 :         ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
    3831                 :        242 :         grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
    3832   [ -  +  #  # ]:        242 :         BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
    3833                 :        242 :         end = bit + pa->pa_len;
    3834                 :            : 
    3835         [ +  + ]:        726 :         while (bit < end) {
    3836                 :        242 :                 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
    3837         [ +  - ]:        242 :                 if (bit >= end)
    3838                 :            :                         break;
    3839                 :        242 :                 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
    3840                 :            :                 mb_debug(1, "    free preallocated %u/%u in group %u\n",
    3841                 :            :                          (unsigned) ext4_group_first_block_no(sb, group) + bit,
    3842                 :            :                          (unsigned) next - bit, (unsigned) group);
    3843                 :        242 :                 free += next - bit;
    3844                 :            : 
    3845                 :        242 :                 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
    3846                 :        242 :                 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
    3847                 :        242 :                                                     EXT4_C2B(sbi, bit)),
    3848                 :            :                                                next - bit);
    3849                 :        242 :                 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
    3850                 :        242 :                 bit = next + 1;
    3851                 :            :         }
    3852         [ -  + ]:        242 :         if (free != pa->pa_free) {
    3853                 :          0 :                 ext4_msg(e4b->bd_sb, KERN_CRIT,
    3854                 :            :                          "pa %p: logic %lu, phys. %lu, len %lu",
    3855                 :            :                          pa, (unsigned long) pa->pa_lstart,
    3856                 :            :                          (unsigned long) pa->pa_pstart,
    3857                 :            :                          (unsigned long) pa->pa_len);
    3858                 :          0 :                 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
    3859                 :            :                                         free, pa->pa_free);
    3860                 :            :                 /*
    3861                 :            :                  * pa is already deleted so we use the value obtained
    3862                 :            :                  * from the bitmap and continue.
    3863                 :            :                  */
    3864                 :            :         }
    3865                 :        242 :         atomic_add(free, &sbi->s_mb_discarded);
    3866                 :            : 
    3867                 :        242 :         return 0;
    3868                 :            : }
    3869                 :            : 
    3870                 :            : static noinline_for_stack int
    3871                 :          0 : ext4_mb_release_group_pa(struct ext4_buddy *e4b,
    3872                 :            :                                 struct ext4_prealloc_space *pa)
    3873                 :            : {
    3874                 :          0 :         struct super_block *sb = e4b->bd_sb;
    3875                 :            :         ext4_group_t group;
    3876                 :            :         ext4_grpblk_t bit;
    3877                 :            : 
    3878                 :          0 :         trace_ext4_mb_release_group_pa(sb, pa);
    3879         [ #  # ]:          0 :         BUG_ON(pa->pa_deleted == 0);
    3880                 :          0 :         ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
    3881   [ #  #  #  # ]:          0 :         BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
    3882                 :          0 :         mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
    3883                 :          0 :         atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
    3884                 :          0 :         trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
    3885                 :            : 
    3886                 :          0 :         return 0;
    3887                 :            : }
    3888                 :            : 
    3889                 :            : /*
    3890                 :            :  * releases all preallocations in given group
    3891                 :            :  *
    3892                 :            :  * first, we need to decide discard policy:
    3893                 :            :  * - when do we discard
    3894                 :            :  *   1) ENOSPC
    3895                 :            :  * - how many do we discard
    3896                 :            :  *   1) how many requested
    3897                 :            :  */
    3898                 :            : static noinline_for_stack int
    3899                 :          0 : ext4_mb_discard_group_preallocations(struct super_block *sb,
    3900                 :            :                                         ext4_group_t group, int needed)
    3901                 :            : {
    3902                 :          0 :         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
    3903                 :            :         struct buffer_head *bitmap_bh = NULL;
    3904                 :            :         struct ext4_prealloc_space *pa, *tmp;
    3905                 :            :         struct list_head list;
    3906                 :            :         struct ext4_buddy e4b;
    3907                 :            :         int err;
    3908                 :            :         int busy = 0;
    3909                 :            :         int free = 0;
    3910                 :            : 
    3911                 :            :         mb_debug(1, "discard preallocation for group %u\n", group);
    3912                 :            : 
    3913         [ #  # ]:          0 :         if (list_empty(&grp->bb_prealloc_list))
    3914                 :            :                 return 0;
    3915                 :            : 
    3916                 :          0 :         bitmap_bh = ext4_read_block_bitmap(sb, group);
    3917         [ #  # ]:          0 :         if (IS_ERR(bitmap_bh)) {
    3918                 :            :                 err = PTR_ERR(bitmap_bh);
    3919                 :          0 :                 ext4_error(sb, "Error %d reading block bitmap for %u",
    3920                 :            :                            err, group);
    3921                 :          0 :                 return 0;
    3922                 :            :         }
    3923                 :            : 
    3924                 :            :         err = ext4_mb_load_buddy(sb, group, &e4b);
    3925         [ #  # ]:          0 :         if (err) {
    3926                 :          0 :                 ext4_warning(sb, "Error %d loading buddy information for %u",
    3927                 :            :                              err, group);
    3928                 :          0 :                 put_bh(bitmap_bh);
    3929                 :          0 :                 return 0;
    3930                 :            :         }
    3931                 :            : 
    3932         [ #  # ]:          0 :         if (needed == 0)
    3933                 :          0 :                 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
    3934                 :            : 
    3935                 :            :         INIT_LIST_HEAD(&list);
    3936                 :            : repeat:
    3937                 :          0 :         ext4_lock_group(sb, group);
    3938         [ #  # ]:          0 :         list_for_each_entry_safe(pa, tmp,
    3939                 :            :                                 &grp->bb_prealloc_list, pa_group_list) {
    3940                 :            :                 spin_lock(&pa->pa_lock);
    3941         [ #  # ]:          0 :                 if (atomic_read(&pa->pa_count)) {
    3942                 :            :                         spin_unlock(&pa->pa_lock);
    3943                 :            :                         busy = 1;
    3944                 :          0 :                         continue;
    3945                 :            :                 }
    3946         [ #  # ]:          0 :                 if (pa->pa_deleted) {
    3947                 :            :                         spin_unlock(&pa->pa_lock);
    3948                 :          0 :                         continue;
    3949                 :            :                 }
    3950                 :            : 
    3951                 :            :                 /* seems this one can be freed ... */
    3952                 :          0 :                 pa->pa_deleted = 1;
    3953                 :            : 
    3954                 :            :                 /* we can trust pa_free ... */
    3955                 :          0 :                 free += pa->pa_free;
    3956                 :            : 
    3957                 :            :                 spin_unlock(&pa->pa_lock);
    3958                 :            : 
    3959                 :            :                 list_del(&pa->pa_group_list);
    3960                 :          0 :                 list_add(&pa->u.pa_tmp_list, &list);
    3961                 :            :         }
    3962                 :            : 
    3963                 :            :         /* if we still need more blocks and some PAs were used, try again */
    3964         [ #  # ]:          0 :         if (free < needed && busy) {
    3965                 :            :                 busy = 0;
    3966                 :            :                 ext4_unlock_group(sb, group);
    3967                 :          0 :                 cond_resched();
    3968                 :          0 :                 goto repeat;
    3969                 :            :         }
    3970                 :            : 
    3971                 :            :         /* found anything to free? */
    3972         [ #  # ]:          0 :         if (list_empty(&list)) {
    3973         [ #  # ]:          0 :                 BUG_ON(free != 0);
    3974                 :            :                 goto out;
    3975                 :            :         }
    3976                 :            : 
    3977                 :            :         /* now free all selected PAs */
    3978         [ #  # ]:          0 :         list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
    3979                 :            : 
    3980                 :            :                 /* remove from object (inode or locality group) */
    3981                 :          0 :                 spin_lock(pa->pa_obj_lock);
    3982                 :            :                 list_del_rcu(&pa->pa_inode_list);
    3983                 :          0 :                 spin_unlock(pa->pa_obj_lock);
    3984                 :            : 
    3985         [ #  # ]:          0 :                 if (pa->pa_type == MB_GROUP_PA)
    3986                 :          0 :                         ext4_mb_release_group_pa(&e4b, pa);
    3987                 :            :                 else
    3988                 :          0 :                         ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
    3989                 :            : 
    3990                 :            :                 list_del(&pa->u.pa_tmp_list);
    3991                 :          0 :                 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
    3992                 :            :         }
    3993                 :            : 
    3994                 :            : out:
    3995                 :            :         ext4_unlock_group(sb, group);
    3996                 :          0 :         ext4_mb_unload_buddy(&e4b);
    3997                 :          0 :         put_bh(bitmap_bh);
    3998                 :          0 :         return free;
    3999                 :            : }
    4000                 :            : 
    4001                 :            : /*
    4002                 :            :  * releases all non-used preallocated blocks for given inode
    4003                 :            :  *
    4004                 :            :  * It's important to discard preallocations under i_data_sem
    4005                 :            :  * We don't want another block to be served from the prealloc
    4006                 :            :  * space when we are discarding the inode prealloc space.
    4007                 :            :  *
    4008                 :            :  * FIXME!! Make sure it is valid at all the call sites
    4009                 :            :  */
    4010                 :      74326 : void ext4_discard_preallocations(struct inode *inode)
    4011                 :            : {
    4012                 :            :         struct ext4_inode_info *ei = EXT4_I(inode);
    4013                 :      74326 :         struct super_block *sb = inode->i_sb;
    4014                 :            :         struct buffer_head *bitmap_bh = NULL;
    4015                 :            :         struct ext4_prealloc_space *pa, *tmp;
    4016                 :            :         ext4_group_t group = 0;
    4017                 :            :         struct list_head list;
    4018                 :            :         struct ext4_buddy e4b;
    4019                 :            :         int err;
    4020                 :            : 
    4021         [ +  + ]:      74326 :         if (!S_ISREG(inode->i_mode)) {
    4022                 :            :                 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
    4023                 :       6516 :                 return;
    4024                 :            :         }
    4025                 :            : 
    4026                 :            :         mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
    4027                 :      67810 :         trace_ext4_discard_preallocations(inode);
    4028                 :            : 
    4029                 :            :         INIT_LIST_HEAD(&list);
    4030                 :            : 
    4031                 :            : repeat:
    4032                 :            :         /* first, collect all pa's in the inode */
    4033                 :            :         spin_lock(&ei->i_prealloc_lock);
    4034         [ +  + ]:     136104 :         while (!list_empty(&ei->i_prealloc_list)) {
    4035                 :        242 :                 pa = list_entry(ei->i_prealloc_list.next,
    4036                 :            :                                 struct ext4_prealloc_space, pa_inode_list);
    4037         [ -  + ]:        242 :                 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
    4038                 :            :                 spin_lock(&pa->pa_lock);
    4039         [ -  + ]:        242 :                 if (atomic_read(&pa->pa_count)) {
    4040                 :            :                         /* this shouldn't happen often - nobody should
    4041                 :            :                          * use preallocation while we're discarding it */
    4042                 :            :                         spin_unlock(&pa->pa_lock);
    4043                 :            :                         spin_unlock(&ei->i_prealloc_lock);
    4044                 :          0 :                         ext4_msg(sb, KERN_ERR,
    4045                 :            :                                  "uh-oh! used pa while discarding");
    4046                 :          0 :                         WARN_ON(1);
    4047                 :          0 :                         schedule_timeout_uninterruptible(HZ);
    4048                 :          0 :                         goto repeat;
    4049                 :            : 
    4050                 :            :                 }
    4051         [ +  - ]:        242 :                 if (pa->pa_deleted == 0) {
    4052                 :        242 :                         pa->pa_deleted = 1;
    4053                 :            :                         spin_unlock(&pa->pa_lock);
    4054                 :            :                         list_del_rcu(&pa->pa_inode_list);
    4055                 :        242 :                         list_add(&pa->u.pa_tmp_list, &list);
    4056                 :        242 :                         continue;
    4057                 :            :                 }
    4058                 :            : 
    4059                 :            :                 /* someone is deleting pa right now */
    4060                 :            :                 spin_unlock(&pa->pa_lock);
    4061                 :            :                 spin_unlock(&ei->i_prealloc_lock);
    4062                 :            : 
    4063                 :            :                 /* we have to wait here because pa_deleted
    4064                 :            :                  * doesn't mean pa is already unlinked from
    4065                 :            :                  * the list. as we might be called from
    4066                 :            :                  * ->clear_inode() the inode will get freed
    4067                 :            :                  * and concurrent thread which is unlinking
    4068                 :            :                  * pa from inode's list may access already
    4069                 :            :                  * freed memory, bad-bad-bad */
    4070                 :            : 
    4071                 :            :                 /* XXX: if this happens too often, we can
    4072                 :            :                  * add a flag to force wait only in case
    4073                 :            :                  * of ->clear_inode(), but not in case of
    4074                 :            :                  * regular truncate */
    4075                 :          0 :                 schedule_timeout_uninterruptible(HZ);
    4076                 :          0 :                 goto repeat;
    4077                 :            :         }
    4078                 :            :         spin_unlock(&ei->i_prealloc_lock);
    4079                 :            : 
    4080         [ +  + ]:      68052 :         list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
    4081         [ -  + ]:        242 :                 BUG_ON(pa->pa_type != MB_INODE_PA);
    4082                 :        242 :                 group = ext4_get_group_number(sb, pa->pa_pstart);
    4083                 :            : 
    4084                 :        242 :                 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
    4085                 :            :                                              GFP_NOFS|__GFP_NOFAIL);
    4086         [ -  + ]:        242 :                 if (err) {
    4087                 :          0 :                         ext4_error(sb, "Error %d loading buddy information for %u",
    4088                 :            :                                    err, group);
    4089                 :          0 :                         continue;
    4090                 :            :                 }
    4091                 :            : 
    4092                 :        242 :                 bitmap_bh = ext4_read_block_bitmap(sb, group);
    4093         [ -  + ]:        242 :                 if (IS_ERR(bitmap_bh)) {
    4094                 :            :                         err = PTR_ERR(bitmap_bh);
    4095                 :          0 :                         ext4_error(sb, "Error %d reading block bitmap for %u",
    4096                 :            :                                         err, group);
    4097                 :          0 :                         ext4_mb_unload_buddy(&e4b);
    4098                 :          0 :                         continue;
    4099                 :            :                 }
    4100                 :            : 
    4101                 :        242 :                 ext4_lock_group(sb, group);
    4102                 :            :                 list_del(&pa->pa_group_list);
    4103                 :        242 :                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
    4104                 :            :                 ext4_unlock_group(sb, group);
    4105                 :            : 
    4106                 :        242 :                 ext4_mb_unload_buddy(&e4b);
    4107                 :        242 :                 put_bh(bitmap_bh);
    4108                 :            : 
    4109                 :            :                 list_del(&pa->u.pa_tmp_list);
    4110                 :        242 :                 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
    4111                 :            :         }
    4112                 :            : }
    4113                 :            : 
    4114                 :            : #ifdef CONFIG_EXT4_DEBUG
    4115                 :            : static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
    4116                 :            : {
    4117                 :            :         struct super_block *sb = ac->ac_sb;
    4118                 :            :         ext4_group_t ngroups, i;
    4119                 :            : 
    4120                 :            :         if (!ext4_mballoc_debug ||
    4121                 :            :             (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
    4122                 :            :                 return;
    4123                 :            : 
    4124                 :            :         ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
    4125                 :            :                         " Allocation context details:");
    4126                 :            :         ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
    4127                 :            :                         ac->ac_status, ac->ac_flags);
    4128                 :            :         ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
    4129                 :            :                         "goal %lu/%lu/%lu@%lu, "
    4130                 :            :                         "best %lu/%lu/%lu@%lu cr %d",
    4131                 :            :                         (unsigned long)ac->ac_o_ex.fe_group,
    4132                 :            :                         (unsigned long)ac->ac_o_ex.fe_start,
    4133                 :            :                         (unsigned long)ac->ac_o_ex.fe_len,
    4134                 :            :                         (unsigned long)ac->ac_o_ex.fe_logical,
    4135                 :            :                         (unsigned long)ac->ac_g_ex.fe_group,
    4136                 :            :                         (unsigned long)ac->ac_g_ex.fe_start,
    4137                 :            :                         (unsigned long)ac->ac_g_ex.fe_len,
    4138                 :            :                         (unsigned long)ac->ac_g_ex.fe_logical,
    4139                 :            :                         (unsigned long)ac->ac_b_ex.fe_group,
    4140                 :            :                         (unsigned long)ac->ac_b_ex.fe_start,
    4141                 :            :                         (unsigned long)ac->ac_b_ex.fe_len,
    4142                 :            :                         (unsigned long)ac->ac_b_ex.fe_logical,
    4143                 :            :                         (int)ac->ac_criteria);
    4144                 :            :         ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found);
    4145                 :            :         ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
    4146                 :            :         ngroups = ext4_get_groups_count(sb);
    4147                 :            :         for (i = 0; i < ngroups; i++) {
    4148                 :            :                 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
    4149                 :            :                 struct ext4_prealloc_space *pa;
    4150                 :            :                 ext4_grpblk_t start;
    4151                 :            :                 struct list_head *cur;
    4152                 :            :                 ext4_lock_group(sb, i);
    4153                 :            :                 list_for_each(cur, &grp->bb_prealloc_list) {
    4154                 :            :                         pa = list_entry(cur, struct ext4_prealloc_space,
    4155                 :            :                                         pa_group_list);
    4156                 :            :                         spin_lock(&pa->pa_lock);
    4157                 :            :                         ext4_get_group_no_and_offset(sb, pa->pa_pstart,
    4158                 :            :                                                      NULL, &start);
    4159                 :            :                         spin_unlock(&pa->pa_lock);
    4160                 :            :                         printk(KERN_ERR "PA:%u:%d:%u \n", i,
    4161                 :            :                                start, pa->pa_len);
    4162                 :            :                 }
    4163                 :            :                 ext4_unlock_group(sb, i);
    4164                 :            : 
    4165                 :            :                 if (grp->bb_free == 0)
    4166                 :            :                         continue;
    4167                 :            :                 printk(KERN_ERR "%u: %d/%d \n",
    4168                 :            :                        i, grp->bb_free, grp->bb_fragments);
    4169                 :            :         }
    4170                 :            :         printk(KERN_ERR "\n");
    4171                 :            : }
    4172                 :            : #else
    4173                 :            : static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
    4174                 :            : {
    4175                 :            :         return;
    4176                 :            : }
    4177                 :            : #endif
    4178                 :            : 
    4179                 :            : /*
    4180                 :            :  * We use locality group preallocation for small size file. The size of the
    4181                 :            :  * file is determined by the current size or the resulting size after
    4182                 :            :  * allocation which ever is larger
    4183                 :            :  *
    4184                 :            :  * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
    4185                 :            :  */
    4186                 :     145960 : static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
    4187                 :            : {
    4188                 :     145960 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    4189                 :     145960 :         int bsbits = ac->ac_sb->s_blocksize_bits;
    4190                 :            :         loff_t size, isize;
    4191                 :            : 
    4192         [ +  + ]:     145960 :         if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
    4193                 :            :                 return;
    4194                 :            : 
    4195         [ +  - ]:      61040 :         if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
    4196                 :            :                 return;
    4197                 :            : 
    4198                 :      61040 :         size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
    4199                 :     122080 :         isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
    4200                 :            :                 >> bsbits;
    4201                 :            : 
    4202   [ +  +  +  -  :     182964 :         if ((size == isize) && !ext4_fs_is_busy(sbi) &&
                   +  + ]
    4203                 :      60962 :             !inode_is_open_for_write(ac->ac_inode)) {
    4204                 :      52998 :                 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
    4205                 :      52998 :                 return;
    4206                 :            :         }
    4207                 :            : 
    4208         [ -  + ]:       8042 :         if (sbi->s_mb_group_prealloc <= 0) {
    4209                 :          0 :                 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
    4210                 :          0 :                 return;
    4211                 :            :         }
    4212                 :            : 
    4213                 :            :         /* don't use group allocation for large files */
    4214                 :       8042 :         size = max(size, isize);
    4215         [ +  + ]:       8042 :         if (size > sbi->s_mb_stream_request) {
    4216                 :       4498 :                 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
    4217                 :       4498 :                 return;
    4218                 :            :         }
    4219                 :            : 
    4220         [ -  + ]:       3544 :         BUG_ON(ac->ac_lg != NULL);
    4221                 :            :         /*
    4222                 :            :          * locality group prealloc space are per cpu. The reason for having
    4223                 :            :          * per cpu locality group is to reduce the contention between block
    4224                 :            :          * request from multiple CPUs.
    4225                 :            :          */
    4226                 :       7088 :         ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
    4227                 :            : 
    4228                 :            :         /* we're going to use group allocation */
    4229                 :       3544 :         ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
    4230                 :            : 
    4231                 :            :         /* serialize all allocations in the group */
    4232                 :       3544 :         mutex_lock(&ac->ac_lg->lg_mutex);
    4233                 :            : }
    4234                 :            : 
    4235                 :            : static noinline_for_stack int
    4236                 :     145960 : ext4_mb_initialize_context(struct ext4_allocation_context *ac,
    4237                 :            :                                 struct ext4_allocation_request *ar)
    4238                 :            : {
    4239                 :     145960 :         struct super_block *sb = ar->inode->i_sb;
    4240                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    4241                 :     145960 :         struct ext4_super_block *es = sbi->s_es;
    4242                 :            :         ext4_group_t group;
    4243                 :            :         unsigned int len;
    4244                 :            :         ext4_fsblk_t goal;
    4245                 :            :         ext4_grpblk_t block;
    4246                 :            : 
    4247                 :            :         /* we can't allocate > group size */
    4248                 :     145960 :         len = ar->len;
    4249                 :            : 
    4250                 :            :         /* just a dirty hack to filter too big requests  */
    4251         [ -  + ]:     145960 :         if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
    4252                 :            :                 len = EXT4_CLUSTERS_PER_GROUP(sb);
    4253                 :            : 
    4254                 :            :         /* start searching from the goal */
    4255                 :     145960 :         goal = ar->goal;
    4256   [ +  -  +  + ]:     291920 :         if (goal < le32_to_cpu(es->s_first_data_block) ||
    4257                 :            :                         goal >= ext4_blocks_count(es))
    4258                 :            :                 goal = le32_to_cpu(es->s_first_data_block);
    4259                 :     145960 :         ext4_get_group_no_and_offset(sb, goal, &group, &block);
    4260                 :            : 
    4261                 :            :         /* set up allocation goals */
    4262                 :     145960 :         ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
    4263                 :     145960 :         ac->ac_status = AC_STATUS_CONTINUE;
    4264                 :     145960 :         ac->ac_sb = sb;
    4265                 :     145960 :         ac->ac_inode = ar->inode;
    4266                 :     145960 :         ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
    4267                 :     145960 :         ac->ac_o_ex.fe_group = group;
    4268                 :     145960 :         ac->ac_o_ex.fe_start = block;
    4269                 :     145960 :         ac->ac_o_ex.fe_len = len;
    4270                 :     145960 :         ac->ac_g_ex = ac->ac_o_ex;
    4271                 :     145960 :         ac->ac_flags = ar->flags;
    4272                 :            : 
    4273                 :            :         /* we have to define context: we'll we work with a file or
    4274                 :            :          * locality group. this is a policy, actually */
    4275                 :     145960 :         ext4_mb_group_or_file(ac);
    4276                 :            : 
    4277                 :            :         mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
    4278                 :            :                         "left: %u/%u, right %u/%u to %swritable\n",
    4279                 :            :                         (unsigned) ar->len, (unsigned) ar->logical,
    4280                 :            :                         (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
    4281                 :            :                         (unsigned) ar->lleft, (unsigned) ar->pleft,
    4282                 :            :                         (unsigned) ar->lright, (unsigned) ar->pright,
    4283                 :            :                         inode_is_open_for_write(ar->inode) ? "" : "non-");
    4284                 :     145960 :         return 0;
    4285                 :            : 
    4286                 :            : }
    4287                 :            : 
    4288                 :            : static noinline_for_stack void
    4289                 :          0 : ext4_mb_discard_lg_preallocations(struct super_block *sb,
    4290                 :            :                                         struct ext4_locality_group *lg,
    4291                 :            :                                         int order, int total_entries)
    4292                 :            : {
    4293                 :            :         ext4_group_t group = 0;
    4294                 :            :         struct ext4_buddy e4b;
    4295                 :            :         struct list_head discard_list;
    4296                 :            :         struct ext4_prealloc_space *pa, *tmp;
    4297                 :            : 
    4298                 :            :         mb_debug(1, "discard locality group preallocation\n");
    4299                 :            : 
    4300                 :            :         INIT_LIST_HEAD(&discard_list);
    4301                 :            : 
    4302                 :            :         spin_lock(&lg->lg_prealloc_lock);
    4303         [ #  # ]:          0 :         list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
    4304                 :            :                                                 pa_inode_list) {
    4305                 :            :                 spin_lock(&pa->pa_lock);
    4306         [ #  # ]:          0 :                 if (atomic_read(&pa->pa_count)) {
    4307                 :            :                         /*
    4308                 :            :                          * This is the pa that we just used
    4309                 :            :                          * for block allocation. So don't
    4310                 :            :                          * free that
    4311                 :            :                          */
    4312                 :            :                         spin_unlock(&pa->pa_lock);
    4313                 :          0 :                         continue;
    4314                 :            :                 }
    4315         [ #  # ]:          0 :                 if (pa->pa_deleted) {
    4316                 :            :                         spin_unlock(&pa->pa_lock);
    4317                 :          0 :                         continue;
    4318                 :            :                 }
    4319                 :            :                 /* only lg prealloc space */
    4320         [ #  # ]:          0 :                 BUG_ON(pa->pa_type != MB_GROUP_PA);
    4321                 :            : 
    4322                 :            :                 /* seems this one can be freed ... */
    4323                 :          0 :                 pa->pa_deleted = 1;
    4324                 :            :                 spin_unlock(&pa->pa_lock);
    4325                 :            : 
    4326                 :            :                 list_del_rcu(&pa->pa_inode_list);
    4327                 :          0 :                 list_add(&pa->u.pa_tmp_list, &discard_list);
    4328                 :            : 
    4329                 :          0 :                 total_entries--;
    4330         [ #  # ]:          0 :                 if (total_entries <= 5) {
    4331                 :            :                         /*
    4332                 :            :                          * we want to keep only 5 entries
    4333                 :            :                          * allowing it to grow to 8. This
    4334                 :            :                          * mak sure we don't call discard
    4335                 :            :                          * soon for this list.
    4336                 :            :                          */
    4337                 :            :                         break;
    4338                 :            :                 }
    4339                 :            :         }
    4340                 :            :         spin_unlock(&lg->lg_prealloc_lock);
    4341                 :            : 
    4342         [ #  # ]:          0 :         list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
    4343                 :            :                 int err;
    4344                 :            : 
    4345                 :          0 :                 group = ext4_get_group_number(sb, pa->pa_pstart);
    4346                 :          0 :                 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
    4347                 :            :                                              GFP_NOFS|__GFP_NOFAIL);
    4348         [ #  # ]:          0 :                 if (err) {
    4349                 :          0 :                         ext4_error(sb, "Error %d loading buddy information for %u",
    4350                 :            :                                    err, group);
    4351                 :          0 :                         continue;
    4352                 :            :                 }
    4353                 :          0 :                 ext4_lock_group(sb, group);
    4354                 :            :                 list_del(&pa->pa_group_list);
    4355                 :          0 :                 ext4_mb_release_group_pa(&e4b, pa);
    4356                 :            :                 ext4_unlock_group(sb, group);
    4357                 :            : 
    4358                 :          0 :                 ext4_mb_unload_buddy(&e4b);
    4359                 :            :                 list_del(&pa->u.pa_tmp_list);
    4360                 :          0 :                 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
    4361                 :            :         }
    4362                 :          0 : }
    4363                 :            : 
    4364                 :            : /*
    4365                 :            :  * We have incremented pa_count. So it cannot be freed at this
    4366                 :            :  * point. Also we hold lg_mutex. So no parallel allocation is
    4367                 :            :  * possible from this lg. That means pa_free cannot be updated.
    4368                 :            :  *
    4369                 :            :  * A parallel ext4_mb_discard_group_preallocations is possible.
    4370                 :            :  * which can cause the lg_prealloc_list to be updated.
    4371                 :            :  */
    4372                 :            : 
    4373                 :       3544 : static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
    4374                 :            : {
    4375                 :            :         int order, added = 0, lg_prealloc_count = 1;
    4376                 :       3544 :         struct super_block *sb = ac->ac_sb;
    4377                 :       3544 :         struct ext4_locality_group *lg = ac->ac_lg;
    4378                 :       3544 :         struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
    4379                 :            : 
    4380                 :       7088 :         order = fls(pa->pa_free) - 1;
    4381         [ -  + ]:       3544 :         if (order > PREALLOC_TB_SIZE - 1)
    4382                 :            :                 /* The max size of hash table is PREALLOC_TB_SIZE */
    4383                 :            :                 order = PREALLOC_TB_SIZE - 1;
    4384                 :            :         /* Add the prealloc space to lg */
    4385                 :            :         spin_lock(&lg->lg_prealloc_lock);
    4386         [ -  + ]:       7088 :         list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
    4387                 :            :                                                 pa_inode_list) {
    4388                 :            :                 spin_lock(&tmp_pa->pa_lock);
    4389         [ #  # ]:          0 :                 if (tmp_pa->pa_deleted) {
    4390                 :            :                         spin_unlock(&tmp_pa->pa_lock);
    4391                 :          0 :                         continue;
    4392                 :            :                 }
    4393   [ #  #  #  # ]:          0 :                 if (!added && pa->pa_free < tmp_pa->pa_free) {
    4394                 :            :                         /* Add to the tail of the previous entry */
    4395                 :          0 :                         list_add_tail_rcu(&pa->pa_inode_list,
    4396                 :            :                                                 &tmp_pa->pa_inode_list);
    4397                 :            :                         added = 1;
    4398                 :            :                         /*
    4399                 :            :                          * we want to count the total
    4400                 :            :                          * number of entries in the list
    4401                 :            :                          */
    4402                 :            :                 }
    4403                 :            :                 spin_unlock(&tmp_pa->pa_lock);
    4404                 :          0 :                 lg_prealloc_count++;
    4405                 :            :         }
    4406         [ +  - ]:       3544 :         if (!added)
    4407                 :       3544 :                 list_add_tail_rcu(&pa->pa_inode_list,
    4408                 :            :                                         &lg->lg_prealloc_list[order]);
    4409                 :            :         spin_unlock(&lg->lg_prealloc_lock);
    4410                 :            : 
    4411                 :            :         /* Now trim the list to be not more than 8 elements */
    4412         [ -  + ]:       3544 :         if (lg_prealloc_count > 8) {
    4413                 :          0 :                 ext4_mb_discard_lg_preallocations(sb, lg,
    4414                 :            :                                                   order, lg_prealloc_count);
    4415                 :          0 :                 return;
    4416                 :            :         }
    4417                 :            :         return ;
    4418                 :            : }
    4419                 :            : 
    4420                 :            : /*
    4421                 :            :  * release all resource we used in allocation
    4422                 :            :  */
    4423                 :     145960 : static int ext4_mb_release_context(struct ext4_allocation_context *ac)
    4424                 :            : {
    4425                 :     145960 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    4426                 :     145960 :         struct ext4_prealloc_space *pa = ac->ac_pa;
    4427         [ +  + ]:     145960 :         if (pa) {
    4428         [ +  + ]:       7976 :                 if (pa->pa_type == MB_GROUP_PA) {
    4429                 :            :                         /* see comment in ext4_mb_use_group_pa() */
    4430                 :            :                         spin_lock(&pa->pa_lock);
    4431                 :       3544 :                         pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
    4432                 :       3544 :                         pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
    4433                 :       3544 :                         pa->pa_free -= ac->ac_b_ex.fe_len;
    4434                 :       3544 :                         pa->pa_len -= ac->ac_b_ex.fe_len;
    4435                 :            :                         spin_unlock(&pa->pa_lock);
    4436                 :            :                 }
    4437                 :            :         }
    4438         [ +  + ]:     145960 :         if (pa) {
    4439                 :            :                 /*
    4440                 :            :                  * We want to add the pa to the right bucket.
    4441                 :            :                  * Remove it from the list and while adding
    4442                 :            :                  * make sure the list to which we are adding
    4443                 :            :                  * doesn't grow big.
    4444                 :            :                  */
    4445   [ +  +  +  - ]:       7976 :                 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
    4446                 :       3544 :                         spin_lock(pa->pa_obj_lock);
    4447                 :            :                         list_del_rcu(&pa->pa_inode_list);
    4448                 :       3544 :                         spin_unlock(pa->pa_obj_lock);
    4449                 :       3544 :                         ext4_mb_add_n_trim(ac);
    4450                 :            :                 }
    4451                 :       7976 :                 ext4_mb_put_pa(ac, ac->ac_sb, pa);
    4452                 :            :         }
    4453         [ +  + ]:     145960 :         if (ac->ac_bitmap_page)
    4454                 :     141256 :                 put_page(ac->ac_bitmap_page);
    4455         [ +  + ]:     145960 :         if (ac->ac_buddy_page)
    4456                 :     141256 :                 put_page(ac->ac_buddy_page);
    4457         [ +  + ]:     145960 :         if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
    4458                 :       3544 :                 mutex_unlock(&ac->ac_lg->lg_mutex);
    4459                 :     145960 :         ext4_mb_collect_stats(ac);
    4460                 :     145960 :         return 0;
    4461                 :            : }
    4462                 :            : 
    4463                 :          0 : static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
    4464                 :            : {
    4465                 :            :         ext4_group_t i, ngroups = ext4_get_groups_count(sb);
    4466                 :            :         int ret;
    4467                 :            :         int freed = 0;
    4468                 :            : 
    4469                 :          0 :         trace_ext4_mb_discard_preallocations(sb, needed);
    4470         [ #  # ]:          0 :         for (i = 0; i < ngroups && needed > 0; i++) {
    4471                 :          0 :                 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
    4472                 :          0 :                 freed += ret;
    4473                 :          0 :                 needed -= ret;
    4474                 :            :         }
    4475                 :            : 
    4476                 :          0 :         return freed;
    4477                 :            : }
    4478                 :            : 
    4479                 :            : /*
    4480                 :            :  * Main entry point into mballoc to allocate blocks
    4481                 :            :  * it tries to use preallocation first, then falls back
    4482                 :            :  * to usual allocation
    4483                 :            :  */
    4484                 :     145960 : ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
    4485                 :            :                                 struct ext4_allocation_request *ar, int *errp)
    4486                 :            : {
    4487                 :            :         int freed;
    4488                 :            :         struct ext4_allocation_context *ac = NULL;
    4489                 :            :         struct ext4_sb_info *sbi;
    4490                 :            :         struct super_block *sb;
    4491                 :            :         ext4_fsblk_t block = 0;
    4492                 :            :         unsigned int inquota = 0;
    4493                 :            :         unsigned int reserv_clstrs = 0;
    4494                 :            : 
    4495                 :     145960 :         might_sleep();
    4496                 :     145960 :         sb = ar->inode->i_sb;
    4497                 :            :         sbi = EXT4_SB(sb);
    4498                 :            : 
    4499                 :     145960 :         trace_ext4_request_blocks(ar);
    4500                 :            : 
    4501                 :            :         /* Allow to use superuser reservation for quota file */
    4502         [ -  + ]:     291920 :         if (ext4_is_quota_file(ar->inode))
    4503                 :          0 :                 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
    4504                 :            : 
    4505         [ +  + ]:     145960 :         if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
    4506                 :            :                 /* Without delayed allocation we need to verify
    4507                 :            :                  * there is enough free blocks to do block allocation
    4508                 :            :                  * and verify allocation doesn't exceed the quota limits.
    4509                 :            :                  */
    4510   [ +  -  -  + ]:     169968 :                 while (ar->len &&
    4511                 :      84984 :                         ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
    4512                 :            : 
    4513                 :            :                         /* let others to free the space */
    4514                 :          0 :                         cond_resched();
    4515                 :          0 :                         ar->len = ar->len >> 1;
    4516                 :            :                 }
    4517         [ -  + ]:      84984 :                 if (!ar->len) {
    4518                 :          0 :                         *errp = -ENOSPC;
    4519                 :          0 :                         return 0;
    4520                 :            :                 }
    4521                 :            :                 reserv_clstrs = ar->len;
    4522         [ +  - ]:      84984 :                 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
    4523                 :          0 :                         dquot_alloc_block_nofail(ar->inode,
    4524                 :          0 :                                                  EXT4_C2B(sbi, ar->len));
    4525                 :            :                 } else {
    4526   [ +  -  -  + ]:     169968 :                         while (ar->len &&
    4527                 :     169968 :                                 dquot_alloc_block(ar->inode,
    4528                 :      84984 :                                                   EXT4_C2B(sbi, ar->len))) {
    4529                 :            : 
    4530                 :          0 :                                 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
    4531                 :          0 :                                 ar->len--;
    4532                 :            :                         }
    4533                 :            :                 }
    4534                 :      84984 :                 inquota = ar->len;
    4535         [ -  + ]:      84984 :                 if (ar->len == 0) {
    4536                 :          0 :                         *errp = -EDQUOT;
    4537                 :          0 :                         goto out;
    4538                 :            :                 }
    4539                 :            :         }
    4540                 :            : 
    4541                 :     145960 :         ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
    4542         [ -  + ]:     145960 :         if (!ac) {
    4543                 :          0 :                 ar->len = 0;
    4544                 :          0 :                 *errp = -ENOMEM;
    4545                 :          0 :                 goto out;
    4546                 :            :         }
    4547                 :            : 
    4548                 :     145960 :         *errp = ext4_mb_initialize_context(ac, ar);
    4549         [ -  + ]:     145960 :         if (*errp) {
    4550                 :          0 :                 ar->len = 0;
    4551                 :          0 :                 goto out;
    4552                 :            :         }
    4553                 :            : 
    4554                 :     145960 :         ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
    4555         [ +  + ]:     145960 :         if (!ext4_mb_use_preallocated(ac)) {
    4556                 :     141256 :                 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
    4557                 :     141256 :                 ext4_mb_normalize_request(ac, ar);
    4558                 :            : repeat:
    4559                 :            :                 /* allocate space in core */
    4560                 :     141256 :                 *errp = ext4_mb_regular_allocator(ac);
    4561         [ +  - ]:     141256 :                 if (*errp)
    4562                 :            :                         goto discard_and_exit;
    4563                 :            : 
    4564                 :            :                 /* as we've just preallocated more space than
    4565                 :            :                  * user requested originally, we store allocated
    4566                 :            :                  * space in a special descriptor */
    4567   [ +  -  +  + ]:     282512 :                 if (ac->ac_status == AC_STATUS_FOUND &&
    4568                 :     141256 :                     ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
    4569                 :       3272 :                         *errp = ext4_mb_new_preallocation(ac);
    4570         [ -  + ]:     141256 :                 if (*errp) {
    4571                 :            :                 discard_and_exit:
    4572                 :          0 :                         ext4_discard_allocated_blocks(ac);
    4573                 :          0 :                         goto errout;
    4574                 :            :                 }
    4575                 :            :         }
    4576         [ +  - ]:     145960 :         if (likely(ac->ac_status == AC_STATUS_FOUND)) {
    4577                 :     145960 :                 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
    4578         [ -  + ]:     145960 :                 if (*errp) {
    4579                 :          0 :                         ext4_discard_allocated_blocks(ac);
    4580                 :          0 :                         goto errout;
    4581                 :            :                 } else {
    4582                 :            :                         block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
    4583                 :     145960 :                         ar->len = ac->ac_b_ex.fe_len;
    4584                 :            :                 }
    4585                 :            :         } else {
    4586                 :          0 :                 freed  = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
    4587         [ #  # ]:          0 :                 if (freed)
    4588                 :            :                         goto repeat;
    4589                 :          0 :                 *errp = -ENOSPC;
    4590                 :            :         }
    4591                 :            : 
    4592                 :            : errout:
    4593         [ -  + ]:     145960 :         if (*errp) {
    4594                 :          0 :                 ac->ac_b_ex.fe_len = 0;
    4595                 :          0 :                 ar->len = 0;
    4596                 :            :                 ext4_mb_show_ac(ac);
    4597                 :            :         }
    4598                 :     145960 :         ext4_mb_release_context(ac);
    4599                 :            : out:
    4600         [ +  - ]:     145960 :         if (ac)
    4601                 :     145960 :                 kmem_cache_free(ext4_ac_cachep, ac);
    4602   [ +  +  +  + ]:     145960 :         if (inquota && ar->len < inquota)
    4603                 :         36 :                 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
    4604         [ -  + ]:     145960 :         if (!ar->len) {
    4605         [ #  # ]:          0 :                 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
    4606                 :            :                         /* release all the reserved blocks if non delalloc */
    4607                 :          0 :                         percpu_counter_sub(&sbi->s_dirtyclusters_counter,
    4608                 :            :                                                 reserv_clstrs);
    4609                 :            :         }
    4610                 :            : 
    4611                 :     145960 :         trace_ext4_allocate_blocks(ar, (unsigned long long)block);
    4612                 :            : 
    4613                 :     145960 :         return block;
    4614                 :            : }
    4615                 :            : 
    4616                 :            : /*
    4617                 :            :  * We can merge two free data extents only if the physical blocks
    4618                 :            :  * are contiguous, AND the extents were freed by the same transaction,
    4619                 :            :  * AND the blocks are associated with the same group.
    4620                 :            :  */
    4621                 :       6284 : static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
    4622                 :            :                                         struct ext4_free_data *entry,
    4623                 :            :                                         struct ext4_free_data *new_entry,
    4624                 :            :                                         struct rb_root *entry_rb_root)
    4625                 :            : {
    4626   [ +  +  +  - ]:      12536 :         if ((entry->efd_tid != new_entry->efd_tid) ||
    4627                 :       6252 :             (entry->efd_group != new_entry->efd_group))
    4628                 :            :                 return;
    4629         [ +  + ]:      12504 :         if (entry->efd_start_cluster + entry->efd_count ==
    4630                 :       6252 :             new_entry->efd_start_cluster) {
    4631                 :       1022 :                 new_entry->efd_start_cluster = entry->efd_start_cluster;
    4632                 :       1022 :                 new_entry->efd_count += entry->efd_count;
    4633         [ +  + ]:       5230 :         } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
    4634                 :            :                    entry->efd_start_cluster) {
    4635                 :        758 :                 new_entry->efd_count += entry->efd_count;
    4636                 :            :         } else
    4637                 :            :                 return;
    4638                 :            :         spin_lock(&sbi->s_md_lock);
    4639                 :            :         list_del(&entry->efd_list);
    4640                 :            :         spin_unlock(&sbi->s_md_lock);
    4641                 :       1780 :         rb_erase(&entry->efd_node, entry_rb_root);
    4642                 :       1780 :         kmem_cache_free(ext4_free_data_cachep, entry);
    4643                 :            : }
    4644                 :            : 
    4645                 :            : static noinline_for_stack int
    4646                 :       7916 : ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
    4647                 :            :                       struct ext4_free_data *new_entry)
    4648                 :            : {
    4649                 :       7916 :         ext4_group_t group = e4b->bd_group;
    4650                 :            :         ext4_grpblk_t cluster;
    4651                 :       7916 :         ext4_grpblk_t clusters = new_entry->efd_count;
    4652                 :            :         struct ext4_free_data *entry;
    4653                 :       7916 :         struct ext4_group_info *db = e4b->bd_info;
    4654                 :       7916 :         struct super_block *sb = e4b->bd_sb;
    4655                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    4656                 :       7916 :         struct rb_node **n = &db->bb_free_root.rb_node, *node;
    4657                 :            :         struct rb_node *parent = NULL, *new_node;
    4658                 :            : 
    4659         [ -  + ]:       7916 :         BUG_ON(!ext4_handle_valid(handle));
    4660         [ -  + ]:       7916 :         BUG_ON(e4b->bd_bitmap_page == NULL);
    4661         [ -  + ]:       7916 :         BUG_ON(e4b->bd_buddy_page == NULL);
    4662                 :            : 
    4663                 :       7916 :         new_node = &new_entry->efd_node;
    4664                 :       7916 :         cluster = new_entry->efd_start_cluster;
    4665                 :            : 
    4666         [ +  + ]:       7916 :         if (!*n) {
    4667                 :            :                 /* first free block exent. We need to
    4668                 :            :                    protect buddy cache from being freed,
    4669                 :            :                  * otherwise we'll refresh it from
    4670                 :            :                  * on-disk bitmap and lose not-yet-available
    4671                 :            :                  * blocks */
    4672                 :       3516 :                 get_page(e4b->bd_buddy_page);
    4673                 :       3516 :                 get_page(e4b->bd_bitmap_page);
    4674                 :            :         }
    4675         [ +  + ]:      17052 :         while (*n) {
    4676                 :            :                 parent = *n;
    4677                 :            :                 entry = rb_entry(parent, struct ext4_free_data, efd_node);
    4678         [ +  + ]:       9136 :                 if (cluster < entry->efd_start_cluster)
    4679                 :       3802 :                         n = &(*n)->rb_left;
    4680         [ +  - ]:       5334 :                 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
    4681                 :       5334 :                         n = &(*n)->rb_right;
    4682                 :            :                 else {
    4683                 :          0 :                         ext4_grp_locked_error(sb, group, 0,
    4684                 :            :                                 ext4_group_first_block_no(sb, group) +
    4685                 :            :                                 EXT4_C2B(sbi, cluster),
    4686                 :            :                                 "Block already on to-be-freed list");
    4687                 :          0 :                         return 0;
    4688                 :            :                 }
    4689                 :            :         }
    4690                 :            : 
    4691                 :            :         rb_link_node(new_node, parent, n);
    4692                 :       7916 :         rb_insert_color(new_node, &db->bb_free_root);
    4693                 :            : 
    4694                 :            :         /* Now try to see the extent can be merged to left and right */
    4695                 :       7916 :         node = rb_prev(new_node);
    4696         [ +  + ]:       7916 :         if (node) {
    4697                 :       3440 :                 entry = rb_entry(node, struct ext4_free_data, efd_node);
    4698                 :       3440 :                 ext4_try_merge_freed_extent(sbi, entry, new_entry,
    4699                 :            :                                             &(db->bb_free_root));
    4700                 :            :         }
    4701                 :            : 
    4702                 :       7916 :         node = rb_next(new_node);
    4703         [ +  + ]:       7916 :         if (node) {
    4704                 :       2844 :                 entry = rb_entry(node, struct ext4_free_data, efd_node);
    4705                 :       2844 :                 ext4_try_merge_freed_extent(sbi, entry, new_entry,
    4706                 :            :                                             &(db->bb_free_root));
    4707                 :            :         }
    4708                 :            : 
    4709                 :            :         spin_lock(&sbi->s_md_lock);
    4710                 :       7916 :         list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
    4711                 :       7916 :         sbi->s_mb_free_pending += clusters;
    4712                 :            :         spin_unlock(&sbi->s_md_lock);
    4713                 :       7916 :         return 0;
    4714                 :            : }
    4715                 :            : 
    4716                 :            : /**
    4717                 :            :  * ext4_free_blocks() -- Free given blocks and update quota
    4718                 :            :  * @handle:             handle for this transaction
    4719                 :            :  * @inode:              inode
    4720                 :            :  * @bh:                 optional buffer of the block to be freed
    4721                 :            :  * @block:              starting physical block to be freed
    4722                 :            :  * @count:              number of blocks to be freed
    4723                 :            :  * @flags:              flags used by ext4_free_blocks
    4724                 :            :  */
    4725                 :       7916 : void ext4_free_blocks(handle_t *handle, struct inode *inode,
    4726                 :            :                       struct buffer_head *bh, ext4_fsblk_t block,
    4727                 :            :                       unsigned long count, int flags)
    4728                 :            : {
    4729                 :            :         struct buffer_head *bitmap_bh = NULL;
    4730                 :       7916 :         struct super_block *sb = inode->i_sb;
    4731                 :            :         struct ext4_group_desc *gdp;
    4732                 :            :         unsigned int overflow;
    4733                 :            :         ext4_grpblk_t bit;
    4734                 :            :         struct buffer_head *gd_bh;
    4735                 :            :         ext4_group_t block_group;
    4736                 :            :         struct ext4_sb_info *sbi;
    4737                 :            :         struct ext4_buddy e4b;
    4738                 :            :         unsigned int count_clusters;
    4739                 :            :         int err = 0;
    4740                 :            :         int ret;
    4741                 :            : 
    4742                 :       7916 :         might_sleep();
    4743         [ -  + ]:       7916 :         if (bh) {
    4744         [ #  # ]:          0 :                 if (block)
    4745         [ #  # ]:          0 :                         BUG_ON(block != bh->b_blocknr);
    4746                 :            :                 else
    4747                 :          0 :                         block = bh->b_blocknr;
    4748                 :            :         }
    4749                 :            : 
    4750                 :            :         sbi = EXT4_SB(sb);
    4751   [ +  -  -  + ]:      15832 :         if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
    4752                 :       7916 :             !ext4_data_block_valid(sbi, block, count)) {
    4753                 :          0 :                 ext4_error(sb, "Freeing blocks not in datazone - "
    4754                 :            :                            "block = %llu, count = %lu", block, count);
    4755                 :          0 :                 goto error_return;
    4756                 :            :         }
    4757                 :            : 
    4758                 :            :         ext4_debug("freeing block %llu\n", block);
    4759                 :       7916 :         trace_ext4_free_blocks(inode, block, count, flags);
    4760                 :            : 
    4761   [ -  +  #  # ]:       7916 :         if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
    4762         [ #  # ]:          0 :                 BUG_ON(count > 1);
    4763                 :            : 
    4764                 :          0 :                 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
    4765                 :            :                             inode, bh, block);
    4766                 :            :         }
    4767                 :            : 
    4768                 :            :         /*
    4769                 :            :          * If the extent to be freed does not begin on a cluster
    4770                 :            :          * boundary, we need to deal with partial clusters at the
    4771                 :            :          * beginning and end of the extent.  Normally we will free
    4772                 :            :          * blocks at the beginning or the end unless we are explicitly
    4773                 :            :          * requested to avoid doing so.
    4774                 :            :          */
    4775                 :       7916 :         overflow = EXT4_PBLK_COFF(sbi, block);
    4776         [ -  + ]:       7916 :         if (overflow) {
    4777         [ #  # ]:          0 :                 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
    4778                 :          0 :                         overflow = sbi->s_cluster_ratio - overflow;
    4779                 :          0 :                         block += overflow;
    4780         [ #  # ]:          0 :                         if (count > overflow)
    4781                 :          0 :                                 count -= overflow;
    4782                 :            :                         else
    4783                 :            :                                 return;
    4784                 :            :                 } else {
    4785                 :          0 :                         block -= overflow;
    4786                 :          0 :                         count += overflow;
    4787                 :            :                 }
    4788                 :            :         }
    4789                 :       7916 :         overflow = EXT4_LBLK_COFF(sbi, count);
    4790         [ -  + ]:       7916 :         if (overflow) {
    4791         [ #  # ]:          0 :                 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
    4792         [ #  # ]:          0 :                         if (count > overflow)
    4793                 :          0 :                                 count -= overflow;
    4794                 :            :                         else
    4795                 :            :                                 return;
    4796                 :            :                 } else
    4797                 :          0 :                         count += sbi->s_cluster_ratio - overflow;
    4798                 :            :         }
    4799                 :            : 
    4800   [ -  +  +  + ]:       7916 :         if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
    4801                 :            :                 int i;
    4802                 :       3260 :                 int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
    4803                 :            : 
    4804         [ +  + ]:       6520 :                 for (i = 0; i < count; i++) {
    4805                 :       3260 :                         cond_resched();
    4806         [ +  - ]:       3260 :                         if (is_metadata)
    4807                 :       3260 :                                 bh = sb_find_get_block(inode->i_sb, block + i);
    4808                 :       3260 :                         ext4_forget(handle, is_metadata, inode, bh, block + i);
    4809                 :            :                 }
    4810                 :            :         }
    4811                 :            : 
    4812                 :            : do_more:
    4813                 :            :         overflow = 0;
    4814                 :       7916 :         ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
    4815                 :            : 
    4816         [ +  - ]:      15832 :         if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
    4817                 :            :                         ext4_get_group_info(sb, block_group))))
    4818                 :            :                 return;
    4819                 :            : 
    4820                 :            :         /*
    4821                 :            :          * Check to see if we are freeing blocks across a group
    4822                 :            :          * boundary.
    4823                 :            :          */
    4824         [ -  + ]:      15832 :         if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
    4825                 :          0 :                 overflow = EXT4_C2B(sbi, bit) + count -
    4826                 :            :                         EXT4_BLOCKS_PER_GROUP(sb);
    4827                 :          0 :                 count -= overflow;
    4828                 :            :         }
    4829                 :       7916 :         count_clusters = EXT4_NUM_B2C(sbi, count);
    4830                 :       7916 :         bitmap_bh = ext4_read_block_bitmap(sb, block_group);
    4831         [ -  + ]:       7916 :         if (IS_ERR(bitmap_bh)) {
    4832                 :            :                 err = PTR_ERR(bitmap_bh);
    4833                 :            :                 bitmap_bh = NULL;
    4834                 :          0 :                 goto error_return;
    4835                 :            :         }
    4836                 :       7916 :         gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
    4837         [ +  - ]:       7916 :         if (!gdp) {
    4838                 :            :                 err = -EIO;
    4839                 :            :                 goto error_return;
    4840                 :            :         }
    4841                 :            : 
    4842   [ -  +  #  #  :      15832 :         if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
                   -  + ]
    4843   [ #  #  +  - ]:      15832 :             in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
    4844         [ +  - ]:      15832 :             in_range(block, ext4_inode_table(sb, gdp),
    4845         [ +  - ]:       7916 :                      sbi->s_itb_per_group) ||
    4846         [ -  + ]:      15832 :             in_range(block + count - 1, ext4_inode_table(sb, gdp),
    4847                 :            :                      sbi->s_itb_per_group)) {
    4848                 :            : 
    4849                 :          0 :                 ext4_error(sb, "Freeing blocks in system zone - "
    4850                 :            :                            "Block = %llu, count = %lu", block, count);
    4851                 :            :                 /* err = 0. ext4_std_error should be a no op */
    4852                 :          0 :                 goto error_return;
    4853                 :            :         }
    4854                 :            : 
    4855                 :            :         BUFFER_TRACE(bitmap_bh, "getting write access");
    4856                 :       7916 :         err = ext4_journal_get_write_access(handle, bitmap_bh);
    4857         [ +  - ]:       7916 :         if (err)
    4858                 :            :                 goto error_return;
    4859                 :            : 
    4860                 :            :         /*
    4861                 :            :          * We are about to modify some metadata.  Call the journal APIs
    4862                 :            :          * to unshare ->b_data if a currently-committing transaction is
    4863                 :            :          * using it
    4864                 :            :          */
    4865                 :            :         BUFFER_TRACE(gd_bh, "get_write_access");
    4866                 :       7916 :         err = ext4_journal_get_write_access(handle, gd_bh);
    4867         [ +  - ]:       7916 :         if (err)
    4868                 :            :                 goto error_return;
    4869                 :            : #ifdef AGGRESSIVE_CHECK
    4870                 :            :         {
    4871                 :            :                 int i;
    4872                 :            :                 for (i = 0; i < count_clusters; i++)
    4873                 :            :                         BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
    4874                 :            :         }
    4875                 :            : #endif
    4876                 :       7916 :         trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
    4877                 :            : 
    4878                 :            :         /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
    4879                 :       7916 :         err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
    4880                 :            :                                      GFP_NOFS|__GFP_NOFAIL);
    4881         [ +  - ]:       7916 :         if (err)
    4882                 :            :                 goto error_return;
    4883                 :            : 
    4884                 :            :         /*
    4885                 :            :          * We need to make sure we don't reuse the freed block until after the
    4886                 :            :          * transaction is committed. We make an exception if the inode is to be
    4887                 :            :          * written in writeback mode since writeback mode has weak data
    4888                 :            :          * consistency guarantees.
    4889                 :            :          */
    4890   [ +  -  +  + ]:      15832 :         if (ext4_handle_valid(handle) &&
    4891         [ +  - ]:      12572 :             ((flags & EXT4_FREE_BLOCKS_METADATA) ||
    4892                 :       7916 :              !ext4_should_writeback_data(inode))) {
    4893                 :            :                 struct ext4_free_data *new_entry;
    4894                 :            :                 /*
    4895                 :            :                  * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
    4896                 :            :                  * to fail.
    4897                 :            :                  */
    4898                 :       7916 :                 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
    4899                 :            :                                 GFP_NOFS|__GFP_NOFAIL);
    4900                 :       7916 :                 new_entry->efd_start_cluster = bit;
    4901                 :       7916 :                 new_entry->efd_group = block_group;
    4902                 :       7916 :                 new_entry->efd_count = count_clusters;
    4903                 :       7916 :                 new_entry->efd_tid = handle->h_transaction->t_tid;
    4904                 :            : 
    4905                 :       7916 :                 ext4_lock_group(sb, block_group);
    4906                 :       7916 :                 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
    4907                 :       7916 :                 ext4_mb_free_metadata(handle, &e4b, new_entry);
    4908                 :            :         } else {
    4909                 :            :                 /* need to update group_info->bb_free and bitmap
    4910                 :            :                  * with group lock held. generate_buddy look at
    4911                 :            :                  * them with group lock_held
    4912                 :            :                  */
    4913         [ #  # ]:          0 :                 if (test_opt(sb, DISCARD)) {
    4914                 :          0 :                         err = ext4_issue_discard(sb, block_group, bit, count,
    4915                 :            :                                                  NULL);
    4916         [ #  # ]:          0 :                         if (err && err != -EOPNOTSUPP)
    4917                 :          0 :                                 ext4_msg(sb, KERN_WARNING, "discard request in"
    4918                 :            :                                          " group:%d block:%d count:%lu failed"
    4919                 :            :                                          " with %d", block_group, bit, count,
    4920                 :            :                                          err);
    4921                 :            :                 } else
    4922                 :          0 :                         EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
    4923                 :            : 
    4924                 :          0 :                 ext4_lock_group(sb, block_group);
    4925                 :          0 :                 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
    4926                 :          0 :                 mb_free_blocks(inode, &e4b, bit, count_clusters);
    4927                 :            :         }
    4928                 :            : 
    4929                 :       7916 :         ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
    4930                 :       7916 :         ext4_free_group_clusters_set(sb, gdp, ret);
    4931                 :       7916 :         ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
    4932                 :       7916 :         ext4_group_desc_csum_set(sb, block_group, gdp);
    4933                 :       7916 :         ext4_unlock_group(sb, block_group);
    4934                 :            : 
    4935         [ +  - ]:       7916 :         if (sbi->s_log_groups_per_flex) {
    4936                 :       7916 :                 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
    4937                 :      15832 :                 atomic64_add(count_clusters,
    4938                 :       7916 :                              &sbi_array_rcu_deref(sbi, s_flex_groups,
    4939                 :            :                                                   flex_group)->free_clusters);
    4940                 :            :         }
    4941                 :            : 
    4942                 :            :         /*
    4943                 :            :          * on a bigalloc file system, defer the s_freeclusters_counter
    4944                 :            :          * update to the caller (ext4_remove_space and friends) so they
    4945                 :            :          * can determine if a cluster freed here should be rereserved
    4946                 :            :          */
    4947         [ +  - ]:       7916 :         if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
    4948         [ +  - ]:       7916 :                 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
    4949                 :       7916 :                         dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
    4950                 :       7916 :                 percpu_counter_add(&sbi->s_freeclusters_counter,
    4951                 :            :                                    count_clusters);
    4952                 :            :         }
    4953                 :            : 
    4954                 :       7916 :         ext4_mb_unload_buddy(&e4b);
    4955                 :            : 
    4956                 :            :         /* We dirtied the bitmap block */
    4957                 :            :         BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
    4958                 :       7916 :         err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
    4959                 :            : 
    4960                 :            :         /* And the group descriptor block */
    4961                 :            :         BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
    4962                 :       7916 :         ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
    4963         [ +  - ]:       7916 :         if (!err)
    4964                 :            :                 err = ret;
    4965                 :            : 
    4966         [ -  + ]:       7916 :         if (overflow && !err) {
    4967                 :            :                 block += count;
    4968                 :            :                 count = overflow;
    4969                 :          0 :                 put_bh(bitmap_bh);
    4970                 :          0 :                 goto do_more;
    4971                 :            :         }
    4972                 :            : error_return:
    4973                 :            :         brelse(bitmap_bh);
    4974         [ -  + ]:       7916 :         ext4_std_error(sb, err);
    4975                 :            :         return;
    4976                 :            : }
    4977                 :            : 
    4978                 :            : /**
    4979                 :            :  * ext4_group_add_blocks() -- Add given blocks to an existing group
    4980                 :            :  * @handle:                     handle to this transaction
    4981                 :            :  * @sb:                         super block
    4982                 :            :  * @block:                      start physical block to add to the block group
    4983                 :            :  * @count:                      number of blocks to free
    4984                 :            :  *
    4985                 :            :  * This marks the blocks as free in the bitmap and buddy.
    4986                 :            :  */
    4987                 :          0 : int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
    4988                 :            :                          ext4_fsblk_t block, unsigned long count)
    4989                 :            : {
    4990                 :            :         struct buffer_head *bitmap_bh = NULL;
    4991                 :            :         struct buffer_head *gd_bh;
    4992                 :            :         ext4_group_t block_group;
    4993                 :            :         ext4_grpblk_t bit;
    4994                 :            :         unsigned int i;
    4995                 :            :         struct ext4_group_desc *desc;
    4996                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    4997                 :            :         struct ext4_buddy e4b;
    4998                 :            :         int err = 0, ret, free_clusters_count;
    4999                 :            :         ext4_grpblk_t clusters_freed;
    5000                 :          0 :         ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
    5001                 :          0 :         ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
    5002                 :          0 :         unsigned long cluster_count = last_cluster - first_cluster + 1;
    5003                 :            : 
    5004                 :            :         ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
    5005                 :            : 
    5006         [ #  # ]:          0 :         if (count == 0)
    5007                 :            :                 return 0;
    5008                 :            : 
    5009                 :          0 :         ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
    5010                 :            :         /*
    5011                 :            :          * Check to see if we are freeing blocks across a group
    5012                 :            :          * boundary.
    5013                 :            :          */
    5014         [ #  # ]:          0 :         if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
    5015                 :          0 :                 ext4_warning(sb, "too many blocks added to group %u",
    5016                 :            :                              block_group);
    5017                 :            :                 err = -EINVAL;
    5018                 :          0 :                 goto error_return;
    5019                 :            :         }
    5020                 :            : 
    5021                 :          0 :         bitmap_bh = ext4_read_block_bitmap(sb, block_group);
    5022         [ #  # ]:          0 :         if (IS_ERR(bitmap_bh)) {
    5023                 :            :                 err = PTR_ERR(bitmap_bh);
    5024                 :            :                 bitmap_bh = NULL;
    5025                 :          0 :                 goto error_return;
    5026                 :            :         }
    5027                 :            : 
    5028                 :          0 :         desc = ext4_get_group_desc(sb, block_group, &gd_bh);
    5029         [ #  # ]:          0 :         if (!desc) {
    5030                 :            :                 err = -EIO;
    5031                 :            :                 goto error_return;
    5032                 :            :         }
    5033                 :            : 
    5034   [ #  #  #  #  :          0 :         if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
                   #  # ]
    5035   [ #  #  #  # ]:          0 :             in_range(ext4_inode_bitmap(sb, desc), block, count) ||
    5036   [ #  #  #  # ]:          0 :             in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
    5037         [ #  # ]:          0 :             in_range(block + count - 1, ext4_inode_table(sb, desc),
    5038                 :            :                      sbi->s_itb_per_group)) {
    5039                 :          0 :                 ext4_error(sb, "Adding blocks in system zones - "
    5040                 :            :                            "Block = %llu, count = %lu",
    5041                 :            :                            block, count);
    5042                 :            :                 err = -EINVAL;
    5043                 :          0 :                 goto error_return;
    5044                 :            :         }
    5045                 :            : 
    5046                 :            :         BUFFER_TRACE(bitmap_bh, "getting write access");
    5047                 :          0 :         err = ext4_journal_get_write_access(handle, bitmap_bh);
    5048         [ #  # ]:          0 :         if (err)
    5049                 :            :                 goto error_return;
    5050                 :            : 
    5051                 :            :         /*
    5052                 :            :          * We are about to modify some metadata.  Call the journal APIs
    5053                 :            :          * to unshare ->b_data if a currently-committing transaction is
    5054                 :            :          * using it
    5055                 :            :          */
    5056                 :            :         BUFFER_TRACE(gd_bh, "get_write_access");
    5057                 :          0 :         err = ext4_journal_get_write_access(handle, gd_bh);
    5058         [ #  # ]:          0 :         if (err)
    5059                 :            :                 goto error_return;
    5060                 :            : 
    5061         [ #  # ]:          0 :         for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
    5062                 :            :                 BUFFER_TRACE(bitmap_bh, "clear bit");
    5063         [ #  # ]:          0 :                 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
    5064                 :          0 :                         ext4_error(sb, "bit already cleared for block %llu",
    5065                 :            :                                    (ext4_fsblk_t)(block + i));
    5066                 :            :                         BUFFER_TRACE(bitmap_bh, "bit already cleared");
    5067                 :            :                 } else {
    5068                 :          0 :                         clusters_freed++;
    5069                 :            :                 }
    5070                 :            :         }
    5071                 :            : 
    5072                 :          0 :         err = ext4_mb_load_buddy(sb, block_group, &e4b);
    5073         [ #  # ]:          0 :         if (err)
    5074                 :            :                 goto error_return;
    5075                 :            : 
    5076                 :            :         /*
    5077                 :            :          * need to update group_info->bb_free and bitmap
    5078                 :            :          * with group lock held. generate_buddy look at
    5079                 :            :          * them with group lock_held
    5080                 :            :          */
    5081                 :          0 :         ext4_lock_group(sb, block_group);
    5082                 :          0 :         mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
    5083                 :          0 :         mb_free_blocks(NULL, &e4b, bit, cluster_count);
    5084                 :          0 :         free_clusters_count = clusters_freed +
    5085                 :          0 :                 ext4_free_group_clusters(sb, desc);
    5086                 :          0 :         ext4_free_group_clusters_set(sb, desc, free_clusters_count);
    5087                 :          0 :         ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
    5088                 :          0 :         ext4_group_desc_csum_set(sb, block_group, desc);
    5089                 :          0 :         ext4_unlock_group(sb, block_group);
    5090                 :          0 :         percpu_counter_add(&sbi->s_freeclusters_counter,
    5091                 :            :                            clusters_freed);
    5092                 :            : 
    5093         [ #  # ]:          0 :         if (sbi->s_log_groups_per_flex) {
    5094                 :          0 :                 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
    5095                 :          0 :                 atomic64_add(clusters_freed,
    5096                 :          0 :                              &sbi_array_rcu_deref(sbi, s_flex_groups,
    5097                 :            :                                                   flex_group)->free_clusters);
    5098                 :            :         }
    5099                 :            : 
    5100                 :          0 :         ext4_mb_unload_buddy(&e4b);
    5101                 :            : 
    5102                 :            :         /* We dirtied the bitmap block */
    5103                 :            :         BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
    5104                 :          0 :         err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
    5105                 :            : 
    5106                 :            :         /* And the group descriptor block */
    5107                 :            :         BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
    5108                 :          0 :         ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
    5109         [ #  # ]:          0 :         if (!err)
    5110                 :            :                 err = ret;
    5111                 :            : 
    5112                 :            : error_return:
    5113                 :            :         brelse(bitmap_bh);
    5114         [ #  # ]:          0 :         ext4_std_error(sb, err);
    5115                 :          0 :         return err;
    5116                 :            : }
    5117                 :            : 
    5118                 :            : /**
    5119                 :            :  * ext4_trim_extent -- function to TRIM one single free extent in the group
    5120                 :            :  * @sb:         super block for the file system
    5121                 :            :  * @start:      starting block of the free extent in the alloc. group
    5122                 :            :  * @count:      number of blocks to TRIM
    5123                 :            :  * @group:      alloc. group we are working with
    5124                 :            :  * @e4b:        ext4 buddy for the group
    5125                 :            :  *
    5126                 :            :  * Trim "count" blocks starting at "start" in the "group". To assure that no
    5127                 :            :  * one will allocate those blocks, mark it as used in buddy bitmap. This must
    5128                 :            :  * be called with under the group lock.
    5129                 :            :  */
    5130                 :          0 : static int ext4_trim_extent(struct super_block *sb, int start, int count,
    5131                 :            :                              ext4_group_t group, struct ext4_buddy *e4b)
    5132                 :            : __releases(bitlock)
    5133                 :            : __acquires(bitlock)
    5134                 :            : {
    5135                 :            :         struct ext4_free_extent ex;
    5136                 :            :         int ret = 0;
    5137                 :            : 
    5138                 :          0 :         trace_ext4_trim_extent(sb, group, start, count);
    5139                 :            : 
    5140         [ #  # ]:          0 :         assert_spin_locked(ext4_group_lock_ptr(sb, group));
    5141                 :            : 
    5142                 :          0 :         ex.fe_start = start;
    5143                 :          0 :         ex.fe_group = group;
    5144                 :          0 :         ex.fe_len = count;
    5145                 :            : 
    5146                 :            :         /*
    5147                 :            :          * Mark blocks used, so no one can reuse them while
    5148                 :            :          * being trimmed.
    5149                 :            :          */
    5150                 :          0 :         mb_mark_used(e4b, &ex);
    5151                 :            :         ext4_unlock_group(sb, group);
    5152                 :          0 :         ret = ext4_issue_discard(sb, group, start, count, NULL);
    5153                 :          0 :         ext4_lock_group(sb, group);
    5154                 :          0 :         mb_free_blocks(NULL, e4b, start, ex.fe_len);
    5155                 :          0 :         return ret;
    5156                 :            : }
    5157                 :            : 
    5158                 :            : /**
    5159                 :            :  * ext4_trim_all_free -- function to trim all free space in alloc. group
    5160                 :            :  * @sb:                 super block for file system
    5161                 :            :  * @group:              group to be trimmed
    5162                 :            :  * @start:              first group block to examine
    5163                 :            :  * @max:                last group block to examine
    5164                 :            :  * @minblocks:          minimum extent block count
    5165                 :            :  *
    5166                 :            :  * ext4_trim_all_free walks through group's buddy bitmap searching for free
    5167                 :            :  * extents. When the free block is found, ext4_trim_extent is called to TRIM
    5168                 :            :  * the extent.
    5169                 :            :  *
    5170                 :            :  *
    5171                 :            :  * ext4_trim_all_free walks through group's block bitmap searching for free
    5172                 :            :  * extents. When the free extent is found, mark it as used in group buddy
    5173                 :            :  * bitmap. Then issue a TRIM command on this extent and free the extent in
    5174                 :            :  * the group buddy bitmap. This is done until whole group is scanned.
    5175                 :            :  */
    5176                 :            : static ext4_grpblk_t
    5177                 :          0 : ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
    5178                 :            :                    ext4_grpblk_t start, ext4_grpblk_t max,
    5179                 :            :                    ext4_grpblk_t minblocks)
    5180                 :            : {
    5181                 :            :         void *bitmap;
    5182                 :            :         ext4_grpblk_t next, count = 0, free_count = 0;
    5183                 :            :         struct ext4_buddy e4b;
    5184                 :            :         int ret = 0;
    5185                 :            : 
    5186                 :          0 :         trace_ext4_trim_all_free(sb, group, start, max);
    5187                 :            : 
    5188                 :            :         ret = ext4_mb_load_buddy(sb, group, &e4b);
    5189         [ #  # ]:          0 :         if (ret) {
    5190                 :          0 :                 ext4_warning(sb, "Error %d loading buddy information for %u",
    5191                 :            :                              ret, group);
    5192                 :          0 :                 return ret;
    5193                 :            :         }
    5194                 :          0 :         bitmap = e4b.bd_bitmap;
    5195                 :            : 
    5196                 :          0 :         ext4_lock_group(sb, group);
    5197   [ #  #  #  # ]:          0 :         if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
    5198                 :          0 :             minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
    5199                 :            :                 goto out;
    5200                 :            : 
    5201                 :          0 :         start = (e4b.bd_info->bb_first_free > start) ?
    5202                 :          0 :                 e4b.bd_info->bb_first_free : start;
    5203                 :            : 
    5204         [ #  # ]:          0 :         while (start <= max) {
    5205                 :          0 :                 start = mb_find_next_zero_bit(bitmap, max + 1, start);
    5206         [ #  # ]:          0 :                 if (start > max)
    5207                 :            :                         break;
    5208                 :          0 :                 next = mb_find_next_bit(bitmap, max + 1, start);
    5209                 :            : 
    5210         [ #  # ]:          0 :                 if ((next - start) >= minblocks) {
    5211                 :          0 :                         ret = ext4_trim_extent(sb, start,
    5212                 :            :                                                next - start, group, &e4b);
    5213         [ #  # ]:          0 :                         if (ret && ret != -EOPNOTSUPP)
    5214                 :            :                                 break;
    5215                 :            :                         ret = 0;
    5216                 :          0 :                         count += next - start;
    5217                 :            :                 }
    5218                 :          0 :                 free_count += next - start;
    5219                 :          0 :                 start = next + 1;
    5220                 :            : 
    5221         [ #  # ]:          0 :                 if (fatal_signal_pending(current)) {
    5222                 :            :                         count = -ERESTARTSYS;
    5223                 :            :                         break;
    5224                 :            :                 }
    5225                 :            : 
    5226         [ #  # ]:          0 :                 if (need_resched()) {
    5227                 :            :                         ext4_unlock_group(sb, group);
    5228                 :          0 :                         cond_resched();
    5229                 :          0 :                         ext4_lock_group(sb, group);
    5230                 :            :                 }
    5231                 :            : 
    5232         [ #  # ]:          0 :                 if ((e4b.bd_info->bb_free - free_count) < minblocks)
    5233                 :            :                         break;
    5234                 :            :         }
    5235                 :            : 
    5236         [ #  # ]:          0 :         if (!ret) {
    5237                 :            :                 ret = count;
    5238                 :          0 :                 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
    5239                 :            :         }
    5240                 :            : out:
    5241                 :            :         ext4_unlock_group(sb, group);
    5242                 :          0 :         ext4_mb_unload_buddy(&e4b);
    5243                 :            : 
    5244                 :            :         ext4_debug("trimmed %d blocks in the group %d\n",
    5245                 :            :                 count, group);
    5246                 :            : 
    5247                 :          0 :         return ret;
    5248                 :            : }
    5249                 :            : 
    5250                 :            : /**
    5251                 :            :  * ext4_trim_fs() -- trim ioctl handle function
    5252                 :            :  * @sb:                 superblock for filesystem
    5253                 :            :  * @range:              fstrim_range structure
    5254                 :            :  *
    5255                 :            :  * start:       First Byte to trim
    5256                 :            :  * len:         number of Bytes to trim from start
    5257                 :            :  * minlen:      minimum extent length in Bytes
    5258                 :            :  * ext4_trim_fs goes through all allocation groups containing Bytes from
    5259                 :            :  * start to start+len. For each such a group ext4_trim_all_free function
    5260                 :            :  * is invoked to trim all free space.
    5261                 :            :  */
    5262                 :          0 : int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
    5263                 :            : {
    5264                 :            :         struct ext4_group_info *grp;
    5265                 :            :         ext4_group_t group, first_group, last_group;
    5266                 :            :         ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
    5267                 :            :         uint64_t start, end, minlen, trimmed = 0;
    5268                 :          0 :         ext4_fsblk_t first_data_blk =
    5269                 :          0 :                         le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
    5270                 :            :         ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
    5271                 :            :         int ret = 0;
    5272                 :            : 
    5273                 :          0 :         start = range->start >> sb->s_blocksize_bits;
    5274                 :          0 :         end = start + (range->len >> sb->s_blocksize_bits) - 1;
    5275                 :          0 :         minlen = EXT4_NUM_B2C(EXT4_SB(sb),
    5276                 :            :                               range->minlen >> sb->s_blocksize_bits);
    5277                 :            : 
    5278   [ #  #  #  # ]:          0 :         if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
    5279         [ #  # ]:          0 :             start >= max_blks ||
    5280                 :          0 :             range->len < sb->s_blocksize)
    5281                 :            :                 return -EINVAL;
    5282         [ #  # ]:          0 :         if (end >= max_blks)
    5283                 :          0 :                 end = max_blks - 1;
    5284         [ #  # ]:          0 :         if (end <= first_data_blk)
    5285                 :            :                 goto out;
    5286         [ #  # ]:          0 :         if (start < first_data_blk)
    5287                 :            :                 start = first_data_blk;
    5288                 :            : 
    5289                 :            :         /* Determine first and last group to examine based on start and end */
    5290                 :          0 :         ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
    5291                 :            :                                      &first_group, &first_cluster);
    5292                 :          0 :         ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
    5293                 :            :                                      &last_group, &last_cluster);
    5294                 :            : 
    5295                 :            :         /* end now represents the last cluster to discard in this group */
    5296                 :          0 :         end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
    5297                 :            : 
    5298         [ #  # ]:          0 :         for (group = first_group; group <= last_group; group++) {
    5299                 :          0 :                 grp = ext4_get_group_info(sb, group);
    5300                 :            :                 /* We only do this if the grp has never been initialized */
    5301         [ #  # ]:          0 :                 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
    5302                 :          0 :                         ret = ext4_mb_init_group(sb, group, GFP_NOFS);
    5303         [ #  # ]:          0 :                         if (ret)
    5304                 :            :                                 break;
    5305                 :            :                 }
    5306                 :            : 
    5307                 :            :                 /*
    5308                 :            :                  * For all the groups except the last one, last cluster will
    5309                 :            :                  * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to
    5310                 :            :                  * change it for the last group, note that last_cluster is
    5311                 :            :                  * already computed earlier by ext4_get_group_no_and_offset()
    5312                 :            :                  */
    5313         [ #  # ]:          0 :                 if (group == last_group)
    5314                 :          0 :                         end = last_cluster;
    5315                 :            : 
    5316         [ #  # ]:          0 :                 if (grp->bb_free >= minlen) {
    5317                 :          0 :                         cnt = ext4_trim_all_free(sb, group, first_cluster,
    5318                 :            :                                                 end, minlen);
    5319         [ #  # ]:          0 :                         if (cnt < 0) {
    5320                 :          0 :                                 ret = cnt;
    5321                 :          0 :                                 break;
    5322                 :            :                         }
    5323                 :          0 :                         trimmed += cnt;
    5324                 :            :                 }
    5325                 :            : 
    5326                 :            :                 /*
    5327                 :            :                  * For every group except the first one, we are sure
    5328                 :            :                  * that the first cluster to discard will be cluster #0.
    5329                 :            :                  */
    5330                 :          0 :                 first_cluster = 0;
    5331                 :            :         }
    5332                 :            : 
    5333         [ #  # ]:          0 :         if (!ret)
    5334                 :          0 :                 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
    5335                 :            : 
    5336                 :            : out:
    5337                 :          0 :         range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
    5338                 :          0 :         return ret;
    5339                 :            : }
    5340                 :            : 
    5341                 :            : /* Iterate all the free extents in the group. */
    5342                 :            : int
    5343                 :          0 : ext4_mballoc_query_range(
    5344                 :            :         struct super_block              *sb,
    5345                 :            :         ext4_group_t                    group,
    5346                 :            :         ext4_grpblk_t                   start,
    5347                 :            :         ext4_grpblk_t                   end,
    5348                 :            :         ext4_mballoc_query_range_fn     formatter,
    5349                 :            :         void                            *priv)
    5350                 :            : {
    5351                 :            :         void                            *bitmap;
    5352                 :            :         ext4_grpblk_t                   next;
    5353                 :            :         struct ext4_buddy               e4b;
    5354                 :            :         int                             error;
    5355                 :            : 
    5356                 :            :         error = ext4_mb_load_buddy(sb, group, &e4b);
    5357         [ #  # ]:          0 :         if (error)
    5358                 :            :                 return error;
    5359                 :          0 :         bitmap = e4b.bd_bitmap;
    5360                 :            : 
    5361                 :          0 :         ext4_lock_group(sb, group);
    5362                 :            : 
    5363                 :          0 :         start = (e4b.bd_info->bb_first_free > start) ?
    5364                 :          0 :                 e4b.bd_info->bb_first_free : start;
    5365         [ #  # ]:          0 :         if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
    5366                 :          0 :                 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
    5367                 :            : 
    5368         [ #  # ]:          0 :         while (start <= end) {
    5369                 :          0 :                 start = mb_find_next_zero_bit(bitmap, end + 1, start);
    5370         [ #  # ]:          0 :                 if (start > end)
    5371                 :            :                         break;
    5372                 :          0 :                 next = mb_find_next_bit(bitmap, end + 1, start);
    5373                 :            : 
    5374                 :            :                 ext4_unlock_group(sb, group);
    5375                 :          0 :                 error = formatter(sb, group, start, next - start, priv);
    5376         [ #  # ]:          0 :                 if (error)
    5377                 :            :                         goto out_unload;
    5378                 :          0 :                 ext4_lock_group(sb, group);
    5379                 :            : 
    5380                 :          0 :                 start = next + 1;
    5381                 :            :         }
    5382                 :            : 
    5383                 :            :         ext4_unlock_group(sb, group);
    5384                 :            : out_unload:
    5385                 :          0 :         ext4_mb_unload_buddy(&e4b);
    5386                 :            : 
    5387                 :          0 :         return error;
    5388                 :            : }

Generated by: LCOV version 1.14