LCOV - Real - mm/mremap.c

LCOV - code coverage report

Current view:	top level - mm - mremap.c (source / functions)		Hit	Total	Coverage
Test:	Real	Lines:	123	192	64.1 %
Date:	2020-10-17 15:46:43	Functions:	0	10	0.0 %
Legend:	Neither, QEMU, Real, Both	Branches:	0	0	-

           Branch data     Line data    Source code

       1                 :            : // SPDX-License-Identifier: GPL-2.0
       2                 :            : /*
       3                 :            :  *      mm/mremap.c
       4                 :            :  *
       5                 :            :  *      (C) Copyright 1996 Linus Torvalds
       6                 :            :  *
       7                 :            :  *      Address space accounting code   <alan@lxorguk.ukuu.org.uk>
       8                 :            :  *      (C) Copyright 2002 Red Hat Inc, All Rights Reserved
       9                 :            :  */
      10                 :            : 
      11                 :            : #include <linux/mm.h>
      12                 :            : #include <linux/hugetlb.h>
      13                 :            : #include <linux/shm.h>
      14                 :            : #include <linux/ksm.h>
      15                 :            : #include <linux/mman.h>
      16                 :            : #include <linux/swap.h>
      17                 :            : #include <linux/capability.h>
      18                 :            : #include <linux/fs.h>
      19                 :            : #include <linux/swapops.h>
      20                 :            : #include <linux/highmem.h>
      21                 :            : #include <linux/security.h>
      22                 :            : #include <linux/syscalls.h>
      23                 :            : #include <linux/mmu_notifier.h>
      24                 :            : #include <linux/uaccess.h>
      25                 :            : #include <linux/mm-arch-hooks.h>
      26                 :            : #include <linux/userfaultfd_k.h>
      27                 :            : 
      28                 :            : #include <asm/cacheflush.h>
      29                 :            : #include <asm/tlbflush.h>
      30                 :            : 
      31                 :            : #include "internal.h"
      32                 :            : 
      33                 :            : static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
      34                 :            : {
      35                 :            :         pgd_t *pgd;
      36                 :            :         p4d_t *p4d;
      37                 :            :         pud_t *pud;
      38                 :            :         pmd_t *pmd;
      39                 :            : 
      40                 :          3 :         pgd = pgd_offset(mm, addr);
      41                 :            :         if (pgd_none_or_clear_bad(pgd))
      42                 :            :                 return NULL;
      43                 :            : 
      44                 :            :         p4d = p4d_offset(pgd, addr);
      45                 :            :         if (p4d_none_or_clear_bad(p4d))
      46                 :            :                 return NULL;
      47                 :            : 
      48                 :            :         pud = pud_offset(p4d, addr);
      49                 :            :         if (pud_none_or_clear_bad(pud))
      50                 :            :                 return NULL;
      51                 :            : 
      52                 :            :         pmd = pmd_offset(pud, addr);
      53                 :          3 :         if (pmd_none(*pmd))
      54                 :            :                 return NULL;
      55                 :            : 
      56                 :            :         return pmd;
      57                 :            : }
      58                 :            : 
      59                 :            : static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
      60                 :            :                             unsigned long addr)
      61                 :            : {
      62                 :            :         pgd_t *pgd;
      63                 :            :         p4d_t *p4d;
      64                 :            :         pud_t *pud;
      65                 :            :         pmd_t *pmd;
      66                 :            : 
      67                 :          3 :         pgd = pgd_offset(mm, addr);
      68                 :            :         p4d = p4d_alloc(mm, pgd, addr);
      69                 :          3 :         if (!p4d)
      70                 :            :                 return NULL;
      71                 :            :         pud = pud_alloc(mm, p4d, addr);
      72                 :          3 :         if (!pud)
      73                 :            :                 return NULL;
      74                 :            : 
      75                 :            :         pmd = pmd_alloc(mm, pud, addr);
      76                 :          3 :         if (!pmd)
      77                 :            :                 return NULL;
      78                 :            : 
      79                 :            :         VM_BUG_ON(pmd_trans_huge(*pmd));
      80                 :            : 
      81                 :            :         return pmd;
      82                 :            : }
      83                 :            : 
      84                 :          0 : static void take_rmap_locks(struct vm_area_struct *vma)
      85                 :            : {
      86                 :          0 :         if (vma->vm_file)
      87                 :          0 :                 i_mmap_lock_write(vma->vm_file->f_mapping);
      88                 :          0 :         if (vma->anon_vma)
      89                 :            :                 anon_vma_lock_write(vma->anon_vma);
      90                 :          0 : }
      91                 :            : 
      92                 :          0 : static void drop_rmap_locks(struct vm_area_struct *vma)
      93                 :            : {
      94                 :          0 :         if (vma->anon_vma)
      95                 :            :                 anon_vma_unlock_write(vma->anon_vma);
      96                 :          0 :         if (vma->vm_file)
      97                 :          0 :                 i_mmap_unlock_write(vma->vm_file->f_mapping);
      98                 :          0 : }
      99                 :            : 
     100                 :            : static pte_t move_soft_dirty_pte(pte_t pte)
     101                 :            : {
     102                 :            :         /*
     103                 :            :          * Set soft dirty bit so we can notice
     104                 :            :          * in userspace the ptes were moved.
     105                 :            :          */
     106                 :            : #ifdef CONFIG_MEM_SOFT_DIRTY
     107                 :            :         if (pte_present(pte))
     108                 :            :                 pte = pte_mksoft_dirty(pte);
     109                 :            :         else if (is_swap_pte(pte))
     110                 :            :                 pte = pte_swp_mksoft_dirty(pte);
     111                 :            : #endif
     112                 :            :         return pte;
     113                 :            : }
     114                 :            : 
     115                 :          3 : static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
     116                 :            :                 unsigned long old_addr, unsigned long old_end,
     117                 :            :                 struct vm_area_struct *new_vma, pmd_t *new_pmd,
     118                 :            :                 unsigned long new_addr, bool need_rmap_locks)
     119                 :            : {
     120                 :          3 :         struct mm_struct *mm = vma->vm_mm;
     121                 :            :         pte_t *old_pte, *new_pte, pte;
     122                 :            :         spinlock_t *old_ptl, *new_ptl;
     123                 :            :         bool force_flush = false;
     124                 :            :         unsigned long len = old_end - old_addr;
     125                 :            : 
     126                 :            :         /*
     127                 :            :          * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
     128                 :            :          * locks to ensure that rmap will always observe either the old or the
     129                 :            :          * new ptes. This is the easiest way to avoid races with
     130                 :            :          * truncate_pagecache(), page migration, etc...
     131                 :            :          *
     132                 :            :          * When need_rmap_locks is false, we use other ways to avoid
     133                 :            :          * such races:
     134                 :            :          *
     135                 :            :          * - During exec() shift_arg_pages(), we use a specially tagged vma
     136                 :            :          *   which rmap call sites look for using is_vma_temporary_stack().
     137                 :            :          *
     138                 :            :          * - During mremap(), new_vma is often known to be placed after vma
     139                 :            :          *   in rmap traversal order. This ensures rmap will always observe
     140                 :            :          *   either the old pte, or the new pte, or both (the page table locks
     141                 :            :          *   serialize access to individual ptes, but only rmap traversal
     142                 :            :          *   order guarantees that we won't miss both the old and new ptes).
     143                 :            :          */
     144                 :          3 :         if (need_rmap_locks)
     145                 :          0 :                 take_rmap_locks(vma);
     146                 :            : 
     147                 :            :         /*
     148                 :            :          * We don't have to worry about the ordering of src and dst
     149                 :            :          * pte locks because exclusive mmap_sem prevents deadlock.
     150                 :            :          */
     151                 :          3 :         old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
     152                 :          3 :         new_pte = pte_offset_map(new_pmd, new_addr);
     153                 :            :         new_ptl = pte_lockptr(mm, new_pmd);
     154                 :          3 :         if (new_ptl != old_ptl)
     155                 :          3 :                 spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
     156                 :            :         flush_tlb_batched_pending(vma->vm_mm);
     157                 :            :         arch_enter_lazy_mmu_mode();
     158                 :            : 
     159                 :          3 :         for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
     160                 :          3 :                                    new_pte++, new_addr += PAGE_SIZE) {
     161                 :          3 :                 if (pte_none(*old_pte))
     162                 :          3 :                         continue;
     163                 :            : 
     164                 :            :                 pte = ptep_get_and_clear(mm, old_addr, old_pte);
     165                 :            :                 /*
     166                 :            :                  * If we are remapping a valid PTE, make sure
     167                 :            :                  * to flush TLB before we drop the PTL for the
     168                 :            :                  * PTE.
     169                 :            :                  *
     170                 :            :                  * NOTE! Both old and new PTL matter: the old one
     171                 :            :                  * for racing with page_mkclean(), the new one to
     172                 :            :                  * make sure the physical page stays valid until
     173                 :            :                  * the TLB entry for the old mapping has been
     174                 :            :                  * flushed.
     175                 :            :                  */
     176                 :          3 :                 if (pte_present(pte))
     177                 :            :                         force_flush = true;
     178                 :            :                 pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
     179                 :            :                 pte = move_soft_dirty_pte(pte);
     180                 :          3 :                 set_pte_at(mm, new_addr, new_pte, pte);
     181                 :            :         }
     182                 :            : 
     183                 :            :         arch_leave_lazy_mmu_mode();
     184                 :          3 :         if (force_flush)
     185                 :          3 :                 flush_tlb_range(vma, old_end - len, old_end);
     186                 :          3 :         if (new_ptl != old_ptl)
     187                 :            :                 spin_unlock(new_ptl);
     188                 :            :         pte_unmap(new_pte - 1);
     189                 :            :         pte_unmap_unlock(old_pte - 1, old_ptl);
     190                 :          3 :         if (need_rmap_locks)
     191                 :          0 :                 drop_rmap_locks(vma);
     192                 :          3 : }
     193                 :            : 
     194                 :            : #ifdef CONFIG_HAVE_MOVE_PMD
     195                 :            : static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
     196                 :            :                   unsigned long new_addr, unsigned long old_end,
     197                 :            :                   pmd_t *old_pmd, pmd_t *new_pmd)
     198                 :            : {
     199                 :            :         spinlock_t *old_ptl, *new_ptl;
     200                 :            :         struct mm_struct *mm = vma->vm_mm;
     201                 :            :         pmd_t pmd;
     202                 :            : 
     203                 :            :         if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
     204                 :            :             || old_end - old_addr < PMD_SIZE)
     205                 :            :                 return false;
     206                 :            : 
     207                 :            :         /*
     208                 :            :          * The destination pmd shouldn't be established, free_pgtables()
     209                 :            :          * should have release it.
     210                 :            :          */
     211                 :            :         if (WARN_ON(!pmd_none(*new_pmd)))
     212                 :            :                 return false;
     213                 :            : 
     214                 :            :         /*
     215                 :            :          * We don't have to worry about the ordering of src and dst
     216                 :            :          * ptlocks because exclusive mmap_sem prevents deadlock.
     217                 :            :          */
     218                 :            :         old_ptl = pmd_lock(vma->vm_mm, old_pmd);
     219                 :            :         new_ptl = pmd_lockptr(mm, new_pmd);
     220                 :            :         if (new_ptl != old_ptl)
     221                 :            :                 spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
     222                 :            : 
     223                 :            :         /* Clear the pmd */
     224                 :            :         pmd = *old_pmd;
     225                 :            :         pmd_clear(old_pmd);
     226                 :            : 
     227                 :            :         VM_BUG_ON(!pmd_none(*new_pmd));
     228                 :            : 
     229                 :            :         /* Set the new pmd */
     230                 :            :         set_pmd_at(mm, new_addr, new_pmd, pmd);
     231                 :            :         flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
     232                 :            :         if (new_ptl != old_ptl)
     233                 :            :                 spin_unlock(new_ptl);
     234                 :            :         spin_unlock(old_ptl);
     235                 :            : 
     236                 :            :         return true;
     237                 :            : }
     238                 :            : #endif
     239                 :            : 
     240                 :          3 : unsigned long move_page_tables(struct vm_area_struct *vma,
     241                 :            :                 unsigned long old_addr, struct vm_area_struct *new_vma,
     242                 :            :                 unsigned long new_addr, unsigned long len,
     243                 :            :                 bool need_rmap_locks)
     244                 :            : {
     245                 :            :         unsigned long extent, next, old_end;
     246                 :            :         struct mmu_notifier_range range;
     247                 :            :         pmd_t *old_pmd, *new_pmd;
     248                 :            : 
     249                 :          3 :         old_end = old_addr + len;
     250                 :          3 :         flush_cache_range(vma, old_addr, old_end);
     251                 :            : 
     252                 :            :         mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
     253                 :            :                                 old_addr, old_end);
     254                 :            :         mmu_notifier_invalidate_range_start(&range);
     255                 :            : 
     256                 :          3 :         for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
     257                 :          3 :                 cond_resched();
     258                 :          3 :                 next = (old_addr + PMD_SIZE) & PMD_MASK;
     259                 :            :                 /* even if next overflowed, extent below will be ok */
     260                 :          3 :                 extent = next - old_addr;
     261                 :          3 :                 if (extent > old_end - old_addr)
     262                 :            :                         extent = old_end - old_addr;
     263                 :          3 :                 old_pmd = get_old_pmd(vma->vm_mm, old_addr);
     264                 :          3 :                 if (!old_pmd)
     265                 :          0 :                         continue;
     266                 :            :                 new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
     267                 :          3 :                 if (!new_pmd)
     268                 :            :                         break;
     269                 :            :                 if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) || pmd_devmap(*old_pmd)) {
     270                 :            :                         if (extent == HPAGE_PMD_SIZE) {
     271                 :            :                                 bool moved;
     272                 :            :                                 /* See comment in move_ptes() */
     273                 :            :                                 if (need_rmap_locks)
     274                 :            :                                         take_rmap_locks(vma);
     275                 :            :                                 moved = move_huge_pmd(vma, old_addr, new_addr,
     276                 :            :                                                     old_end, old_pmd, new_pmd);
     277                 :            :                                 if (need_rmap_locks)
     278                 :            :                                         drop_rmap_locks(vma);
     279                 :            :                                 if (moved)
     280                 :            :                                         continue;
     281                 :            :                         }
     282                 :            :                         split_huge_pmd(vma, old_pmd, old_addr);
     283                 :            :                         if (pmd_trans_unstable(old_pmd))
     284                 :            :                                 continue;
     285                 :            :                 } else if (extent == PMD_SIZE) {
     286                 :            : #ifdef CONFIG_HAVE_MOVE_PMD
     287                 :            :                         /*
     288                 :            :                          * If the extent is PMD-sized, try to speed the move by
     289                 :            :                          * moving at the PMD level if possible.
     290                 :            :                          */
     291                 :            :                         bool moved;
     292                 :            : 
     293                 :            :                         if (need_rmap_locks)
     294                 :            :                                 take_rmap_locks(vma);
     295                 :            :                         moved = move_normal_pmd(vma, old_addr, new_addr,
     296                 :            :                                         old_end, old_pmd, new_pmd);
     297                 :            :                         if (need_rmap_locks)
     298                 :            :                                 drop_rmap_locks(vma);
     299                 :            :                         if (moved)
     300                 :            :                                 continue;
     301                 :            : #endif
     302                 :            :                 }
     303                 :            : 
     304                 :          3 :                 if (pte_alloc(new_vma->vm_mm, new_pmd))
     305                 :            :                         break;
     306                 :          3 :                 next = (new_addr + PMD_SIZE) & PMD_MASK;
     307                 :          3 :                 if (extent > next - new_addr)
     308                 :            :                         extent = next - new_addr;
     309                 :          3 :                 move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
     310                 :            :                           new_pmd, new_addr, need_rmap_locks);
     311                 :            :         }
     312                 :            : 
     313                 :            :         mmu_notifier_invalidate_range_end(&range);
     314                 :            : 
     315                 :          3 :         return len + old_addr - old_end;        /* how much done */
     316                 :            : }
     317                 :            : 
     318                 :          3 : static unsigned long move_vma(struct vm_area_struct *vma,
     319                 :            :                 unsigned long old_addr, unsigned long old_len,
     320                 :            :                 unsigned long new_len, unsigned long new_addr,
     321                 :            :                 bool *locked, struct vm_userfaultfd_ctx *uf,
     322                 :            :                 struct list_head *uf_unmap)
     323                 :            : {
     324                 :          3 :         struct mm_struct *mm = vma->vm_mm;
     325                 :            :         struct vm_area_struct *new_vma;
     326                 :          3 :         unsigned long vm_flags = vma->vm_flags;
     327                 :            :         unsigned long new_pgoff;
     328                 :            :         unsigned long moved_len;
     329                 :            :         unsigned long excess = 0;
     330                 :            :         unsigned long hiwater_vm;
     331                 :            :         int split = 0;
     332                 :            :         int err;
     333                 :            :         bool need_rmap_locks;
     334                 :            : 
     335                 :            :         /*
     336                 :            :          * We'd prefer to avoid failure later on in do_munmap:
     337                 :            :          * which may split one vma into three before unmapping.
     338                 :            :          */
     339                 :          3 :         if (mm->map_count >= sysctl_max_map_count - 3)
     340                 :            :                 return -ENOMEM;
     341                 :            : 
     342                 :            :         /*
     343                 :            :          * Advise KSM to break any KSM pages in the area to be moved:
     344                 :            :          * it would be confusing if they were to turn up at the new
     345                 :            :          * location, where they happen to coincide with different KSM
     346                 :            :          * pages recently unmapped.  But leave vma->vm_flags as it was,
     347                 :            :          * so KSM can come around to merge on vma and new_vma afterwards.
     348                 :            :          */
     349                 :            :         err = ksm_madvise(vma, old_addr, old_addr + old_len,
     350                 :            :                                                 MADV_UNMERGEABLE, &vm_flags);
     351                 :            :         if (err)
     352                 :            :                 return err;
     353                 :            : 
     354                 :          3 :         new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
     355                 :          3 :         new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
     356                 :            :                            &need_rmap_locks);
     357                 :          3 :         if (!new_vma)
     358                 :            :                 return -ENOMEM;
     359                 :            : 
     360                 :          3 :         moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
     361                 :            :                                      need_rmap_locks);
     362                 :          3 :         if (moved_len < old_len) {
     363                 :            :                 err = -ENOMEM;
     364                 :          3 :         } else if (vma->vm_ops && vma->vm_ops->mremap) {
     365                 :          0 :                 err = vma->vm_ops->mremap(new_vma);
     366                 :            :         }
     367                 :            : 
     368                 :          3 :         if (unlikely(err)) {
     369                 :            :                 /*
     370                 :            :                  * On error, move entries back from new area to old,
     371                 :            :                  * which will succeed since page tables still there,
     372                 :            :                  * and then proceed to unmap new area instead of old.
     373                 :            :                  */
     374                 :          0 :                 move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
     375                 :            :                                  true);
     376                 :          0 :                 vma = new_vma;
     377                 :            :                 old_len = new_len;
     378                 :            :                 old_addr = new_addr;
     379                 :          0 :                 new_addr = err;
     380                 :            :         } else {
     381                 :            :                 mremap_userfaultfd_prep(new_vma, uf);
     382                 :            :                 arch_remap(mm, old_addr, old_addr + old_len,
     383                 :            :                            new_addr, new_addr + new_len);
     384                 :            :         }
     385                 :            : 
     386                 :            :         /* Conceal VM_ACCOUNT so old reservation is not undone */
     387                 :          3 :         if (vm_flags & VM_ACCOUNT) {
     388                 :          3 :                 vma->vm_flags &= ~VM_ACCOUNT;
     389                 :          3 :                 excess = vma->vm_end - vma->vm_start - old_len;
     390                 :          3 :                 if (old_addr > vma->vm_start &&
     391                 :          0 :                     old_addr + old_len < vma->vm_end)
     392                 :            :                         split = 1;
     393                 :            :         }
     394                 :            : 
     395                 :            :         /*
     396                 :            :          * If we failed to move page tables we still do total_vm increment
     397                 :            :          * since do_munmap() will decrement it by old_len == new_len.
     398                 :            :          *
     399                 :            :          * Since total_vm is about to be raised artificially high for a
     400                 :            :          * moment, we need to restore high watermark afterwards: if stats
     401                 :            :          * are taken meanwhile, total_vm and hiwater_vm appear too high.
     402                 :            :          * If this were a serious issue, we'd add a flag to do_munmap().
     403                 :            :          */
     404                 :          3 :         hiwater_vm = mm->hiwater_vm;
     405                 :          3 :         vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT);
     406                 :            : 
     407                 :            :         /* Tell pfnmap has moved from this vma */
     408                 :            :         if (unlikely(vma->vm_flags & VM_PFNMAP))
     409                 :            :                 untrack_pfn_moved(vma);
     410                 :            : 
     411                 :          3 :         if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
     412                 :            :                 /* OOM: unable to split vma, just get accounts right */
     413                 :          0 :                 vm_unacct_memory(excess >> PAGE_SHIFT);
     414                 :            :                 excess = 0;
     415                 :            :         }
     416                 :          3 :         mm->hiwater_vm = hiwater_vm;
     417                 :            : 
     418                 :            :         /* Restore VM_ACCOUNT if one or two pieces of vma left */
     419                 :          3 :         if (excess) {
     420                 :          3 :                 vma->vm_flags |= VM_ACCOUNT;
     421                 :          3 :                 if (split)
     422                 :          0 :                         vma->vm_next->vm_flags |= VM_ACCOUNT;
     423                 :            :         }
     424                 :            : 
     425                 :          3 :         if (vm_flags & VM_LOCKED) {
     426                 :          0 :                 mm->locked_vm += new_len >> PAGE_SHIFT;
     427                 :          0 :                 *locked = true;
     428                 :            :         }
     429                 :            : 
     430                 :          3 :         return new_addr;
     431                 :            : }
     432                 :            : 
     433                 :          3 : static struct vm_area_struct *vma_to_resize(unsigned long addr,
     434                 :            :         unsigned long old_len, unsigned long new_len, unsigned long *p)
     435                 :            : {
     436                 :          3 :         struct mm_struct *mm = current->mm;
     437                 :          3 :         struct vm_area_struct *vma = find_vma(mm, addr);
     438                 :            :         unsigned long pgoff;
     439                 :            : 
     440                 :          3 :         if (!vma || vma->vm_start > addr)
     441                 :            :                 return ERR_PTR(-EFAULT);
     442                 :            : 
     443                 :            :         /*
     444                 :            :          * !old_len is a special case where an attempt is made to 'duplicate'
     445                 :            :          * a mapping.  This makes no sense for private mappings as it will
     446                 :            :          * instead create a fresh/new mapping unrelated to the original.  This
     447                 :            :          * is contrary to the basic idea of mremap which creates new mappings
     448                 :            :          * based on the original.  There are no known use cases for this
     449                 :            :          * behavior.  As a result, fail such attempts.
     450                 :            :          */
     451                 :          3 :         if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) {
     452                 :          0 :                 pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap.  This is not supported.\n", current->comm, current->pid);
     453                 :            :                 return ERR_PTR(-EINVAL);
     454                 :            :         }
     455                 :            : 
     456                 :            :         if (is_vm_hugetlb_page(vma))
     457                 :            :                 return ERR_PTR(-EINVAL);
     458                 :            : 
     459                 :            :         /* We can't remap across vm area boundaries */
     460                 :          3 :         if (old_len > vma->vm_end - addr)
     461                 :            :                 return ERR_PTR(-EFAULT);
     462                 :            : 
     463                 :          3 :         if (new_len == old_len)
     464                 :            :                 return vma;
     465                 :            : 
     466                 :            :         /* Need to be careful about a growing mapping */
     467                 :          3 :         pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
     468                 :          3 :         pgoff += vma->vm_pgoff;
     469                 :          3 :         if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
     470                 :            :                 return ERR_PTR(-EINVAL);
     471                 :            : 
     472                 :          3 :         if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
     473                 :            :                 return ERR_PTR(-EFAULT);
     474                 :            : 
     475                 :          3 :         if (vma->vm_flags & VM_LOCKED) {
     476                 :            :                 unsigned long locked, lock_limit;
     477                 :          0 :                 locked = mm->locked_vm << PAGE_SHIFT;
     478                 :            :                 lock_limit = rlimit(RLIMIT_MEMLOCK);
     479                 :          0 :                 locked += new_len - old_len;
     480                 :          0 :                 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
     481                 :            :                         return ERR_PTR(-EAGAIN);
     482                 :            :         }
     483                 :            : 
     484                 :          3 :         if (!may_expand_vm(mm, vma->vm_flags,
     485                 :          3 :                                 (new_len - old_len) >> PAGE_SHIFT))
     486                 :            :                 return ERR_PTR(-ENOMEM);
     487                 :            : 
     488                 :          3 :         if (vma->vm_flags & VM_ACCOUNT) {
     489                 :            :                 unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
     490                 :          3 :                 if (security_vm_enough_memory_mm(mm, charged))
     491                 :            :                         return ERR_PTR(-ENOMEM);
     492                 :          3 :                 *p = charged;
     493                 :            :         }
     494                 :            : 
     495                 :          3 :         return vma;
     496                 :            : }
     497                 :            : 
     498                 :          0 : static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
     499                 :            :                 unsigned long new_addr, unsigned long new_len, bool *locked,
     500                 :            :                 struct vm_userfaultfd_ctx *uf,
     501                 :            :                 struct list_head *uf_unmap_early,
     502                 :            :                 struct list_head *uf_unmap)
     503                 :            : {
     504                 :          0 :         struct mm_struct *mm = current->mm;
     505                 :            :         struct vm_area_struct *vma;
     506                 :            :         unsigned long ret = -EINVAL;
     507                 :          0 :         unsigned long charged = 0;
     508                 :            :         unsigned long map_flags;
     509                 :            : 
     510                 :          0 :         if (offset_in_page(new_addr))
     511                 :            :                 goto out;
     512                 :            : 
     513                 :          0 :         if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
     514                 :            :                 goto out;
     515                 :            : 
     516                 :            :         /* Ensure the old/new locations do not overlap */
     517                 :          0 :         if (addr + old_len > new_addr && new_addr + new_len > addr)
     518                 :            :                 goto out;
     519                 :            : 
     520                 :            :         /*
     521                 :            :          * move_vma() need us to stay 4 maps below the threshold, otherwise
     522                 :            :          * it will bail out at the very beginning.
     523                 :            :          * That is a problem if we have already unmaped the regions here
     524                 :            :          * (new_addr, and old_addr), because userspace will not know the
     525                 :            :          * state of the vma's after it gets -ENOMEM.
     526                 :            :          * So, to avoid such scenario we can pre-compute if the whole
     527                 :            :          * operation has high chances to success map-wise.
     528                 :            :          * Worst-scenario case is when both vma's (new_addr and old_addr) get
     529                 :            :          * split in 3 before unmaping it.
     530                 :            :          * That means 2 more maps (1 for each) to the ones we already hold.
     531                 :            :          * Check whether current map count plus 2 still leads us to 4 maps below
     532                 :            :          * the threshold, otherwise return -ENOMEM here to be more safe.
     533                 :            :          */
     534                 :          0 :         if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
     535                 :            :                 return -ENOMEM;
     536                 :            : 
     537                 :          0 :         ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
     538                 :          0 :         if (ret)
     539                 :            :                 goto out;
     540                 :            : 
     541                 :          0 :         if (old_len >= new_len) {
     542                 :          0 :                 ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
     543                 :          0 :                 if (ret && old_len != new_len)
     544                 :            :                         goto out;
     545                 :            :                 old_len = new_len;
     546                 :            :         }
     547                 :            : 
     548                 :          0 :         vma = vma_to_resize(addr, old_len, new_len, &charged);
     549                 :          0 :         if (IS_ERR(vma)) {
     550                 :          0 :                 ret = PTR_ERR(vma);
     551                 :          0 :                 goto out;
     552                 :            :         }
     553                 :            : 
     554                 :            :         map_flags = MAP_FIXED;
     555                 :          0 :         if (vma->vm_flags & VM_MAYSHARE)
     556                 :            :                 map_flags |= MAP_SHARED;
     557                 :            : 
     558                 :          0 :         ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff +
     559                 :          0 :                                 ((addr - vma->vm_start) >> PAGE_SHIFT),
     560                 :            :                                 map_flags);
     561                 :          0 :         if (offset_in_page(ret))
     562                 :            :                 goto out1;
     563                 :            : 
     564                 :          0 :         ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
     565                 :            :                        uf_unmap);
     566                 :          0 :         if (!(offset_in_page(ret)))
     567                 :            :                 goto out;
     568                 :            : out1:
     569                 :          0 :         vm_unacct_memory(charged);
     570                 :            : 
     571                 :            : out:
     572                 :          0 :         return ret;
     573                 :            : }
     574                 :            : 
     575                 :          3 : static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
     576                 :            : {
     577                 :          3 :         unsigned long end = vma->vm_end + delta;
     578                 :          3 :         if (end < vma->vm_end) /* overflow */
     579                 :            :                 return 0;
     580                 :          3 :         if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */
     581                 :            :                 return 0;
     582                 :          0 :         if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start,
     583                 :          0 :                               0, MAP_FIXED) & ~PAGE_MASK)
     584                 :            :                 return 0;
     585                 :          0 :         return 1;
     586                 :            : }
     587                 :            : 
     588                 :            : /*
     589                 :            :  * Expand (or shrink) an existing mapping, potentially moving it at the
     590                 :            :  * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
     591                 :            :  *
     592                 :            :  * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
     593                 :            :  * This option implies MREMAP_MAYMOVE.
     594                 :            :  */
     595                 :          3 : SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
     596                 :            :                 unsigned long, new_len, unsigned long, flags,
     597                 :            :                 unsigned long, new_addr)
     598                 :            : {
     599                 :          3 :         struct mm_struct *mm = current->mm;
     600                 :            :         struct vm_area_struct *vma;
     601                 :            :         unsigned long ret = -EINVAL;
     602                 :          3 :         unsigned long charged = 0;
     603                 :          3 :         bool locked = false;
     604                 :            :         bool downgraded = false;
     605                 :          3 :         struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
     606                 :          3 :         LIST_HEAD(uf_unmap_early);
     607                 :          3 :         LIST_HEAD(uf_unmap);
     608                 :            : 
     609                 :            :         /*
     610                 :            :          * There is a deliberate asymmetry here: we strip the pointer tag
     611                 :            :          * from the old address but leave the new address alone. This is
     612                 :            :          * for consistency with mmap(), where we prevent the creation of
     613                 :            :          * aliasing mappings in userspace by leaving the tag bits of the
     614                 :            :          * mapping address intact. A non-zero tag will cause the subsequent
     615                 :            :          * range checks to reject the address as invalid.
     616                 :            :          *
     617                 :            :          * See Documentation/arm64/tagged-address-abi.rst for more information.
     618                 :            :          */
     619                 :            :         addr = untagged_addr(addr);
     620                 :            : 
     621                 :          3 :         if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
     622                 :            :                 return ret;
     623                 :            : 
     624                 :          3 :         if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
     625                 :            :                 return ret;
     626                 :            : 
     627                 :          3 :         if (offset_in_page(addr))
     628                 :            :                 return ret;
     629                 :            : 
     630                 :          3 :         old_len = PAGE_ALIGN(old_len);
     631                 :          3 :         new_len = PAGE_ALIGN(new_len);
     632                 :            : 
     633                 :            :         /*
     634                 :            :          * We allow a zero old-len as a special case
     635                 :            :          * for DOS-emu "duplicate shm area" thing. But
     636                 :            :          * a zero new-len is nonsensical.
     637                 :            :          */
     638                 :          3 :         if (!new_len)
     639                 :            :                 return ret;
     640                 :            : 
     641                 :          3 :         if (down_write_killable(&current->mm->mmap_sem))
     642                 :            :                 return -EINTR;
     643                 :            : 
     644                 :          3 :         if (flags & MREMAP_FIXED) {
     645                 :          0 :                 ret = mremap_to(addr, old_len, new_addr, new_len,
     646                 :            :                                 &locked, &uf, &uf_unmap_early, &uf_unmap);
     647                 :          0 :                 goto out;
     648                 :            :         }
     649                 :            : 
     650                 :            :         /*
     651                 :            :          * Always allow a shrinking remap: that just unmaps
     652                 :            :          * the unnecessary pages..
     653                 :            :          * __do_munmap does all the needed commit accounting, and
     654                 :            :          * downgrades mmap_sem to read if so directed.
     655                 :            :          */
     656                 :          3 :         if (old_len >= new_len) {
     657                 :            :                 int retval;
     658                 :            : 
     659                 :          3 :                 retval = __do_munmap(mm, addr+new_len, old_len - new_len,
     660                 :            :                                   &uf_unmap, true);
     661                 :          3 :                 if (retval < 0 && old_len != new_len) {
     662                 :          0 :                         ret = retval;
     663                 :          0 :                         goto out;
     664                 :            :                 /* Returning 1 indicates mmap_sem is downgraded to read. */
     665                 :          3 :                 } else if (retval == 1)
     666                 :            :                         downgraded = true;
     667                 :            :                 ret = addr;
     668                 :          3 :                 goto out;
     669                 :            :         }
     670                 :            : 
     671                 :            :         /*
     672                 :            :          * Ok, we need to grow..
     673                 :            :          */
     674                 :          3 :         vma = vma_to_resize(addr, old_len, new_len, &charged);
     675                 :          3 :         if (IS_ERR(vma)) {
     676                 :          0 :                 ret = PTR_ERR(vma);
     677                 :          0 :                 goto out;
     678                 :            :         }
     679                 :            : 
     680                 :            :         /* old_len exactly to the end of the area..
     681                 :            :          */
     682                 :          3 :         if (old_len == vma->vm_end - addr) {
     683                 :            :                 /* can we just expand the current mapping? */
     684                 :          3 :                 if (vma_expandable(vma, new_len - old_len)) {
     685                 :          0 :                         int pages = (new_len - old_len) >> PAGE_SHIFT;
     686                 :            : 
     687                 :          0 :                         if (vma_adjust(vma, vma->vm_start, addr + new_len,
     688                 :            :                                        vma->vm_pgoff, NULL)) {
     689                 :            :                                 ret = -ENOMEM;
     690                 :            :                                 goto out;
     691                 :            :                         }
     692                 :            : 
     693                 :          0 :                         vm_stat_account(mm, vma->vm_flags, pages);
     694                 :          0 :                         if (vma->vm_flags & VM_LOCKED) {
     695                 :          0 :                                 mm->locked_vm += pages;
     696                 :          0 :                                 locked = true;
     697                 :            :                                 new_addr = addr;
     698                 :            :                         }
     699                 :            :                         ret = addr;
     700                 :          0 :                         goto out;
     701                 :            :                 }
     702                 :            :         }
     703                 :            : 
     704                 :            :         /*
     705                 :            :          * We weren't able to just expand or shrink the area,
     706                 :            :          * we need to create a new one and move it..
     707                 :            :          */
     708                 :            :         ret = -ENOMEM;
     709                 :          3 :         if (flags & MREMAP_MAYMOVE) {
     710                 :            :                 unsigned long map_flags = 0;
     711                 :          3 :                 if (vma->vm_flags & VM_MAYSHARE)
     712                 :            :                         map_flags |= MAP_SHARED;
     713                 :            : 
     714                 :          3 :                 new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
     715                 :          3 :                                         vma->vm_pgoff +
     716                 :          3 :                                         ((addr - vma->vm_start) >> PAGE_SHIFT),
     717                 :            :                                         map_flags);
     718                 :          3 :                 if (offset_in_page(new_addr)) {
     719                 :            :                         ret = new_addr;
     720                 :            :                         goto out;
     721                 :            :                 }
     722                 :            : 
     723                 :          3 :                 ret = move_vma(vma, addr, old_len, new_len, new_addr,
     724                 :            :                                &locked, &uf, &uf_unmap);
     725                 :            :         }
     726                 :            : out:
     727                 :          3 :         if (offset_in_page(ret)) {
     728                 :          0 :                 vm_unacct_memory(charged);
     729                 :          0 :                 locked = 0;
     730                 :            :         }
     731                 :          3 :         if (downgraded)
     732                 :          3 :                 up_read(&current->mm->mmap_sem);
     733                 :            :         else
     734                 :          3 :                 up_write(&current->mm->mmap_sem);
     735                 :          3 :         if (locked && new_len > old_len)
     736                 :          0 :                 mm_populate(new_addr + old_len, new_len - old_len);
     737                 :            :         userfaultfd_unmap_complete(mm, &uf_unmap_early);
     738                 :            :         mremap_userfaultfd_complete(&uf, addr, new_addr, old_len);
     739                 :            :         userfaultfd_unmap_complete(mm, &uf_unmap);
     740                 :          3 :         return ret;
     741                 :            : }

Generated by: LCOV version 1.14