LCOV - code coverage report
Current view: top level - mm - rmap.c (source / functions) Hit Total Coverage
Test: Real Lines: 168 371 45.3 %
Date: 2020-10-17 15:46:43 Functions: 4 41 9.8 %
Legend: Neither, QEMU, Real, Both Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /*
       2                 :            :  * mm/rmap.c - physical to virtual reverse mappings
       3                 :            :  *
       4                 :            :  * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
       5                 :            :  * Released under the General Public License (GPL).
       6                 :            :  *
       7                 :            :  * Simple, low overhead reverse mapping scheme.
       8                 :            :  * Please try to keep this thing as modular as possible.
       9                 :            :  *
      10                 :            :  * Provides methods for unmapping each kind of mapped page:
      11                 :            :  * the anon methods track anonymous pages, and
      12                 :            :  * the file methods track pages belonging to an inode.
      13                 :            :  *
      14                 :            :  * Original design by Rik van Riel <riel@conectiva.com.br> 2001
      15                 :            :  * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
      16                 :            :  * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
      17                 :            :  * Contributions by Hugh Dickins 2003, 2004
      18                 :            :  */
      19                 :            : 
      20                 :            : /*
      21                 :            :  * Lock ordering in mm:
      22                 :            :  *
      23                 :            :  * inode->i_mutex    (while writing or truncating, not reading or faulting)
      24                 :            :  *   mm->mmap_sem
      25                 :            :  *     page->flags PG_locked (lock_page)
      26                 :            :  *       hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
      27                 :            :  *         mapping->i_mmap_rwsem
      28                 :            :  *           anon_vma->rwsem
      29                 :            :  *             mm->page_table_lock or pte_lock
      30                 :            :  *               pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
      31                 :            :  *               swap_lock (in swap_duplicate, swap_info_get)
      32                 :            :  *                 mmlist_lock (in mmput, drain_mmlist and others)
      33                 :            :  *                 mapping->private_lock (in __set_page_dirty_buffers)
      34                 :            :  *                   mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
      35                 :            :  *                     i_pages lock (widely used)
      36                 :            :  *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
      37                 :            :  *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
      38                 :            :  *                   sb_lock (within inode_lock in fs/fs-writeback.c)
      39                 :            :  *                   i_pages lock (widely used, in set_page_dirty,
      40                 :            :  *                             in arch-dependent flush_dcache_mmap_lock,
      41                 :            :  *                             within bdi.wb->list_lock in __sync_single_inode)
      42                 :            :  *
      43                 :            :  * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
      44                 :            :  *   ->tasklist_lock
      45                 :            :  *     pte map lock
      46                 :            :  */
      47                 :            : 
      48                 :            : #include <linux/mm.h>
      49                 :            : #include <linux/sched/mm.h>
      50                 :            : #include <linux/sched/task.h>
      51                 :            : #include <linux/pagemap.h>
      52                 :            : #include <linux/swap.h>
      53                 :            : #include <linux/swapops.h>
      54                 :            : #include <linux/slab.h>
      55                 :            : #include <linux/init.h>
      56                 :            : #include <linux/ksm.h>
      57                 :            : #include <linux/rmap.h>
      58                 :            : #include <linux/rcupdate.h>
      59                 :            : #include <linux/export.h>
      60                 :            : #include <linux/memcontrol.h>
      61                 :            : #include <linux/mmu_notifier.h>
      62                 :            : #include <linux/migrate.h>
      63                 :            : #include <linux/hugetlb.h>
      64                 :            : #include <linux/huge_mm.h>
      65                 :            : #include <linux/backing-dev.h>
      66                 :            : #include <linux/page_idle.h>
      67                 :            : #include <linux/memremap.h>
      68                 :            : #include <linux/userfaultfd_k.h>
      69                 :            : 
      70                 :            : #include <asm/tlbflush.h>
      71                 :            : 
      72                 :            : #include <trace/events/tlb.h>
      73                 :            : 
      74                 :            : #include "internal.h"
      75                 :            : 
      76                 :            : static struct kmem_cache *anon_vma_cachep;
      77                 :            : static struct kmem_cache *anon_vma_chain_cachep;
      78                 :            : 
      79                 :          3 : static inline struct anon_vma *anon_vma_alloc(void)
      80                 :            : {
      81                 :            :         struct anon_vma *anon_vma;
      82                 :            : 
      83                 :          3 :         anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
      84                 :          3 :         if (anon_vma) {
      85                 :            :                 atomic_set(&anon_vma->refcount, 1);
      86                 :          3 :                 anon_vma->degree = 1;        /* Reference for first vma */
      87                 :          3 :                 anon_vma->parent = anon_vma;
      88                 :            :                 /*
      89                 :            :                  * Initialise the anon_vma root to point to itself. If called
      90                 :            :                  * from fork, the root will be reset to the parents anon_vma.
      91                 :            :                  */
      92                 :          3 :                 anon_vma->root = anon_vma;
      93                 :            :         }
      94                 :            : 
      95                 :          3 :         return anon_vma;
      96                 :            : }
      97                 :            : 
      98                 :          3 : static inline void anon_vma_free(struct anon_vma *anon_vma)
      99                 :            : {
     100                 :            :         VM_BUG_ON(atomic_read(&anon_vma->refcount));
     101                 :            : 
     102                 :            :         /*
     103                 :            :          * Synchronize against page_lock_anon_vma_read() such that
     104                 :            :          * we can safely hold the lock without the anon_vma getting
     105                 :            :          * freed.
     106                 :            :          *
     107                 :            :          * Relies on the full mb implied by the atomic_dec_and_test() from
     108                 :            :          * put_anon_vma() against the acquire barrier implied by
     109                 :            :          * down_read_trylock() from page_lock_anon_vma_read(). This orders:
     110                 :            :          *
     111                 :            :          * page_lock_anon_vma_read()    VS      put_anon_vma()
     112                 :            :          *   down_read_trylock()                  atomic_dec_and_test()
     113                 :            :          *   LOCK                                 MB
     114                 :            :          *   atomic_read()                        rwsem_is_locked()
     115                 :            :          *
     116                 :            :          * LOCK should suffice since the actual taking of the lock must
     117                 :            :          * happen _before_ what follows.
     118                 :            :          */
     119                 :          3 :         might_sleep();
     120                 :          3 :         if (rwsem_is_locked(&anon_vma->root->rwsem)) {
     121                 :            :                 anon_vma_lock_write(anon_vma);
     122                 :            :                 anon_vma_unlock_write(anon_vma);
     123                 :            :         }
     124                 :            : 
     125                 :          3 :         kmem_cache_free(anon_vma_cachep, anon_vma);
     126                 :          3 : }
     127                 :            : 
     128                 :            : static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
     129                 :            : {
     130                 :          3 :         return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
     131                 :            : }
     132                 :            : 
     133                 :            : static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
     134                 :            : {
     135                 :          3 :         kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
     136                 :            : }
     137                 :            : 
     138                 :            : static void anon_vma_chain_link(struct vm_area_struct *vma,
     139                 :            :                                 struct anon_vma_chain *avc,
     140                 :            :                                 struct anon_vma *anon_vma)
     141                 :            : {
     142                 :          3 :         avc->vma = vma;
     143                 :          3 :         avc->anon_vma = anon_vma;
     144                 :          3 :         list_add(&avc->same_vma, &vma->anon_vma_chain);
     145                 :          3 :         anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
     146                 :            : }
     147                 :            : 
     148                 :            : /**
     149                 :            :  * __anon_vma_prepare - attach an anon_vma to a memory region
     150                 :            :  * @vma: the memory region in question
     151                 :            :  *
     152                 :            :  * This makes sure the memory mapping described by 'vma' has
     153                 :            :  * an 'anon_vma' attached to it, so that we can associate the
     154                 :            :  * anonymous pages mapped into it with that anon_vma.
     155                 :            :  *
     156                 :            :  * The common case will be that we already have one, which
     157                 :            :  * is handled inline by anon_vma_prepare(). But if
     158                 :            :  * not we either need to find an adjacent mapping that we
     159                 :            :  * can re-use the anon_vma from (very common when the only
     160                 :            :  * reason for splitting a vma has been mprotect()), or we
     161                 :            :  * allocate a new one.
     162                 :            :  *
     163                 :            :  * Anon-vma allocations are very subtle, because we may have
     164                 :            :  * optimistically looked up an anon_vma in page_lock_anon_vma_read()
     165                 :            :  * and that may actually touch the spinlock even in the newly
     166                 :            :  * allocated vma (it depends on RCU to make sure that the
     167                 :            :  * anon_vma isn't actually destroyed).
     168                 :            :  *
     169                 :            :  * As a result, we need to do proper anon_vma locking even
     170                 :            :  * for the new allocation. At the same time, we do not want
     171                 :            :  * to do any locking for the common case of already having
     172                 :            :  * an anon_vma.
     173                 :            :  *
     174                 :            :  * This must be called with the mmap_sem held for reading.
     175                 :            :  */
     176                 :          3 : int __anon_vma_prepare(struct vm_area_struct *vma)
     177                 :            : {
     178                 :          3 :         struct mm_struct *mm = vma->vm_mm;
     179                 :            :         struct anon_vma *anon_vma, *allocated;
     180                 :            :         struct anon_vma_chain *avc;
     181                 :            : 
     182                 :          3 :         might_sleep();
     183                 :            : 
     184                 :            :         avc = anon_vma_chain_alloc(GFP_KERNEL);
     185                 :          3 :         if (!avc)
     186                 :            :                 goto out_enomem;
     187                 :            : 
     188                 :          3 :         anon_vma = find_mergeable_anon_vma(vma);
     189                 :            :         allocated = NULL;
     190                 :          3 :         if (!anon_vma) {
     191                 :          3 :                 anon_vma = anon_vma_alloc();
     192                 :          3 :                 if (unlikely(!anon_vma))
     193                 :            :                         goto out_enomem_free_avc;
     194                 :            :                 allocated = anon_vma;
     195                 :            :         }
     196                 :            : 
     197                 :            :         anon_vma_lock_write(anon_vma);
     198                 :            :         /* page_table_lock to protect against threads */
     199                 :            :         spin_lock(&mm->page_table_lock);
     200                 :          3 :         if (likely(!vma->anon_vma)) {
     201                 :          3 :                 vma->anon_vma = anon_vma;
     202                 :            :                 anon_vma_chain_link(vma, avc, anon_vma);
     203                 :            :                 /* vma reference or self-parent link for new root */
     204                 :          3 :                 anon_vma->degree++;
     205                 :            :                 allocated = NULL;
     206                 :            :                 avc = NULL;
     207                 :            :         }
     208                 :            :         spin_unlock(&mm->page_table_lock);
     209                 :            :         anon_vma_unlock_write(anon_vma);
     210                 :            : 
     211                 :          3 :         if (unlikely(allocated))
     212                 :          0 :                 put_anon_vma(allocated);
     213                 :          3 :         if (unlikely(avc))
     214                 :            :                 anon_vma_chain_free(avc);
     215                 :            : 
     216                 :            :         return 0;
     217                 :            : 
     218                 :            :  out_enomem_free_avc:
     219                 :            :         anon_vma_chain_free(avc);
     220                 :            :  out_enomem:
     221                 :            :         return -ENOMEM;
     222                 :            : }
     223                 :            : 
     224                 :            : /*
     225                 :            :  * This is a useful helper function for locking the anon_vma root as
     226                 :            :  * we traverse the vma->anon_vma_chain, looping over anon_vma's that
     227                 :            :  * have the same vma.
     228                 :            :  *
     229                 :            :  * Such anon_vma's should have the same root, so you'd expect to see
     230                 :            :  * just a single mutex_lock for the whole traversal.
     231                 :            :  */
     232                 :          3 : static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
     233                 :            : {
     234                 :          3 :         struct anon_vma *new_root = anon_vma->root;
     235                 :          3 :         if (new_root != root) {
     236                 :          3 :                 if (WARN_ON_ONCE(root))
     237                 :          0 :                         up_write(&root->rwsem);
     238                 :            :                 root = new_root;
     239                 :          3 :                 down_write(&root->rwsem);
     240                 :            :         }
     241                 :          3 :         return root;
     242                 :            : }
     243                 :            : 
     244                 :            : static inline void unlock_anon_vma_root(struct anon_vma *root)
     245                 :            : {
     246                 :          3 :         if (root)
     247                 :          3 :                 up_write(&root->rwsem);
     248                 :            : }
     249                 :            : 
     250                 :            : /*
     251                 :            :  * Attach the anon_vmas from src to dst.
     252                 :            :  * Returns 0 on success, -ENOMEM on failure.
     253                 :            :  *
     254                 :            :  * If dst->anon_vma is NULL this function tries to find and reuse existing
     255                 :            :  * anon_vma which has no vmas and only one child anon_vma. This prevents
     256                 :            :  * degradation of anon_vma hierarchy to endless linear chain in case of
     257                 :            :  * constantly forking task. On the other hand, an anon_vma with more than one
     258                 :            :  * child isn't reused even if there was no alive vma, thus rmap walker has a
     259                 :            :  * good chance of avoiding scanning the whole hierarchy when it searches where
     260                 :            :  * page is mapped.
     261                 :            :  */
     262                 :          3 : int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
     263                 :            : {
     264                 :            :         struct anon_vma_chain *avc, *pavc;
     265                 :            :         struct anon_vma *root = NULL;
     266                 :            : 
     267                 :          3 :         list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
     268                 :            :                 struct anon_vma *anon_vma;
     269                 :            : 
     270                 :            :                 avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
     271                 :          3 :                 if (unlikely(!avc)) {
     272                 :            :                         unlock_anon_vma_root(root);
     273                 :            :                         root = NULL;
     274                 :            :                         avc = anon_vma_chain_alloc(GFP_KERNEL);
     275                 :          0 :                         if (!avc)
     276                 :            :                                 goto enomem_failure;
     277                 :            :                 }
     278                 :          3 :                 anon_vma = pavc->anon_vma;
     279                 :          3 :                 root = lock_anon_vma_root(root, anon_vma);
     280                 :            :                 anon_vma_chain_link(dst, avc, anon_vma);
     281                 :            : 
     282                 :            :                 /*
     283                 :            :                  * Reuse existing anon_vma if its degree lower than two,
     284                 :            :                  * that means it has no vma and only one anon_vma child.
     285                 :            :                  *
     286                 :            :                  * Do not chose parent anon_vma, otherwise first child
     287                 :            :                  * will always reuse it. Root anon_vma is never reused:
     288                 :            :                  * it has self-parent reference and at least one child.
     289                 :            :                  */
     290                 :          3 :                 if (!dst->anon_vma && anon_vma != src->anon_vma &&
     291                 :          3 :                                 anon_vma->degree < 2)
     292                 :          3 :                         dst->anon_vma = anon_vma;
     293                 :            :         }
     294                 :          3 :         if (dst->anon_vma)
     295                 :          3 :                 dst->anon_vma->degree++;
     296                 :            :         unlock_anon_vma_root(root);
     297                 :            :         return 0;
     298                 :            : 
     299                 :            :  enomem_failure:
     300                 :            :         /*
     301                 :            :          * dst->anon_vma is dropped here otherwise its degree can be incorrectly
     302                 :            :          * decremented in unlink_anon_vmas().
     303                 :            :          * We can safely do this because callers of anon_vma_clone() don't care
     304                 :            :          * about dst->anon_vma if anon_vma_clone() failed.
     305                 :            :          */
     306                 :          0 :         dst->anon_vma = NULL;
     307                 :          0 :         unlink_anon_vmas(dst);
     308                 :          0 :         return -ENOMEM;
     309                 :            : }
     310                 :            : 
     311                 :            : /*
     312                 :            :  * Attach vma to its own anon_vma, as well as to the anon_vmas that
     313                 :            :  * the corresponding VMA in the parent process is attached to.
     314                 :            :  * Returns 0 on success, non-zero on failure.
     315                 :            :  */
     316                 :          3 : int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
     317                 :            : {
     318                 :            :         struct anon_vma_chain *avc;
     319                 :            :         struct anon_vma *anon_vma;
     320                 :            :         int error;
     321                 :            : 
     322                 :            :         /* Don't bother if the parent process has no anon_vma here. */
     323                 :          3 :         if (!pvma->anon_vma)
     324                 :            :                 return 0;
     325                 :            : 
     326                 :            :         /* Drop inherited anon_vma, we'll reuse existing or allocate new. */
     327                 :          3 :         vma->anon_vma = NULL;
     328                 :            : 
     329                 :            :         /*
     330                 :            :          * First, attach the new VMA to the parent VMA's anon_vmas,
     331                 :            :          * so rmap can find non-COWed pages in child processes.
     332                 :            :          */
     333                 :          3 :         error = anon_vma_clone(vma, pvma);
     334                 :          3 :         if (error)
     335                 :            :                 return error;
     336                 :            : 
     337                 :            :         /* An existing anon_vma has been reused, all done then. */
     338                 :          3 :         if (vma->anon_vma)
     339                 :            :                 return 0;
     340                 :            : 
     341                 :            :         /* Then add our own anon_vma. */
     342                 :          3 :         anon_vma = anon_vma_alloc();
     343                 :          3 :         if (!anon_vma)
     344                 :            :                 goto out_error;
     345                 :            :         avc = anon_vma_chain_alloc(GFP_KERNEL);
     346                 :          3 :         if (!avc)
     347                 :            :                 goto out_error_free_anon_vma;
     348                 :            : 
     349                 :            :         /*
     350                 :            :          * The root anon_vma's spinlock is the lock actually used when we
     351                 :            :          * lock any of the anon_vmas in this anon_vma tree.
     352                 :            :          */
     353                 :          3 :         anon_vma->root = pvma->anon_vma->root;
     354                 :          3 :         anon_vma->parent = pvma->anon_vma;
     355                 :            :         /*
     356                 :            :          * With refcounts, an anon_vma can stay around longer than the
     357                 :            :          * process it belongs to. The root anon_vma needs to be pinned until
     358                 :            :          * this anon_vma is freed, because the lock lives in the root.
     359                 :            :          */
     360                 :            :         get_anon_vma(anon_vma->root);
     361                 :            :         /* Mark this anon_vma as the one where our new (COWed) pages go. */
     362                 :          3 :         vma->anon_vma = anon_vma;
     363                 :            :         anon_vma_lock_write(anon_vma);
     364                 :            :         anon_vma_chain_link(vma, avc, anon_vma);
     365                 :          3 :         anon_vma->parent->degree++;
     366                 :            :         anon_vma_unlock_write(anon_vma);
     367                 :            : 
     368                 :          3 :         return 0;
     369                 :            : 
     370                 :            :  out_error_free_anon_vma:
     371                 :          0 :         put_anon_vma(anon_vma);
     372                 :            :  out_error:
     373                 :          3 :         unlink_anon_vmas(vma);
     374                 :          0 :         return -ENOMEM;
     375                 :            : }
     376                 :            : 
     377                 :          3 : void unlink_anon_vmas(struct vm_area_struct *vma)
     378                 :            : {
     379                 :            :         struct anon_vma_chain *avc, *next;
     380                 :            :         struct anon_vma *root = NULL;
     381                 :            : 
     382                 :            :         /*
     383                 :            :          * Unlink each anon_vma chained to the VMA.  This list is ordered
     384                 :            :          * from newest to oldest, ensuring the root anon_vma gets freed last.
     385                 :            :          */
     386                 :          3 :         list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
     387                 :          3 :                 struct anon_vma *anon_vma = avc->anon_vma;
     388                 :            : 
     389                 :          3 :                 root = lock_anon_vma_root(root, anon_vma);
     390                 :          3 :                 anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
     391                 :            : 
     392                 :            :                 /*
     393                 :            :                  * Leave empty anon_vmas on the list - we'll need
     394                 :            :                  * to free them outside the lock.
     395                 :            :                  */
     396                 :          3 :                 if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
     397                 :          3 :                         anon_vma->parent->degree--;
     398                 :          3 :                         continue;
     399                 :            :                 }
     400                 :            : 
     401                 :            :                 list_del(&avc->same_vma);
     402                 :            :                 anon_vma_chain_free(avc);
     403                 :            :         }
     404                 :          3 :         if (vma->anon_vma)
     405                 :          3 :                 vma->anon_vma->degree--;
     406                 :            :         unlock_anon_vma_root(root);
     407                 :            : 
     408                 :            :         /*
     409                 :            :          * Iterate the list once more, it now only contains empty and unlinked
     410                 :            :          * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
     411                 :            :          * needing to write-acquire the anon_vma->root->rwsem.
     412                 :            :          */
     413                 :          3 :         list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
     414                 :          3 :                 struct anon_vma *anon_vma = avc->anon_vma;
     415                 :            : 
     416                 :            :                 VM_WARN_ON(anon_vma->degree);
     417                 :          3 :                 put_anon_vma(anon_vma);
     418                 :            : 
     419                 :            :                 list_del(&avc->same_vma);
     420                 :            :                 anon_vma_chain_free(avc);
     421                 :            :         }
     422                 :          3 : }
     423                 :            : 
     424                 :          3 : static void anon_vma_ctor(void *data)
     425                 :            : {
     426                 :            :         struct anon_vma *anon_vma = data;
     427                 :            : 
     428                 :          3 :         init_rwsem(&anon_vma->rwsem);
     429                 :            :         atomic_set(&anon_vma->refcount, 0);
     430                 :          3 :         anon_vma->rb_root = RB_ROOT_CACHED;
     431                 :          3 : }
     432                 :            : 
     433                 :          3 : void __init anon_vma_init(void)
     434                 :            : {
     435                 :          3 :         anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
     436                 :            :                         0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
     437                 :            :                         anon_vma_ctor);
     438                 :          3 :         anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
     439                 :            :                         SLAB_PANIC|SLAB_ACCOUNT);
     440                 :          3 : }
     441                 :            : 
     442                 :            : /*
     443                 :            :  * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
     444                 :            :  *
     445                 :            :  * Since there is no serialization what so ever against page_remove_rmap()
     446                 :            :  * the best this function can do is return a locked anon_vma that might
     447                 :            :  * have been relevant to this page.
     448                 :            :  *
     449                 :            :  * The page might have been remapped to a different anon_vma or the anon_vma
     450                 :            :  * returned may already be freed (and even reused).
     451                 :            :  *
     452                 :            :  * In case it was remapped to a different anon_vma, the new anon_vma will be a
     453                 :            :  * child of the old anon_vma, and the anon_vma lifetime rules will therefore
     454                 :            :  * ensure that any anon_vma obtained from the page will still be valid for as
     455                 :            :  * long as we observe page_mapped() [ hence all those page_mapped() tests ].
     456                 :            :  *
     457                 :            :  * All users of this function must be very careful when walking the anon_vma
     458                 :            :  * chain and verify that the page in question is indeed mapped in it
     459                 :            :  * [ something equivalent to page_mapped_in_vma() ].
     460                 :            :  *
     461                 :            :  * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
     462                 :            :  * that the anon_vma pointer from page->mapping is valid if there is a
     463                 :            :  * mapcount, we can dereference the anon_vma after observing those.
     464                 :            :  */
     465                 :          0 : struct anon_vma *page_get_anon_vma(struct page *page)
     466                 :            : {
     467                 :            :         struct anon_vma *anon_vma = NULL;
     468                 :            :         unsigned long anon_mapping;
     469                 :            : 
     470                 :            :         rcu_read_lock();
     471                 :            :         anon_mapping = (unsigned long)READ_ONCE(page->mapping);
     472                 :          0 :         if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
     473                 :            :                 goto out;
     474                 :          0 :         if (!page_mapped(page))
     475                 :            :                 goto out;
     476                 :            : 
     477                 :          0 :         anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
     478                 :          0 :         if (!atomic_inc_not_zero(&anon_vma->refcount)) {
     479                 :            :                 anon_vma = NULL;
     480                 :            :                 goto out;
     481                 :            :         }
     482                 :            : 
     483                 :            :         /*
     484                 :            :          * If this page is still mapped, then its anon_vma cannot have been
     485                 :            :          * freed.  But if it has been unmapped, we have no security against the
     486                 :            :          * anon_vma structure being freed and reused (for another anon_vma:
     487                 :            :          * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
     488                 :            :          * above cannot corrupt).
     489                 :            :          */
     490                 :          0 :         if (!page_mapped(page)) {
     491                 :            :                 rcu_read_unlock();
     492                 :          0 :                 put_anon_vma(anon_vma);
     493                 :          0 :                 return NULL;
     494                 :            :         }
     495                 :            : out:
     496                 :            :         rcu_read_unlock();
     497                 :            : 
     498                 :          0 :         return anon_vma;
     499                 :            : }
     500                 :            : 
     501                 :            : /*
     502                 :            :  * Similar to page_get_anon_vma() except it locks the anon_vma.
     503                 :            :  *
     504                 :            :  * Its a little more complex as it tries to keep the fast path to a single
     505                 :            :  * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
     506                 :            :  * reference like with page_get_anon_vma() and then block on the mutex.
     507                 :            :  */
     508                 :          0 : struct anon_vma *page_lock_anon_vma_read(struct page *page)
     509                 :            : {
     510                 :            :         struct anon_vma *anon_vma = NULL;
     511                 :            :         struct anon_vma *root_anon_vma;
     512                 :            :         unsigned long anon_mapping;
     513                 :            : 
     514                 :            :         rcu_read_lock();
     515                 :            :         anon_mapping = (unsigned long)READ_ONCE(page->mapping);
     516                 :          0 :         if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
     517                 :            :                 goto out;
     518                 :          0 :         if (!page_mapped(page))
     519                 :            :                 goto out;
     520                 :            : 
     521                 :          0 :         anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
     522                 :          0 :         root_anon_vma = READ_ONCE(anon_vma->root);
     523                 :          0 :         if (down_read_trylock(&root_anon_vma->rwsem)) {
     524                 :            :                 /*
     525                 :            :                  * If the page is still mapped, then this anon_vma is still
     526                 :            :                  * its anon_vma, and holding the mutex ensures that it will
     527                 :            :                  * not go away, see anon_vma_free().
     528                 :            :                  */
     529                 :          0 :                 if (!page_mapped(page)) {
     530                 :          0 :                         up_read(&root_anon_vma->rwsem);
     531                 :            :                         anon_vma = NULL;
     532                 :            :                 }
     533                 :            :                 goto out;
     534                 :            :         }
     535                 :            : 
     536                 :            :         /* trylock failed, we got to sleep */
     537                 :          0 :         if (!atomic_inc_not_zero(&anon_vma->refcount)) {
     538                 :            :                 anon_vma = NULL;
     539                 :            :                 goto out;
     540                 :            :         }
     541                 :            : 
     542                 :          0 :         if (!page_mapped(page)) {
     543                 :            :                 rcu_read_unlock();
     544                 :          0 :                 put_anon_vma(anon_vma);
     545                 :          0 :                 return NULL;
     546                 :            :         }
     547                 :            : 
     548                 :            :         /* we pinned the anon_vma, its safe to sleep */
     549                 :            :         rcu_read_unlock();
     550                 :            :         anon_vma_lock_read(anon_vma);
     551                 :            : 
     552                 :          0 :         if (atomic_dec_and_test(&anon_vma->refcount)) {
     553                 :            :                 /*
     554                 :            :                  * Oops, we held the last refcount, release the lock
     555                 :            :                  * and bail -- can't simply use put_anon_vma() because
     556                 :            :                  * we'll deadlock on the anon_vma_lock_write() recursion.
     557                 :            :                  */
     558                 :            :                 anon_vma_unlock_read(anon_vma);
     559                 :          0 :                 __put_anon_vma(anon_vma);
     560                 :            :                 anon_vma = NULL;
     561                 :            :         }
     562                 :            : 
     563                 :          0 :         return anon_vma;
     564                 :            : 
     565                 :            : out:
     566                 :            :         rcu_read_unlock();
     567                 :          0 :         return anon_vma;
     568                 :            : }
     569                 :            : 
     570                 :          0 : void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
     571                 :            : {
     572                 :            :         anon_vma_unlock_read(anon_vma);
     573                 :          0 : }
     574                 :            : 
     575                 :            : #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
     576                 :            : /*
     577                 :            :  * Flush TLB entries for recently unmapped pages from remote CPUs. It is
     578                 :            :  * important if a PTE was dirty when it was unmapped that it's flushed
     579                 :            :  * before any IO is initiated on the page to prevent lost writes. Similarly,
     580                 :            :  * it must be flushed before freeing to prevent data leakage.
     581                 :            :  */
     582                 :            : void try_to_unmap_flush(void)
     583                 :            : {
     584                 :            :         struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
     585                 :            : 
     586                 :            :         if (!tlb_ubc->flush_required)
     587                 :            :                 return;
     588                 :            : 
     589                 :            :         arch_tlbbatch_flush(&tlb_ubc->arch);
     590                 :            :         tlb_ubc->flush_required = false;
     591                 :            :         tlb_ubc->writable = false;
     592                 :            : }
     593                 :            : 
     594                 :            : /* Flush iff there are potentially writable TLB entries that can race with IO */
     595                 :            : void try_to_unmap_flush_dirty(void)
     596                 :            : {
     597                 :            :         struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
     598                 :            : 
     599                 :            :         if (tlb_ubc->writable)
     600                 :            :                 try_to_unmap_flush();
     601                 :            : }
     602                 :            : 
     603                 :            : static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
     604                 :            : {
     605                 :            :         struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
     606                 :            : 
     607                 :            :         arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
     608                 :            :         tlb_ubc->flush_required = true;
     609                 :            : 
     610                 :            :         /*
     611                 :            :          * Ensure compiler does not re-order the setting of tlb_flush_batched
     612                 :            :          * before the PTE is cleared.
     613                 :            :          */
     614                 :            :         barrier();
     615                 :            :         mm->tlb_flush_batched = true;
     616                 :            : 
     617                 :            :         /*
     618                 :            :          * If the PTE was dirty then it's best to assume it's writable. The
     619                 :            :          * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
     620                 :            :          * before the page is queued for IO.
     621                 :            :          */
     622                 :            :         if (writable)
     623                 :            :                 tlb_ubc->writable = true;
     624                 :            : }
     625                 :            : 
     626                 :            : /*
     627                 :            :  * Returns true if the TLB flush should be deferred to the end of a batch of
     628                 :            :  * unmap operations to reduce IPIs.
     629                 :            :  */
     630                 :            : static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
     631                 :            : {
     632                 :            :         bool should_defer = false;
     633                 :            : 
     634                 :            :         if (!(flags & TTU_BATCH_FLUSH))
     635                 :            :                 return false;
     636                 :            : 
     637                 :            :         /* If remote CPUs need to be flushed then defer batch the flush */
     638                 :            :         if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
     639                 :            :                 should_defer = true;
     640                 :            :         put_cpu();
     641                 :            : 
     642                 :            :         return should_defer;
     643                 :            : }
     644                 :            : 
     645                 :            : /*
     646                 :            :  * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
     647                 :            :  * releasing the PTL if TLB flushes are batched. It's possible for a parallel
     648                 :            :  * operation such as mprotect or munmap to race between reclaim unmapping
     649                 :            :  * the page and flushing the page. If this race occurs, it potentially allows
     650                 :            :  * access to data via a stale TLB entry. Tracking all mm's that have TLB
     651                 :            :  * batching in flight would be expensive during reclaim so instead track
     652                 :            :  * whether TLB batching occurred in the past and if so then do a flush here
     653                 :            :  * if required. This will cost one additional flush per reclaim cycle paid
     654                 :            :  * by the first operation at risk such as mprotect and mumap.
     655                 :            :  *
     656                 :            :  * This must be called under the PTL so that an access to tlb_flush_batched
     657                 :            :  * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
     658                 :            :  * via the PTL.
     659                 :            :  */
     660                 :            : void flush_tlb_batched_pending(struct mm_struct *mm)
     661                 :            : {
     662                 :            :         if (mm->tlb_flush_batched) {
     663                 :            :                 flush_tlb_mm(mm);
     664                 :            : 
     665                 :            :                 /*
     666                 :            :                  * Do not allow the compiler to re-order the clearing of
     667                 :            :                  * tlb_flush_batched before the tlb is flushed.
     668                 :            :                  */
     669                 :            :                 barrier();
     670                 :            :                 mm->tlb_flush_batched = false;
     671                 :            :         }
     672                 :            : }
     673                 :            : #else
     674                 :            : static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
     675                 :            : {
     676                 :            : }
     677                 :            : 
     678                 :            : static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
     679                 :            : {
     680                 :            :         return false;
     681                 :            : }
     682                 :            : #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
     683                 :            : 
     684                 :            : /*
     685                 :            :  * At what user virtual address is page expected in vma?
     686                 :            :  * Caller should check the page is actually part of the vma.
     687                 :            :  */
     688                 :          0 : unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
     689                 :            : {
     690                 :            :         unsigned long address;
     691                 :          0 :         if (PageAnon(page)) {
     692                 :          0 :                 struct anon_vma *page__anon_vma = page_anon_vma(page);
     693                 :            :                 /*
     694                 :            :                  * Note: swapoff's unuse_vma() is more efficient with this
     695                 :            :                  * check, and needs it to match anon_vma when KSM is active.
     696                 :            :                  */
     697                 :          0 :                 if (!vma->anon_vma || !page__anon_vma ||
     698                 :          0 :                     vma->anon_vma->root != page__anon_vma->root)
     699                 :            :                         return -EFAULT;
     700                 :          0 :         } else if (page->mapping) {
     701                 :          0 :                 if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
     702                 :            :                         return -EFAULT;
     703                 :            :         } else
     704                 :            :                 return -EFAULT;
     705                 :            :         address = __vma_address(page, vma);
     706                 :          0 :         if (unlikely(address < vma->vm_start || address >= vma->vm_end))
     707                 :            :                 return -EFAULT;
     708                 :          0 :         return address;
     709                 :            : }
     710                 :            : 
     711                 :          0 : pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
     712                 :            : {
     713                 :            :         pgd_t *pgd;
     714                 :            :         p4d_t *p4d;
     715                 :            :         pud_t *pud;
     716                 :            :         pmd_t *pmd = NULL;
     717                 :            :         pmd_t pmde;
     718                 :            : 
     719                 :          0 :         pgd = pgd_offset(mm, address);
     720                 :            :         if (!pgd_present(*pgd))
     721                 :            :                 goto out;
     722                 :            : 
     723                 :            :         p4d = p4d_offset(pgd, address);
     724                 :            :         if (!p4d_present(*p4d))
     725                 :            :                 goto out;
     726                 :            : 
     727                 :            :         pud = pud_offset(p4d, address);
     728                 :            :         if (!pud_present(*pud))
     729                 :            :                 goto out;
     730                 :            : 
     731                 :            :         pmd = pmd_offset(pud, address);
     732                 :            :         /*
     733                 :            :          * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
     734                 :            :          * without holding anon_vma lock for write.  So when looking for a
     735                 :            :          * genuine pmde (in which to find pte), test present and !THP together.
     736                 :            :          */
     737                 :          0 :         pmde = *pmd;
     738                 :          0 :         barrier();
     739                 :          0 :         if (!pmd_present(pmde) || pmd_trans_huge(pmde))
     740                 :            :                 pmd = NULL;
     741                 :            : out:
     742                 :          0 :         return pmd;
     743                 :            : }
     744                 :            : 
     745                 :            : struct page_referenced_arg {
     746                 :            :         int mapcount;
     747                 :            :         int referenced;
     748                 :            :         unsigned long vm_flags;
     749                 :            :         struct mem_cgroup *memcg;
     750                 :            : };
     751                 :            : /*
     752                 :            :  * arg: page_referenced_arg will be passed
     753                 :            :  */
     754                 :          0 : static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
     755                 :            :                         unsigned long address, void *arg)
     756                 :            : {
     757                 :            :         struct page_referenced_arg *pra = arg;
     758                 :          0 :         struct page_vma_mapped_walk pvmw = {
     759                 :            :                 .page = page,
     760                 :            :                 .vma = vma,
     761                 :            :                 .address = address,
     762                 :            :         };
     763                 :            :         int referenced = 0;
     764                 :            : 
     765                 :          0 :         while (page_vma_mapped_walk(&pvmw)) {
     766                 :          0 :                 address = pvmw.address;
     767                 :            : 
     768                 :          0 :                 if (vma->vm_flags & VM_LOCKED) {
     769                 :            :                         page_vma_mapped_walk_done(&pvmw);
     770                 :          0 :                         pra->vm_flags |= VM_LOCKED;
     771                 :          0 :                         return false; /* To break the loop */
     772                 :            :                 }
     773                 :            : 
     774                 :          0 :                 if (pvmw.pte) {
     775                 :          0 :                         if (ptep_clear_flush_young_notify(vma, address,
     776                 :            :                                                 pvmw.pte)) {
     777                 :            :                                 /*
     778                 :            :                                  * Don't treat a reference through
     779                 :            :                                  * a sequentially read mapping as such.
     780                 :            :                                  * If the page has been used in another mapping,
     781                 :            :                                  * we will catch it; if this other mapping is
     782                 :            :                                  * already gone, the unmap path will have set
     783                 :            :                                  * PG_referenced or activated the page.
     784                 :            :                                  */
     785                 :          0 :                                 if (likely(!(vma->vm_flags & VM_SEQ_READ)))
     786                 :          0 :                                         referenced++;
     787                 :            :                         }
     788                 :            :                 } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
     789                 :            :                         if (pmdp_clear_flush_young_notify(vma, address,
     790                 :            :                                                 pvmw.pmd))
     791                 :            :                                 referenced++;
     792                 :            :                 } else {
     793                 :            :                         /* unexpected pmd-mapped page? */
     794                 :          0 :                         WARN_ON_ONCE(1);
     795                 :            :                 }
     796                 :            : 
     797                 :          0 :                 pra->mapcount--;
     798                 :            :         }
     799                 :            : 
     800                 :            :         if (referenced)
     801                 :            :                 clear_page_idle(page);
     802                 :            :         if (test_and_clear_page_young(page))
     803                 :            :                 referenced++;
     804                 :            : 
     805                 :          0 :         if (referenced) {
     806                 :          0 :                 pra->referenced++;
     807                 :          0 :                 pra->vm_flags |= vma->vm_flags;
     808                 :            :         }
     809                 :            : 
     810                 :          0 :         if (!pra->mapcount)
     811                 :            :                 return false; /* To break the loop */
     812                 :            : 
     813                 :          0 :         return true;
     814                 :            : }
     815                 :            : 
     816                 :          0 : static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
     817                 :            : {
     818                 :            :         struct page_referenced_arg *pra = arg;
     819                 :          0 :         struct mem_cgroup *memcg = pra->memcg;
     820                 :            : 
     821                 :          0 :         if (!mm_match_cgroup(vma->vm_mm, memcg))
     822                 :            :                 return true;
     823                 :            : 
     824                 :          0 :         return false;
     825                 :            : }
     826                 :            : 
     827                 :            : /**
     828                 :            :  * page_referenced - test if the page was referenced
     829                 :            :  * @page: the page to test
     830                 :            :  * @is_locked: caller holds lock on the page
     831                 :            :  * @memcg: target memory cgroup
     832                 :            :  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
     833                 :            :  *
     834                 :            :  * Quick test_and_clear_referenced for all mappings to a page,
     835                 :            :  * returns the number of ptes which referenced the page.
     836                 :            :  */
     837                 :          0 : int page_referenced(struct page *page,
     838                 :            :                     int is_locked,
     839                 :            :                     struct mem_cgroup *memcg,
     840                 :            :                     unsigned long *vm_flags)
     841                 :            : {
     842                 :            :         int we_locked = 0;
     843                 :          0 :         struct page_referenced_arg pra = {
     844                 :            :                 .mapcount = total_mapcount(page),
     845                 :            :                 .memcg = memcg,
     846                 :            :         };
     847                 :          0 :         struct rmap_walk_control rwc = {
     848                 :            :                 .rmap_one = page_referenced_one,
     849                 :            :                 .arg = (void *)&pra,
     850                 :            :                 .anon_lock = page_lock_anon_vma_read,
     851                 :            :         };
     852                 :            : 
     853                 :          0 :         *vm_flags = 0;
     854                 :          0 :         if (!pra.mapcount)
     855                 :            :                 return 0;
     856                 :            : 
     857                 :          0 :         if (!page_rmapping(page))
     858                 :            :                 return 0;
     859                 :            : 
     860                 :          0 :         if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
     861                 :          0 :                 we_locked = trylock_page(page);
     862                 :          0 :                 if (!we_locked)
     863                 :            :                         return 1;
     864                 :            :         }
     865                 :            : 
     866                 :            :         /*
     867                 :            :          * If we are reclaiming on behalf of a cgroup, skip
     868                 :            :          * counting on behalf of references from different
     869                 :            :          * cgroups
     870                 :            :          */
     871                 :          0 :         if (memcg) {
     872                 :          0 :                 rwc.invalid_vma = invalid_page_referenced_vma;
     873                 :            :         }
     874                 :            : 
     875                 :          0 :         rmap_walk(page, &rwc);
     876                 :          0 :         *vm_flags = pra.vm_flags;
     877                 :            : 
     878                 :          0 :         if (we_locked)
     879                 :          0 :                 unlock_page(page);
     880                 :            : 
     881                 :          0 :         return pra.referenced;
     882                 :            : }
     883                 :            : 
     884                 :          2 : static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
     885                 :            :                             unsigned long address, void *arg)
     886                 :            : {
     887                 :          2 :         struct page_vma_mapped_walk pvmw = {
     888                 :            :                 .page = page,
     889                 :            :                 .vma = vma,
     890                 :            :                 .address = address,
     891                 :            :                 .flags = PVMW_SYNC,
     892                 :            :         };
     893                 :            :         struct mmu_notifier_range range;
     894                 :            :         int *cleaned = arg;
     895                 :            : 
     896                 :            :         /*
     897                 :            :          * We have to assume the worse case ie pmd for invalidation. Note that
     898                 :            :          * the page can not be free from this function.
     899                 :            :          */
     900                 :            :         mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
     901                 :            :                                 0, vma, vma->vm_mm, address,
     902                 :            :                                 min(vma->vm_end, address + page_size(page)));
     903                 :            :         mmu_notifier_invalidate_range_start(&range);
     904                 :            : 
     905                 :          2 :         while (page_vma_mapped_walk(&pvmw)) {
     906                 :            :                 int ret = 0;
     907                 :            : 
     908                 :          2 :                 address = pvmw.address;
     909                 :          2 :                 if (pvmw.pte) {
     910                 :            :                         pte_t entry;
     911                 :            :                         pte_t *pte = pvmw.pte;
     912                 :            : 
     913                 :          2 :                         if (!pte_dirty(*pte) && !pte_write(*pte))
     914                 :          0 :                                 continue;
     915                 :            : 
     916                 :          2 :                         flush_cache_page(vma, address, pte_pfn(*pte));
     917                 :          2 :                         entry = ptep_clear_flush(vma, address, pte);
     918                 :            :                         entry = pte_wrprotect(entry);
     919                 :            :                         entry = pte_mkclean(entry);
     920                 :          2 :                         set_pte_at(vma->vm_mm, address, pte, entry);
     921                 :            :                         ret = 1;
     922                 :            :                 } else {
     923                 :            : #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
     924                 :            :                         pmd_t *pmd = pvmw.pmd;
     925                 :            :                         pmd_t entry;
     926                 :            : 
     927                 :            :                         if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
     928                 :            :                                 continue;
     929                 :            : 
     930                 :            :                         flush_cache_page(vma, address, page_to_pfn(page));
     931                 :            :                         entry = pmdp_invalidate(vma, address, pmd);
     932                 :            :                         entry = pmd_wrprotect(entry);
     933                 :            :                         entry = pmd_mkclean(entry);
     934                 :            :                         set_pmd_at(vma->vm_mm, address, pmd, entry);
     935                 :            :                         ret = 1;
     936                 :            : #else
     937                 :            :                         /* unexpected pmd-mapped page? */
     938                 :          0 :                         WARN_ON_ONCE(1);
     939                 :            : #endif
     940                 :            :                 }
     941                 :            : 
     942                 :            :                 /*
     943                 :            :                  * No need to call mmu_notifier_invalidate_range() as we are
     944                 :            :                  * downgrading page table protection not changing it to point
     945                 :            :                  * to a new page.
     946                 :            :                  *
     947                 :            :                  * See Documentation/vm/mmu_notifier.rst
     948                 :            :                  */
     949                 :          2 :                 if (ret)
     950                 :          2 :                         (*cleaned)++;
     951                 :            :         }
     952                 :            : 
     953                 :            :         mmu_notifier_invalidate_range_end(&range);
     954                 :            : 
     955                 :          2 :         return true;
     956                 :            : }
     957                 :            : 
     958                 :          2 : static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
     959                 :            : {
     960                 :          2 :         if (vma->vm_flags & VM_SHARED)
     961                 :            :                 return false;
     962                 :            : 
     963                 :          0 :         return true;
     964                 :            : }
     965                 :            : 
     966                 :          3 : int page_mkclean(struct page *page)
     967                 :            : {
     968                 :          3 :         int cleaned = 0;
     969                 :            :         struct address_space *mapping;
     970                 :          3 :         struct rmap_walk_control rwc = {
     971                 :            :                 .arg = (void *)&cleaned,
     972                 :            :                 .rmap_one = page_mkclean_one,
     973                 :            :                 .invalid_vma = invalid_mkclean_vma,
     974                 :            :         };
     975                 :            : 
     976                 :          3 :         BUG_ON(!PageLocked(page));
     977                 :            : 
     978                 :          3 :         if (!page_mapped(page))
     979                 :            :                 return 0;
     980                 :            : 
     981                 :          2 :         mapping = page_mapping(page);
     982                 :          2 :         if (!mapping)
     983                 :            :                 return 0;
     984                 :            : 
     985                 :          2 :         rmap_walk(page, &rwc);
     986                 :            : 
     987                 :          2 :         return cleaned;
     988                 :            : }
     989                 :            : EXPORT_SYMBOL_GPL(page_mkclean);
     990                 :            : 
     991                 :            : /**
     992                 :            :  * page_move_anon_rmap - move a page to our anon_vma
     993                 :            :  * @page:       the page to move to our anon_vma
     994                 :            :  * @vma:        the vma the page belongs to
     995                 :            :  *
     996                 :            :  * When a page belongs exclusively to one process after a COW event,
     997                 :            :  * that page can be moved into the anon_vma that belongs to just that
     998                 :            :  * process, so the rmap code will not search the parent or sibling
     999                 :            :  * processes.
    1000                 :            :  */
    1001                 :          3 : void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
    1002                 :            : {
    1003                 :          3 :         struct anon_vma *anon_vma = vma->anon_vma;
    1004                 :            : 
    1005                 :            :         page = compound_head(page);
    1006                 :            : 
    1007                 :            :         VM_BUG_ON_PAGE(!PageLocked(page), page);
    1008                 :            :         VM_BUG_ON_VMA(!anon_vma, vma);
    1009                 :            : 
    1010                 :          3 :         anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
    1011                 :            :         /*
    1012                 :            :          * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
    1013                 :            :          * simultaneously, so a concurrent reader (eg page_referenced()'s
    1014                 :            :          * PageAnon()) will not see one without the other.
    1015                 :            :          */
    1016                 :          3 :         WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
    1017                 :          3 : }
    1018                 :            : 
    1019                 :            : /**
    1020                 :            :  * __page_set_anon_rmap - set up new anonymous rmap
    1021                 :            :  * @page:       Page or Hugepage to add to rmap
    1022                 :            :  * @vma:        VM area to add page to.
    1023                 :            :  * @address:    User virtual address of the mapping     
    1024                 :            :  * @exclusive:  the page is exclusively owned by the current process
    1025                 :            :  */
    1026                 :          3 : static void __page_set_anon_rmap(struct page *page,
    1027                 :            :         struct vm_area_struct *vma, unsigned long address, int exclusive)
    1028                 :            : {
    1029                 :          3 :         struct anon_vma *anon_vma = vma->anon_vma;
    1030                 :            : 
    1031                 :          3 :         BUG_ON(!anon_vma);
    1032                 :            : 
    1033                 :          3 :         if (PageAnon(page))
    1034                 :          3 :                 return;
    1035                 :            : 
    1036                 :            :         /*
    1037                 :            :          * If the page isn't exclusively mapped into this vma,
    1038                 :            :          * we must use the _oldest_ possible anon_vma for the
    1039                 :            :          * page mapping!
    1040                 :            :          */
    1041                 :          3 :         if (!exclusive)
    1042                 :          0 :                 anon_vma = anon_vma->root;
    1043                 :            : 
    1044                 :          3 :         anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
    1045                 :          3 :         page->mapping = (struct address_space *) anon_vma;
    1046                 :          3 :         page->index = linear_page_index(vma, address);
    1047                 :            : }
    1048                 :            : 
    1049                 :            : /**
    1050                 :            :  * __page_check_anon_rmap - sanity check anonymous rmap addition
    1051                 :            :  * @page:       the page to add the mapping to
    1052                 :            :  * @vma:        the vm area in which the mapping is added
    1053                 :            :  * @address:    the user virtual address mapped
    1054                 :            :  */
    1055                 :            : static void __page_check_anon_rmap(struct page *page,
    1056                 :            :         struct vm_area_struct *vma, unsigned long address)
    1057                 :            : {
    1058                 :            : #ifdef CONFIG_DEBUG_VM
    1059                 :            :         /*
    1060                 :            :          * The page's anon-rmap details (mapping and index) are guaranteed to
    1061                 :            :          * be set up correctly at this point.
    1062                 :            :          *
    1063                 :            :          * We have exclusion against page_add_anon_rmap because the caller
    1064                 :            :          * always holds the page locked, except if called from page_dup_rmap,
    1065                 :            :          * in which case the page is already known to be setup.
    1066                 :            :          *
    1067                 :            :          * We have exclusion against page_add_new_anon_rmap because those pages
    1068                 :            :          * are initially only visible via the pagetables, and the pte is locked
    1069                 :            :          * over the call to page_add_new_anon_rmap.
    1070                 :            :          */
    1071                 :            :         BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
    1072                 :            :         BUG_ON(page_to_pgoff(page) != linear_page_index(vma, address));
    1073                 :            : #endif
    1074                 :            : }
    1075                 :            : 
    1076                 :            : /**
    1077                 :            :  * page_add_anon_rmap - add pte mapping to an anonymous page
    1078                 :            :  * @page:       the page to add the mapping to
    1079                 :            :  * @vma:        the vm area in which the mapping is added
    1080                 :            :  * @address:    the user virtual address mapped
    1081                 :            :  * @compound:   charge the page as compound or small page
    1082                 :            :  *
    1083                 :            :  * The caller needs to hold the pte lock, and the page must be locked in
    1084                 :            :  * the anon_vma case: to serialize mapping,index checking after setting,
    1085                 :            :  * and to ensure that PageAnon is not being upgraded racily to PageKsm
    1086                 :            :  * (but PageKsm is never downgraded to PageAnon).
    1087                 :            :  */
    1088                 :          0 : void page_add_anon_rmap(struct page *page,
    1089                 :            :         struct vm_area_struct *vma, unsigned long address, bool compound)
    1090                 :            : {
    1091                 :          0 :         do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
    1092                 :          0 : }
    1093                 :            : 
    1094                 :            : /*
    1095                 :            :  * Special version of the above for do_swap_page, which often runs
    1096                 :            :  * into pages that are exclusively owned by the current process.
    1097                 :            :  * Everybody else should continue to use page_add_anon_rmap above.
    1098                 :            :  */
    1099                 :          0 : void do_page_add_anon_rmap(struct page *page,
    1100                 :            :         struct vm_area_struct *vma, unsigned long address, int flags)
    1101                 :            : {
    1102                 :          0 :         bool compound = flags & RMAP_COMPOUND;
    1103                 :            :         bool first;
    1104                 :            : 
    1105                 :          0 :         if (compound) {
    1106                 :            :                 atomic_t *mapcount;
    1107                 :            :                 VM_BUG_ON_PAGE(!PageLocked(page), page);
    1108                 :            :                 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
    1109                 :            :                 mapcount = compound_mapcount_ptr(page);
    1110                 :            :                 first = atomic_inc_and_test(mapcount);
    1111                 :            :         } else {
    1112                 :          0 :                 first = atomic_inc_and_test(&page->_mapcount);
    1113                 :            :         }
    1114                 :            : 
    1115                 :          0 :         if (first) {
    1116                 :            :                 int nr = compound ? hpage_nr_pages(page) : 1;
    1117                 :            :                 /*
    1118                 :            :                  * We use the irq-unsafe __{inc|mod}_zone_page_stat because
    1119                 :            :                  * these counters are not modified in interrupt context, and
    1120                 :            :                  * pte lock(a spinlock) is held, which implies preemption
    1121                 :            :                  * disabled.
    1122                 :            :                  */
    1123                 :          0 :                 if (compound)
    1124                 :          0 :                         __inc_node_page_state(page, NR_ANON_THPS);
    1125                 :          0 :                 __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
    1126                 :            :         }
    1127                 :            :         if (unlikely(PageKsm(page)))
    1128                 :          0 :                 return;
    1129                 :            : 
    1130                 :            :         VM_BUG_ON_PAGE(!PageLocked(page), page);
    1131                 :            : 
    1132                 :            :         /* address might be in next vma when migration races vma_adjust */
    1133                 :          0 :         if (first)
    1134                 :          0 :                 __page_set_anon_rmap(page, vma, address,
    1135                 :            :                                 flags & RMAP_EXCLUSIVE);
    1136                 :            :         else
    1137                 :            :                 __page_check_anon_rmap(page, vma, address);
    1138                 :            : }
    1139                 :            : 
    1140                 :            : /**
    1141                 :            :  * page_add_new_anon_rmap - add pte mapping to a new anonymous page
    1142                 :            :  * @page:       the page to add the mapping to
    1143                 :            :  * @vma:        the vm area in which the mapping is added
    1144                 :            :  * @address:    the user virtual address mapped
    1145                 :            :  * @compound:   charge the page as compound or small page
    1146                 :            :  *
    1147                 :            :  * Same as page_add_anon_rmap but must only be called on *new* pages.
    1148                 :            :  * This means the inc-and-test can be bypassed.
    1149                 :            :  * Page does not have to be locked.
    1150                 :            :  */
    1151                 :          3 : void page_add_new_anon_rmap(struct page *page,
    1152                 :            :         struct vm_area_struct *vma, unsigned long address, bool compound)
    1153                 :            : {
    1154                 :            :         int nr = compound ? hpage_nr_pages(page) : 1;
    1155                 :            : 
    1156                 :            :         VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
    1157                 :            :         __SetPageSwapBacked(page);
    1158                 :          3 :         if (compound) {
    1159                 :            :                 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
    1160                 :            :                 /* increment count (starts at -1) */
    1161                 :            :                 atomic_set(compound_mapcount_ptr(page), 0);
    1162                 :          0 :                 __inc_node_page_state(page, NR_ANON_THPS);
    1163                 :            :         } else {
    1164                 :            :                 /* Anon THP always mapped first with PMD */
    1165                 :            :                 VM_BUG_ON_PAGE(PageTransCompound(page), page);
    1166                 :            :                 /* increment count (starts at -1) */
    1167                 :            :                 atomic_set(&page->_mapcount, 0);
    1168                 :            :         }
    1169                 :          3 :         __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
    1170                 :          3 :         __page_set_anon_rmap(page, vma, address, 1);
    1171                 :          3 : }
    1172                 :            : 
    1173                 :            : /**
    1174                 :            :  * page_add_file_rmap - add pte mapping to a file page
    1175                 :            :  * @page: the page to add the mapping to
    1176                 :            :  * @compound: charge the page as compound or small page
    1177                 :            :  *
    1178                 :            :  * The caller needs to hold the pte lock.
    1179                 :            :  */
    1180                 :          3 : void page_add_file_rmap(struct page *page, bool compound)
    1181                 :            : {
    1182                 :            :         int i, nr = 1;
    1183                 :            : 
    1184                 :            :         VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
    1185                 :          3 :         lock_page_memcg(page);
    1186                 :            :         if (compound && PageTransHuge(page)) {
    1187                 :            :                 for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
    1188                 :            :                         if (atomic_inc_and_test(&page[i]._mapcount))
    1189                 :            :                                 nr++;
    1190                 :            :                 }
    1191                 :            :                 if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
    1192                 :            :                         goto out;
    1193                 :            :                 if (PageSwapBacked(page))
    1194                 :            :                         __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
    1195                 :            :                 else
    1196                 :            :                         __inc_node_page_state(page, NR_FILE_PMDMAPPED);
    1197                 :            :         } else {
    1198                 :            :                 if (PageTransCompound(page) && page_mapping(page)) {
    1199                 :            :                         VM_WARN_ON_ONCE(!PageLocked(page));
    1200                 :            : 
    1201                 :            :                         SetPageDoubleMap(compound_head(page));
    1202                 :            :                         if (PageMlocked(page))
    1203                 :            :                                 clear_page_mlock(compound_head(page));
    1204                 :            :                 }
    1205                 :          3 :                 if (!atomic_inc_and_test(&page->_mapcount))
    1206                 :            :                         goto out;
    1207                 :            :         }
    1208                 :          3 :         __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
    1209                 :            : out:
    1210                 :          3 :         unlock_page_memcg(page);
    1211                 :          3 : }
    1212                 :            : 
    1213                 :          3 : static void page_remove_file_rmap(struct page *page, bool compound)
    1214                 :            : {
    1215                 :            :         int i, nr = 1;
    1216                 :            : 
    1217                 :            :         VM_BUG_ON_PAGE(compound && !PageHead(page), page);
    1218                 :          3 :         lock_page_memcg(page);
    1219                 :            : 
    1220                 :            :         /* Hugepages are not counted in NR_FILE_MAPPED for now. */
    1221                 :            :         if (unlikely(PageHuge(page))) {
    1222                 :            :                 /* hugetlb pages are always mapped with pmds */
    1223                 :            :                 atomic_dec(compound_mapcount_ptr(page));
    1224                 :            :                 goto out;
    1225                 :            :         }
    1226                 :            : 
    1227                 :            :         /* page still mapped by someone else? */
    1228                 :            :         if (compound && PageTransHuge(page)) {
    1229                 :            :                 for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
    1230                 :            :                         if (atomic_add_negative(-1, &page[i]._mapcount))
    1231                 :            :                                 nr++;
    1232                 :            :                 }
    1233                 :            :                 if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
    1234                 :            :                         goto out;
    1235                 :            :                 if (PageSwapBacked(page))
    1236                 :            :                         __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
    1237                 :            :                 else
    1238                 :            :                         __dec_node_page_state(page, NR_FILE_PMDMAPPED);
    1239                 :            :         } else {
    1240                 :          3 :                 if (!atomic_add_negative(-1, &page->_mapcount))
    1241                 :            :                         goto out;
    1242                 :            :         }
    1243                 :            : 
    1244                 :            :         /*
    1245                 :            :          * We use the irq-unsafe __{inc|mod}_lruvec_page_state because
    1246                 :            :          * these counters are not modified in interrupt context, and
    1247                 :            :          * pte lock(a spinlock) is held, which implies preemption disabled.
    1248                 :            :          */
    1249                 :          3 :         __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
    1250                 :            : 
    1251                 :          3 :         if (unlikely(PageMlocked(page)))
    1252                 :          0 :                 clear_page_mlock(page);
    1253                 :            : out:
    1254                 :          3 :         unlock_page_memcg(page);
    1255                 :          3 : }
    1256                 :            : 
    1257                 :            : static void page_remove_anon_compound_rmap(struct page *page)
    1258                 :            : {
    1259                 :            :         int i, nr;
    1260                 :            : 
    1261                 :          0 :         if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
    1262                 :            :                 return;
    1263                 :            : 
    1264                 :            :         /* Hugepages are not counted in NR_ANON_PAGES for now. */
    1265                 :            :         if (unlikely(PageHuge(page)))
    1266                 :            :                 return;
    1267                 :            : 
    1268                 :            :         if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
    1269                 :            :                 return;
    1270                 :            : 
    1271                 :            :         __dec_node_page_state(page, NR_ANON_THPS);
    1272                 :            : 
    1273                 :            :         if (TestClearPageDoubleMap(page)) {
    1274                 :            :                 /*
    1275                 :            :                  * Subpages can be mapped with PTEs too. Check how many of
    1276                 :            :                  * themi are still mapped.
    1277                 :            :                  */
    1278                 :            :                 for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
    1279                 :            :                         if (atomic_add_negative(-1, &page[i]._mapcount))
    1280                 :            :                                 nr++;
    1281                 :            :                 }
    1282                 :            :         } else {
    1283                 :            :                 nr = HPAGE_PMD_NR;
    1284                 :            :         }
    1285                 :            : 
    1286                 :            :         if (unlikely(PageMlocked(page)))
    1287                 :            :                 clear_page_mlock(page);
    1288                 :            : 
    1289                 :            :         if (nr) {
    1290                 :            :                 __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, -nr);
    1291                 :            :                 deferred_split_huge_page(page);
    1292                 :            :         }
    1293                 :            : }
    1294                 :            : 
    1295                 :            : /**
    1296                 :            :  * page_remove_rmap - take down pte mapping from a page
    1297                 :            :  * @page:       page to remove mapping from
    1298                 :            :  * @compound:   uncharge the page as compound or small page
    1299                 :            :  *
    1300                 :            :  * The caller needs to hold the pte lock.
    1301                 :            :  */
    1302                 :          3 : void page_remove_rmap(struct page *page, bool compound)
    1303                 :            : {
    1304                 :          3 :         if (!PageAnon(page))
    1305                 :          3 :                 return page_remove_file_rmap(page, compound);
    1306                 :            : 
    1307                 :          3 :         if (compound)
    1308                 :            :                 return page_remove_anon_compound_rmap(page);
    1309                 :            : 
    1310                 :            :         /* page still mapped by someone else? */
    1311                 :          3 :         if (!atomic_add_negative(-1, &page->_mapcount))
    1312                 :            :                 return;
    1313                 :            : 
    1314                 :            :         /*
    1315                 :            :          * We use the irq-unsafe __{inc|mod}_zone_page_stat because
    1316                 :            :          * these counters are not modified in interrupt context, and
    1317                 :            :          * pte lock(a spinlock) is held, which implies preemption disabled.
    1318                 :            :          */
    1319                 :          3 :         __dec_node_page_state(page, NR_ANON_MAPPED);
    1320                 :            : 
    1321                 :          3 :         if (unlikely(PageMlocked(page)))
    1322                 :          0 :                 clear_page_mlock(page);
    1323                 :            : 
    1324                 :            :         if (PageTransCompound(page))
    1325                 :            :                 deferred_split_huge_page(compound_head(page));
    1326                 :            : 
    1327                 :            :         /*
    1328                 :            :          * It would be tidy to reset the PageAnon mapping here,
    1329                 :            :          * but that might overwrite a racing page_add_anon_rmap
    1330                 :            :          * which increments mapcount after us but sets mapping
    1331                 :            :          * before us: so leave the reset to free_unref_page,
    1332                 :            :          * and remember that it's only reliable while mapped.
    1333                 :            :          * Leaving it set also helps swapoff to reinstate ptes
    1334                 :            :          * faster for those pages still in swapcache.
    1335                 :            :          */
    1336                 :            : }
    1337                 :            : 
    1338                 :            : /*
    1339                 :            :  * @arg: enum ttu_flags will be passed to this argument
    1340                 :            :  */
    1341                 :          0 : static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
    1342                 :            :                      unsigned long address, void *arg)
    1343                 :            : {
    1344                 :          0 :         struct mm_struct *mm = vma->vm_mm;
    1345                 :          0 :         struct page_vma_mapped_walk pvmw = {
    1346                 :            :                 .page = page,
    1347                 :            :                 .vma = vma,
    1348                 :            :                 .address = address,
    1349                 :            :         };
    1350                 :            :         pte_t pteval;
    1351                 :            :         struct page *subpage;
    1352                 :            :         bool ret = true;
    1353                 :            :         struct mmu_notifier_range range;
    1354                 :          0 :         enum ttu_flags flags = (enum ttu_flags)arg;
    1355                 :            : 
    1356                 :            :         /* munlock has nothing to gain from examining un-locked vmas */
    1357                 :          0 :         if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
    1358                 :            :                 return true;
    1359                 :            : 
    1360                 :            :         if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
    1361                 :            :             is_zone_device_page(page) && !is_device_private_page(page))
    1362                 :            :                 return true;
    1363                 :            : 
    1364                 :            :         if (flags & TTU_SPLIT_HUGE_PMD) {
    1365                 :            :                 split_huge_pmd_address(vma, address,
    1366                 :            :                                 flags & TTU_SPLIT_FREEZE, page);
    1367                 :            :         }
    1368                 :            : 
    1369                 :            :         /*
    1370                 :            :          * For THP, we have to assume the worse case ie pmd for invalidation.
    1371                 :            :          * For hugetlb, it could be much worse if we need to do pud
    1372                 :            :          * invalidation in the case of pmd sharing.
    1373                 :            :          *
    1374                 :            :          * Note that the page can not be free in this function as call of
    1375                 :            :          * try_to_unmap() must hold a reference on the page.
    1376                 :            :          */
    1377                 :            :         mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
    1378                 :            :                                 address,
    1379                 :            :                                 min(vma->vm_end, address + page_size(page)));
    1380                 :            :         if (PageHuge(page)) {
    1381                 :            :                 /*
    1382                 :            :                  * If sharing is possible, start and end will be adjusted
    1383                 :            :                  * accordingly.
    1384                 :            :                  */
    1385                 :            :                 adjust_range_if_pmd_sharing_possible(vma, &range.start,
    1386                 :            :                                                      &range.end);
    1387                 :            :         }
    1388                 :            :         mmu_notifier_invalidate_range_start(&range);
    1389                 :            : 
    1390                 :          0 :         while (page_vma_mapped_walk(&pvmw)) {
    1391                 :            : #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
    1392                 :            :                 /* PMD-mapped THP migration entry */
    1393                 :            :                 if (!pvmw.pte && (flags & TTU_MIGRATION)) {
    1394                 :            :                         VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
    1395                 :            : 
    1396                 :            :                         set_pmd_migration_entry(&pvmw, page);
    1397                 :            :                         continue;
    1398                 :            :                 }
    1399                 :            : #endif
    1400                 :            : 
    1401                 :            :                 /*
    1402                 :            :                  * If the page is mlock()d, we cannot swap it out.
    1403                 :            :                  * If it's recently referenced (perhaps page_referenced
    1404                 :            :                  * skipped over this mm) then we should reactivate it.
    1405                 :            :                  */
    1406                 :          0 :                 if (!(flags & TTU_IGNORE_MLOCK)) {
    1407                 :          0 :                         if (vma->vm_flags & VM_LOCKED) {
    1408                 :            :                                 /* PTE-mapped THP are never mlocked */
    1409                 :            :                                 if (!PageTransCompound(page)) {
    1410                 :            :                                         /*
    1411                 :            :                                          * Holding pte lock, we do *not* need
    1412                 :            :                                          * mmap_sem here
    1413                 :            :                                          */
    1414                 :          0 :                                         mlock_vma_page(page);
    1415                 :            :                                 }
    1416                 :            :                                 ret = false;
    1417                 :            :                                 page_vma_mapped_walk_done(&pvmw);
    1418                 :            :                                 break;
    1419                 :            :                         }
    1420                 :          0 :                         if (flags & TTU_MUNLOCK)
    1421                 :          0 :                                 continue;
    1422                 :            :                 }
    1423                 :            : 
    1424                 :            :                 /* Unexpected PMD-mapped THP? */
    1425                 :            :                 VM_BUG_ON_PAGE(!pvmw.pte, page);
    1426                 :            : 
    1427                 :          0 :                 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
    1428                 :          0 :                 address = pvmw.address;
    1429                 :            : 
    1430                 :            :                 if (PageHuge(page)) {
    1431                 :            :                         if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
    1432                 :            :                                 /*
    1433                 :            :                                  * huge_pmd_unshare unmapped an entire PMD
    1434                 :            :                                  * page.  There is no way of knowing exactly
    1435                 :            :                                  * which PMDs may be cached for this mm, so
    1436                 :            :                                  * we must flush them all.  start/end were
    1437                 :            :                                  * already adjusted above to cover this range.
    1438                 :            :                                  */
    1439                 :            :                                 flush_cache_range(vma, range.start, range.end);
    1440                 :            :                                 flush_tlb_range(vma, range.start, range.end);
    1441                 :            :                                 mmu_notifier_invalidate_range(mm, range.start,
    1442                 :            :                                                               range.end);
    1443                 :            : 
    1444                 :            :                                 /*
    1445                 :            :                                  * The ref count of the PMD page was dropped
    1446                 :            :                                  * which is part of the way map counting
    1447                 :            :                                  * is done for shared PMDs.  Return 'true'
    1448                 :            :                                  * here.  When there is no other sharing,
    1449                 :            :                                  * huge_pmd_unshare returns false and we will
    1450                 :            :                                  * unmap the actual page and drop map count
    1451                 :            :                                  * to zero.
    1452                 :            :                                  */
    1453                 :            :                                 page_vma_mapped_walk_done(&pvmw);
    1454                 :            :                                 break;
    1455                 :            :                         }
    1456                 :            :                 }
    1457                 :            : 
    1458                 :            :                 if (IS_ENABLED(CONFIG_MIGRATION) &&
    1459                 :            :                     (flags & TTU_MIGRATION) &&
    1460                 :            :                     is_zone_device_page(page)) {
    1461                 :            :                         swp_entry_t entry;
    1462                 :            :                         pte_t swp_pte;
    1463                 :            : 
    1464                 :            :                         pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
    1465                 :            : 
    1466                 :            :                         /*
    1467                 :            :                          * Store the pfn of the page in a special migration
    1468                 :            :                          * pte. do_swap_page() will wait until the migration
    1469                 :            :                          * pte is removed and then restart fault handling.
    1470                 :            :                          */
    1471                 :            :                         entry = make_migration_entry(page, 0);
    1472                 :            :                         swp_pte = swp_entry_to_pte(entry);
    1473                 :            :                         if (pte_soft_dirty(pteval))
    1474                 :            :                                 swp_pte = pte_swp_mksoft_dirty(swp_pte);
    1475                 :            :                         set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
    1476                 :            :                         /*
    1477                 :            :                          * No need to invalidate here it will synchronize on
    1478                 :            :                          * against the special swap migration pte.
    1479                 :            :                          *
    1480                 :            :                          * The assignment to subpage above was computed from a
    1481                 :            :                          * swap PTE which results in an invalid pointer.
    1482                 :            :                          * Since only PAGE_SIZE pages can currently be
    1483                 :            :                          * migrated, just set it to page. This will need to be
    1484                 :            :                          * changed when hugepage migrations to device private
    1485                 :            :                          * memory are supported.
    1486                 :            :                          */
    1487                 :            :                         subpage = page;
    1488                 :            :                         goto discard;
    1489                 :            :                 }
    1490                 :            : 
    1491                 :          0 :                 if (!(flags & TTU_IGNORE_ACCESS)) {
    1492                 :          0 :                         if (ptep_clear_flush_young_notify(vma, address,
    1493                 :            :                                                 pvmw.pte)) {
    1494                 :            :                                 ret = false;
    1495                 :            :                                 page_vma_mapped_walk_done(&pvmw);
    1496                 :            :                                 break;
    1497                 :            :                         }
    1498                 :            :                 }
    1499                 :            : 
    1500                 :            :                 /* Nuke the page table entry. */
    1501                 :          0 :                 flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
    1502                 :            :                 if (should_defer_flush(mm, flags)) {
    1503                 :            :                         /*
    1504                 :            :                          * We clear the PTE but do not flush so potentially
    1505                 :            :                          * a remote CPU could still be writing to the page.
    1506                 :            :                          * If the entry was previously clean then the
    1507                 :            :                          * architecture must guarantee that a clear->dirty
    1508                 :            :                          * transition on a cached TLB entry is written through
    1509                 :            :                          * and traps if the PTE is unmapped.
    1510                 :            :                          */
    1511                 :            :                         pteval = ptep_get_and_clear(mm, address, pvmw.pte);
    1512                 :            : 
    1513                 :            :                         set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
    1514                 :            :                 } else {
    1515                 :          0 :                         pteval = ptep_clear_flush(vma, address, pvmw.pte);
    1516                 :            :                 }
    1517                 :            : 
    1518                 :            :                 /* Move the dirty bit to the page. Now the pte is gone. */
    1519                 :          0 :                 if (pte_dirty(pteval))
    1520                 :          0 :                         set_page_dirty(page);
    1521                 :            : 
    1522                 :            :                 /* Update high watermark before we lower rss */
    1523                 :            :                 update_hiwater_rss(mm);
    1524                 :            : 
    1525                 :            :                 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
    1526                 :            :                         pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
    1527                 :            :                         if (PageHuge(page)) {
    1528                 :            :                                 hugetlb_count_sub(compound_nr(page), mm);
    1529                 :            :                                 set_huge_swap_pte_at(mm, address,
    1530                 :            :                                                      pvmw.pte, pteval,
    1531                 :            :                                                      vma_mmu_pagesize(vma));
    1532                 :            :                         } else {
    1533                 :            :                                 dec_mm_counter(mm, mm_counter(page));
    1534                 :            :                                 set_pte_at(mm, address, pvmw.pte, pteval);
    1535                 :            :                         }
    1536                 :            : 
    1537                 :            :                 } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
    1538                 :            :                         /*
    1539                 :            :                          * The guest indicated that the page content is of no
    1540                 :            :                          * interest anymore. Simply discard the pte, vmscan
    1541                 :            :                          * will take care of the rest.
    1542                 :            :                          * A future reference will then fault in a new zero
    1543                 :            :                          * page. When userfaultfd is active, we must not drop
    1544                 :            :                          * this page though, as its main user (postcopy
    1545                 :            :                          * migration) will not expect userfaults on already
    1546                 :            :                          * copied pages.
    1547                 :            :                          */
    1548                 :            :                         dec_mm_counter(mm, mm_counter(page));
    1549                 :            :                         /* We have to invalidate as we cleared the pte */
    1550                 :            :                         mmu_notifier_invalidate_range(mm, address,
    1551                 :            :                                                       address + PAGE_SIZE);
    1552                 :          0 :                 } else if (IS_ENABLED(CONFIG_MIGRATION) &&
    1553                 :          0 :                                 (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
    1554                 :            :                         swp_entry_t entry;
    1555                 :            :                         pte_t swp_pte;
    1556                 :            : 
    1557                 :            :                         if (arch_unmap_one(mm, vma, address, pteval) < 0) {
    1558                 :            :                                 set_pte_at(mm, address, pvmw.pte, pteval);
    1559                 :            :                                 ret = false;
    1560                 :            :                                 page_vma_mapped_walk_done(&pvmw);
    1561                 :            :                                 break;
    1562                 :            :                         }
    1563                 :            : 
    1564                 :            :                         /*
    1565                 :            :                          * Store the pfn of the page in a special migration
    1566                 :            :                          * pte. do_swap_page() will wait until the migration
    1567                 :            :                          * pte is removed and then restart fault handling.
    1568                 :            :                          */
    1569                 :          0 :                         entry = make_migration_entry(subpage,
    1570                 :          0 :                                         pte_write(pteval));
    1571                 :            :                         swp_pte = swp_entry_to_pte(entry);
    1572                 :            :                         if (pte_soft_dirty(pteval))
    1573                 :            :                                 swp_pte = pte_swp_mksoft_dirty(swp_pte);
    1574                 :          0 :                         set_pte_at(mm, address, pvmw.pte, swp_pte);
    1575                 :            :                         /*
    1576                 :            :                          * No need to invalidate here it will synchronize on
    1577                 :            :                          * against the special swap migration pte.
    1578                 :            :                          */
    1579                 :          0 :                 } else if (PageAnon(page)) {
    1580                 :          0 :                         swp_entry_t entry = { .val = page_private(subpage) };
    1581                 :            :                         pte_t swp_pte;
    1582                 :            :                         /*
    1583                 :            :                          * Store the swap location in the pte.
    1584                 :            :                          * See handle_pte_fault() ...
    1585                 :            :                          */
    1586                 :          0 :                         if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
    1587                 :          0 :                                 WARN_ON_ONCE(1);
    1588                 :            :                                 ret = false;
    1589                 :            :                                 /* We have to invalidate as we cleared the pte */
    1590                 :            :                                 mmu_notifier_invalidate_range(mm, address,
    1591                 :            :                                                         address + PAGE_SIZE);
    1592                 :            :                                 page_vma_mapped_walk_done(&pvmw);
    1593                 :            :                                 break;
    1594                 :            :                         }
    1595                 :            : 
    1596                 :            :                         /* MADV_FREE page check */
    1597                 :          0 :                         if (!PageSwapBacked(page)) {
    1598                 :          0 :                                 if (!PageDirty(page)) {
    1599                 :            :                                         /* Invalidate as we cleared the pte */
    1600                 :            :                                         mmu_notifier_invalidate_range(mm,
    1601                 :            :                                                 address, address + PAGE_SIZE);
    1602                 :            :                                         dec_mm_counter(mm, MM_ANONPAGES);
    1603                 :            :                                         goto discard;
    1604                 :            :                                 }
    1605                 :            : 
    1606                 :            :                                 /*
    1607                 :            :                                  * If the page was redirtied, it cannot be
    1608                 :            :                                  * discarded. Remap the page to page table.
    1609                 :            :                                  */
    1610                 :          0 :                                 set_pte_at(mm, address, pvmw.pte, pteval);
    1611                 :            :                                 SetPageSwapBacked(page);
    1612                 :            :                                 ret = false;
    1613                 :            :                                 page_vma_mapped_walk_done(&pvmw);
    1614                 :            :                                 break;
    1615                 :            :                         }
    1616                 :            : 
    1617                 :          0 :                         if (swap_duplicate(entry) < 0) {
    1618                 :          0 :                                 set_pte_at(mm, address, pvmw.pte, pteval);
    1619                 :            :                                 ret = false;
    1620                 :            :                                 page_vma_mapped_walk_done(&pvmw);
    1621                 :            :                                 break;
    1622                 :            :                         }
    1623                 :            :                         if (arch_unmap_one(mm, vma, address, pteval) < 0) {
    1624                 :            :                                 set_pte_at(mm, address, pvmw.pte, pteval);
    1625                 :            :                                 ret = false;
    1626                 :            :                                 page_vma_mapped_walk_done(&pvmw);
    1627                 :            :                                 break;
    1628                 :            :                         }
    1629                 :          0 :                         if (list_empty(&mm->mmlist)) {
    1630                 :            :                                 spin_lock(&mmlist_lock);
    1631                 :          0 :                                 if (list_empty(&mm->mmlist))
    1632                 :            :                                         list_add(&mm->mmlist, &init_mm.mmlist);
    1633                 :            :                                 spin_unlock(&mmlist_lock);
    1634                 :            :                         }
    1635                 :            :                         dec_mm_counter(mm, MM_ANONPAGES);
    1636                 :            :                         inc_mm_counter(mm, MM_SWAPENTS);
    1637                 :            :                         swp_pte = swp_entry_to_pte(entry);
    1638                 :            :                         if (pte_soft_dirty(pteval))
    1639                 :            :                                 swp_pte = pte_swp_mksoft_dirty(swp_pte);
    1640                 :          0 :                         set_pte_at(mm, address, pvmw.pte, swp_pte);
    1641                 :            :                         /* Invalidate as we cleared the pte */
    1642                 :            :                         mmu_notifier_invalidate_range(mm, address,
    1643                 :            :                                                       address + PAGE_SIZE);
    1644                 :            :                 } else {
    1645                 :            :                         /*
    1646                 :            :                          * This is a locked file-backed page, thus it cannot
    1647                 :            :                          * be removed from the page cache and replaced by a new
    1648                 :            :                          * page before mmu_notifier_invalidate_range_end, so no
    1649                 :            :                          * concurrent thread might update its page table to
    1650                 :            :                          * point at new page while a device still is using this
    1651                 :            :                          * page.
    1652                 :            :                          *
    1653                 :            :                          * See Documentation/vm/mmu_notifier.rst
    1654                 :            :                          */
    1655                 :          0 :                         dec_mm_counter(mm, mm_counter_file(page));
    1656                 :            :                 }
    1657                 :            : discard:
    1658                 :            :                 /*
    1659                 :            :                  * No need to call mmu_notifier_invalidate_range() it has be
    1660                 :            :                  * done above for all cases requiring it to happen under page
    1661                 :            :                  * table lock before mmu_notifier_invalidate_range_end()
    1662                 :            :                  *
    1663                 :            :                  * See Documentation/vm/mmu_notifier.rst
    1664                 :            :                  */
    1665                 :          0 :                 page_remove_rmap(subpage, PageHuge(page));
    1666                 :          0 :                 put_page(page);
    1667                 :            :         }
    1668                 :            : 
    1669                 :            :         mmu_notifier_invalidate_range_end(&range);
    1670                 :            : 
    1671                 :          0 :         return ret;
    1672                 :            : }
    1673                 :            : 
    1674                 :          0 : bool is_vma_temporary_stack(struct vm_area_struct *vma)
    1675                 :            : {
    1676                 :          0 :         int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
    1677                 :            : 
    1678                 :          0 :         if (!maybe_stack)
    1679                 :            :                 return false;
    1680                 :            : 
    1681                 :          0 :         if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
    1682                 :            :                                                 VM_STACK_INCOMPLETE_SETUP)
    1683                 :            :                 return true;
    1684                 :            : 
    1685                 :          0 :         return false;
    1686                 :            : }
    1687                 :            : 
    1688                 :          0 : static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
    1689                 :            : {
    1690                 :          0 :         return is_vma_temporary_stack(vma);
    1691                 :            : }
    1692                 :            : 
    1693                 :          0 : static int page_mapcount_is_zero(struct page *page)
    1694                 :            : {
    1695                 :          0 :         return !total_mapcount(page);
    1696                 :            : }
    1697                 :            : 
    1698                 :            : /**
    1699                 :            :  * try_to_unmap - try to remove all page table mappings to a page
    1700                 :            :  * @page: the page to get unmapped
    1701                 :            :  * @flags: action and flags
    1702                 :            :  *
    1703                 :            :  * Tries to remove all the page table entries which are mapping this
    1704                 :            :  * page, used in the pageout path.  Caller must hold the page lock.
    1705                 :            :  *
    1706                 :            :  * If unmap is successful, return true. Otherwise, false.
    1707                 :            :  */
    1708                 :          0 : bool try_to_unmap(struct page *page, enum ttu_flags flags)
    1709                 :            : {
    1710                 :          0 :         struct rmap_walk_control rwc = {
    1711                 :            :                 .rmap_one = try_to_unmap_one,
    1712                 :          0 :                 .arg = (void *)flags,
    1713                 :            :                 .done = page_mapcount_is_zero,
    1714                 :            :                 .anon_lock = page_lock_anon_vma_read,
    1715                 :            :         };
    1716                 :            : 
    1717                 :            :         /*
    1718                 :            :          * During exec, a temporary VMA is setup and later moved.
    1719                 :            :          * The VMA is moved under the anon_vma lock but not the
    1720                 :            :          * page tables leading to a race where migration cannot
    1721                 :            :          * find the migration ptes. Rather than increasing the
    1722                 :            :          * locking requirements of exec(), migration skips
    1723                 :            :          * temporary VMAs until after exec() completes.
    1724                 :            :          */
    1725                 :          0 :         if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
    1726                 :          0 :             && !PageKsm(page) && PageAnon(page))
    1727                 :          0 :                 rwc.invalid_vma = invalid_migration_vma;
    1728                 :            : 
    1729                 :          0 :         if (flags & TTU_RMAP_LOCKED)
    1730                 :          0 :                 rmap_walk_locked(page, &rwc);
    1731                 :            :         else
    1732                 :          0 :                 rmap_walk(page, &rwc);
    1733                 :            : 
    1734                 :          0 :         return !page_mapcount(page) ? true : false;
    1735                 :            : }
    1736                 :            : 
    1737                 :          0 : static int page_not_mapped(struct page *page)
    1738                 :            : {
    1739                 :          0 :         return !page_mapped(page);
    1740                 :            : };
    1741                 :            : 
    1742                 :            : /**
    1743                 :            :  * try_to_munlock - try to munlock a page
    1744                 :            :  * @page: the page to be munlocked
    1745                 :            :  *
    1746                 :            :  * Called from munlock code.  Checks all of the VMAs mapping the page
    1747                 :            :  * to make sure nobody else has this page mlocked. The page will be
    1748                 :            :  * returned with PG_mlocked cleared if no other vmas have it mlocked.
    1749                 :            :  */
    1750                 :            : 
    1751                 :          0 : void try_to_munlock(struct page *page)
    1752                 :            : {
    1753                 :          0 :         struct rmap_walk_control rwc = {
    1754                 :            :                 .rmap_one = try_to_unmap_one,
    1755                 :            :                 .arg = (void *)TTU_MUNLOCK,
    1756                 :            :                 .done = page_not_mapped,
    1757                 :            :                 .anon_lock = page_lock_anon_vma_read,
    1758                 :            : 
    1759                 :            :         };
    1760                 :            : 
    1761                 :            :         VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
    1762                 :            :         VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
    1763                 :            : 
    1764                 :          0 :         rmap_walk(page, &rwc);
    1765                 :          0 : }
    1766                 :            : 
    1767                 :          3 : void __put_anon_vma(struct anon_vma *anon_vma)
    1768                 :            : {
    1769                 :          3 :         struct anon_vma *root = anon_vma->root;
    1770                 :            : 
    1771                 :          3 :         anon_vma_free(anon_vma);
    1772                 :          3 :         if (root != anon_vma && atomic_dec_and_test(&root->refcount))
    1773                 :          0 :                 anon_vma_free(root);
    1774                 :          3 : }
    1775                 :            : 
    1776                 :          0 : static struct anon_vma *rmap_walk_anon_lock(struct page *page,
    1777                 :            :                                         struct rmap_walk_control *rwc)
    1778                 :            : {
    1779                 :            :         struct anon_vma *anon_vma;
    1780                 :            : 
    1781                 :          0 :         if (rwc->anon_lock)
    1782                 :          0 :                 return rwc->anon_lock(page);
    1783                 :            : 
    1784                 :            :         /*
    1785                 :            :          * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
    1786                 :            :          * because that depends on page_mapped(); but not all its usages
    1787                 :            :          * are holding mmap_sem. Users without mmap_sem are required to
    1788                 :            :          * take a reference count to prevent the anon_vma disappearing
    1789                 :            :          */
    1790                 :          0 :         anon_vma = page_anon_vma(page);
    1791                 :          0 :         if (!anon_vma)
    1792                 :            :                 return NULL;
    1793                 :            : 
    1794                 :            :         anon_vma_lock_read(anon_vma);
    1795                 :          0 :         return anon_vma;
    1796                 :            : }
    1797                 :            : 
    1798                 :            : /*
    1799                 :            :  * rmap_walk_anon - do something to anonymous page using the object-based
    1800                 :            :  * rmap method
    1801                 :            :  * @page: the page to be handled
    1802                 :            :  * @rwc: control variable according to each walk type
    1803                 :            :  *
    1804                 :            :  * Find all the mappings of a page using the mapping pointer and the vma chains
    1805                 :            :  * contained in the anon_vma struct it points to.
    1806                 :            :  *
    1807                 :            :  * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
    1808                 :            :  * where the page was found will be held for write.  So, we won't recheck
    1809                 :            :  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
    1810                 :            :  * LOCKED.
    1811                 :            :  */
    1812                 :          0 : static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
    1813                 :            :                 bool locked)
    1814                 :            : {
    1815                 :            :         struct anon_vma *anon_vma;
    1816                 :            :         pgoff_t pgoff_start, pgoff_end;
    1817                 :            :         struct anon_vma_chain *avc;
    1818                 :            : 
    1819                 :          0 :         if (locked) {
    1820                 :          0 :                 anon_vma = page_anon_vma(page);
    1821                 :            :                 /* anon_vma disappear under us? */
    1822                 :            :                 VM_BUG_ON_PAGE(!anon_vma, page);
    1823                 :            :         } else {
    1824                 :          0 :                 anon_vma = rmap_walk_anon_lock(page, rwc);
    1825                 :            :         }
    1826                 :          0 :         if (!anon_vma)
    1827                 :          0 :                 return;
    1828                 :            : 
    1829                 :            :         pgoff_start = page_to_pgoff(page);
    1830                 :            :         pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
    1831                 :          0 :         anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
    1832                 :            :                         pgoff_start, pgoff_end) {
    1833                 :          0 :                 struct vm_area_struct *vma = avc->vma;
    1834                 :            :                 unsigned long address = vma_address(page, vma);
    1835                 :            : 
    1836                 :          0 :                 cond_resched();
    1837                 :            : 
    1838                 :          0 :                 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
    1839                 :          0 :                         continue;
    1840                 :            : 
    1841                 :          0 :                 if (!rwc->rmap_one(page, vma, address, rwc->arg))
    1842                 :            :                         break;
    1843                 :          0 :                 if (rwc->done && rwc->done(page))
    1844                 :            :                         break;
    1845                 :            :         }
    1846                 :            : 
    1847                 :          0 :         if (!locked)
    1848                 :            :                 anon_vma_unlock_read(anon_vma);
    1849                 :            : }
    1850                 :            : 
    1851                 :            : /*
    1852                 :            :  * rmap_walk_file - do something to file page using the object-based rmap method
    1853                 :            :  * @page: the page to be handled
    1854                 :            :  * @rwc: control variable according to each walk type
    1855                 :            :  *
    1856                 :            :  * Find all the mappings of a page using the mapping pointer and the vma chains
    1857                 :            :  * contained in the address_space struct it points to.
    1858                 :            :  *
    1859                 :            :  * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
    1860                 :            :  * where the page was found will be held for write.  So, we won't recheck
    1861                 :            :  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
    1862                 :            :  * LOCKED.
    1863                 :            :  */
    1864                 :          2 : static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
    1865                 :            :                 bool locked)
    1866                 :            : {
    1867                 :          2 :         struct address_space *mapping = page_mapping(page);
    1868                 :            :         pgoff_t pgoff_start, pgoff_end;
    1869                 :            :         struct vm_area_struct *vma;
    1870                 :            : 
    1871                 :            :         /*
    1872                 :            :          * The page lock not only makes sure that page->mapping cannot
    1873                 :            :          * suddenly be NULLified by truncation, it makes sure that the
    1874                 :            :          * structure at mapping cannot be freed and reused yet,
    1875                 :            :          * so we can safely take mapping->i_mmap_rwsem.
    1876                 :            :          */
    1877                 :            :         VM_BUG_ON_PAGE(!PageLocked(page), page);
    1878                 :            : 
    1879                 :          2 :         if (!mapping)
    1880                 :          2 :                 return;
    1881                 :            : 
    1882                 :            :         pgoff_start = page_to_pgoff(page);
    1883                 :            :         pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
    1884                 :          2 :         if (!locked)
    1885                 :            :                 i_mmap_lock_read(mapping);
    1886                 :          2 :         vma_interval_tree_foreach(vma, &mapping->i_mmap,
    1887                 :            :                         pgoff_start, pgoff_end) {
    1888                 :            :                 unsigned long address = vma_address(page, vma);
    1889                 :            : 
    1890                 :          2 :                 cond_resched();
    1891                 :            : 
    1892                 :          2 :                 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
    1893                 :          0 :                         continue;
    1894                 :            : 
    1895                 :          2 :                 if (!rwc->rmap_one(page, vma, address, rwc->arg))
    1896                 :            :                         goto done;
    1897                 :          2 :                 if (rwc->done && rwc->done(page))
    1898                 :            :                         goto done;
    1899                 :            :         }
    1900                 :            : 
    1901                 :            : done:
    1902                 :          2 :         if (!locked)
    1903                 :            :                 i_mmap_unlock_read(mapping);
    1904                 :            : }
    1905                 :            : 
    1906                 :          2 : void rmap_walk(struct page *page, struct rmap_walk_control *rwc)
    1907                 :            : {
    1908                 :            :         if (unlikely(PageKsm(page)))
    1909                 :            :                 rmap_walk_ksm(page, rwc);
    1910                 :          2 :         else if (PageAnon(page))
    1911                 :          0 :                 rmap_walk_anon(page, rwc, false);
    1912                 :            :         else
    1913                 :          2 :                 rmap_walk_file(page, rwc, false);
    1914                 :          2 : }
    1915                 :            : 
    1916                 :            : /* Like rmap_walk, but caller holds relevant rmap lock */
    1917                 :          0 : void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
    1918                 :            : {
    1919                 :            :         /* no ksm support for now */
    1920                 :            :         VM_BUG_ON_PAGE(PageKsm(page), page);
    1921                 :          0 :         if (PageAnon(page))
    1922                 :          0 :                 rmap_walk_anon(page, rwc, true);
    1923                 :            :         else
    1924                 :          0 :                 rmap_walk_file(page, rwc, true);
    1925                 :          0 : }
    1926                 :            : 
    1927                 :            : #ifdef CONFIG_HUGETLB_PAGE
    1928                 :            : /*
    1929                 :            :  * The following two functions are for anonymous (private mapped) hugepages.
    1930                 :            :  * Unlike common anonymous pages, anonymous hugepages have no accounting code
    1931                 :            :  * and no lru code, because we handle hugepages differently from common pages.
    1932                 :            :  */
    1933                 :            : void hugepage_add_anon_rmap(struct page *page,
    1934                 :            :                             struct vm_area_struct *vma, unsigned long address)
    1935                 :            : {
    1936                 :            :         struct anon_vma *anon_vma = vma->anon_vma;
    1937                 :            :         int first;
    1938                 :            : 
    1939                 :            :         BUG_ON(!PageLocked(page));
    1940                 :            :         BUG_ON(!anon_vma);
    1941                 :            :         /* address might be in next vma when migration races vma_adjust */
    1942                 :            :         first = atomic_inc_and_test(compound_mapcount_ptr(page));
    1943                 :            :         if (first)
    1944                 :            :                 __page_set_anon_rmap(page, vma, address, 0);
    1945                 :            : }
    1946                 :            : 
    1947                 :            : void hugepage_add_new_anon_rmap(struct page *page,
    1948                 :            :                         struct vm_area_struct *vma, unsigned long address)
    1949                 :            : {
    1950                 :            :         BUG_ON(address < vma->vm_start || address >= vma->vm_end);
    1951                 :            :         atomic_set(compound_mapcount_ptr(page), 0);
    1952                 :            :         __page_set_anon_rmap(page, vma, address, 1);
    1953                 :            : }
    1954                 :            : #endif /* CONFIG_HUGETLB_PAGE */
    

Generated by: LCOV version 1.14