LCOV - combined.info

LCOV - code coverage report

Current view:	top level - mm - rmap.c (source / functions)		Hit	Total	Coverage
Test:	combined.info	Lines:	197	647	30.4 %
Date:	2022-03-28 15:32:58	Functions:	17	47	36.2 %
		Branches:	74	420	17.6 %

           Branch data     Line data    Source code

       1                 :            : /*
       2                 :            :  * mm/rmap.c - physical to virtual reverse mappings
       3                 :            :  *
       4                 :            :  * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
       5                 :            :  * Released under the General Public License (GPL).
       6                 :            :  *
       7                 :            :  * Simple, low overhead reverse mapping scheme.
       8                 :            :  * Please try to keep this thing as modular as possible.
       9                 :            :  *
      10                 :            :  * Provides methods for unmapping each kind of mapped page:
      11                 :            :  * the anon methods track anonymous pages, and
      12                 :            :  * the file methods track pages belonging to an inode.
      13                 :            :  *
      14                 :            :  * Original design by Rik van Riel <riel@conectiva.com.br> 2001
      15                 :            :  * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
      16                 :            :  * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
      17                 :            :  * Contributions by Hugh Dickins 2003, 2004
      18                 :            :  */
      19                 :            : 
      20                 :            : /*
      21                 :            :  * Lock ordering in mm:
      22                 :            :  *
      23                 :            :  * inode->i_mutex    (while writing or truncating, not reading or faulting)
      24                 :            :  *   mm->mmap_sem
      25                 :            :  *     page->flags PG_locked (lock_page)
      26                 :            :  *       hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
      27                 :            :  *         mapping->i_mmap_rwsem
      28                 :            :  *           anon_vma->rwsem
      29                 :            :  *             mm->page_table_lock or pte_lock
      30                 :            :  *               pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
      31                 :            :  *               swap_lock (in swap_duplicate, swap_info_get)
      32                 :            :  *                 mmlist_lock (in mmput, drain_mmlist and others)
      33                 :            :  *                 mapping->private_lock (in __set_page_dirty_buffers)
      34                 :            :  *                   mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
      35                 :            :  *                     i_pages lock (widely used)
      36                 :            :  *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
      37                 :            :  *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
      38                 :            :  *                   sb_lock (within inode_lock in fs/fs-writeback.c)
      39                 :            :  *                   i_pages lock (widely used, in set_page_dirty,
      40                 :            :  *                             in arch-dependent flush_dcache_mmap_lock,
      41                 :            :  *                             within bdi.wb->list_lock in __sync_single_inode)
      42                 :            :  *
      43                 :            :  * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
      44                 :            :  *   ->tasklist_lock
      45                 :            :  *     pte map lock
      46                 :            :  */
      47                 :            : 
      48                 :            : #include <linux/mm.h>
      49                 :            : #include <linux/sched/mm.h>
      50                 :            : #include <linux/sched/task.h>
      51                 :            : #include <linux/pagemap.h>
      52                 :            : #include <linux/swap.h>
      53                 :            : #include <linux/swapops.h>
      54                 :            : #include <linux/slab.h>
      55                 :            : #include <linux/init.h>
      56                 :            : #include <linux/ksm.h>
      57                 :            : #include <linux/rmap.h>
      58                 :            : #include <linux/rcupdate.h>
      59                 :            : #include <linux/export.h>
      60                 :            : #include <linux/memcontrol.h>
      61                 :            : #include <linux/mmu_notifier.h>
      62                 :            : #include <linux/migrate.h>
      63                 :            : #include <linux/hugetlb.h>
      64                 :            : #include <linux/huge_mm.h>
      65                 :            : #include <linux/backing-dev.h>
      66                 :            : #include <linux/page_idle.h>
      67                 :            : #include <linux/memremap.h>
      68                 :            : #include <linux/userfaultfd_k.h>
      69                 :            : 
      70                 :            : #include <asm/tlbflush.h>
      71                 :            : 
      72                 :            : #include <trace/events/tlb.h>
      73                 :            : 
      74                 :            : #include "internal.h"
      75                 :            : 
      76                 :            : static struct kmem_cache *anon_vma_cachep;
      77                 :            : static struct kmem_cache *anon_vma_chain_cachep;
      78                 :            : 
      79                 :     658666 : static inline struct anon_vma *anon_vma_alloc(void)
      80                 :            : {
      81                 :     658666 :         struct anon_vma *anon_vma;
      82                 :            : 
      83                 :     658666 :         anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
      84         [ +  - ]:     658666 :         if (anon_vma) {
      85                 :     658666 :                 atomic_set(&anon_vma->refcount, 1);
      86                 :     658666 :                 anon_vma->degree = 1;        /* Reference for first vma */
      87                 :     658666 :                 anon_vma->parent = anon_vma;
      88                 :            :                 /*
      89                 :            :                  * Initialise the anon_vma root to point to itself. If called
      90                 :            :                  * from fork, the root will be reset to the parents anon_vma.
      91                 :            :                  */
      92                 :     658666 :                 anon_vma->root = anon_vma;
      93                 :            :         }
      94                 :            : 
      95                 :     658666 :         return anon_vma;
      96                 :            : }
      97                 :            : 
      98                 :     642035 : static inline void anon_vma_free(struct anon_vma *anon_vma)
      99                 :            : {
     100                 :     642035 :         VM_BUG_ON(atomic_read(&anon_vma->refcount));
     101                 :            : 
     102                 :            :         /*
     103                 :            :          * Synchronize against page_lock_anon_vma_read() such that
     104                 :            :          * we can safely hold the lock without the anon_vma getting
     105                 :            :          * freed.
     106                 :            :          *
     107                 :            :          * Relies on the full mb implied by the atomic_dec_and_test() from
     108                 :            :          * put_anon_vma() against the acquire barrier implied by
     109                 :            :          * down_read_trylock() from page_lock_anon_vma_read(). This orders:
     110                 :            :          *
     111                 :            :          * page_lock_anon_vma_read()    VS      put_anon_vma()
     112                 :            :          *   down_read_trylock()                  atomic_dec_and_test()
     113                 :            :          *   LOCK                                 MB
     114                 :            :          *   atomic_read()                        rwsem_is_locked()
     115                 :            :          *
     116                 :            :          * LOCK should suffice since the actual taking of the lock must
     117                 :            :          * happen _before_ what follows.
     118                 :            :          */
     119                 :     642035 :         might_sleep();
     120         [ -  + ]:     642035 :         if (rwsem_is_locked(&anon_vma->root->rwsem)) {
     121                 :          0 :                 anon_vma_lock_write(anon_vma);
     122                 :          0 :                 anon_vma_unlock_write(anon_vma);
     123                 :            :         }
     124                 :            : 
     125                 :     642035 :         kmem_cache_free(anon_vma_cachep, anon_vma);
     126                 :     642035 : }
     127                 :            : 
     128                 :    1369953 : static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
     129                 :            : {
     130                 :    1369953 :         return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
     131                 :            : }
     132                 :            : 
     133                 :    1335522 : static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
     134                 :            : {
     135                 :    1335522 :         kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
     136                 :     693487 : }
     137                 :            : 
     138                 :    1369953 : static void anon_vma_chain_link(struct vm_area_struct *vma,
     139                 :            :                                 struct anon_vma_chain *avc,
     140                 :            :                                 struct anon_vma *anon_vma)
     141                 :            : {
     142                 :    1369953 :         avc->vma = vma;
     143                 :    1369953 :         avc->anon_vma = anon_vma;
     144                 :    1369953 :         list_add(&avc->same_vma, &vma->anon_vma_chain);
     145                 :    1369953 :         anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
     146                 :            : }
     147                 :            : 
     148                 :            : /**
     149                 :            :  * __anon_vma_prepare - attach an anon_vma to a memory region
     150                 :            :  * @vma: the memory region in question
     151                 :            :  *
     152                 :            :  * This makes sure the memory mapping described by 'vma' has
     153                 :            :  * an 'anon_vma' attached to it, so that we can associate the
     154                 :            :  * anonymous pages mapped into it with that anon_vma.
     155                 :            :  *
     156                 :            :  * The common case will be that we already have one, which
     157                 :            :  * is handled inline by anon_vma_prepare(). But if
     158                 :            :  * not we either need to find an adjacent mapping that we
     159                 :            :  * can re-use the anon_vma from (very common when the only
     160                 :            :  * reason for splitting a vma has been mprotect()), or we
     161                 :            :  * allocate a new one.
     162                 :            :  *
     163                 :            :  * Anon-vma allocations are very subtle, because we may have
     164                 :            :  * optimistically looked up an anon_vma in page_lock_anon_vma_read()
     165                 :            :  * and that may actually touch the spinlock even in the newly
     166                 :            :  * allocated vma (it depends on RCU to make sure that the
     167                 :            :  * anon_vma isn't actually destroyed).
     168                 :            :  *
     169                 :            :  * As a result, we need to do proper anon_vma locking even
     170                 :            :  * for the new allocation. At the same time, we do not want
     171                 :            :  * to do any locking for the common case of already having
     172                 :            :  * an anon_vma.
     173                 :            :  *
     174                 :            :  * This must be called with the mmap_sem held for reading.
     175                 :            :  */
     176                 :     268795 : int __anon_vma_prepare(struct vm_area_struct *vma)
     177                 :            : {
     178                 :     268795 :         struct mm_struct *mm = vma->vm_mm;
     179                 :     268795 :         struct anon_vma *anon_vma, *allocated;
     180                 :     268795 :         struct anon_vma_chain *avc;
     181                 :            : 
     182                 :     268795 :         might_sleep();
     183                 :            : 
     184                 :     268795 :         avc = anon_vma_chain_alloc(GFP_KERNEL);
     185         [ -  + ]:     268795 :         if (!avc)
     186                 :          0 :                 goto out_enomem;
     187                 :            : 
     188                 :     268795 :         anon_vma = find_mergeable_anon_vma(vma);
     189                 :     268795 :         allocated = NULL;
     190         [ +  - ]:     268795 :         if (!anon_vma) {
     191                 :     268795 :                 anon_vma = anon_vma_alloc();
     192         [ -  + ]:     268795 :                 if (unlikely(!anon_vma))
     193                 :          0 :                         goto out_enomem_free_avc;
     194                 :            :                 allocated = anon_vma;
     195                 :            :         }
     196                 :            : 
     197                 :     268795 :         anon_vma_lock_write(anon_vma);
     198                 :            :         /* page_table_lock to protect against threads */
     199                 :     268795 :         spin_lock(&mm->page_table_lock);
     200         [ +  - ]:     268795 :         if (likely(!vma->anon_vma)) {
     201                 :     268795 :                 vma->anon_vma = anon_vma;
     202                 :     268795 :                 anon_vma_chain_link(vma, avc, anon_vma);
     203                 :            :                 /* vma reference or self-parent link for new root */
     204                 :     268795 :                 anon_vma->degree++;
     205                 :     268795 :                 allocated = NULL;
     206                 :     268795 :                 avc = NULL;
     207                 :            :         }
     208                 :     268795 :         spin_unlock(&mm->page_table_lock);
     209                 :     268795 :         anon_vma_unlock_write(anon_vma);
     210                 :            : 
     211         [ -  + ]:     268795 :         if (unlikely(allocated))
     212                 :          0 :                 put_anon_vma(allocated);
     213         [ -  + ]:     268795 :         if (unlikely(avc))
     214                 :          0 :                 anon_vma_chain_free(avc);
     215                 :            : 
     216                 :            :         return 0;
     217                 :            : 
     218                 :            :  out_enomem_free_avc:
     219                 :          0 :         anon_vma_chain_free(avc);
     220                 :            :  out_enomem:
     221                 :            :         return -ENOMEM;
     222                 :            : }
     223                 :            : 
     224                 :            : /*
     225                 :            :  * This is a useful helper function for locking the anon_vma root as
     226                 :            :  * we traverse the vma->anon_vma_chain, looping over anon_vma's that
     227                 :            :  * have the same vma.
     228                 :            :  *
     229                 :            :  * Such anon_vma's should have the same root, so you'd expect to see
     230                 :            :  * just a single mutex_lock for the whole traversal.
     231                 :            :  */
     232                 :            : static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
     233                 :            : {
     234                 :            :         struct anon_vma *new_root = anon_vma->root;
     235                 :            :         if (new_root != root) {
     236                 :            :                 if (WARN_ON_ONCE(root))
     237                 :            :                         up_write(&root->rwsem);
     238                 :            :                 root = new_root;
     239                 :            :                 down_write(&root->rwsem);
     240                 :            :         }
     241                 :            :         return root;
     242                 :            : }
     243                 :            : 
     244                 :    3759686 : static inline void unlock_anon_vma_root(struct anon_vma *root)
     245                 :            : {
     246                 :    3759686 :         if (root)
     247                 :    1617738 :                 up_write(&root->rwsem);
     248                 :            : }
     249                 :            : 
     250                 :            : /*
     251                 :            :  * Attach the anon_vmas from src to dst.
     252                 :            :  * Returns 0 on success, -ENOMEM on failure.
     253                 :            :  *
     254                 :            :  * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and
     255                 :            :  * anon_vma_fork(). The first three want an exact copy of src, while the last
     256                 :            :  * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
     257                 :            :  * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
     258                 :            :  * we can identify this case by checking (!dst->anon_vma && src->anon_vma).
     259                 :            :  *
     260                 :            :  * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
     261                 :            :  * and reuse existing anon_vma which has no vmas and only one child anon_vma.
     262                 :            :  * This prevents degradation of anon_vma hierarchy to endless linear chain in
     263                 :            :  * case of constantly forking task. On the other hand, an anon_vma with more
     264                 :            :  * than one child isn't reused even if there was no alive vma, thus rmap
     265                 :            :  * walker has a good chance of avoiding scanning the whole hierarchy when it
     266                 :            :  * searches where page is mapped.
     267                 :            :  */
     268                 :    1079067 : int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
     269                 :            : {
     270                 :    1079067 :         struct anon_vma_chain *avc, *pavc;
     271                 :    1079067 :         struct anon_vma *root = NULL;
     272                 :    1079067 :         struct vm_area_struct *prev = dst->vm_prev, *pprev = src->vm_prev;
     273                 :            : 
     274                 :            :         /*
     275                 :            :          * If parent share anon_vma with its vm_prev, keep this sharing in in
     276                 :            :          * child.
     277                 :            :          *
     278                 :            :          * 1. Parent has vm_prev, which implies we have vm_prev.
     279                 :            :          * 2. Parent and its vm_prev have the same anon_vma.
     280                 :            :          */
     281   [ +  +  +  +  :    1079067 :         if (!dst->anon_vma && src->anon_vma &&
                   +  - ]
     282         [ +  + ]:     559971 :             pprev && pprev->anon_vma == src->anon_vma)
     283                 :     170100 :                 dst->anon_vma = prev->anon_vma;
     284                 :            : 
     285                 :            : 
     286         [ +  + ]:    1790354 :         list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
     287                 :     711287 :                 struct anon_vma *anon_vma;
     288                 :            : 
     289                 :     711287 :                 avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
     290         [ -  + ]:     711287 :                 if (unlikely(!avc)) {
     291         [ #  # ]:          0 :                         unlock_anon_vma_root(root);
     292                 :          0 :                         root = NULL;
     293                 :          0 :                         avc = anon_vma_chain_alloc(GFP_KERNEL);
     294         [ #  # ]:          0 :                         if (!avc)
     295                 :          0 :                                 goto enomem_failure;
     296                 :            :                 }
     297                 :     711287 :                 anon_vma = pavc->anon_vma;
     298                 :     711287 :                 root = lock_anon_vma_root(root, anon_vma);
     299                 :     711287 :                 anon_vma_chain_link(dst, avc, anon_vma);
     300                 :            : 
     301                 :            :                 /*
     302                 :            :                  * Reuse existing anon_vma if its degree lower than two,
     303                 :            :                  * that means it has no vma and only one anon_vma child.
     304                 :            :                  *
     305                 :            :                  * Do not chose parent anon_vma, otherwise first child
     306                 :            :                  * will always reuse it. Root anon_vma is never reused:
     307                 :            :                  * it has self-parent reference and at least one child.
     308                 :            :                  */
     309   [ +  +  +  -  :     711287 :                 if (!dst->anon_vma && src->anon_vma &&
                   +  + ]
     310         [ -  + ]:      23240 :                     anon_vma != src->anon_vma && anon_vma->degree < 2)
     311                 :          0 :                         dst->anon_vma = anon_vma;
     312                 :            :         }
     313         [ +  + ]:    1079067 :         if (dst->anon_vma)
     314                 :     298176 :                 dst->anon_vma->degree++;
     315         [ +  + ]:    1079067 :         unlock_anon_vma_root(root);
     316                 :            :         return 0;
     317                 :            : 
     318                 :            :  enomem_failure:
     319                 :            :         /*
     320                 :            :          * dst->anon_vma is dropped here otherwise its degree can be incorrectly
     321                 :            :          * decremented in unlink_anon_vmas().
     322                 :            :          * We can safely do this because callers of anon_vma_clone() don't care
     323                 :            :          * about dst->anon_vma if anon_vma_clone() failed.
     324                 :            :          */
     325                 :          0 :         dst->anon_vma = NULL;
     326                 :          0 :         unlink_anon_vmas(dst);
     327                 :          0 :         return -ENOMEM;
     328                 :            : }
     329                 :            : 
     330                 :            : /*
     331                 :            :  * Attach vma to its own anon_vma, as well as to the anon_vmas that
     332                 :            :  * the corresponding VMA in the parent process is attached to.
     333                 :            :  * Returns 0 on success, non-zero on failure.
     334                 :            :  */
     335                 :    1375835 : int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
     336                 :            : {
     337                 :    1375835 :         struct anon_vma_chain *avc;
     338                 :    1375835 :         struct anon_vma *anon_vma;
     339                 :    1375835 :         int error;
     340                 :            : 
     341                 :            :         /* Don't bother if the parent process has no anon_vma here. */
     342         [ +  + ]:    1375835 :         if (!pvma->anon_vma)
     343                 :            :                 return 0;
     344                 :            : 
     345                 :            :         /* Drop inherited anon_vma, we'll reuse existing or allocate new. */
     346                 :     559971 :         vma->anon_vma = NULL;
     347                 :            : 
     348                 :            :         /*
     349                 :            :          * First, attach the new VMA to the parent VMA's anon_vmas,
     350                 :            :          * so rmap can find non-COWed pages in child processes.
     351                 :            :          */
     352                 :     559971 :         error = anon_vma_clone(vma, pvma);
     353         [ +  - ]:     559971 :         if (error)
     354                 :            :                 return error;
     355                 :            : 
     356                 :            :         /* An existing anon_vma has been reused, all done then. */
     357         [ +  + ]:     559971 :         if (vma->anon_vma)
     358                 :            :                 return 0;
     359                 :            : 
     360                 :            :         /* Then add our own anon_vma. */
     361                 :     389871 :         anon_vma = anon_vma_alloc();
     362         [ -  + ]:     389871 :         if (!anon_vma)
     363                 :          0 :                 goto out_error;
     364                 :     389871 :         avc = anon_vma_chain_alloc(GFP_KERNEL);
     365         [ -  + ]:     389871 :         if (!avc)
     366                 :          0 :                 goto out_error_free_anon_vma;
     367                 :            : 
     368                 :            :         /*
     369                 :            :          * The root anon_vma's spinlock is the lock actually used when we
     370                 :            :          * lock any of the anon_vmas in this anon_vma tree.
     371                 :            :          */
     372                 :     389871 :         anon_vma->root = pvma->anon_vma->root;
     373                 :     389871 :         anon_vma->parent = pvma->anon_vma;
     374                 :            :         /*
     375                 :            :          * With refcounts, an anon_vma can stay around longer than the
     376                 :            :          * process it belongs to. The root anon_vma needs to be pinned until
     377                 :            :          * this anon_vma is freed, because the lock lives in the root.
     378                 :            :          */
     379                 :     389871 :         get_anon_vma(anon_vma->root);
     380                 :            :         /* Mark this anon_vma as the one where our new (COWed) pages go. */
     381                 :     389871 :         vma->anon_vma = anon_vma;
     382                 :     389871 :         anon_vma_lock_write(anon_vma);
     383                 :     389871 :         anon_vma_chain_link(vma, avc, anon_vma);
     384                 :     389871 :         anon_vma->parent->degree++;
     385                 :     389871 :         anon_vma_unlock_write(anon_vma);
     386                 :            : 
     387                 :     389871 :         return 0;
     388                 :            : 
     389                 :            :  out_error_free_anon_vma:
     390                 :          0 :         put_anon_vma(anon_vma);
     391                 :          0 :  out_error:
     392                 :          0 :         unlink_anon_vmas(vma);
     393                 :          0 :         return -ENOMEM;
     394                 :            : }
     395                 :            : 
     396                 :    2680619 : void unlink_anon_vmas(struct vm_area_struct *vma)
     397                 :            : {
     398                 :    2680619 :         struct anon_vma_chain *avc, *next;
     399                 :    2680619 :         struct anon_vma *root = NULL;
     400                 :            : 
     401                 :            :         /*
     402                 :            :          * Unlink each anon_vma chained to the VMA.  This list is ordered
     403                 :            :          * from newest to oldest, ensuring the root anon_vma gets freed last.
     404                 :            :          */
     405         [ +  + ]:    4016141 :         list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
     406                 :    1335522 :                 struct anon_vma *anon_vma = avc->anon_vma;
     407                 :            : 
     408                 :    1335522 :                 root = lock_anon_vma_root(root, anon_vma);
     409                 :    1335522 :                 anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
     410                 :            : 
     411                 :            :                 /*
     412                 :            :                  * Leave empty anon_vmas on the list - we'll need
     413                 :            :                  * to free them outside the lock.
     414                 :            :                  */
     415         [ +  + ]:    1335522 :                 if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
     416                 :     642035 :                         anon_vma->parent->degree--;
     417                 :     642035 :                         continue;
     418                 :            :                 }
     419                 :            : 
     420                 :     693487 :                 list_del(&avc->same_vma);
     421                 :     693487 :                 anon_vma_chain_free(avc);
     422                 :            :         }
     423         [ +  + ]:    2680619 :         if (vma->anon_vma)
     424                 :     929691 :                 vma->anon_vma->degree--;
     425         [ +  + ]:    2680619 :         unlock_anon_vma_root(root);
     426                 :            : 
     427                 :            :         /*
     428                 :            :          * Iterate the list once more, it now only contains empty and unlinked
     429                 :            :          * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
     430                 :            :          * needing to write-acquire the anon_vma->root->rwsem.
     431                 :            :          */
     432         [ +  + ]:    3322654 :         list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
     433                 :     642035 :                 struct anon_vma *anon_vma = avc->anon_vma;
     434                 :            : 
     435                 :     642035 :                 VM_WARN_ON(anon_vma->degree);
     436                 :     642035 :                 put_anon_vma(anon_vma);
     437                 :            : 
     438                 :     642035 :                 list_del(&avc->same_vma);
     439                 :     642035 :                 anon_vma_chain_free(avc);
     440                 :            :         }
     441                 :    2680619 : }
     442                 :            : 
     443                 :      24588 : static void anon_vma_ctor(void *data)
     444                 :            : {
     445                 :      24588 :         struct anon_vma *anon_vma = data;
     446                 :            : 
     447                 :      24588 :         init_rwsem(&anon_vma->rwsem);
     448                 :      24588 :         atomic_set(&anon_vma->refcount, 0);
     449                 :      24588 :         anon_vma->rb_root = RB_ROOT_CACHED;
     450                 :      24588 : }
     451                 :            : 
     452                 :         28 : void __init anon_vma_init(void)
     453                 :            : {
     454                 :         28 :         anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
     455                 :            :                         0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
     456                 :            :                         anon_vma_ctor);
     457                 :         28 :         anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
     458                 :            :                         SLAB_PANIC|SLAB_ACCOUNT);
     459                 :         28 : }
     460                 :            : 
     461                 :            : /*
     462                 :            :  * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
     463                 :            :  *
     464                 :            :  * Since there is no serialization what so ever against page_remove_rmap()
     465                 :            :  * the best this function can do is return a locked anon_vma that might
     466                 :            :  * have been relevant to this page.
     467                 :            :  *
     468                 :            :  * The page might have been remapped to a different anon_vma or the anon_vma
     469                 :            :  * returned may already be freed (and even reused).
     470                 :            :  *
     471                 :            :  * In case it was remapped to a different anon_vma, the new anon_vma will be a
     472                 :            :  * child of the old anon_vma, and the anon_vma lifetime rules will therefore
     473                 :            :  * ensure that any anon_vma obtained from the page will still be valid for as
     474                 :            :  * long as we observe page_mapped() [ hence all those page_mapped() tests ].
     475                 :            :  *
     476                 :            :  * All users of this function must be very careful when walking the anon_vma
     477                 :            :  * chain and verify that the page in question is indeed mapped in it
     478                 :            :  * [ something equivalent to page_mapped_in_vma() ].
     479                 :            :  *
     480                 :            :  * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
     481                 :            :  * page_remove_rmap() that the anon_vma pointer from page->mapping is valid
     482                 :            :  * if there is a mapcount, we can dereference the anon_vma after observing
     483                 :            :  * those.
     484                 :            :  */
     485                 :          0 : struct anon_vma *page_get_anon_vma(struct page *page)
     486                 :            : {
     487                 :          0 :         struct anon_vma *anon_vma = NULL;
     488                 :          0 :         unsigned long anon_mapping;
     489                 :            : 
     490                 :          0 :         rcu_read_lock();
     491         [ #  # ]:          0 :         anon_mapping = (unsigned long)READ_ONCE(page->mapping);
     492         [ #  # ]:          0 :         if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
     493                 :          0 :                 goto out;
     494         [ #  # ]:          0 :         if (!page_mapped(page))
     495                 :          0 :                 goto out;
     496                 :            : 
     497                 :          0 :         anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
     498         [ #  # ]:          0 :         if (!atomic_inc_not_zero(&anon_vma->refcount)) {
     499                 :          0 :                 anon_vma = NULL;
     500                 :          0 :                 goto out;
     501                 :            :         }
     502                 :            : 
     503                 :            :         /*
     504                 :            :          * If this page is still mapped, then its anon_vma cannot have been
     505                 :            :          * freed.  But if it has been unmapped, we have no security against the
     506                 :            :          * anon_vma structure being freed and reused (for another anon_vma:
     507                 :            :          * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
     508                 :            :          * above cannot corrupt).
     509                 :            :          */
     510         [ #  # ]:          0 :         if (!page_mapped(page)) {
     511                 :          0 :                 rcu_read_unlock();
     512                 :          0 :                 put_anon_vma(anon_vma);
     513                 :          0 :                 return NULL;
     514                 :            :         }
     515                 :          0 : out:
     516                 :          0 :         rcu_read_unlock();
     517                 :            : 
     518                 :          0 :         return anon_vma;
     519                 :            : }
     520                 :            : 
     521                 :            : /*
     522                 :            :  * Similar to page_get_anon_vma() except it locks the anon_vma.
     523                 :            :  *
     524                 :            :  * Its a little more complex as it tries to keep the fast path to a single
     525                 :            :  * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
     526                 :            :  * reference like with page_get_anon_vma() and then block on the mutex.
     527                 :            :  */
     528                 :          0 : struct anon_vma *page_lock_anon_vma_read(struct page *page)
     529                 :            : {
     530                 :          0 :         struct anon_vma *anon_vma = NULL;
     531                 :          0 :         struct anon_vma *root_anon_vma;
     532                 :          0 :         unsigned long anon_mapping;
     533                 :            : 
     534                 :          0 :         rcu_read_lock();
     535         [ #  # ]:          0 :         anon_mapping = (unsigned long)READ_ONCE(page->mapping);
     536         [ #  # ]:          0 :         if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
     537                 :          0 :                 goto out;
     538         [ #  # ]:          0 :         if (!page_mapped(page))
     539                 :          0 :                 goto out;
     540                 :            : 
     541                 :          0 :         anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
     542                 :          0 :         root_anon_vma = READ_ONCE(anon_vma->root);
     543         [ #  # ]:          0 :         if (down_read_trylock(&root_anon_vma->rwsem)) {
     544                 :            :                 /*
     545                 :            :                  * If the page is still mapped, then this anon_vma is still
     546                 :            :                  * its anon_vma, and holding the mutex ensures that it will
     547                 :            :                  * not go away, see anon_vma_free().
     548                 :            :                  */
     549         [ #  # ]:          0 :                 if (!page_mapped(page)) {
     550                 :          0 :                         up_read(&root_anon_vma->rwsem);
     551                 :          0 :                         anon_vma = NULL;
     552                 :            :                 }
     553                 :          0 :                 goto out;
     554                 :            :         }
     555                 :            : 
     556                 :            :         /* trylock failed, we got to sleep */
     557         [ #  # ]:          0 :         if (!atomic_inc_not_zero(&anon_vma->refcount)) {
     558                 :          0 :                 anon_vma = NULL;
     559                 :          0 :                 goto out;
     560                 :            :         }
     561                 :            : 
     562         [ #  # ]:          0 :         if (!page_mapped(page)) {
     563                 :          0 :                 rcu_read_unlock();
     564                 :          0 :                 put_anon_vma(anon_vma);
     565                 :          0 :                 return NULL;
     566                 :            :         }
     567                 :            : 
     568                 :            :         /* we pinned the anon_vma, its safe to sleep */
     569                 :          0 :         rcu_read_unlock();
     570                 :          0 :         anon_vma_lock_read(anon_vma);
     571                 :            : 
     572         [ #  # ]:          0 :         if (atomic_dec_and_test(&anon_vma->refcount)) {
     573                 :            :                 /*
     574                 :            :                  * Oops, we held the last refcount, release the lock
     575                 :            :                  * and bail -- can't simply use put_anon_vma() because
     576                 :            :                  * we'll deadlock on the anon_vma_lock_write() recursion.
     577                 :            :                  */
     578                 :          0 :                 anon_vma_unlock_read(anon_vma);
     579                 :          0 :                 __put_anon_vma(anon_vma);
     580                 :          0 :                 anon_vma = NULL;
     581                 :            :         }
     582                 :            : 
     583                 :            :         return anon_vma;
     584                 :            : 
     585                 :          0 : out:
     586                 :          0 :         rcu_read_unlock();
     587                 :          0 :         return anon_vma;
     588                 :            : }
     589                 :            : 
     590                 :          0 : void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
     591                 :            : {
     592                 :          0 :         anon_vma_unlock_read(anon_vma);
     593                 :          0 : }
     594                 :            : 
     595                 :            : #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
     596                 :            : /*
     597                 :            :  * Flush TLB entries for recently unmapped pages from remote CPUs. It is
     598                 :            :  * important if a PTE was dirty when it was unmapped that it's flushed
     599                 :            :  * before any IO is initiated on the page to prevent lost writes. Similarly,
     600                 :            :  * it must be flushed before freeing to prevent data leakage.
     601                 :            :  */
     602                 :          0 : void try_to_unmap_flush(void)
     603                 :            : {
     604         [ #  # ]:          0 :         struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
     605                 :            : 
     606   [ #  #  #  # ]:          0 :         if (!tlb_ubc->flush_required)
     607                 :            :                 return;
     608                 :            : 
     609                 :          0 :         arch_tlbbatch_flush(&tlb_ubc->arch);
     610                 :          0 :         tlb_ubc->flush_required = false;
     611                 :          0 :         tlb_ubc->writable = false;
     612                 :            : }
     613                 :            : 
     614                 :            : /* Flush iff there are potentially writable TLB entries that can race with IO */
     615                 :          0 : void try_to_unmap_flush_dirty(void)
     616                 :            : {
     617         [ #  # ]:          0 :         struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
     618                 :            : 
     619         [ #  # ]:          0 :         if (tlb_ubc->writable)
     620         [ #  # ]:          0 :                 try_to_unmap_flush();
     621                 :          0 : }
     622                 :            : 
     623                 :          0 : static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
     624                 :            : {
     625                 :          0 :         struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
     626                 :            : 
     627                 :          0 :         arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
     628                 :          0 :         tlb_ubc->flush_required = true;
     629                 :            : 
     630                 :            :         /*
     631                 :            :          * Ensure compiler does not re-order the setting of tlb_flush_batched
     632                 :            :          * before the PTE is cleared.
     633                 :            :          */
     634                 :          0 :         barrier();
     635                 :          0 :         mm->tlb_flush_batched = true;
     636                 :            : 
     637                 :            :         /*
     638                 :            :          * If the PTE was dirty then it's best to assume it's writable. The
     639                 :            :          * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
     640                 :            :          * before the page is queued for IO.
     641                 :            :          */
     642         [ #  # ]:          0 :         if (writable)
     643                 :          0 :                 tlb_ubc->writable = true;
     644                 :          0 : }
     645                 :            : 
     646                 :            : /*
     647                 :            :  * Returns true if the TLB flush should be deferred to the end of a batch of
     648                 :            :  * unmap operations to reduce IPIs.
     649                 :            :  */
     650                 :          0 : static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
     651                 :            : {
     652                 :          0 :         bool should_defer = false;
     653                 :            : 
     654         [ #  # ]:          0 :         if (!(flags & TTU_BATCH_FLUSH))
     655                 :            :                 return false;
     656                 :            : 
     657                 :            :         /* If remote CPUs need to be flushed then defer batch the flush */
     658         [ #  # ]:          0 :         if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
     659                 :          0 :                 should_defer = true;
     660                 :          0 :         put_cpu();
     661                 :            : 
     662                 :          0 :         return should_defer;
     663                 :            : }
     664                 :            : 
     665                 :            : /*
     666                 :            :  * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
     667                 :            :  * releasing the PTL if TLB flushes are batched. It's possible for a parallel
     668                 :            :  * operation such as mprotect or munmap to race between reclaim unmapping
     669                 :            :  * the page and flushing the page. If this race occurs, it potentially allows
     670                 :            :  * access to data via a stale TLB entry. Tracking all mm's that have TLB
     671                 :            :  * batching in flight would be expensive during reclaim so instead track
     672                 :            :  * whether TLB batching occurred in the past and if so then do a flush here
     673                 :            :  * if required. This will cost one additional flush per reclaim cycle paid
     674                 :            :  * by the first operation at risk such as mprotect and mumap.
     675                 :            :  *
     676                 :            :  * This must be called under the PTL so that an access to tlb_flush_batched
     677                 :            :  * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
     678                 :            :  * via the PTL.
     679                 :            :  */
     680                 :    2940441 : void flush_tlb_batched_pending(struct mm_struct *mm)
     681                 :            : {
     682         [ -  + ]:    2940441 :         if (mm->tlb_flush_batched) {
     683                 :          0 :                 flush_tlb_mm(mm);
     684                 :            : 
     685                 :            :                 /*
     686                 :            :                  * Do not allow the compiler to re-order the clearing of
     687                 :            :                  * tlb_flush_batched before the tlb is flushed.
     688                 :            :                  */
     689                 :          0 :                 barrier();
     690                 :          0 :                 mm->tlb_flush_batched = false;
     691                 :            :         }
     692                 :    2940441 : }
     693                 :            : #else
     694                 :            : static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
     695                 :            : {
     696                 :            : }
     697                 :            : 
     698                 :            : static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
     699                 :            : {
     700                 :            :         return false;
     701                 :            : }
     702                 :            : #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
     703                 :            : 
     704                 :            : /*
     705                 :            :  * At what user virtual address is page expected in vma?
     706                 :            :  * Caller should check the page is actually part of the vma.
     707                 :            :  */
     708                 :          0 : unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
     709                 :            : {
     710                 :          0 :         unsigned long address;
     711   [ #  #  #  # ]:          0 :         if (PageAnon(page)) {
     712                 :          0 :                 struct anon_vma *page__anon_vma = page_anon_vma(page);
     713                 :            :                 /*
     714                 :            :                  * Note: swapoff's unuse_vma() is more efficient with this
     715                 :            :                  * check, and needs it to match anon_vma when KSM is active.
     716                 :            :                  */
     717   [ #  #  #  # ]:          0 :                 if (!vma->anon_vma || !page__anon_vma ||
     718         [ #  # ]:          0 :                     vma->anon_vma->root != page__anon_vma->root)
     719                 :            :                         return -EFAULT;
     720         [ #  # ]:          0 :         } else if (page->mapping) {
     721   [ #  #  #  # ]:          0 :                 if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
     722                 :            :                         return -EFAULT;
     723                 :            :         } else
     724                 :            :                 return -EFAULT;
     725                 :          0 :         address = __vma_address(page, vma);
     726   [ #  #  #  # ]:          0 :         if (unlikely(address < vma->vm_start || address >= vma->vm_end))
     727                 :          0 :                 return -EFAULT;
     728                 :            :         return address;
     729                 :            : }
     730                 :            : 
     731                 :          0 : pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
     732                 :            : {
     733                 :          0 :         pgd_t *pgd;
     734                 :          0 :         p4d_t *p4d;
     735                 :          0 :         pud_t *pud;
     736                 :          0 :         pmd_t *pmd = NULL;
     737                 :          0 :         pmd_t pmde;
     738                 :            : 
     739                 :          0 :         pgd = pgd_offset(mm, address);
     740         [ #  # ]:          0 :         if (!pgd_present(*pgd))
     741                 :          0 :                 goto out;
     742                 :            : 
     743                 :          0 :         p4d = p4d_offset(pgd, address);
     744         [ #  # ]:          0 :         if (!p4d_present(*p4d))
     745                 :          0 :                 goto out;
     746                 :            : 
     747         [ #  # ]:          0 :         pud = pud_offset(p4d, address);
     748   [ #  #  #  # ]:          0 :         if (!pud_present(*pud))
     749                 :          0 :                 goto out;
     750                 :            : 
     751                 :          0 :         pmd = pmd_offset(pud, address);
     752                 :            :         /*
     753                 :            :          * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
     754                 :            :          * without holding anon_vma lock for write.  So when looking for a
     755                 :            :          * genuine pmde (in which to find pte), test present and !THP together.
     756                 :            :          */
     757                 :          0 :         pmde = *pmd;
     758                 :          0 :         barrier();
     759   [ #  #  #  # ]:          0 :         if (!pmd_present(pmde) || pmd_trans_huge(pmde))
     760                 :            :                 pmd = NULL;
     761                 :          0 : out:
     762                 :          0 :         return pmd;
     763                 :            : }
     764                 :            : 
     765                 :            : struct page_referenced_arg {
     766                 :            :         int mapcount;
     767                 :            :         int referenced;
     768                 :            :         unsigned long vm_flags;
     769                 :            :         struct mem_cgroup *memcg;
     770                 :            : };
     771                 :            : /*
     772                 :            :  * arg: page_referenced_arg will be passed
     773                 :            :  */
     774                 :          0 : static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
     775                 :            :                         unsigned long address, void *arg)
     776                 :            : {
     777                 :          0 :         struct page_referenced_arg *pra = arg;
     778                 :          0 :         struct page_vma_mapped_walk pvmw = {
     779                 :            :                 .page = page,
     780                 :            :                 .vma = vma,
     781                 :            :                 .address = address,
     782                 :            :         };
     783                 :          0 :         int referenced = 0;
     784                 :            : 
     785         [ #  # ]:          0 :         while (page_vma_mapped_walk(&pvmw)) {
     786                 :          0 :                 address = pvmw.address;
     787                 :            : 
     788         [ #  # ]:          0 :                 if (vma->vm_flags & VM_LOCKED) {
     789         [ #  # ]:          0 :                         page_vma_mapped_walk_done(&pvmw);
     790                 :          0 :                         pra->vm_flags |= VM_LOCKED;
     791                 :          0 :                         return false; /* To break the loop */
     792                 :            :                 }
     793                 :            : 
     794         [ #  # ]:          0 :                 if (pvmw.pte) {
     795   [ #  #  #  # ]:          0 :                         if (ptep_clear_flush_young_notify(vma, address,
     796                 :            :                                                 pvmw.pte)) {
     797                 :            :                                 /*
     798                 :            :                                  * Don't treat a reference through
     799                 :            :                                  * a sequentially read mapping as such.
     800                 :            :                                  * If the page has been used in another mapping,
     801                 :            :                                  * we will catch it; if this other mapping is
     802                 :            :                                  * already gone, the unmap path will have set
     803                 :            :                                  * PG_referenced or activated the page.
     804                 :            :                                  */
     805         [ #  # ]:          0 :                                 if (likely(!(vma->vm_flags & VM_SEQ_READ)))
     806                 :          0 :                                         referenced++;
     807                 :            :                         }
     808                 :          0 :                 } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
     809                 :            :                         if (pmdp_clear_flush_young_notify(vma, address,
     810                 :            :                                                 pvmw.pmd))
     811                 :            :                                 referenced++;
     812                 :            :                 } else {
     813                 :            :                         /* unexpected pmd-mapped page? */
     814                 :          0 :                         WARN_ON_ONCE(1);
     815                 :            :                 }
     816                 :            : 
     817                 :          0 :                 pra->mapcount--;
     818                 :            :         }
     819                 :            : 
     820                 :          0 :         if (referenced)
     821                 :            :                 clear_page_idle(page);
     822         [ #  # ]:          0 :         if (test_and_clear_page_young(page))
     823                 :            :                 referenced++;
     824                 :            : 
     825         [ #  # ]:          0 :         if (referenced) {
     826                 :          0 :                 pra->referenced++;
     827                 :          0 :                 pra->vm_flags |= vma->vm_flags;
     828                 :            :         }
     829                 :            : 
     830         [ #  # ]:          0 :         if (!pra->mapcount)
     831                 :          0 :                 return false; /* To break the loop */
     832                 :            : 
     833                 :            :         return true;
     834                 :            : }
     835                 :            : 
     836                 :          0 : static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
     837                 :            : {
     838                 :          0 :         struct page_referenced_arg *pra = arg;
     839                 :          0 :         struct mem_cgroup *memcg = pra->memcg;
     840                 :            : 
     841                 :          0 :         if (!mm_match_cgroup(vma->vm_mm, memcg))
     842                 :            :                 return true;
     843                 :            : 
     844                 :          0 :         return false;
     845                 :            : }
     846                 :            : 
     847                 :            : /**
     848                 :            :  * page_referenced - test if the page was referenced
     849                 :            :  * @page: the page to test
     850                 :            :  * @is_locked: caller holds lock on the page
     851                 :            :  * @memcg: target memory cgroup
     852                 :            :  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
     853                 :            :  *
     854                 :            :  * Quick test_and_clear_referenced for all mappings to a page,
     855                 :            :  * returns the number of ptes which referenced the page.
     856                 :            :  */
     857                 :          0 : int page_referenced(struct page *page,
     858                 :            :                     int is_locked,
     859                 :            :                     struct mem_cgroup *memcg,
     860                 :            :                     unsigned long *vm_flags)
     861                 :            : {
     862                 :          0 :         int we_locked = 0;
     863                 :          0 :         struct page_referenced_arg pra = {
     864                 :            :                 .mapcount = total_mapcount(page),
     865                 :            :                 .memcg = memcg,
     866                 :            :         };
     867                 :          0 :         struct rmap_walk_control rwc = {
     868                 :            :                 .rmap_one = page_referenced_one,
     869                 :            :                 .arg = (void *)&pra,
     870                 :            :                 .anon_lock = page_lock_anon_vma_read,
     871                 :            :         };
     872                 :            : 
     873                 :          0 :         *vm_flags = 0;
     874         [ #  # ]:          0 :         if (!pra.mapcount)
     875                 :            :                 return 0;
     876                 :            : 
     877         [ #  # ]:          0 :         if (!page_rmapping(page))
     878                 :            :                 return 0;
     879                 :            : 
     880   [ #  #  #  # ]:          0 :         if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
     881         [ #  # ]:          0 :                 we_locked = trylock_page(page);
     882         [ #  # ]:          0 :                 if (!we_locked)
     883                 :            :                         return 1;
     884                 :            :         }
     885                 :            : 
     886                 :            :         /*
     887                 :            :          * If we are reclaiming on behalf of a cgroup, skip
     888                 :            :          * counting on behalf of references from different
     889                 :            :          * cgroups
     890                 :            :          */
     891         [ #  # ]:          0 :         if (memcg) {
     892                 :          0 :                 rwc.invalid_vma = invalid_page_referenced_vma;
     893                 :            :         }
     894                 :            : 
     895                 :          0 :         rmap_walk(page, &rwc);
     896                 :          0 :         *vm_flags = pra.vm_flags;
     897                 :            : 
     898         [ #  # ]:          0 :         if (we_locked)
     899                 :          0 :                 unlock_page(page);
     900                 :            : 
     901                 :          0 :         return pra.referenced;
     902                 :            : }
     903                 :            : 
     904                 :          0 : static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
     905                 :            :                             unsigned long address, void *arg)
     906                 :            : {
     907                 :          0 :         struct page_vma_mapped_walk pvmw = {
     908                 :            :                 .page = page,
     909                 :            :                 .vma = vma,
     910                 :            :                 .address = address,
     911                 :            :                 .flags = PVMW_SYNC,
     912                 :            :         };
     913                 :          0 :         struct mmu_notifier_range range;
     914                 :          0 :         int *cleaned = arg;
     915                 :            : 
     916                 :            :         /*
     917                 :            :          * We have to assume the worse case ie pmd for invalidation. Note that
     918                 :            :          * the page can not be free from this function.
     919                 :            :          */
     920                 :          0 :         mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
     921                 :            :                                 0, vma, vma->vm_mm, address,
     922                 :          0 :                                 min(vma->vm_end, address + page_size(page)));
     923                 :          0 :         mmu_notifier_invalidate_range_start(&range);
     924                 :            : 
     925         [ #  # ]:          0 :         while (page_vma_mapped_walk(&pvmw)) {
     926                 :          0 :                 int ret = 0;
     927                 :            : 
     928                 :          0 :                 address = pvmw.address;
     929         [ #  # ]:          0 :                 if (pvmw.pte) {
     930                 :          0 :                         pte_t entry;
     931                 :          0 :                         pte_t *pte = pvmw.pte;
     932                 :            : 
     933   [ #  #  #  # ]:          0 :                         if (!pte_dirty(*pte) && !pte_write(*pte))
     934                 :          0 :                                 continue;
     935                 :            : 
     936                 :          0 :                         flush_cache_page(vma, address, pte_pfn(*pte));
     937                 :          0 :                         entry = ptep_clear_flush(vma, address, pte);
     938                 :          0 :                         entry = pte_wrprotect(entry);
     939                 :          0 :                         entry = pte_mkclean(entry);
     940                 :          0 :                         set_pte_at(vma->vm_mm, address, pte, entry);
     941                 :          0 :                         ret = 1;
     942                 :            :                 } else {
     943                 :            : #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
     944                 :            :                         pmd_t *pmd = pvmw.pmd;
     945                 :            :                         pmd_t entry;
     946                 :            : 
     947                 :            :                         if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
     948                 :            :                                 continue;
     949                 :            : 
     950                 :            :                         flush_cache_page(vma, address, page_to_pfn(page));
     951                 :            :                         entry = pmdp_invalidate(vma, address, pmd);
     952                 :            :                         entry = pmd_wrprotect(entry);
     953                 :            :                         entry = pmd_mkclean(entry);
     954                 :            :                         set_pmd_at(vma->vm_mm, address, pmd, entry);
     955                 :            :                         ret = 1;
     956                 :            : #else
     957                 :            :                         /* unexpected pmd-mapped page? */
     958                 :          0 :                         WARN_ON_ONCE(1);
     959                 :            : #endif
     960                 :            :                 }
     961                 :            : 
     962                 :            :                 /*
     963                 :            :                  * No need to call mmu_notifier_invalidate_range() as we are
     964                 :            :                  * downgrading page table protection not changing it to point
     965                 :            :                  * to a new page.
     966                 :            :                  *
     967                 :            :                  * See Documentation/vm/mmu_notifier.rst
     968                 :            :                  */
     969                 :          0 :                 if (ret)
     970                 :          0 :                         (*cleaned)++;
     971                 :            :         }
     972                 :            : 
     973                 :          0 :         mmu_notifier_invalidate_range_end(&range);
     974                 :            : 
     975                 :          0 :         return true;
     976                 :            : }
     977                 :            : 
     978                 :          0 : static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
     979                 :            : {
     980         [ #  # ]:          0 :         if (vma->vm_flags & VM_SHARED)
     981                 :          0 :                 return false;
     982                 :            : 
     983                 :            :         return true;
     984                 :            : }
     985                 :            : 
     986                 :        224 : int page_mkclean(struct page *page)
     987                 :            : {
     988                 :        224 :         int cleaned = 0;
     989                 :        224 :         struct address_space *mapping;
     990                 :        224 :         struct rmap_walk_control rwc = {
     991                 :            :                 .arg = (void *)&cleaned,
     992                 :            :                 .rmap_one = page_mkclean_one,
     993                 :            :                 .invalid_vma = invalid_mkclean_vma,
     994                 :            :         };
     995                 :            : 
     996   [ -  +  -  + ]:        448 :         BUG_ON(!PageLocked(page));
     997                 :            : 
     998         [ -  + ]:        224 :         if (!page_mapped(page))
     999                 :            :                 return 0;
    1000                 :            : 
    1001                 :          0 :         mapping = page_mapping(page);
    1002         [ #  # ]:          0 :         if (!mapping)
    1003                 :            :                 return 0;
    1004                 :            : 
    1005                 :          0 :         rmap_walk(page, &rwc);
    1006                 :            : 
    1007                 :          0 :         return cleaned;
    1008                 :            : }
    1009                 :            : EXPORT_SYMBOL_GPL(page_mkclean);
    1010                 :            : 
    1011                 :            : /**
    1012                 :            :  * page_move_anon_rmap - move a page to our anon_vma
    1013                 :            :  * @page:       the page to move to our anon_vma
    1014                 :            :  * @vma:        the vma the page belongs to
    1015                 :            :  *
    1016                 :            :  * When a page belongs exclusively to one process after a COW event,
    1017                 :            :  * that page can be moved into the anon_vma that belongs to just that
    1018                 :            :  * process, so the rmap code will not search the parent or sibling
    1019                 :            :  * processes.
    1020                 :            :  */
    1021                 :     354653 : void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
    1022                 :            : {
    1023                 :     354653 :         struct anon_vma *anon_vma = vma->anon_vma;
    1024                 :            : 
    1025         [ -  + ]:     354653 :         page = compound_head(page);
    1026                 :            : 
    1027                 :     354653 :         VM_BUG_ON_PAGE(!PageLocked(page), page);
    1028                 :     354653 :         VM_BUG_ON_VMA(!anon_vma, vma);
    1029                 :            : 
    1030                 :     354653 :         anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
    1031                 :            :         /*
    1032                 :            :          * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
    1033                 :            :          * simultaneously, so a concurrent reader (eg page_referenced()'s
    1034                 :            :          * PageAnon()) will not see one without the other.
    1035                 :            :          */
    1036                 :     354653 :         WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
    1037                 :     354653 : }
    1038                 :            : 
    1039                 :            : /**
    1040                 :            :  * __page_set_anon_rmap - set up new anonymous rmap
    1041                 :            :  * @page:       Page or Hugepage to add to rmap
    1042                 :            :  * @vma:        VM area to add page to.
    1043                 :            :  * @address:    User virtual address of the mapping     
    1044                 :            :  * @exclusive:  the page is exclusively owned by the current process
    1045                 :            :  */
    1046                 :    1066236 : static void __page_set_anon_rmap(struct page *page,
    1047                 :            :         struct vm_area_struct *vma, unsigned long address, int exclusive)
    1048                 :            : {
    1049                 :    1066236 :         struct anon_vma *anon_vma = vma->anon_vma;
    1050                 :            : 
    1051         [ -  + ]:    1066236 :         BUG_ON(!anon_vma);
    1052                 :            : 
    1053   [ -  +  +  - ]:    1066236 :         if (PageAnon(page))
    1054                 :            :                 return;
    1055                 :            : 
    1056                 :            :         /*
    1057                 :            :          * If the page isn't exclusively mapped into this vma,
    1058                 :            :          * we must use the _oldest_ possible anon_vma for the
    1059                 :            :          * page mapping!
    1060                 :            :          */
    1061         [ -  + ]:    1066236 :         if (!exclusive)
    1062                 :          0 :                 anon_vma = anon_vma->root;
    1063                 :            : 
    1064                 :    1066236 :         anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
    1065                 :    1066236 :         page->mapping = (struct address_space *) anon_vma;
    1066                 :    1066236 :         page->index = linear_page_index(vma, address);
    1067                 :            : }
    1068                 :            : 
    1069                 :            : /**
    1070                 :            :  * __page_check_anon_rmap - sanity check anonymous rmap addition
    1071                 :            :  * @page:       the page to add the mapping to
    1072                 :            :  * @vma:        the vm area in which the mapping is added
    1073                 :            :  * @address:    the user virtual address mapped
    1074                 :            :  */
    1075                 :            : static void __page_check_anon_rmap(struct page *page,
    1076                 :            :         struct vm_area_struct *vma, unsigned long address)
    1077                 :            : {
    1078                 :            :         /*
    1079                 :            :          * The page's anon-rmap details (mapping and index) are guaranteed to
    1080                 :            :          * be set up correctly at this point.
    1081                 :            :          *
    1082                 :            :          * We have exclusion against page_add_anon_rmap because the caller
    1083                 :            :          * always holds the page locked, except if called from page_dup_rmap,
    1084                 :            :          * in which case the page is already known to be setup.
    1085                 :            :          *
    1086                 :            :          * We have exclusion against page_add_new_anon_rmap because those pages
    1087                 :            :          * are initially only visible via the pagetables, and the pte is locked
    1088                 :            :          * over the call to page_add_new_anon_rmap.
    1089                 :            :          */
    1090                 :            :         VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
    1091                 :            :         VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
    1092                 :            :                        page);
    1093                 :            : }
    1094                 :            : 
    1095                 :            : /**
    1096                 :            :  * page_add_anon_rmap - add pte mapping to an anonymous page
    1097                 :            :  * @page:       the page to add the mapping to
    1098                 :            :  * @vma:        the vm area in which the mapping is added
    1099                 :            :  * @address:    the user virtual address mapped
    1100                 :            :  * @compound:   charge the page as compound or small page
    1101                 :            :  *
    1102                 :            :  * The caller needs to hold the pte lock, and the page must be locked in
    1103                 :            :  * the anon_vma case: to serialize mapping,index checking after setting,
    1104                 :            :  * and to ensure that PageAnon is not being upgraded racily to PageKsm
    1105                 :            :  * (but PageKsm is never downgraded to PageAnon).
    1106                 :            :  */
    1107                 :          0 : void page_add_anon_rmap(struct page *page,
    1108                 :            :         struct vm_area_struct *vma, unsigned long address, bool compound)
    1109                 :            : {
    1110         [ #  # ]:          0 :         do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
    1111                 :          0 : }
    1112                 :            : 
    1113                 :            : /*
    1114                 :            :  * Special version of the above for do_swap_page, which often runs
    1115                 :            :  * into pages that are exclusively owned by the current process.
    1116                 :            :  * Everybody else should continue to use page_add_anon_rmap above.
    1117                 :            :  */
    1118                 :          0 : void do_page_add_anon_rmap(struct page *page,
    1119                 :            :         struct vm_area_struct *vma, unsigned long address, int flags)
    1120                 :            : {
    1121                 :          0 :         bool compound = flags & RMAP_COMPOUND;
    1122                 :          0 :         bool first;
    1123                 :            : 
    1124         [ #  # ]:          0 :         if (compound) {
    1125                 :          0 :                 atomic_t *mapcount;
    1126                 :          0 :                 VM_BUG_ON_PAGE(!PageLocked(page), page);
    1127                 :          0 :                 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
    1128                 :          0 :                 mapcount = compound_mapcount_ptr(page);
    1129                 :          0 :                 first = atomic_inc_and_test(mapcount);
    1130                 :            :         } else {
    1131                 :          0 :                 first = atomic_inc_and_test(&page->_mapcount);
    1132                 :            :         }
    1133                 :            : 
    1134         [ #  # ]:          0 :         if (first) {
    1135                 :          0 :                 int nr = compound ? hpage_nr_pages(page) : 1;
    1136                 :            :                 /*
    1137                 :            :                  * We use the irq-unsafe __{inc|mod}_zone_page_stat because
    1138                 :            :                  * these counters are not modified in interrupt context, and
    1139                 :            :                  * pte lock(a spinlock) is held, which implies preemption
    1140                 :            :                  * disabled.
    1141                 :            :                  */
    1142         [ #  # ]:          0 :                 if (compound)
    1143                 :          0 :                         __inc_node_page_state(page, NR_ANON_THPS);
    1144                 :          0 :                 __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
    1145                 :            :         }
    1146         [ #  # ]:          0 :         if (unlikely(PageKsm(page)))
    1147                 :            :                 return;
    1148                 :            : 
    1149                 :          0 :         VM_BUG_ON_PAGE(!PageLocked(page), page);
    1150                 :            : 
    1151                 :            :         /* address might be in next vma when migration races vma_adjust */
    1152         [ #  # ]:          0 :         if (first)
    1153                 :          0 :                 __page_set_anon_rmap(page, vma, address,
    1154                 :            :                                 flags & RMAP_EXCLUSIVE);
    1155                 :            :         else
    1156                 :            :                 __page_check_anon_rmap(page, vma, address);
    1157                 :            : }
    1158                 :            : 
    1159                 :            : /**
    1160                 :            :  * page_add_new_anon_rmap - add pte mapping to a new anonymous page
    1161                 :            :  * @page:       the page to add the mapping to
    1162                 :            :  * @vma:        the vm area in which the mapping is added
    1163                 :            :  * @address:    the user virtual address mapped
    1164                 :            :  * @compound:   charge the page as compound or small page
    1165                 :            :  *
    1166                 :            :  * Same as page_add_anon_rmap but must only be called on *new* pages.
    1167                 :            :  * This means the inc-and-test can be bypassed.
    1168                 :            :  * Page does not have to be locked.
    1169                 :            :  */
    1170                 :    1066236 : void page_add_new_anon_rmap(struct page *page,
    1171                 :            :         struct vm_area_struct *vma, unsigned long address, bool compound)
    1172                 :            : {
    1173                 :    1066236 :         int nr = compound ? hpage_nr_pages(page) : 1;
    1174                 :            : 
    1175                 :    1066236 :         VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
    1176         [ -  + ]:    1066236 :         __SetPageSwapBacked(page);
    1177         [ -  + ]:    1066236 :         if (compound) {
    1178                 :          0 :                 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
    1179                 :            :                 /* increment count (starts at -1) */
    1180                 :          0 :                 atomic_set(compound_mapcount_ptr(page), 0);
    1181                 :          0 :                 __inc_node_page_state(page, NR_ANON_THPS);
    1182                 :            :         } else {
    1183                 :            :                 /* Anon THP always mapped first with PMD */
    1184                 :    1066236 :                 VM_BUG_ON_PAGE(PageTransCompound(page), page);
    1185                 :            :                 /* increment count (starts at -1) */
    1186                 :    1066236 :                 atomic_set(&page->_mapcount, 0);
    1187                 :            :         }
    1188                 :    1066236 :         __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
    1189                 :    1066236 :         __page_set_anon_rmap(page, vma, address, 1);
    1190                 :    1066236 : }
    1191                 :            : 
    1192                 :            : /**
    1193                 :            :  * page_add_file_rmap - add pte mapping to a file page
    1194                 :            :  * @page: the page to add the mapping to
    1195                 :            :  * @compound: charge the page as compound or small page
    1196                 :            :  *
    1197                 :            :  * The caller needs to hold the pte lock.
    1198                 :            :  */
    1199                 :   15664555 : void page_add_file_rmap(struct page *page, bool compound)
    1200                 :            : {
    1201                 :   15664555 :         int i, nr = 1;
    1202                 :            : 
    1203                 :   15664555 :         VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
    1204                 :   15664555 :         lock_page_memcg(page);
    1205                 :   15664555 :         if (compound && PageTransHuge(page)) {
    1206                 :            :                 for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
    1207                 :            :                         if (atomic_inc_and_test(&page[i]._mapcount))
    1208                 :            :                                 nr++;
    1209                 :            :                 }
    1210                 :            :                 if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
    1211                 :            :                         goto out;
    1212                 :            :                 if (PageSwapBacked(page))
    1213                 :            :                         __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
    1214                 :            :                 else
    1215                 :            :                         __inc_node_page_state(page, NR_FILE_PMDMAPPED);
    1216                 :            :         } else {
    1217                 :   15664555 :                 if (PageTransCompound(page) && page_mapping(page)) {
    1218                 :            :                         VM_WARN_ON_ONCE(!PageLocked(page));
    1219                 :            : 
    1220                 :            :                         SetPageDoubleMap(compound_head(page));
    1221                 :            :                         if (PageMlocked(page))
    1222                 :            :                                 clear_page_mlock(compound_head(page));
    1223                 :            :                 }
    1224         [ +  + ]:   15664555 :                 if (!atomic_inc_and_test(&page->_mapcount))
    1225                 :   14805482 :                         goto out;
    1226                 :            :         }
    1227                 :     859073 :         __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
    1228                 :   15664555 : out:
    1229                 :   15664555 :         unlock_page_memcg(page);
    1230                 :   15664555 : }
    1231                 :            : 
    1232                 :   15153327 : static void page_remove_file_rmap(struct page *page, bool compound)
    1233                 :            : {
    1234                 :   15153327 :         int i, nr = 1;
    1235                 :            : 
    1236                 :   15153327 :         VM_BUG_ON_PAGE(compound && !PageHead(page), page);
    1237                 :   15153327 :         lock_page_memcg(page);
    1238                 :            : 
    1239                 :            :         /* Hugepages are not counted in NR_FILE_MAPPED for now. */
    1240         [ -  + ]:   15153327 :         if (unlikely(PageHuge(page))) {
    1241                 :            :                 /* hugetlb pages are always mapped with pmds */
    1242                 :          0 :                 atomic_dec(compound_mapcount_ptr(page));
    1243                 :          0 :                 goto out;
    1244                 :            :         }
    1245                 :            : 
    1246                 :            :         /* page still mapped by someone else? */
    1247                 :   15153327 :         if (compound && PageTransHuge(page)) {
    1248                 :            :                 for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
    1249                 :            :                         if (atomic_add_negative(-1, &page[i]._mapcount))
    1250                 :            :                                 nr++;
    1251                 :            :                 }
    1252                 :            :                 if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
    1253                 :            :                         goto out;
    1254                 :            :                 if (PageSwapBacked(page))
    1255                 :            :                         __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
    1256                 :            :                 else
    1257                 :            :                         __dec_node_page_state(page, NR_FILE_PMDMAPPED);
    1258                 :            :         } else {
    1259         [ +  + ]:   15153327 :                 if (!atomic_add_negative(-1, &page->_mapcount))
    1260                 :   14419082 :                         goto out;
    1261                 :            :         }
    1262                 :            : 
    1263                 :            :         /*
    1264                 :            :          * We use the irq-unsafe __{inc|mod}_lruvec_page_state because
    1265                 :            :          * these counters are not modified in interrupt context, and
    1266                 :            :          * pte lock(a spinlock) is held, which implies preemption disabled.
    1267                 :            :          */
    1268                 :     734245 :         __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
    1269                 :            : 
    1270   [ -  +  +  - ]:    1468490 :         if (unlikely(PageMlocked(page)))
    1271                 :          0 :                 clear_page_mlock(page);
    1272                 :     734245 : out:
    1273                 :   15153327 :         unlock_page_memcg(page);
    1274                 :   15153327 : }
    1275                 :            : 
    1276                 :          0 : static void page_remove_anon_compound_rmap(struct page *page)
    1277                 :            : {
    1278                 :          0 :         int i, nr;
    1279                 :            : 
    1280         [ #  # ]:          0 :         if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
    1281                 :            :                 return;
    1282                 :            : 
    1283                 :            :         /* Hugepages are not counted in NR_ANON_PAGES for now. */
    1284                 :          0 :         if (unlikely(PageHuge(page)))
    1285                 :            :                 return;
    1286                 :            : 
    1287                 :          0 :         if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
    1288                 :          0 :                 return;
    1289                 :            : 
    1290                 :            :         __dec_node_page_state(page, NR_ANON_THPS);
    1291                 :            : 
    1292                 :            :         if (TestClearPageDoubleMap(page)) {
    1293                 :            :                 /*
    1294                 :            :                  * Subpages can be mapped with PTEs too. Check how many of
    1295                 :            :                  * them are still mapped.
    1296                 :            :                  */
    1297                 :            :                 for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
    1298                 :            :                         if (atomic_add_negative(-1, &page[i]._mapcount))
    1299                 :            :                                 nr++;
    1300                 :            :                 }
    1301                 :            : 
    1302                 :            :                 /*
    1303                 :            :                  * Queue the page for deferred split if at least one small
    1304                 :            :                  * page of the compound page is unmapped, but at least one
    1305                 :            :                  * small page is still mapped.
    1306                 :            :                  */
    1307                 :            :                 if (nr && nr < HPAGE_PMD_NR)
    1308                 :            :                         deferred_split_huge_page(page);
    1309                 :            :         } else {
    1310                 :            :                 nr = HPAGE_PMD_NR;
    1311                 :            :         }
    1312                 :            : 
    1313                 :            :         if (unlikely(PageMlocked(page)))
    1314                 :            :                 clear_page_mlock(page);
    1315                 :            : 
    1316                 :            :         if (nr)
    1317                 :            :                 __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, -nr);
    1318                 :            : }
    1319                 :            : 
    1320                 :            : /**
    1321                 :            :  * page_remove_rmap - take down pte mapping from a page
    1322                 :            :  * @page:       page to remove mapping from
    1323                 :            :  * @compound:   uncharge the page as compound or small page
    1324                 :            :  *
    1325                 :            :  * The caller needs to hold the pte lock.
    1326                 :            :  */
    1327                 :   17744708 : void page_remove_rmap(struct page *page, bool compound)
    1328                 :            : {
    1329   [ -  +  +  + ]:   17744708 :         if (!PageAnon(page))
    1330                 :   15153327 :                 return page_remove_file_rmap(page, compound);
    1331                 :            : 
    1332         [ -  + ]:    2591381 :         if (compound)
    1333                 :          0 :                 return page_remove_anon_compound_rmap(page);
    1334                 :            : 
    1335                 :            :         /* page still mapped by someone else? */
    1336         [ +  + ]:    2591381 :         if (!atomic_add_negative(-1, &page->_mapcount))
    1337                 :            :                 return;
    1338                 :            : 
    1339                 :            :         /*
    1340                 :            :          * We use the irq-unsafe __{inc|mod}_zone_page_stat because
    1341                 :            :          * these counters are not modified in interrupt context, and
    1342                 :            :          * pte lock(a spinlock) is held, which implies preemption disabled.
    1343                 :            :          */
    1344                 :     995174 :         __dec_node_page_state(page, NR_ANON_MAPPED);
    1345                 :            : 
    1346   [ -  +  -  + ]:    1990348 :         if (unlikely(PageMlocked(page)))
    1347                 :          0 :                 clear_page_mlock(page);
    1348                 :            : 
    1349                 :            :         if (PageTransCompound(page))
    1350                 :            :                 deferred_split_huge_page(compound_head(page));
    1351                 :            : 
    1352                 :            :         /*
    1353                 :            :          * It would be tidy to reset the PageAnon mapping here,
    1354                 :            :          * but that might overwrite a racing page_add_anon_rmap
    1355                 :            :          * which increments mapcount after us but sets mapping
    1356                 :            :          * before us: so leave the reset to free_unref_page,
    1357                 :            :          * and remember that it's only reliable while mapped.
    1358                 :            :          * Leaving it set also helps swapoff to reinstate ptes
    1359                 :            :          * faster for those pages still in swapcache.
    1360                 :            :          */
    1361                 :            : }
    1362                 :            : 
    1363                 :            : /*
    1364                 :            :  * @arg: enum ttu_flags will be passed to this argument
    1365                 :            :  */
    1366                 :          0 : static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
    1367                 :            :                      unsigned long address, void *arg)
    1368                 :            : {
    1369                 :          0 :         struct mm_struct *mm = vma->vm_mm;
    1370                 :          0 :         struct page_vma_mapped_walk pvmw = {
    1371                 :            :                 .page = page,
    1372                 :            :                 .vma = vma,
    1373                 :            :                 .address = address,
    1374                 :            :         };
    1375                 :          0 :         pte_t pteval;
    1376                 :          0 :         struct page *subpage;
    1377                 :          0 :         bool ret = true;
    1378                 :          0 :         struct mmu_notifier_range range;
    1379                 :          0 :         enum ttu_flags flags = (enum ttu_flags)arg;
    1380                 :            : 
    1381                 :            :         /* munlock has nothing to gain from examining un-locked vmas */
    1382   [ #  #  #  # ]:          0 :         if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
    1383                 :            :                 return true;
    1384                 :            : 
    1385                 :          0 :         if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
    1386                 :            :             is_zone_device_page(page) && !is_device_private_page(page))
    1387                 :            :                 return true;
    1388                 :            : 
    1389                 :          0 :         if (flags & TTU_SPLIT_HUGE_PMD) {
    1390                 :            :                 split_huge_pmd_address(vma, address,
    1391                 :            :                                 flags & TTU_SPLIT_FREEZE, page);
    1392                 :            :         }
    1393                 :            : 
    1394                 :            :         /*
    1395                 :            :          * For THP, we have to assume the worse case ie pmd for invalidation.
    1396                 :            :          * For hugetlb, it could be much worse if we need to do pud
    1397                 :            :          * invalidation in the case of pmd sharing.
    1398                 :            :          *
    1399                 :            :          * Note that the page can not be free in this function as call of
    1400                 :            :          * try_to_unmap() must hold a reference on the page.
    1401                 :            :          */
    1402                 :          0 :         mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
    1403                 :            :                                 address,
    1404                 :          0 :                                 min(vma->vm_end, address + page_size(page)));
    1405         [ #  # ]:          0 :         if (PageHuge(page)) {
    1406                 :            :                 /*
    1407                 :            :                  * If sharing is possible, start and end will be adjusted
    1408                 :            :                  * accordingly.
    1409                 :            :                  */
    1410                 :          0 :                 adjust_range_if_pmd_sharing_possible(vma, &range.start,
    1411                 :            :                                                      &range.end);
    1412                 :            :         }
    1413                 :          0 :         mmu_notifier_invalidate_range_start(&range);
    1414                 :            : 
    1415         [ #  # ]:          0 :         while (page_vma_mapped_walk(&pvmw)) {
    1416                 :            : #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
    1417                 :            :                 /* PMD-mapped THP migration entry */
    1418                 :            :                 if (!pvmw.pte && (flags & TTU_MIGRATION)) {
    1419                 :            :                         VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
    1420                 :            : 
    1421                 :            :                         set_pmd_migration_entry(&pvmw, page);
    1422                 :            :                         continue;
    1423                 :            :                 }
    1424                 :            : #endif
    1425                 :            : 
    1426                 :            :                 /*
    1427                 :            :                  * If the page is mlock()d, we cannot swap it out.
    1428                 :            :                  * If it's recently referenced (perhaps page_referenced
    1429                 :            :                  * skipped over this mm) then we should reactivate it.
    1430                 :            :                  */
    1431         [ #  # ]:          0 :                 if (!(flags & TTU_IGNORE_MLOCK)) {
    1432         [ #  # ]:          0 :                         if (vma->vm_flags & VM_LOCKED) {
    1433                 :            :                                 /* PTE-mapped THP are never mlocked */
    1434                 :          0 :                                 if (!PageTransCompound(page)) {
    1435                 :            :                                         /*
    1436                 :            :                                          * Holding pte lock, we do *not* need
    1437                 :            :                                          * mmap_sem here
    1438                 :            :                                          */
    1439                 :          0 :                                         mlock_vma_page(page);
    1440                 :            :                                 }
    1441                 :          0 :                                 ret = false;
    1442         [ #  # ]:          0 :                                 page_vma_mapped_walk_done(&pvmw);
    1443                 :            :                                 break;
    1444                 :            :                         }
    1445         [ #  # ]:          0 :                         if (flags & TTU_MUNLOCK)
    1446                 :          0 :                                 continue;
    1447                 :            :                 }
    1448                 :            : 
    1449                 :            :                 /* Unexpected PMD-mapped THP? */
    1450                 :          0 :                 VM_BUG_ON_PAGE(!pvmw.pte, page);
    1451                 :            : 
    1452         [ #  # ]:          0 :                 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
    1453                 :          0 :                 address = pvmw.address;
    1454                 :            : 
    1455         [ #  # ]:          0 :                 if (PageHuge(page)) {
    1456         [ #  # ]:          0 :                         if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
    1457                 :            :                                 /*
    1458                 :            :                                  * huge_pmd_unshare unmapped an entire PMD
    1459                 :            :                                  * page.  There is no way of knowing exactly
    1460                 :            :                                  * which PMDs may be cached for this mm, so
    1461                 :            :                                  * we must flush them all.  start/end were
    1462                 :            :                                  * already adjusted above to cover this range.
    1463                 :            :                                  */
    1464         [ #  # ]:          0 :                                 flush_cache_range(vma, range.start, range.end);
    1465         [ #  # ]:          0 :                                 flush_tlb_range(vma, range.start, range.end);
    1466         [ #  # ]:          0 :                                 mmu_notifier_invalidate_range(mm, range.start,
    1467                 :            :                                                               range.end);
    1468                 :            : 
    1469                 :            :                                 /*
    1470                 :            :                                  * The ref count of the PMD page was dropped
    1471                 :            :                                  * which is part of the way map counting
    1472                 :            :                                  * is done for shared PMDs.  Return 'true'
    1473                 :            :                                  * here.  When there is no other sharing,
    1474                 :            :                                  * huge_pmd_unshare returns false and we will
    1475                 :            :                                  * unmap the actual page and drop map count
    1476                 :            :                                  * to zero.
    1477                 :            :                                  */
    1478         [ #  # ]:          0 :                                 page_vma_mapped_walk_done(&pvmw);
    1479                 :            :                                 break;
    1480                 :            :                         }
    1481                 :            :                 }
    1482                 :            : 
    1483                 :          0 :                 if (IS_ENABLED(CONFIG_MIGRATION) &&
    1484                 :            :                     (flags & TTU_MIGRATION) &&
    1485                 :            :                     is_zone_device_page(page)) {
    1486                 :            :                         swp_entry_t entry;
    1487                 :            :                         pte_t swp_pte;
    1488                 :            : 
    1489                 :            :                         pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
    1490                 :            : 
    1491                 :            :                         /*
    1492                 :            :                          * Store the pfn of the page in a special migration
    1493                 :            :                          * pte. do_swap_page() will wait until the migration
    1494                 :            :                          * pte is removed and then restart fault handling.
    1495                 :            :                          */
    1496                 :            :                         entry = make_migration_entry(page, 0);
    1497                 :            :                         swp_pte = swp_entry_to_pte(entry);
    1498                 :            :                         if (pte_soft_dirty(pteval))
    1499                 :            :                                 swp_pte = pte_swp_mksoft_dirty(swp_pte);
    1500                 :            :                         set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
    1501                 :            :                         /*
    1502                 :            :                          * No need to invalidate here it will synchronize on
    1503                 :            :                          * against the special swap migration pte.
    1504                 :            :                          *
    1505                 :            :                          * The assignment to subpage above was computed from a
    1506                 :            :                          * swap PTE which results in an invalid pointer.
    1507                 :            :                          * Since only PAGE_SIZE pages can currently be
    1508                 :            :                          * migrated, just set it to page. This will need to be
    1509                 :            :                          * changed when hugepage migrations to device private
    1510                 :            :                          * memory are supported.
    1511                 :            :                          */
    1512                 :            :                         subpage = page;
    1513                 :            :                         goto discard;
    1514                 :            :                 }
    1515                 :            : 
    1516         [ #  # ]:          0 :                 if (!(flags & TTU_IGNORE_ACCESS)) {
    1517   [ #  #  #  # ]:          0 :                         if (ptep_clear_flush_young_notify(vma, address,
    1518                 :            :                                                 pvmw.pte)) {
    1519                 :          0 :                                 ret = false;
    1520         [ #  # ]:          0 :                                 page_vma_mapped_walk_done(&pvmw);
    1521                 :            :                                 break;
    1522                 :            :                         }
    1523                 :            :                 }
    1524                 :            : 
    1525                 :            :                 /* Nuke the page table entry. */
    1526                 :          0 :                 flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
    1527         [ #  # ]:          0 :                 if (should_defer_flush(mm, flags)) {
    1528                 :            :                         /*
    1529                 :            :                          * We clear the PTE but do not flush so potentially
    1530                 :            :                          * a remote CPU could still be writing to the page.
    1531                 :            :                          * If the entry was previously clean then the
    1532                 :            :                          * architecture must guarantee that a clear->dirty
    1533                 :            :                          * transition on a cached TLB entry is written through
    1534                 :            :                          * and traps if the PTE is unmapped.
    1535                 :            :                          */
    1536                 :          0 :                         pteval = ptep_get_and_clear(mm, address, pvmw.pte);
    1537                 :            : 
    1538                 :          0 :                         set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
    1539                 :            :                 } else {
    1540                 :          0 :                         pteval = ptep_clear_flush(vma, address, pvmw.pte);
    1541                 :            :                 }
    1542                 :            : 
    1543                 :            :                 /* Move the dirty bit to the page. Now the pte is gone. */
    1544         [ #  # ]:          0 :                 if (pte_dirty(pteval))
    1545                 :          0 :                         set_page_dirty(page);
    1546                 :            : 
    1547                 :            :                 /* Update high watermark before we lower rss */
    1548                 :          0 :                 update_hiwater_rss(mm);
    1549                 :            : 
    1550         [ #  # ]:          0 :                 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
    1551                 :            :                         pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
    1552                 :            :                         if (PageHuge(page)) {
    1553                 :            :                                 hugetlb_count_sub(compound_nr(page), mm);
    1554                 :            :                                 set_huge_swap_pte_at(mm, address,
    1555                 :            :                                                      pvmw.pte, pteval,
    1556                 :            :                                                      vma_mmu_pagesize(vma));
    1557                 :            :                         } else {
    1558                 :            :                                 dec_mm_counter(mm, mm_counter(page));
    1559                 :            :                                 set_pte_at(mm, address, pvmw.pte, pteval);
    1560                 :            :                         }
    1561                 :            : 
    1562         [ #  # ]:          0 :                 } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
    1563                 :            :                         /*
    1564                 :            :                          * The guest indicated that the page content is of no
    1565                 :            :                          * interest anymore. Simply discard the pte, vmscan
    1566                 :            :                          * will take care of the rest.
    1567                 :            :                          * A future reference will then fault in a new zero
    1568                 :            :                          * page. When userfaultfd is active, we must not drop
    1569                 :            :                          * this page though, as its main user (postcopy
    1570                 :            :                          * migration) will not expect userfaults on already
    1571                 :            :                          * copied pages.
    1572                 :            :                          */
    1573                 :            :                         dec_mm_counter(mm, mm_counter(page));
    1574                 :            :                         /* We have to invalidate as we cleared the pte */
    1575                 :            :                         mmu_notifier_invalidate_range(mm, address,
    1576                 :            :                                                       address + PAGE_SIZE);
    1577                 :          0 :                 } else if (IS_ENABLED(CONFIG_MIGRATION) &&
    1578         [ #  # ]:          0 :                                 (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
    1579                 :          0 :                         swp_entry_t entry;
    1580                 :          0 :                         pte_t swp_pte;
    1581                 :            : 
    1582                 :          0 :                         if (arch_unmap_one(mm, vma, address, pteval) < 0) {
    1583                 :            :                                 set_pte_at(mm, address, pvmw.pte, pteval);
    1584                 :            :                                 ret = false;
    1585                 :            :                                 page_vma_mapped_walk_done(&pvmw);
    1586                 :            :                                 break;
    1587                 :            :                         }
    1588                 :            : 
    1589                 :            :                         /*
    1590                 :            :                          * Store the pfn of the page in a special migration
    1591                 :            :                          * pte. do_swap_page() will wait until the migration
    1592                 :            :                          * pte is removed and then restart fault handling.
    1593                 :            :                          */
    1594                 :          0 :                         entry = make_migration_entry(subpage,
    1595                 :            :                                         pte_write(pteval));
    1596                 :          0 :                         swp_pte = swp_entry_to_pte(entry);
    1597                 :          0 :                         if (pte_soft_dirty(pteval))
    1598                 :            :                                 swp_pte = pte_swp_mksoft_dirty(swp_pte);
    1599                 :          0 :                         set_pte_at(mm, address, pvmw.pte, swp_pte);
    1600                 :            :                         /*
    1601                 :            :                          * No need to invalidate here it will synchronize on
    1602                 :            :                          * against the special swap migration pte.
    1603                 :            :                          */
    1604   [ #  #  #  # ]:          0 :                 } else if (PageAnon(page)) {
    1605                 :          0 :                         swp_entry_t entry = { .val = page_private(subpage) };
    1606                 :          0 :                         pte_t swp_pte;
    1607                 :            :                         /*
    1608                 :            :                          * Store the swap location in the pte.
    1609                 :            :                          * See handle_pte_fault() ...
    1610                 :            :                          */
    1611   [ #  #  #  # ]:          0 :                         if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
    1612                 :          0 :                                 WARN_ON_ONCE(1);
    1613                 :          0 :                                 ret = false;
    1614                 :            :                                 /* We have to invalidate as we cleared the pte */
    1615         [ #  # ]:          0 :                                 mmu_notifier_invalidate_range(mm, address,
    1616                 :            :                                                         address + PAGE_SIZE);
    1617         [ #  # ]:          0 :                                 page_vma_mapped_walk_done(&pvmw);
    1618                 :            :                                 break;
    1619                 :            :                         }
    1620                 :            : 
    1621                 :            :                         /* MADV_FREE page check */
    1622   [ #  #  #  # ]:          0 :                         if (!PageSwapBacked(page)) {
    1623   [ #  #  #  # ]:          0 :                                 if (!PageDirty(page)) {
    1624                 :            :                                         /* Invalidate as we cleared the pte */
    1625         [ #  # ]:          0 :                                         mmu_notifier_invalidate_range(mm,
    1626                 :            :                                                 address, address + PAGE_SIZE);
    1627                 :          0 :                                         dec_mm_counter(mm, MM_ANONPAGES);
    1628                 :          0 :                                         goto discard;
    1629                 :            :                                 }
    1630                 :            : 
    1631                 :            :                                 /*
    1632                 :            :                                  * If the page was redirtied, it cannot be
    1633                 :            :                                  * discarded. Remap the page to page table.
    1634                 :            :                                  */
    1635         [ #  # ]:          0 :                                 set_pte_at(mm, address, pvmw.pte, pteval);
    1636         [ #  # ]:          0 :                                 SetPageSwapBacked(page);
    1637                 :          0 :                                 ret = false;
    1638         [ #  # ]:          0 :                                 page_vma_mapped_walk_done(&pvmw);
    1639                 :            :                                 break;
    1640                 :            :                         }
    1641                 :            : 
    1642         [ #  # ]:          0 :                         if (swap_duplicate(entry) < 0) {
    1643         [ #  # ]:          0 :                                 set_pte_at(mm, address, pvmw.pte, pteval);
    1644                 :          0 :                                 ret = false;
    1645         [ #  # ]:          0 :                                 page_vma_mapped_walk_done(&pvmw);
    1646                 :            :                                 break;
    1647                 :            :                         }
    1648         [ #  # ]:          0 :                         if (arch_unmap_one(mm, vma, address, pteval) < 0) {
    1649                 :            :                                 set_pte_at(mm, address, pvmw.pte, pteval);
    1650                 :            :                                 ret = false;
    1651                 :            :                                 page_vma_mapped_walk_done(&pvmw);
    1652                 :            :                                 break;
    1653                 :            :                         }
    1654         [ #  # ]:          0 :                         if (list_empty(&mm->mmlist)) {
    1655                 :          0 :                                 spin_lock(&mmlist_lock);
    1656         [ #  # ]:          0 :                                 if (list_empty(&mm->mmlist))
    1657                 :          0 :                                         list_add(&mm->mmlist, &init_mm.mmlist);
    1658                 :          0 :                                 spin_unlock(&mmlist_lock);
    1659                 :            :                         }
    1660                 :          0 :                         dec_mm_counter(mm, MM_ANONPAGES);
    1661                 :          0 :                         inc_mm_counter(mm, MM_SWAPENTS);
    1662         [ #  # ]:          0 :                         swp_pte = swp_entry_to_pte(entry);
    1663         [ #  # ]:          0 :                         if (pte_soft_dirty(pteval))
    1664                 :            :                                 swp_pte = pte_swp_mksoft_dirty(swp_pte);
    1665         [ #  # ]:          0 :                         set_pte_at(mm, address, pvmw.pte, swp_pte);
    1666                 :            :                         /* Invalidate as we cleared the pte */
    1667         [ #  # ]:          0 :                         mmu_notifier_invalidate_range(mm, address,
    1668                 :            :                                                       address + PAGE_SIZE);
    1669                 :            :                 } else {
    1670                 :            :                         /*
    1671                 :            :                          * This is a locked file-backed page, thus it cannot
    1672                 :            :                          * be removed from the page cache and replaced by a new
    1673                 :            :                          * page before mmu_notifier_invalidate_range_end, so no
    1674                 :            :                          * concurrent thread might update its page table to
    1675                 :            :                          * point at new page while a device still is using this
    1676                 :            :                          * page.
    1677                 :            :                          *
    1678                 :            :                          * See Documentation/vm/mmu_notifier.rst
    1679                 :            :                          */
    1680                 :          0 :                         dec_mm_counter(mm, mm_counter_file(page));
    1681                 :            :                 }
    1682                 :          0 : discard:
    1683                 :            :                 /*
    1684                 :            :                  * No need to call mmu_notifier_invalidate_range() it has be
    1685                 :            :                  * done above for all cases requiring it to happen under page
    1686                 :            :                  * table lock before mmu_notifier_invalidate_range_end()
    1687                 :            :                  *
    1688                 :            :                  * See Documentation/vm/mmu_notifier.rst
    1689                 :            :                  */
    1690                 :          0 :                 page_remove_rmap(subpage, PageHuge(page));
    1691                 :          0 :                 put_page(page);
    1692                 :            :         }
    1693                 :            : 
    1694                 :          0 :         mmu_notifier_invalidate_range_end(&range);
    1695                 :            : 
    1696                 :          0 :         return ret;
    1697                 :            : }
    1698                 :            : 
    1699                 :          0 : bool is_vma_temporary_stack(struct vm_area_struct *vma)
    1700                 :            : {
    1701                 :          0 :         int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
    1702                 :            : 
    1703         [ #  # ]:          0 :         if (!maybe_stack)
    1704                 :            :                 return false;
    1705                 :            : 
    1706   [ #  #  #  # ]:          0 :         if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
    1707                 :            :                                                 VM_STACK_INCOMPLETE_SETUP)
    1708                 :          0 :                 return true;
    1709                 :            : 
    1710                 :            :         return false;
    1711                 :            : }
    1712                 :            : 
    1713                 :          0 : static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
    1714                 :            : {
    1715         [ #  # ]:          0 :         return is_vma_temporary_stack(vma);
    1716                 :            : }
    1717                 :            : 
    1718                 :          0 : static int page_mapcount_is_zero(struct page *page)
    1719                 :            : {
    1720                 :          0 :         return !total_mapcount(page);
    1721                 :            : }
    1722                 :            : 
    1723                 :            : /**
    1724                 :            :  * try_to_unmap - try to remove all page table mappings to a page
    1725                 :            :  * @page: the page to get unmapped
    1726                 :            :  * @flags: action and flags
    1727                 :            :  *
    1728                 :            :  * Tries to remove all the page table entries which are mapping this
    1729                 :            :  * page, used in the pageout path.  Caller must hold the page lock.
    1730                 :            :  *
    1731                 :            :  * If unmap is successful, return true. Otherwise, false.
    1732                 :            :  */
    1733                 :          0 : bool try_to_unmap(struct page *page, enum ttu_flags flags)
    1734                 :            : {
    1735                 :          0 :         struct rmap_walk_control rwc = {
    1736                 :            :                 .rmap_one = try_to_unmap_one,
    1737                 :          0 :                 .arg = (void *)flags,
    1738                 :            :                 .done = page_mapcount_is_zero,
    1739                 :            :                 .anon_lock = page_lock_anon_vma_read,
    1740                 :            :         };
    1741                 :            : 
    1742                 :            :         /*
    1743                 :            :          * During exec, a temporary VMA is setup and later moved.
    1744                 :            :          * The VMA is moved under the anon_vma lock but not the
    1745                 :            :          * page tables leading to a race where migration cannot
    1746                 :            :          * find the migration ptes. Rather than increasing the
    1747                 :            :          * locking requirements of exec(), migration skips
    1748                 :            :          * temporary VMAs until after exec() completes.
    1749                 :            :          */
    1750         [ #  # ]:          0 :         if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
    1751         [ #  # ]:          0 :             && !PageKsm(page) && PageAnon(page))
    1752                 :          0 :                 rwc.invalid_vma = invalid_migration_vma;
    1753                 :            : 
    1754         [ #  # ]:          0 :         if (flags & TTU_RMAP_LOCKED)
    1755                 :          0 :                 rmap_walk_locked(page, &rwc);
    1756                 :            :         else
    1757                 :          0 :                 rmap_walk(page, &rwc);
    1758                 :            : 
    1759                 :          0 :         return !page_mapcount(page) ? true : false;
    1760                 :            : }
    1761                 :            : 
    1762                 :          0 : static int page_not_mapped(struct page *page)
    1763                 :            : {
    1764                 :          0 :         return !page_mapped(page);
    1765                 :            : };
    1766                 :            : 
    1767                 :            : /**
    1768                 :            :  * try_to_munlock - try to munlock a page
    1769                 :            :  * @page: the page to be munlocked
    1770                 :            :  *
    1771                 :            :  * Called from munlock code.  Checks all of the VMAs mapping the page
    1772                 :            :  * to make sure nobody else has this page mlocked. The page will be
    1773                 :            :  * returned with PG_mlocked cleared if no other vmas have it mlocked.
    1774                 :            :  */
    1775                 :            : 
    1776                 :          0 : void try_to_munlock(struct page *page)
    1777                 :            : {
    1778                 :          0 :         struct rmap_walk_control rwc = {
    1779                 :            :                 .rmap_one = try_to_unmap_one,
    1780                 :            :                 .arg = (void *)TTU_MUNLOCK,
    1781                 :            :                 .done = page_not_mapped,
    1782                 :            :                 .anon_lock = page_lock_anon_vma_read,
    1783                 :            : 
    1784                 :            :         };
    1785                 :            : 
    1786                 :          0 :         VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
    1787                 :          0 :         VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
    1788                 :            : 
    1789                 :          0 :         rmap_walk(page, &rwc);
    1790                 :          0 : }
    1791                 :            : 
    1792                 :     642035 : void __put_anon_vma(struct anon_vma *anon_vma)
    1793                 :            : {
    1794                 :     642035 :         struct anon_vma *root = anon_vma->root;
    1795                 :            : 
    1796                 :     642035 :         anon_vma_free(anon_vma);
    1797   [ +  +  -  + ]:     642035 :         if (root != anon_vma && atomic_dec_and_test(&root->refcount))
    1798                 :          0 :                 anon_vma_free(root);
    1799                 :     642035 : }
    1800                 :            : 
    1801                 :            : static struct anon_vma *rmap_walk_anon_lock(struct page *page,
    1802                 :            :                                         struct rmap_walk_control *rwc)
    1803                 :            : {
    1804                 :            :         struct anon_vma *anon_vma;
    1805                 :            : 
    1806                 :            :         if (rwc->anon_lock)
    1807                 :            :                 return rwc->anon_lock(page);
    1808                 :            : 
    1809                 :            :         /*
    1810                 :            :          * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
    1811                 :            :          * because that depends on page_mapped(); but not all its usages
    1812                 :            :          * are holding mmap_sem. Users without mmap_sem are required to
    1813                 :            :          * take a reference count to prevent the anon_vma disappearing
    1814                 :            :          */
    1815                 :            :         anon_vma = page_anon_vma(page);
    1816                 :            :         if (!anon_vma)
    1817                 :            :                 return NULL;
    1818                 :            : 
    1819                 :            :         anon_vma_lock_read(anon_vma);
    1820                 :            :         return anon_vma;
    1821                 :            : }
    1822                 :            : 
    1823                 :            : /*
    1824                 :            :  * rmap_walk_anon - do something to anonymous page using the object-based
    1825                 :            :  * rmap method
    1826                 :            :  * @page: the page to be handled
    1827                 :            :  * @rwc: control variable according to each walk type
    1828                 :            :  *
    1829                 :            :  * Find all the mappings of a page using the mapping pointer and the vma chains
    1830                 :            :  * contained in the anon_vma struct it points to.
    1831                 :            :  *
    1832                 :            :  * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
    1833                 :            :  * where the page was found will be held for write.  So, we won't recheck
    1834                 :            :  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
    1835                 :            :  * LOCKED.
    1836                 :            :  */
    1837                 :          0 : static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
    1838                 :            :                 bool locked)
    1839                 :            : {
    1840                 :          0 :         struct anon_vma *anon_vma;
    1841                 :          0 :         pgoff_t pgoff_start, pgoff_end;
    1842                 :          0 :         struct anon_vma_chain *avc;
    1843                 :            : 
    1844         [ #  # ]:          0 :         if (locked) {
    1845                 :          0 :                 anon_vma = page_anon_vma(page);
    1846                 :            :                 /* anon_vma disappear under us? */
    1847                 :          0 :                 VM_BUG_ON_PAGE(!anon_vma, page);
    1848                 :            :         } else {
    1849                 :          0 :                 anon_vma = rmap_walk_anon_lock(page, rwc);
    1850                 :            :         }
    1851         [ #  # ]:          0 :         if (!anon_vma)
    1852                 :            :                 return;
    1853                 :            : 
    1854                 :          0 :         pgoff_start = page_to_pgoff(page);
    1855                 :          0 :         pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
    1856         [ #  # ]:          0 :         anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
    1857                 :            :                         pgoff_start, pgoff_end) {
    1858                 :          0 :                 struct vm_area_struct *vma = avc->vma;
    1859                 :          0 :                 unsigned long address = vma_address(page, vma);
    1860                 :            : 
    1861                 :          0 :                 cond_resched();
    1862                 :            : 
    1863   [ #  #  #  # ]:          0 :                 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
    1864                 :          0 :                         continue;
    1865                 :            : 
    1866         [ #  # ]:          0 :                 if (!rwc->rmap_one(page, vma, address, rwc->arg))
    1867                 :            :                         break;
    1868   [ #  #  #  # ]:          0 :                 if (rwc->done && rwc->done(page))
    1869                 :            :                         break;
    1870                 :            :         }
    1871                 :            : 
    1872         [ #  # ]:          0 :         if (!locked)
    1873                 :          0 :                 anon_vma_unlock_read(anon_vma);
    1874                 :            : }
    1875                 :            : 
    1876                 :            : /*
    1877                 :            :  * rmap_walk_file - do something to file page using the object-based rmap method
    1878                 :            :  * @page: the page to be handled
    1879                 :            :  * @rwc: control variable according to each walk type
    1880                 :            :  *
    1881                 :            :  * Find all the mappings of a page using the mapping pointer and the vma chains
    1882                 :            :  * contained in the address_space struct it points to.
    1883                 :            :  *
    1884                 :            :  * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
    1885                 :            :  * where the page was found will be held for write.  So, we won't recheck
    1886                 :            :  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
    1887                 :            :  * LOCKED.
    1888                 :            :  */
    1889                 :          0 : static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
    1890                 :            :                 bool locked)
    1891                 :            : {
    1892                 :          0 :         struct address_space *mapping = page_mapping(page);
    1893                 :          0 :         pgoff_t pgoff_start, pgoff_end;
    1894                 :          0 :         struct vm_area_struct *vma;
    1895                 :            : 
    1896                 :            :         /*
    1897                 :            :          * The page lock not only makes sure that page->mapping cannot
    1898                 :            :          * suddenly be NULLified by truncation, it makes sure that the
    1899                 :            :          * structure at mapping cannot be freed and reused yet,
    1900                 :            :          * so we can safely take mapping->i_mmap_rwsem.
    1901                 :            :          */
    1902                 :          0 :         VM_BUG_ON_PAGE(!PageLocked(page), page);
    1903                 :            : 
    1904         [ #  # ]:          0 :         if (!mapping)
    1905                 :            :                 return;
    1906                 :            : 
    1907                 :          0 :         pgoff_start = page_to_pgoff(page);
    1908                 :          0 :         pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
    1909         [ #  # ]:          0 :         if (!locked)
    1910                 :          0 :                 i_mmap_lock_read(mapping);
    1911         [ #  # ]:          0 :         vma_interval_tree_foreach(vma, &mapping->i_mmap,
    1912                 :            :                         pgoff_start, pgoff_end) {
    1913                 :          0 :                 unsigned long address = vma_address(page, vma);
    1914                 :            : 
    1915                 :          0 :                 cond_resched();
    1916                 :            : 
    1917   [ #  #  #  # ]:          0 :                 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
    1918                 :          0 :                         continue;
    1919                 :            : 
    1920         [ #  # ]:          0 :                 if (!rwc->rmap_one(page, vma, address, rwc->arg))
    1921                 :          0 :                         goto done;
    1922   [ #  #  #  # ]:          0 :                 if (rwc->done && rwc->done(page))
    1923                 :          0 :                         goto done;
    1924                 :            :         }
    1925                 :            : 
    1926                 :          0 : done:
    1927         [ #  # ]:          0 :         if (!locked)
    1928                 :          0 :                 i_mmap_unlock_read(mapping);
    1929                 :            : }
    1930                 :            : 
    1931                 :          0 : void rmap_walk(struct page *page, struct rmap_walk_control *rwc)
    1932                 :            : {
    1933         [ #  # ]:          0 :         if (unlikely(PageKsm(page)))
    1934                 :            :                 rmap_walk_ksm(page, rwc);
    1935   [ #  #  #  # ]:          0 :         else if (PageAnon(page))
    1936                 :          0 :                 rmap_walk_anon(page, rwc, false);
    1937                 :            :         else
    1938                 :          0 :                 rmap_walk_file(page, rwc, false);
    1939                 :          0 : }
    1940                 :            : 
    1941                 :            : /* Like rmap_walk, but caller holds relevant rmap lock */
    1942                 :          0 : void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
    1943                 :            : {
    1944                 :            :         /* no ksm support for now */
    1945                 :          0 :         VM_BUG_ON_PAGE(PageKsm(page), page);
    1946   [ #  #  #  # ]:          0 :         if (PageAnon(page))
    1947                 :          0 :                 rmap_walk_anon(page, rwc, true);
    1948                 :            :         else
    1949                 :          0 :                 rmap_walk_file(page, rwc, true);
    1950                 :          0 : }
    1951                 :            : 
    1952                 :            : #ifdef CONFIG_HUGETLB_PAGE
    1953                 :            : /*
    1954                 :            :  * The following two functions are for anonymous (private mapped) hugepages.
    1955                 :            :  * Unlike common anonymous pages, anonymous hugepages have no accounting code
    1956                 :            :  * and no lru code, because we handle hugepages differently from common pages.
    1957                 :            :  */
    1958                 :          0 : void hugepage_add_anon_rmap(struct page *page,
    1959                 :            :                             struct vm_area_struct *vma, unsigned long address)
    1960                 :            : {
    1961                 :          0 :         struct anon_vma *anon_vma = vma->anon_vma;
    1962                 :          0 :         int first;
    1963                 :            : 
    1964   [ #  #  #  # ]:          0 :         BUG_ON(!PageLocked(page));
    1965         [ #  # ]:          0 :         BUG_ON(!anon_vma);
    1966                 :            :         /* address might be in next vma when migration races vma_adjust */
    1967                 :          0 :         first = atomic_inc_and_test(compound_mapcount_ptr(page));
    1968         [ #  # ]:          0 :         if (first)
    1969                 :          0 :                 __page_set_anon_rmap(page, vma, address, 0);
    1970                 :          0 : }
    1971                 :            : 
    1972                 :          0 : void hugepage_add_new_anon_rmap(struct page *page,
    1973                 :            :                         struct vm_area_struct *vma, unsigned long address)
    1974                 :            : {
    1975   [ #  #  #  # ]:          0 :         BUG_ON(address < vma->vm_start || address >= vma->vm_end);
    1976                 :          0 :         atomic_set(compound_mapcount_ptr(page), 0);
    1977                 :          0 :         __page_set_anon_rmap(page, vma, address, 1);
    1978                 :          0 : }
    1979                 :            : #endif /* CONFIG_HUGETLB_PAGE */

Generated by: LCOV version 1.14