LCOV - code coverage report
Current view: top level - arch/x86/mm - tlb.c (source / functions) Hit Total Coverage
Test: combined.info Lines: 175 272 64.3 %
Date: 2022-04-01 14:58:12 Functions: 14 22 63.6 %
Branches: 60 134 44.8 %

           Branch data     Line data    Source code
       1                 :            : // SPDX-License-Identifier: GPL-2.0-only
       2                 :            : #include <linux/init.h>
       3                 :            : 
       4                 :            : #include <linux/mm.h>
       5                 :            : #include <linux/spinlock.h>
       6                 :            : #include <linux/smp.h>
       7                 :            : #include <linux/interrupt.h>
       8                 :            : #include <linux/export.h>
       9                 :            : #include <linux/cpu.h>
      10                 :            : #include <linux/debugfs.h>
      11                 :            : 
      12                 :            : #include <asm/tlbflush.h>
      13                 :            : #include <asm/mmu_context.h>
      14                 :            : #include <asm/nospec-branch.h>
      15                 :            : #include <asm/cache.h>
      16                 :            : #include <asm/apic.h>
      17                 :            : #include <asm/uv/uv.h>
      18                 :            : 
      19                 :            : #include "mm_internal.h"
      20                 :            : 
      21                 :            : /*
      22                 :            :  *      TLB flushing, formerly SMP-only
      23                 :            :  *              c/o Linus Torvalds.
      24                 :            :  *
      25                 :            :  *      These mean you can really definitely utterly forget about
      26                 :            :  *      writing to user space from interrupts. (Its not allowed anyway).
      27                 :            :  *
      28                 :            :  *      Optimizations Manfred Spraul <manfred@colorfullife.com>
      29                 :            :  *
      30                 :            :  *      More scalable flush, from Andi Kleen
      31                 :            :  *
      32                 :            :  *      Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
      33                 :            :  */
      34                 :            : 
      35                 :            : /*
      36                 :            :  * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
      37                 :            :  * stored in cpu_tlb_state.last_user_mm_ibpb.
      38                 :            :  */
      39                 :            : #define LAST_USER_MM_IBPB       0x1UL
      40                 :            : 
      41                 :            : /*
      42                 :            :  * We get here when we do something requiring a TLB invalidation
      43                 :            :  * but could not go invalidate all of the contexts.  We do the
      44                 :            :  * necessary invalidation by clearing out the 'ctx_id' which
      45                 :            :  * forces a TLB flush when the context is loaded.
      46                 :            :  */
      47                 :          0 : static void clear_asid_other(void)
      48                 :            : {
      49                 :          0 :         u16 asid;
      50                 :            : 
      51                 :            :         /*
      52                 :            :          * This is only expected to be set if we have disabled
      53                 :            :          * kernel _PAGE_GLOBAL pages.
      54                 :            :          */
      55      [ #  #  # ]:          0 :         if (!static_cpu_has(X86_FEATURE_PTI)) {
      56                 :          0 :                 WARN_ON_ONCE(1);
      57                 :          0 :                 return;
      58                 :            :         }
      59                 :            : 
      60         [ #  # ]:          0 :         for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
      61                 :            :                 /* Do not need to flush the current asid */
      62         [ #  # ]:          0 :                 if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
      63                 :          0 :                         continue;
      64                 :            :                 /*
      65                 :            :                  * Make sure the next time we go to switch to
      66                 :            :                  * this asid, we do a flush:
      67                 :            :                  */
      68                 :          0 :                 this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
      69                 :            :         }
      70                 :          0 :         this_cpu_write(cpu_tlbstate.invalidate_other, false);
      71                 :            : }
      72                 :            : 
      73                 :            : atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
      74                 :            : 
      75                 :            : 
      76                 :     129742 : static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
      77                 :            :                             u16 *new_asid, bool *need_flush)
      78                 :            : {
      79                 :     129742 :         u16 asid;
      80                 :            : 
      81      [ -  +  - ]:     129742 :         if (!static_cpu_has(X86_FEATURE_PCID)) {
      82                 :     129742 :                 *new_asid = 0;
      83                 :     129742 :                 *need_flush = true;
      84                 :     129742 :                 return;
      85                 :            :         }
      86                 :            : 
      87         [ #  # ]:          0 :         if (this_cpu_read(cpu_tlbstate.invalidate_other))
      88                 :          0 :                 clear_asid_other();
      89                 :            : 
      90         [ #  # ]:          0 :         for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
      91                 :          0 :                 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
      92         [ #  # ]:          0 :                     next->context.ctx_id)
      93                 :          0 :                         continue;
      94                 :            : 
      95                 :          0 :                 *new_asid = asid;
      96                 :          0 :                 *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
      97                 :            :                                next_tlb_gen);
      98                 :          0 :                 return;
      99                 :            :         }
     100                 :            : 
     101                 :            :         /*
     102                 :            :          * We don't currently own an ASID slot on this CPU.
     103                 :            :          * Allocate a slot.
     104                 :            :          */
     105                 :          0 :         *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
     106         [ #  # ]:          0 :         if (*new_asid >= TLB_NR_DYN_ASIDS) {
     107                 :          0 :                 *new_asid = 0;
     108                 :          0 :                 this_cpu_write(cpu_tlbstate.next_asid, 1);
     109                 :            :         }
     110                 :          0 :         *need_flush = true;
     111                 :            : }
     112                 :            : 
     113                 :     129742 : static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
     114                 :            : {
     115                 :     129742 :         unsigned long new_mm_cr3;
     116                 :            : 
     117         [ +  - ]:     129742 :         if (need_flush) {
     118                 :     129742 :                 invalidate_user_asid(new_asid);
     119                 :     129742 :                 new_mm_cr3 = build_cr3(pgdir, new_asid);
     120                 :            :         } else {
     121         [ #  # ]:          0 :                 new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
     122                 :            :         }
     123                 :            : 
     124                 :            :         /*
     125                 :            :          * Caution: many callers of this function expect
     126                 :            :          * that load_cr3() is serializing and orders TLB
     127                 :            :          * fills with respect to the mm_cpumask writes.
     128                 :            :          */
     129                 :     129742 :         write_cr3(new_mm_cr3);
     130                 :     129742 : }
     131                 :            : 
     132                 :          0 : void leave_mm(int cpu)
     133                 :            : {
     134                 :          0 :         struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
     135                 :            : 
     136                 :            :         /*
     137                 :            :          * It's plausible that we're in lazy TLB mode while our mm is init_mm.
     138                 :            :          * If so, our callers still expect us to flush the TLB, but there
     139                 :            :          * aren't any user TLB entries in init_mm to worry about.
     140                 :            :          *
     141                 :            :          * This needs to happen before any other sanity checks due to
     142                 :            :          * intel_idle's shenanigans.
     143                 :            :          */
     144         [ #  # ]:          0 :         if (loaded_mm == &init_mm)
     145                 :            :                 return;
     146                 :            : 
     147                 :            :         /* Warn if we're not lazy. */
     148         [ #  # ]:          0 :         WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
     149                 :            : 
     150                 :          0 :         switch_mm(NULL, &init_mm, NULL);
     151                 :            : }
     152                 :            : EXPORT_SYMBOL_GPL(leave_mm);
     153                 :            : 
     154                 :      15777 : void switch_mm(struct mm_struct *prev, struct mm_struct *next,
     155                 :            :                struct task_struct *tsk)
     156                 :            : {
     157                 :      15777 :         unsigned long flags;
     158                 :            : 
     159                 :      15777 :         local_irq_save(flags);
     160                 :      15777 :         switch_mm_irqs_off(prev, next, tsk);
     161                 :      15777 :         local_irq_restore(flags);
     162                 :          0 : }
     163                 :            : 
     164                 :            : static void sync_current_stack_to_mm(struct mm_struct *mm)
     165                 :            : {
     166                 :            :         unsigned long sp = current_stack_pointer;
     167                 :            :         pgd_t *pgd = pgd_offset(mm, sp);
     168                 :            : 
     169                 :            :         if (pgtable_l5_enabled()) {
     170                 :            :                 if (unlikely(pgd_none(*pgd))) {
     171                 :            :                         pgd_t *pgd_ref = pgd_offset_k(sp);
     172                 :            : 
     173                 :            :                         set_pgd(pgd, *pgd_ref);
     174                 :            :                 }
     175                 :            :         } else {
     176                 :            :                 /*
     177                 :            :                  * "pgd" is faked.  The top level entries are "p4d"s, so sync
     178                 :            :                  * the p4d.  This compiles to approximately the same code as
     179                 :            :                  * the 5-level case.
     180                 :            :                  */
     181                 :            :                 p4d_t *p4d = p4d_offset(pgd, sp);
     182                 :            : 
     183                 :            :                 if (unlikely(p4d_none(*p4d))) {
     184                 :            :                         pgd_t *pgd_ref = pgd_offset_k(sp);
     185                 :            :                         p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
     186                 :            : 
     187                 :            :                         set_p4d(p4d, *p4d_ref);
     188                 :            :                 }
     189                 :            :         }
     190                 :            : }
     191                 :            : 
     192                 :          0 : static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
     193                 :            : {
     194                 :          0 :         unsigned long next_tif = task_thread_info(next)->flags;
     195                 :          0 :         unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
     196                 :            : 
     197                 :          0 :         return (unsigned long)next->mm | ibpb;
     198                 :            : }
     199                 :            : 
     200                 :     129742 : static void cond_ibpb(struct task_struct *next)
     201                 :            : {
     202   [ +  +  +  + ]:     129742 :         if (!next || !next->mm)
     203                 :            :                 return;
     204                 :            : 
     205                 :            :         /*
     206                 :            :          * Both, the conditional and the always IBPB mode use the mm
     207                 :            :          * pointer to avoid the IBPB when switching between tasks of the
     208                 :            :          * same process. Using the mm pointer instead of mm->context.ctx_id
     209                 :            :          * opens a hypothetical hole vs. mm_struct reuse, which is more or
     210                 :            :          * less impossible to control by an attacker. Aside of that it
     211                 :            :          * would only affect the first schedule so the theoretically
     212                 :            :          * exposed data is not really interesting.
     213                 :            :          */
     214   [ -  +  +  - ]:      38982 :         if (static_branch_likely(&switch_mm_cond_ibpb)) {
     215                 :          0 :                 unsigned long prev_mm, next_mm;
     216                 :            : 
     217                 :            :                 /*
     218                 :            :                  * This is a bit more complex than the always mode because
     219                 :            :                  * it has to handle two cases:
     220                 :            :                  *
     221                 :            :                  * 1) Switch from a user space task (potential attacker)
     222                 :            :                  *    which has TIF_SPEC_IB set to a user space task
     223                 :            :                  *    (potential victim) which has TIF_SPEC_IB not set.
     224                 :            :                  *
     225                 :            :                  * 2) Switch from a user space task (potential attacker)
     226                 :            :                  *    which has TIF_SPEC_IB not set to a user space task
     227                 :            :                  *    (potential victim) which has TIF_SPEC_IB set.
     228                 :            :                  *
     229                 :            :                  * This could be done by unconditionally issuing IBPB when
     230                 :            :                  * a task which has TIF_SPEC_IB set is either scheduled in
     231                 :            :                  * or out. Though that results in two flushes when:
     232                 :            :                  *
     233                 :            :                  * - the same user space task is scheduled out and later
     234                 :            :                  *   scheduled in again and only a kernel thread ran in
     235                 :            :                  *   between.
     236                 :            :                  *
     237                 :            :                  * - a user space task belonging to the same process is
     238                 :            :                  *   scheduled in after a kernel thread ran in between
     239                 :            :                  *
     240                 :            :                  * - a user space task belonging to the same process is
     241                 :            :                  *   scheduled in immediately.
     242                 :            :                  *
     243                 :            :                  * Optimize this with reasonably small overhead for the
     244                 :            :                  * above cases. Mangle the TIF_SPEC_IB bit into the mm
     245                 :            :                  * pointer of the incoming task which is stored in
     246                 :            :                  * cpu_tlbstate.last_user_mm_ibpb for comparison.
     247                 :            :                  */
     248                 :          0 :                 next_mm = mm_mangle_tif_spec_ib(next);
     249                 :          0 :                 prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
     250                 :            : 
     251                 :            :                 /*
     252                 :            :                  * Issue IBPB only if the mm's are different and one or
     253                 :            :                  * both have the IBPB bit set.
     254                 :            :                  */
     255         [ #  # ]:          0 :                 if (next_mm != prev_mm &&
     256         [ #  # ]:          0 :                     (next_mm | prev_mm) & LAST_USER_MM_IBPB)
     257                 :          0 :                         indirect_branch_prediction_barrier();
     258                 :            : 
     259                 :      38982 :                 this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
     260                 :            :         }
     261                 :            : 
     262   [ +  -  +  - ]:      77964 :         if (static_branch_unlikely(&switch_mm_always_ibpb)) {
     263                 :            :                 /*
     264                 :            :                  * Only flush when switching to a user space task with a
     265                 :            :                  * different context than the user space task which ran
     266                 :            :                  * last on this CPU.
     267                 :            :                  */
     268         [ #  # ]:          0 :                 if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
     269                 :          0 :                         indirect_branch_prediction_barrier();
     270                 :      38982 :                         this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
     271                 :            :                 }
     272                 :            :         }
     273                 :            : }
     274                 :            : 
     275                 :     140820 : void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
     276                 :            :                         struct task_struct *tsk)
     277                 :            : {
     278                 :     140820 :         struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
     279                 :     140820 :         u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
     280                 :     140820 :         bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
     281                 :     140820 :         unsigned cpu = smp_processor_id();
     282                 :     140820 :         u64 next_tlb_gen;
     283                 :     140820 :         bool need_flush;
     284                 :     140820 :         u16 new_asid;
     285                 :            : 
     286                 :            :         /*
     287                 :            :          * NB: The scheduler will call us with prev == next when switching
     288                 :            :          * from lazy TLB mode to normal mode if active_mm isn't changing.
     289                 :            :          * When this happens, we don't assume that CR3 (and hence
     290                 :            :          * cpu_tlbstate.loaded_mm) matches next.
     291                 :            :          *
     292                 :            :          * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
     293                 :            :          */
     294                 :            : 
     295                 :            :         /* We don't want flush_tlb_func_* to run concurrently with us. */
     296                 :     140820 :         if (IS_ENABLED(CONFIG_PROVE_LOCKING))
     297                 :            :                 WARN_ON_ONCE(!irqs_disabled());
     298                 :            : 
     299                 :            :         /*
     300                 :            :          * Verify that CR3 is what we think it is.  This will catch
     301                 :            :          * hypothetical buggy code that directly switches to swapper_pg_dir
     302                 :            :          * without going through leave_mm() / switch_mm_irqs_off() or that
     303                 :            :          * does something like write_cr3(read_cr3_pa()).
     304                 :            :          *
     305                 :            :          * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
     306                 :            :          * isn't free.
     307                 :            :          */
     308                 :            : #ifdef CONFIG_DEBUG_VM
     309                 :            :         if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
     310                 :            :                 /*
     311                 :            :                  * If we were to BUG here, we'd be very likely to kill
     312                 :            :                  * the system so hard that we don't see the call trace.
     313                 :            :                  * Try to recover instead by ignoring the error and doing
     314                 :            :                  * a global flush to minimize the chance of corruption.
     315                 :            :                  *
     316                 :            :                  * (This is far from being a fully correct recovery.
     317                 :            :                  *  Architecturally, the CPU could prefetch something
     318                 :            :                  *  back into an incorrect ASID slot and leave it there
     319                 :            :                  *  to cause trouble down the road.  It's better than
     320                 :            :                  *  nothing, though.)
     321                 :            :                  */
     322                 :            :                 __flush_tlb_all();
     323                 :            :         }
     324                 :            : #endif
     325                 :     140820 :         this_cpu_write(cpu_tlbstate.is_lazy, false);
     326                 :            : 
     327                 :            :         /*
     328                 :            :          * The membarrier system call requires a full memory barrier and
     329                 :            :          * core serialization before returning to user-space, after
     330                 :            :          * storing to rq->curr. Writing to CR3 provides that full
     331                 :            :          * memory barrier and core serializing instruction.
     332                 :            :          */
     333         [ +  + ]:     140820 :         if (real_prev == next) {
     334                 :      11078 :                 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
     335                 :            :                            next->context.ctx_id);
     336                 :            : 
     337                 :            :                 /*
     338                 :            :                  * Even in lazy TLB mode, the CPU should stay set in the
     339                 :            :                  * mm_cpumask. The TLB shootdown code can figure out from
     340                 :            :                  * from cpu_tlbstate.is_lazy whether or not to send an IPI.
     341                 :            :                  */
     342   [ +  -  +  -  :      33234 :                 if (WARN_ON_ONCE(real_prev != &init_mm &&
             -  +  -  + ]
     343                 :            :                                  !cpumask_test_cpu(cpu, mm_cpumask(next))))
     344                 :          0 :                         cpumask_set_cpu(cpu, mm_cpumask(next));
     345                 :            : 
     346                 :            :                 /*
     347                 :            :                  * If the CPU is not in lazy TLB mode, we are just switching
     348                 :            :                  * from one thread in a process to another thread in the same
     349                 :            :                  * process. No TLB flush required.
     350                 :            :                  */
     351         [ +  + ]:      11078 :                 if (!was_lazy)
     352                 :      11078 :                         return;
     353                 :            : 
     354                 :            :                 /*
     355                 :            :                  * Read the tlb_gen to check whether a flush is needed.
     356                 :            :                  * If the TLB is up to date, just use it.
     357                 :            :                  * The barrier synchronizes with the tlb_gen increment in
     358                 :            :                  * the TLB shootdown code.
     359                 :            :                  */
     360                 :      10996 :                 smp_mb();
     361                 :      10996 :                 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
     362         [ -  + ]:      10996 :                 if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
     363                 :            :                                 next_tlb_gen)
     364                 :            :                         return;
     365                 :            : 
     366                 :            :                 /*
     367                 :            :                  * TLB contents went out of date while we were in lazy
     368                 :            :                  * mode. Fall through to the TLB switching code below.
     369                 :            :                  */
     370                 :          0 :                 new_asid = prev_asid;
     371                 :          0 :                 need_flush = true;
     372                 :            :         } else {
     373                 :            :                 /*
     374                 :            :                  * Avoid user/user BTB poisoning by flushing the branch
     375                 :            :                  * predictor when switching between processes. This stops
     376                 :            :                  * one process from doing Spectre-v2 attacks on another.
     377                 :            :                  */
     378                 :     129742 :                 cond_ibpb(tsk);
     379                 :            : 
     380                 :     129742 :                 if (IS_ENABLED(CONFIG_VMAP_STACK)) {
     381                 :            :                         /*
     382                 :            :                          * If our current stack is in vmalloc space and isn't
     383                 :            :                          * mapped in the new pgd, we'll double-fault.  Forcibly
     384                 :            :                          * map it.
     385                 :            :                          */
     386                 :            :                         sync_current_stack_to_mm(next);
     387                 :            :                 }
     388                 :            : 
     389                 :            :                 /*
     390                 :            :                  * Stop remote flushes for the previous mm.
     391                 :            :                  * Skip kernel threads; we never send init_mm TLB flushing IPIs,
     392                 :            :                  * but the bitmap manipulation can cause cache line contention.
     393                 :            :                  */
     394         [ +  + ]:     129742 :                 if (real_prev != &init_mm) {
     395                 :      84399 :                         VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
     396                 :            :                                                 mm_cpumask(real_prev)));
     397                 :      84399 :                         cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
     398                 :            :                 }
     399                 :            : 
     400                 :            :                 /*
     401                 :            :                  * Start remote flushes and then read tlb_gen.
     402                 :            :                  */
     403         [ +  + ]:     129742 :                 if (next != &init_mm)
     404                 :      84402 :                         cpumask_set_cpu(cpu, mm_cpumask(next));
     405                 :     129742 :                 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
     406                 :            : 
     407                 :     129742 :                 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
     408                 :            : 
     409                 :            :                 /* Let nmi_uaccess_okay() know that we're changing CR3. */
     410                 :     129742 :                 this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
     411                 :     129742 :                 barrier();
     412                 :            :         }
     413                 :            : 
     414         [ +  - ]:     129742 :         if (need_flush) {
     415                 :     129742 :                 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
     416                 :     129742 :                 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
     417                 :     129742 :                 load_new_mm_cr3(next->pgd, new_asid, true);
     418                 :            : 
     419                 :            :                 /*
     420                 :            :                  * NB: This gets called via leave_mm() in the idle path
     421                 :            :                  * where RCU functions differently.  Tracing normally
     422                 :            :                  * uses RCU, so we need to use the _rcuidle variant.
     423                 :            :                  *
     424                 :            :                  * (There is no good reason for this.  The idle code should
     425                 :            :                  *  be rearranged to call this before rcu_idle_enter().)
     426                 :            :                  */
     427                 :     129742 :                 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
     428                 :            :         } else {
     429                 :            :                 /* The new ASID is already up to date. */
     430                 :          0 :                 load_new_mm_cr3(next->pgd, new_asid, false);
     431                 :            : 
     432                 :            :                 /* See above wrt _rcuidle. */
     433                 :          0 :                 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
     434                 :            :         }
     435                 :            : 
     436                 :            :         /* Make sure we write CR3 before loaded_mm. */
     437                 :     129742 :         barrier();
     438                 :            : 
     439                 :     129742 :         this_cpu_write(cpu_tlbstate.loaded_mm, next);
     440                 :     129742 :         this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
     441                 :            : 
     442         [ +  - ]:     129742 :         if (next != real_prev) {
     443                 :     129742 :                 load_mm_cr4_irqsoff(next);
     444                 :     129742 :                 switch_ldt(real_prev, next);
     445                 :            :         }
     446                 :            : }
     447                 :            : 
     448                 :            : /*
     449                 :            :  * Please ignore the name of this function.  It should be called
     450                 :            :  * switch_to_kernel_thread().
     451                 :            :  *
     452                 :            :  * enter_lazy_tlb() is a hint from the scheduler that we are entering a
     453                 :            :  * kernel thread or other context without an mm.  Acceptable implementations
     454                 :            :  * include doing nothing whatsoever, switching to init_mm, or various clever
     455                 :            :  * lazy tricks to try to minimize TLB flushes.
     456                 :            :  *
     457                 :            :  * The scheduler reserves the right to call enter_lazy_tlb() several times
     458                 :            :  * in a row.  It will notify us that we're going back to a real mm by
     459                 :            :  * calling switch_mm_irqs_off().
     460                 :            :  */
     461                 :      53883 : void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
     462                 :            : {
     463         [ +  + ]:      53883 :         if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
     464                 :            :                 return;
     465                 :            : 
     466                 :      48442 :         this_cpu_write(cpu_tlbstate.is_lazy, true);
     467                 :            : }
     468                 :            : 
     469                 :            : /*
     470                 :            :  * Call this when reinitializing a CPU.  It fixes the following potential
     471                 :            :  * problems:
     472                 :            :  *
     473                 :            :  * - The ASID changed from what cpu_tlbstate thinks it is (most likely
     474                 :            :  *   because the CPU was taken down and came back up with CR3's PCID
     475                 :            :  *   bits clear.  CPU hotplug can do this.
     476                 :            :  *
     477                 :            :  * - The TLB contains junk in slots corresponding to inactive ASIDs.
     478                 :            :  *
     479                 :            :  * - The CPU went so far out to lunch that it may have missed a TLB
     480                 :            :  *   flush.
     481                 :            :  */
     482                 :          3 : void initialize_tlbstate_and_flush(void)
     483                 :            : {
     484                 :          3 :         int i;
     485                 :          3 :         struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
     486                 :          3 :         u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
     487                 :          3 :         unsigned long cr3 = __read_cr3();
     488                 :            : 
     489                 :            :         /* Assert that CR3 already references the right mm. */
     490   [ -  +  -  + ]:          6 :         WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
     491                 :            : 
     492                 :            :         /*
     493                 :            :          * Assert that CR4.PCIDE is set if needed.  (CR4.PCIDE initialization
     494                 :            :          * doesn't work like other CR4 bits because it can only be set from
     495                 :            :          * long mode.)
     496                 :            :          */
     497   [ -  +  -  -  :          3 :         WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
                   -  + ]
     498                 :            :                 !(cr4_read_shadow() & X86_CR4_PCIDE));
     499                 :            : 
     500                 :            :         /* Force ASID 0 and force a TLB flush. */
     501                 :          3 :         write_cr3(build_cr3(mm->pgd, 0));
     502                 :            : 
     503                 :            :         /* Reinitialize tlbstate. */
     504                 :          3 :         this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
     505                 :          3 :         this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
     506                 :          3 :         this_cpu_write(cpu_tlbstate.next_asid, 1);
     507                 :          3 :         this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
     508                 :          3 :         this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
     509                 :            : 
     510         [ +  + ]:         21 :         for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
     511                 :         15 :                 this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
     512                 :          3 : }
     513                 :            : 
     514                 :            : /*
     515                 :            :  * flush_tlb_func_common()'s memory ordering requirement is that any
     516                 :            :  * TLB fills that happen after we flush the TLB are ordered after we
     517                 :            :  * read active_mm's tlb_gen.  We don't need any explicit barriers
     518                 :            :  * because all x86 flush operations are serializing and the
     519                 :            :  * atomic64_read operation won't be reordered by the compiler.
     520                 :            :  */
     521                 :     367453 : static void flush_tlb_func_common(const struct flush_tlb_info *f,
     522                 :            :                                   bool local, enum tlb_flush_reason reason)
     523                 :            : {
     524                 :            :         /*
     525                 :            :          * We have three different tlb_gen values in here.  They are:
     526                 :            :          *
     527                 :            :          * - mm_tlb_gen:     the latest generation.
     528                 :            :          * - local_tlb_gen:  the generation that this CPU has already caught
     529                 :            :          *                   up to.
     530                 :            :          * - f->new_tlb_gen: the generation that the requester of the flush
     531                 :            :          *                   wants us to catch up to.
     532                 :            :          */
     533                 :     367453 :         struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
     534                 :     367453 :         u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
     535                 :     367453 :         u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
     536                 :     367453 :         u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
     537                 :            : 
     538                 :            :         /* This code cannot presently handle being reentered. */
     539                 :     367453 :         VM_WARN_ON(!irqs_disabled());
     540                 :            : 
     541         [ +  + ]:     367453 :         if (unlikely(loaded_mm == &init_mm))
     542                 :            :                 return;
     543                 :            : 
     544                 :     367450 :         VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
     545                 :            :                    loaded_mm->context.ctx_id);
     546                 :            : 
     547         [ +  + ]:     367450 :         if (this_cpu_read(cpu_tlbstate.is_lazy)) {
     548                 :            :                 /*
     549                 :            :                  * We're in lazy mode.  We need to at least flush our
     550                 :            :                  * paging-structure cache to avoid speculatively reading
     551                 :            :                  * garbage into our TLB.  Since switching to init_mm is barely
     552                 :            :                  * slower than a minimal flush, just switch to init_mm.
     553                 :            :                  *
     554                 :            :                  * This should be rare, with native_flush_tlb_others skipping
     555                 :            :                  * IPIs to lazy TLB mode CPUs.
     556                 :            :                  */
     557                 :      15733 :                 switch_mm_irqs_off(NULL, &init_mm, NULL);
     558                 :      15733 :                 return;
     559                 :            :         }
     560                 :            : 
     561         [ -  + ]:     351717 :         if (unlikely(local_tlb_gen == mm_tlb_gen)) {
     562                 :            :                 /*
     563                 :            :                  * There's nothing to do: we're already up to date.  This can
     564                 :            :                  * happen if two concurrent flushes happen -- the first flush to
     565                 :            :                  * be handled can catch us all the way up, leaving no work for
     566                 :            :                  * the second flush.
     567                 :            :                  */
     568                 :          0 :                 trace_tlb_flush(reason, 0);
     569                 :          0 :                 return;
     570                 :            :         }
     571                 :            : 
     572         [ -  + ]:     351717 :         WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
     573         [ -  + ]:     351717 :         WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
     574                 :            : 
     575                 :            :         /*
     576                 :            :          * If we get to this point, we know that our TLB is out of date.
     577                 :            :          * This does not strictly imply that we need to flush (it's
     578                 :            :          * possible that f->new_tlb_gen <= local_tlb_gen), but we're
     579                 :            :          * going to need to flush in the very near future, so we might
     580                 :            :          * as well get it over with.
     581                 :            :          *
     582                 :            :          * The only question is whether to do a full or partial flush.
     583                 :            :          *
     584                 :            :          * We do a partial flush if requested and two extra conditions
     585                 :            :          * are met:
     586                 :            :          *
     587                 :            :          * 1. f->new_tlb_gen == local_tlb_gen + 1.  We have an invariant that
     588                 :            :          *    we've always done all needed flushes to catch up to
     589                 :            :          *    local_tlb_gen.  If, for example, local_tlb_gen == 2 and
     590                 :            :          *    f->new_tlb_gen == 3, then we know that the flush needed to bring
     591                 :            :          *    us up to date for tlb_gen 3 is the partial flush we're
     592                 :            :          *    processing.
     593                 :            :          *
     594                 :            :          *    As an example of why this check is needed, suppose that there
     595                 :            :          *    are two concurrent flushes.  The first is a full flush that
     596                 :            :          *    changes context.tlb_gen from 1 to 2.  The second is a partial
     597                 :            :          *    flush that changes context.tlb_gen from 2 to 3.  If they get
     598                 :            :          *    processed on this CPU in reverse order, we'll see
     599                 :            :          *     local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
     600                 :            :          *    If we were to use __flush_tlb_one_user() and set local_tlb_gen to
     601                 :            :          *    3, we'd be break the invariant: we'd update local_tlb_gen above
     602                 :            :          *    1 without the full flush that's needed for tlb_gen 2.
     603                 :            :          *
     604                 :            :          * 2. f->new_tlb_gen == mm_tlb_gen.  This is purely an optimiation.
     605                 :            :          *    Partial TLB flushes are not all that much cheaper than full TLB
     606                 :            :          *    flushes, so it seems unlikely that it would be a performance win
     607                 :            :          *    to do a partial flush if that won't bring our TLB fully up to
     608                 :            :          *    date.  By doing a full flush instead, we can increase
     609                 :            :          *    local_tlb_gen all the way to mm_tlb_gen and we can probably
     610                 :            :          *    avoid another flush in the very near future.
     611                 :            :          */
     612         [ +  + ]:     351717 :         if (f->end != TLB_FLUSH_ALL &&
     613   [ +  -  +  - ]:     320403 :             f->new_tlb_gen == local_tlb_gen + 1 &&
     614                 :     320403 :             f->new_tlb_gen == mm_tlb_gen) {
     615                 :            :                 /* Partial flush */
     616                 :     320403 :                 unsigned long nr_invalidate = (f->end - f->start) >> f->stride_shift;
     617                 :     320403 :                 unsigned long addr = f->start;
     618                 :            : 
     619         [ +  + ]:     740135 :                 while (addr < f->end) {
     620                 :     419732 :                         __flush_tlb_one_user(addr);
     621                 :     419732 :                         addr += 1UL << f->stride_shift;
     622                 :            :                 }
     623                 :     320403 :                 if (local)
     624                 :     320403 :                         count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
     625                 :     320403 :                 trace_tlb_flush(reason, nr_invalidate);
     626                 :            :         } else {
     627                 :            :                 /* Full flush. */
     628                 :      31314 :                 local_flush_tlb();
     629                 :      31314 :                 if (local)
     630                 :      31314 :                         count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
     631                 :      31314 :                 trace_tlb_flush(reason, TLB_FLUSH_ALL);
     632                 :            :         }
     633                 :            : 
     634                 :            :         /* Both paths above update our state to mm_tlb_gen. */
     635                 :     351717 :         this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
     636                 :            : }
     637                 :            : 
     638                 :     367453 : static void flush_tlb_func_local(const void *info, enum tlb_flush_reason reason)
     639                 :            : {
     640                 :     367453 :         const struct flush_tlb_info *f = info;
     641                 :            : 
     642                 :     367453 :         flush_tlb_func_common(f, true, reason);
     643                 :            : }
     644                 :            : 
     645                 :          0 : static void flush_tlb_func_remote(void *info)
     646                 :            : {
     647                 :          0 :         const struct flush_tlb_info *f = info;
     648                 :            : 
     649                 :          0 :         inc_irq_stat(irq_tlb_count);
     650                 :            : 
     651   [ #  #  #  # ]:          0 :         if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
     652                 :            :                 return;
     653                 :            : 
     654                 :          0 :         count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
     655                 :          0 :         flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
     656                 :            : }
     657                 :            : 
     658                 :          0 : static bool tlb_is_not_lazy(int cpu, void *data)
     659                 :            : {
     660                 :          0 :         return !per_cpu(cpu_tlbstate.is_lazy, cpu);
     661                 :            : }
     662                 :            : 
     663                 :          0 : void native_flush_tlb_others(const struct cpumask *cpumask,
     664                 :            :                              const struct flush_tlb_info *info)
     665                 :            : {
     666                 :          0 :         count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
     667         [ #  # ]:          0 :         if (info->end == TLB_FLUSH_ALL)
     668                 :          0 :                 trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
     669                 :            :         else
     670                 :          0 :                 trace_tlb_flush(TLB_REMOTE_SEND_IPI,
     671                 :          0 :                                 (info->end - info->start) >> PAGE_SHIFT);
     672                 :            : 
     673         [ #  # ]:          0 :         if (is_uv_system()) {
     674                 :            :                 /*
     675                 :            :                  * This whole special case is confused.  UV has a "Broadcast
     676                 :            :                  * Assist Unit", which seems to be a fancy way to send IPIs.
     677                 :            :                  * Back when x86 used an explicit TLB flush IPI, UV was
     678                 :            :                  * optimized to use its own mechanism.  These days, x86 uses
     679                 :            :                  * smp_call_function_many(), but UV still uses a manual IPI,
     680                 :            :                  * and that IPI's action is out of date -- it does a manual
     681                 :            :                  * flush instead of calling flush_tlb_func_remote().  This
     682                 :            :                  * means that the percpu tlb_gen variables won't be updated
     683                 :            :                  * and we'll do pointless flushes on future context switches.
     684                 :            :                  *
     685                 :            :                  * Rather than hooking native_flush_tlb_others() here, I think
     686                 :            :                  * that UV should be updated so that smp_call_function_many(),
     687                 :            :                  * etc, are optimal on UV.
     688                 :            :                  */
     689                 :            :                 cpumask = uv_flush_tlb_others(cpumask, info);
     690                 :            :                 if (cpumask)
     691                 :            :                         smp_call_function_many(cpumask, flush_tlb_func_remote,
     692                 :            :                                                (void *)info, 1);
     693                 :            :                 return;
     694                 :            :         }
     695                 :            : 
     696                 :            :         /*
     697                 :            :          * If no page tables were freed, we can skip sending IPIs to
     698                 :            :          * CPUs in lazy TLB mode. They will flush the CPU themselves
     699                 :            :          * at the next context switch.
     700                 :            :          *
     701                 :            :          * However, if page tables are getting freed, we need to send the
     702                 :            :          * IPI everywhere, to prevent CPUs in lazy TLB mode from tripping
     703                 :            :          * up on the new contents of what used to be page tables, while
     704                 :            :          * doing a speculative memory access.
     705                 :            :          */
     706         [ #  # ]:          0 :         if (info->freed_tables)
     707                 :          0 :                 smp_call_function_many(cpumask, flush_tlb_func_remote,
     708                 :            :                                (void *)info, 1);
     709                 :            :         else
     710                 :          0 :                 on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
     711                 :            :                                 (void *)info, 1, cpumask);
     712                 :            : }
     713                 :            : 
     714                 :            : /*
     715                 :            :  * See Documentation/x86/tlb.rst for details.  We choose 33
     716                 :            :  * because it is large enough to cover the vast majority (at
     717                 :            :  * least 95%) of allocations, and is small enough that we are
     718                 :            :  * confident it will not cause too much overhead.  Each single
     719                 :            :  * flush is about 100 ns, so this caps the maximum overhead at
     720                 :            :  * _about_ 3,000 ns.
     721                 :            :  *
     722                 :            :  * This is in units of pages.
     723                 :            :  */
     724                 :            : unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
     725                 :            : 
     726                 :            : static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
     727                 :            : 
     728                 :            : #ifdef CONFIG_DEBUG_VM
     729                 :            : static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
     730                 :            : #endif
     731                 :            : 
     732                 :     413053 : static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
     733                 :            :                         unsigned long start, unsigned long end,
     734                 :            :                         unsigned int stride_shift, bool freed_tables,
     735                 :            :                         u64 new_tlb_gen)
     736                 :            : {
     737                 :     826106 :         struct flush_tlb_info *info = this_cpu_ptr(&flush_tlb_info);
     738                 :            : 
     739                 :            : #ifdef CONFIG_DEBUG_VM
     740                 :            :         /*
     741                 :            :          * Ensure that the following code is non-reentrant and flush_tlb_info
     742                 :            :          * is not overwritten. This means no TLB flushing is initiated by
     743                 :            :          * interrupt handlers and machine-check exception handlers.
     744                 :            :          */
     745                 :            :         BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
     746                 :            : #endif
     747                 :            : 
     748                 :     413053 :         info->start          = start;
     749                 :     413053 :         info->end            = end;
     750                 :     413053 :         info->mm             = mm;
     751                 :     413053 :         info->stride_shift   = stride_shift;
     752                 :     413053 :         info->freed_tables   = freed_tables;
     753                 :     413053 :         info->new_tlb_gen    = new_tlb_gen;
     754                 :            : 
     755                 :     413053 :         return info;
     756                 :            : }
     757                 :            : 
     758                 :     413053 : static inline void put_flush_tlb_info(void)
     759                 :            : {
     760                 :            : #ifdef CONFIG_DEBUG_VM
     761                 :            :         /* Complete reentrency prevention checks */
     762                 :            :         barrier();
     763                 :            :         this_cpu_dec(flush_tlb_info_idx);
     764                 :            : #endif
     765                 :     413053 : }
     766                 :            : 
     767                 :     413040 : void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
     768                 :            :                                 unsigned long end, unsigned int stride_shift,
     769                 :            :                                 bool freed_tables)
     770                 :            : {
     771                 :     413040 :         struct flush_tlb_info *info;
     772                 :     413040 :         u64 new_tlb_gen;
     773                 :     413040 :         int cpu;
     774                 :            : 
     775         [ +  + ]:     413040 :         cpu = get_cpu();
     776                 :            : 
     777                 :            :         /* Should we flush just the requested range? */
     778         [ +  + ]:     413040 :         if ((end == TLB_FLUSH_ALL) ||
     779         [ +  + ]:     365727 :             ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
     780                 :      62826 :                 start = 0;
     781                 :      62826 :                 end = TLB_FLUSH_ALL;
     782                 :            :         }
     783                 :            : 
     784                 :            :         /* This is also a barrier that synchronizes with switch_mm(). */
     785                 :     413040 :         new_tlb_gen = inc_mm_tlb_gen(mm);
     786                 :            : 
     787                 :     413040 :         info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
     788                 :            :                                   new_tlb_gen);
     789                 :            : 
     790         [ +  + ]:     413040 :         if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
     791                 :     367453 :                 lockdep_assert_irqs_enabled();
     792                 :     367453 :                 local_irq_disable();
     793                 :     367453 :                 flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
     794                 :     367453 :                 local_irq_enable();
     795                 :            :         }
     796                 :            : 
     797         [ -  + ]:     413040 :         if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
     798                 :          0 :                 flush_tlb_others(mm_cpumask(mm), info);
     799                 :            : 
     800                 :     413040 :         put_flush_tlb_info();
     801                 :     413040 :         put_cpu();
     802                 :     413040 : }
     803                 :            : 
     804                 :            : 
     805                 :        175 : static void do_flush_tlb_all(void *info)
     806                 :            : {
     807                 :        175 :         count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
     808                 :        175 :         __flush_tlb_all();
     809                 :        175 : }
     810                 :            : 
     811                 :         85 : void flush_tlb_all(void)
     812                 :            : {
     813                 :         85 :         count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
     814                 :         85 :         on_each_cpu(do_flush_tlb_all, NULL, 1);
     815                 :         85 : }
     816                 :            : 
     817                 :         13 : static void do_kernel_range_flush(void *info)
     818                 :            : {
     819                 :         13 :         struct flush_tlb_info *f = info;
     820                 :         13 :         unsigned long addr;
     821                 :            : 
     822                 :            :         /* flush range by one by one 'invlpg' */
     823         [ +  + ]:        113 :         for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
     824                 :        100 :                 __flush_tlb_one_kernel(addr);
     825                 :         13 : }
     826                 :            : 
     827                 :        103 : void flush_tlb_kernel_range(unsigned long start, unsigned long end)
     828                 :            : {
     829                 :            :         /* Balance as user space task's flush, a bit conservative */
     830         [ +  - ]:        103 :         if (end == TLB_FLUSH_ALL ||
     831         [ +  + ]:        103 :             (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
     832                 :         90 :                 on_each_cpu(do_flush_tlb_all, NULL, 1);
     833                 :            :         } else {
     834                 :         13 :                 struct flush_tlb_info *info;
     835                 :            : 
     836                 :         13 :                 preempt_disable();
     837                 :         13 :                 info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
     838                 :            : 
     839                 :         13 :                 on_each_cpu(do_kernel_range_flush, info, 1);
     840                 :            : 
     841                 :         13 :                 put_flush_tlb_info();
     842                 :         13 :                 preempt_enable();
     843                 :            :         }
     844                 :        103 : }
     845                 :            : 
     846                 :            : /*
     847                 :            :  * arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm.
     848                 :            :  * This means that the 'struct flush_tlb_info' that describes which mappings to
     849                 :            :  * flush is actually fixed. We therefore set a single fixed struct and use it in
     850                 :            :  * arch_tlbbatch_flush().
     851                 :            :  */
     852                 :            : static const struct flush_tlb_info full_flush_tlb_info = {
     853                 :            :         .mm = NULL,
     854                 :            :         .start = 0,
     855                 :            :         .end = TLB_FLUSH_ALL,
     856                 :            : };
     857                 :            : 
     858                 :          0 : void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
     859                 :            : {
     860                 :          0 :         int cpu = get_cpu();
     861                 :            : 
     862         [ #  # ]:          0 :         if (cpumask_test_cpu(cpu, &batch->cpumask)) {
     863                 :          0 :                 lockdep_assert_irqs_enabled();
     864                 :          0 :                 local_irq_disable();
     865                 :          0 :                 flush_tlb_func_local(&full_flush_tlb_info, TLB_LOCAL_SHOOTDOWN);
     866                 :          0 :                 local_irq_enable();
     867                 :            :         }
     868                 :            : 
     869         [ #  # ]:          0 :         if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
     870                 :          0 :                 flush_tlb_others(&batch->cpumask, &full_flush_tlb_info);
     871                 :            : 
     872                 :          0 :         cpumask_clear(&batch->cpumask);
     873                 :            : 
     874                 :          0 :         put_cpu();
     875                 :          0 : }
     876                 :            : 
     877                 :          0 : static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
     878                 :            :                              size_t count, loff_t *ppos)
     879                 :            : {
     880                 :          0 :         char buf[32];
     881                 :          0 :         unsigned int len;
     882                 :            : 
     883                 :          0 :         len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
     884                 :          0 :         return simple_read_from_buffer(user_buf, count, ppos, buf, len);
     885                 :            : }
     886                 :            : 
     887                 :          0 : static ssize_t tlbflush_write_file(struct file *file,
     888                 :            :                  const char __user *user_buf, size_t count, loff_t *ppos)
     889                 :            : {
     890                 :          0 :         char buf[32];
     891                 :          0 :         ssize_t len;
     892                 :          0 :         int ceiling;
     893                 :            : 
     894                 :          0 :         len = min(count, sizeof(buf) - 1);
     895         [ #  # ]:          0 :         if (copy_from_user(buf, user_buf, len))
     896                 :            :                 return -EFAULT;
     897                 :            : 
     898                 :          0 :         buf[len] = '\0';
     899         [ #  # ]:          0 :         if (kstrtoint(buf, 0, &ceiling))
     900                 :            :                 return -EINVAL;
     901                 :            : 
     902         [ #  # ]:          0 :         if (ceiling < 0)
     903                 :            :                 return -EINVAL;
     904                 :            : 
     905                 :          0 :         tlb_single_page_flush_ceiling = ceiling;
     906                 :          0 :         return count;
     907                 :            : }
     908                 :            : 
     909                 :            : static const struct file_operations fops_tlbflush = {
     910                 :            :         .read = tlbflush_read_file,
     911                 :            :         .write = tlbflush_write_file,
     912                 :            :         .llseek = default_llseek,
     913                 :            : };
     914                 :            : 
     915                 :          3 : static int __init create_tlb_single_page_flush_ceiling(void)
     916                 :            : {
     917                 :          3 :         debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
     918                 :            :                             arch_debugfs_dir, NULL, &fops_tlbflush);
     919                 :          3 :         return 0;
     920                 :            : }
     921                 :            : late_initcall(create_tlb_single_page_flush_ceiling);

Generated by: LCOV version 1.14