LCOV - combined.info - arch/x86/kernel/cpu/mce/core.c

LCOV - code coverage report

Current view:	top level - arch/x86/kernel/cpu/mce - core.c (source / functions)		Hit	Total	Coverage
Test:	combined.info	Lines:	261	1009	25.9 %
Date:	2022-04-01 13:59:58	Functions:	28	99	28.3 %
		Branches:	80	598	13.4 %

           Branch data     Line data    Source code

       1                 :            : // SPDX-License-Identifier: GPL-2.0-only
       2                 :            : /*
       3                 :            :  * Machine check handler.
       4                 :            :  *
       5                 :            :  * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
       6                 :            :  * Rest from unknown author(s).
       7                 :            :  * 2004 Andi Kleen. Rewrote most of it.
       8                 :            :  * Copyright 2008 Intel Corporation
       9                 :            :  * Author: Andi Kleen
      10                 :            :  */
      11                 :            : 
      12                 :            : #include <linux/thread_info.h>
      13                 :            : #include <linux/capability.h>
      14                 :            : #include <linux/miscdevice.h>
      15                 :            : #include <linux/ratelimit.h>
      16                 :            : #include <linux/rcupdate.h>
      17                 :            : #include <linux/kobject.h>
      18                 :            : #include <linux/uaccess.h>
      19                 :            : #include <linux/kdebug.h>
      20                 :            : #include <linux/kernel.h>
      21                 :            : #include <linux/percpu.h>
      22                 :            : #include <linux/string.h>
      23                 :            : #include <linux/device.h>
      24                 :            : #include <linux/syscore_ops.h>
      25                 :            : #include <linux/delay.h>
      26                 :            : #include <linux/ctype.h>
      27                 :            : #include <linux/sched.h>
      28                 :            : #include <linux/sysfs.h>
      29                 :            : #include <linux/types.h>
      30                 :            : #include <linux/slab.h>
      31                 :            : #include <linux/init.h>
      32                 :            : #include <linux/kmod.h>
      33                 :            : #include <linux/poll.h>
      34                 :            : #include <linux/nmi.h>
      35                 :            : #include <linux/cpu.h>
      36                 :            : #include <linux/ras.h>
      37                 :            : #include <linux/smp.h>
      38                 :            : #include <linux/fs.h>
      39                 :            : #include <linux/mm.h>
      40                 :            : #include <linux/debugfs.h>
      41                 :            : #include <linux/irq_work.h>
      42                 :            : #include <linux/export.h>
      43                 :            : #include <linux/jump_label.h>
      44                 :            : #include <linux/set_memory.h>
      45                 :            : 
      46                 :            : #include <asm/intel-family.h>
      47                 :            : #include <asm/processor.h>
      48                 :            : #include <asm/traps.h>
      49                 :            : #include <asm/tlbflush.h>
      50                 :            : #include <asm/mce.h>
      51                 :            : #include <asm/msr.h>
      52                 :            : #include <asm/reboot.h>
      53                 :            : 
      54                 :            : #include "internal.h"
      55                 :            : 
      56                 :            : /* sysfs synchronization */
      57                 :            : static DEFINE_MUTEX(mce_sysfs_mutex);
      58                 :            : 
      59                 :            : #define CREATE_TRACE_POINTS
      60                 :            : #include <trace/events/mce.h>
      61                 :            : 
      62                 :            : #define SPINUNIT                100     /* 100ns */
      63                 :            : 
      64                 :            : DEFINE_PER_CPU(unsigned, mce_exception_count);
      65                 :            : 
      66                 :            : DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
      67                 :            : 
      68                 :            : struct mce_bank {
      69                 :            :         u64                     ctl;                    /* subevents to enable */
      70                 :            :         bool                    init;                   /* initialise bank? */
      71                 :            : };
      72                 :            : static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
      73                 :            : 
      74                 :            : #define ATTR_LEN               16
      75                 :            : /* One object for each MCE bank, shared by all CPUs */
      76                 :            : struct mce_bank_dev {
      77                 :            :         struct device_attribute attr;                   /* device attribute */
      78                 :            :         char                    attrname[ATTR_LEN];     /* attribute name */
      79                 :            :         u8                      bank;                   /* bank number */
      80                 :            : };
      81                 :            : static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS];
      82                 :            : 
      83                 :            : struct mce_vendor_flags mce_flags __read_mostly;
      84                 :            : 
      85                 :            : struct mca_config mca_cfg __read_mostly = {
      86                 :            :         .bootlog  = -1,
      87                 :            :         /*
      88                 :            :          * Tolerant levels:
      89                 :            :          * 0: always panic on uncorrected errors, log corrected errors
      90                 :            :          * 1: panic or SIGBUS on uncorrected errors, log corrected errors
      91                 :            :          * 2: SIGBUS or log uncorrected errors (if possible), log corr. errors
      92                 :            :          * 3: never panic or SIGBUS, log all errors (for testing only)
      93                 :            :          */
      94                 :            :         .tolerant = 1,
      95                 :            :         .monarch_timeout = -1
      96                 :            : };
      97                 :            : 
      98                 :            : static DEFINE_PER_CPU(struct mce, mces_seen);
      99                 :            : static unsigned long mce_need_notify;
     100                 :            : static int cpu_missing;
     101                 :            : 
     102                 :            : /*
     103                 :            :  * MCA banks polled by the period polling timer for corrected events.
     104                 :            :  * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
     105                 :            :  */
     106                 :            : DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
     107                 :            :         [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
     108                 :            : };
     109                 :            : 
     110                 :            : /*
     111                 :            :  * MCA banks controlled through firmware first for corrected errors.
     112                 :            :  * This is a global list of banks for which we won't enable CMCI and we
     113                 :            :  * won't poll. Firmware controls these banks and is responsible for
     114                 :            :  * reporting corrected errors through GHES. Uncorrected/recoverable
     115                 :            :  * errors are still notified through a machine check.
     116                 :            :  */
     117                 :            : mce_banks_t mce_banks_ce_disabled;
     118                 :            : 
     119                 :            : static struct work_struct mce_work;
     120                 :            : static struct irq_work mce_irq_work;
     121                 :            : 
     122                 :            : static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
     123                 :            : 
     124                 :            : /*
     125                 :            :  * CPU/chipset specific EDAC code can register a notifier call here to print
     126                 :            :  * MCE errors in a human-readable form.
     127                 :            :  */
     128                 :            : BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
     129                 :            : 
     130                 :            : /* Do initial initialization of a struct mce */
     131                 :         78 : void mce_setup(struct mce *m)
     132                 :            : {
     133                 :         78 :         memset(m, 0, sizeof(struct mce));
     134                 :         78 :         m->cpu = m->extcpu = smp_processor_id();
     135                 :            :         /* need the internal __ version to avoid deadlocks */
     136                 :         78 :         m->time = __ktime_get_real_seconds();
     137                 :         78 :         m->cpuvendor = boot_cpu_data.x86_vendor;
     138                 :         78 :         m->cpuid = cpuid_eax(1);
     139                 :         78 :         m->socketid = cpu_data(m->extcpu).phys_proc_id;
     140                 :         78 :         m->apicid = cpu_data(m->extcpu).initial_apicid;
     141                 :         78 :         rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
     142                 :            : 
     143         [ -  + ]:         78 :         if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
     144                 :          0 :                 rdmsrl(MSR_PPIN, m->ppin);
     145                 :            : 
     146                 :         78 :         m->microcode = boot_cpu_data.microcode;
     147                 :         78 : }
     148                 :            : 
     149                 :            : DEFINE_PER_CPU(struct mce, injectm);
     150                 :            : EXPORT_PER_CPU_SYMBOL_GPL(injectm);
     151                 :            : 
     152                 :          0 : void mce_log(struct mce *m)
     153                 :            : {
     154         [ #  # ]:          0 :         if (!mce_gen_pool_add(m))
     155                 :          0 :                 irq_work_queue(&mce_irq_work);
     156                 :          0 : }
     157                 :            : EXPORT_SYMBOL_GPL(mce_log);
     158                 :            : 
     159                 :            : /*
     160                 :            :  * We run the default notifier if we have only the UC, the first and the
     161                 :            :  * default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS
     162                 :            :  * notifiers registered on the chain.
     163                 :            :  */
     164                 :            : #define NUM_DEFAULT_NOTIFIERS   3
     165                 :            : static atomic_t num_notifiers;
     166                 :            : 
     167                 :        234 : void mce_register_decode_chain(struct notifier_block *nb)
     168                 :            : {
     169   [ -  +  +  - ]:        234 :         if (WARN_ON(nb->priority > MCE_PRIO_MCELOG && nb->priority < MCE_PRIO_EDAC))
     170                 :            :                 return;
     171                 :            : 
     172                 :        234 :         atomic_inc(&num_notifiers);
     173                 :            : 
     174                 :        234 :         blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
     175                 :            : }
     176                 :            : EXPORT_SYMBOL_GPL(mce_register_decode_chain);
     177                 :            : 
     178                 :          0 : void mce_unregister_decode_chain(struct notifier_block *nb)
     179                 :            : {
     180                 :          0 :         atomic_dec(&num_notifiers);
     181                 :            : 
     182                 :          0 :         blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
     183                 :          0 : }
     184                 :            : EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
     185                 :            : 
     186                 :       2262 : static inline u32 ctl_reg(int bank)
     187                 :            : {
     188                 :       2262 :         return MSR_IA32_MCx_CTL(bank);
     189                 :            : }
     190                 :            : 
     191                 :       1482 : static inline u32 status_reg(int bank)
     192                 :            : {
     193                 :       1482 :         return MSR_IA32_MCx_STATUS(bank);
     194                 :            : }
     195                 :            : 
     196                 :          0 : static inline u32 addr_reg(int bank)
     197                 :            : {
     198                 :          0 :         return MSR_IA32_MCx_ADDR(bank);
     199                 :            : }
     200                 :            : 
     201                 :        780 : static inline u32 misc_reg(int bank)
     202                 :            : {
     203                 :        780 :         return MSR_IA32_MCx_MISC(bank);
     204                 :            : }
     205                 :            : 
     206                 :          0 : static inline u32 smca_ctl_reg(int bank)
     207                 :            : {
     208                 :          0 :         return MSR_AMD64_SMCA_MCx_CTL(bank);
     209                 :            : }
     210                 :            : 
     211                 :          0 : static inline u32 smca_status_reg(int bank)
     212                 :            : {
     213                 :          0 :         return MSR_AMD64_SMCA_MCx_STATUS(bank);
     214                 :            : }
     215                 :            : 
     216                 :          0 : static inline u32 smca_addr_reg(int bank)
     217                 :            : {
     218                 :          0 :         return MSR_AMD64_SMCA_MCx_ADDR(bank);
     219                 :            : }
     220                 :            : 
     221                 :          0 : static inline u32 smca_misc_reg(int bank)
     222                 :            : {
     223                 :          0 :         return MSR_AMD64_SMCA_MCx_MISC(bank);
     224                 :            : }
     225                 :            : 
     226                 :            : struct mca_msr_regs msr_ops = {
     227                 :            :         .ctl    = ctl_reg,
     228                 :            :         .status = status_reg,
     229                 :            :         .addr   = addr_reg,
     230                 :            :         .misc   = misc_reg
     231                 :            : };
     232                 :            : 
     233                 :          0 : static void __print_mce(struct mce *m)
     234                 :            : {
     235         [ #  # ]:          0 :         pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
     236                 :            :                  m->extcpu,
     237                 :            :                  (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
     238                 :            :                  m->mcgstatus, m->bank, m->status);
     239                 :            : 
     240         [ #  # ]:          0 :         if (m->ip) {
     241         [ #  # ]:          0 :                 pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
     242                 :            :                         !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
     243                 :            :                         m->cs, m->ip);
     244                 :            : 
     245         [ #  # ]:          0 :                 if (m->cs == __KERNEL_CS)
     246                 :          0 :                         pr_cont("{%pS}", (void *)(unsigned long)m->ip);
     247                 :          0 :                 pr_cont("\n");
     248                 :            :         }
     249                 :            : 
     250                 :          0 :         pr_emerg(HW_ERR "TSC %llx ", m->tsc);
     251         [ #  # ]:          0 :         if (m->addr)
     252                 :          0 :                 pr_cont("ADDR %llx ", m->addr);
     253         [ #  # ]:          0 :         if (m->misc)
     254                 :          0 :                 pr_cont("MISC %llx ", m->misc);
     255                 :            : 
     256         [ #  # ]:          0 :         if (mce_flags.smca) {
     257         [ #  # ]:          0 :                 if (m->synd)
     258                 :          0 :                         pr_cont("SYND %llx ", m->synd);
     259         [ #  # ]:          0 :                 if (m->ipid)
     260                 :          0 :                         pr_cont("IPID %llx ", m->ipid);
     261                 :            :         }
     262                 :            : 
     263                 :          0 :         pr_cont("\n");
     264                 :            :         /*
     265                 :            :          * Note this output is parsed by external tools and old fields
     266                 :            :          * should not be changed.
     267                 :            :          */
     268                 :          0 :         pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
     269                 :            :                 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
     270                 :            :                 m->microcode);
     271                 :          0 : }
     272                 :            : 
     273                 :          0 : static void print_mce(struct mce *m)
     274                 :            : {
     275                 :          0 :         __print_mce(m);
     276                 :            : 
     277         [ #  # ]:          0 :         if (m->cpuvendor != X86_VENDOR_AMD && m->cpuvendor != X86_VENDOR_HYGON)
     278         [ #  # ]:          0 :                 pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
     279                 :          0 : }
     280                 :            : 
     281                 :            : #define PANIC_TIMEOUT 5 /* 5 seconds */
     282                 :            : 
     283                 :            : static atomic_t mce_panicked;
     284                 :            : 
     285                 :            : static int fake_panic;
     286                 :            : static atomic_t mce_fake_panicked;
     287                 :            : 
     288                 :            : /* Panic in progress. Enable interrupts and wait for final IPI */
     289                 :          0 : static void wait_for_panic(void)
     290                 :            : {
     291                 :          0 :         long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
     292                 :            : 
     293                 :          0 :         preempt_disable();
     294                 :          0 :         local_irq_enable();
     295         [ #  # ]:          0 :         while (timeout-- > 0)
     296                 :          0 :                 udelay(1);
     297         [ #  # ]:          0 :         if (panic_timeout == 0)
     298                 :          0 :                 panic_timeout = mca_cfg.panic_timeout;
     299                 :          0 :         panic("Panicing machine check CPU died");
     300                 :            : }
     301                 :            : 
     302                 :          0 : static void mce_panic(const char *msg, struct mce *final, char *exp)
     303                 :            : {
     304                 :          0 :         int apei_err = 0;
     305                 :          0 :         struct llist_node *pending;
     306                 :          0 :         struct mce_evt_llist *l;
     307                 :            : 
     308         [ #  # ]:          0 :         if (!fake_panic) {
     309                 :            :                 /*
     310                 :            :                  * Make sure only one CPU runs in machine check panic
     311                 :            :                  */
     312         [ #  # ]:          0 :                 if (atomic_inc_return(&mce_panicked) > 1)
     313                 :          0 :                         wait_for_panic();
     314                 :          0 :                 barrier();
     315                 :            : 
     316                 :          0 :                 bust_spinlocks(1);
     317         [ #  # ]:          0 :                 console_verbose();
     318                 :            :         } else {
     319                 :            :                 /* Don't log too much for fake panic */
     320         [ #  # ]:          0 :                 if (atomic_inc_return(&mce_fake_panicked) > 1)
     321                 :            :                         return;
     322                 :            :         }
     323                 :          0 :         pending = mce_gen_pool_prepare_records();
     324                 :            :         /* First print corrected ones that are still unlogged */
     325         [ #  # ]:          0 :         llist_for_each_entry(l, pending, llnode) {
     326                 :          0 :                 struct mce *m = &l->mce;
     327         [ #  # ]:          0 :                 if (!(m->status & MCI_STATUS_UC)) {
     328                 :          0 :                         print_mce(m);
     329                 :          0 :                         if (!apei_err)
     330                 :            :                                 apei_err = apei_write_mce(m);
     331                 :            :                 }
     332                 :            :         }
     333                 :            :         /* Now print uncorrected but with the final one last */
     334         [ #  # ]:          0 :         llist_for_each_entry(l, pending, llnode) {
     335                 :          0 :                 struct mce *m = &l->mce;
     336         [ #  # ]:          0 :                 if (!(m->status & MCI_STATUS_UC))
     337                 :          0 :                         continue;
     338   [ #  #  #  # ]:          0 :                 if (!final || mce_cmp(m, final)) {
     339                 :          0 :                         print_mce(m);
     340                 :          0 :                         if (!apei_err)
     341                 :            :                                 apei_err = apei_write_mce(m);
     342                 :            :                 }
     343                 :            :         }
     344         [ #  # ]:          0 :         if (final) {
     345                 :          0 :                 print_mce(final);
     346                 :          0 :                 if (!apei_err)
     347                 :            :                         apei_err = apei_write_mce(final);
     348                 :            :         }
     349         [ #  # ]:          0 :         if (cpu_missing)
     350                 :          0 :                 pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n");
     351         [ #  # ]:          0 :         if (exp)
     352                 :          0 :                 pr_emerg(HW_ERR "Machine check: %s\n", exp);
     353         [ #  # ]:          0 :         if (!fake_panic) {
     354         [ #  # ]:          0 :                 if (panic_timeout == 0)
     355                 :          0 :                         panic_timeout = mca_cfg.panic_timeout;
     356                 :          0 :                 panic(msg);
     357                 :            :         } else
     358                 :          0 :                 pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
     359                 :            : }
     360                 :            : 
     361                 :            : /* Support code for software error injection */
     362                 :            : 
     363                 :          0 : static int msr_to_offset(u32 msr)
     364                 :            : {
     365         [ #  # ]:          0 :         unsigned bank = __this_cpu_read(injectm.bank);
     366                 :            : 
     367         [ #  # ]:          0 :         if (msr == mca_cfg.rip_msr)
     368                 :            :                 return offsetof(struct mce, ip);
     369         [ #  # ]:          0 :         if (msr == msr_ops.status(bank))
     370                 :            :                 return offsetof(struct mce, status);
     371         [ #  # ]:          0 :         if (msr == msr_ops.addr(bank))
     372                 :            :                 return offsetof(struct mce, addr);
     373         [ #  # ]:          0 :         if (msr == msr_ops.misc(bank))
     374                 :            :                 return offsetof(struct mce, misc);
     375         [ #  # ]:          0 :         if (msr == MSR_IA32_MCG_STATUS)
     376                 :          0 :                 return offsetof(struct mce, mcgstatus);
     377                 :            :         return -1;
     378                 :            : }
     379                 :            : 
     380                 :            : /* MSR access wrappers used for error injection */
     381                 :        780 : static u64 mce_rdmsrl(u32 msr)
     382                 :            : {
     383                 :        780 :         u64 v;
     384                 :            : 
     385         [ -  + ]:        780 :         if (__this_cpu_read(injectm.finished)) {
     386                 :          0 :                 int offset = msr_to_offset(msr);
     387                 :            : 
     388         [ #  # ]:          0 :                 if (offset < 0)
     389                 :            :                         return 0;
     390                 :          0 :                 return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
     391                 :            :         }
     392                 :            : 
     393         [ -  + ]:        780 :         if (rdmsrl_safe(msr, &v)) {
     394         [ #  # ]:          0 :                 WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
     395                 :            :                 /*
     396                 :            :                  * Return zero in case the access faulted. This should
     397                 :            :                  * not happen normally but can happen if the CPU does
     398                 :            :                  * something weird, or if the code is buggy.
     399                 :            :                  */
     400                 :            :                 v = 0;
     401                 :            :         }
     402                 :            : 
     403                 :            :         return v;
     404                 :            : }
     405                 :            : 
     406                 :          0 : static void mce_wrmsrl(u32 msr, u64 v)
     407                 :            : {
     408         [ #  # ]:          0 :         if (__this_cpu_read(injectm.finished)) {
     409                 :          0 :                 int offset = msr_to_offset(msr);
     410                 :            : 
     411         [ #  # ]:          0 :                 if (offset >= 0)
     412                 :          0 :                         *(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
     413                 :          0 :                 return;
     414                 :            :         }
     415                 :          0 :         wrmsrl(msr, v);
     416                 :            : }
     417                 :            : 
     418                 :            : /*
     419                 :            :  * Collect all global (w.r.t. this processor) status about this machine
     420                 :            :  * check into our "mce" struct so that we can use it later to assess
     421                 :            :  * the severity of the problem as we read per-bank specific details.
     422                 :            :  */
     423                 :         78 : static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
     424                 :            : {
     425                 :          0 :         mce_setup(m);
     426                 :            : 
     427                 :         78 :         m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
     428         [ #  # ]:          0 :         if (regs) {
     429                 :            :                 /*
     430                 :            :                  * Get the address of the instruction at the time of
     431                 :            :                  * the machine check error.
     432                 :            :                  */
     433         [ #  # ]:          0 :                 if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
     434                 :          0 :                         m->ip = regs->ip;
     435                 :          0 :                         m->cs = regs->cs;
     436                 :            : 
     437                 :            :                         /*
     438                 :            :                          * When in VM86 mode make the cs look like ring 3
     439                 :            :                          * always. This is a lie, but it's better than passing
     440                 :            :                          * the additional vm86 bit around everywhere.
     441                 :            :                          */
     442                 :          0 :                         if (v8086_mode(regs))
     443                 :            :                                 m->cs |= 3;
     444                 :            :                 }
     445                 :            :                 /* Use accurate RIP reporting if available. */
     446         [ #  # ]:          0 :                 if (mca_cfg.rip_msr)
     447                 :          0 :                         m->ip = mce_rdmsrl(mca_cfg.rip_msr);
     448                 :            :         }
     449                 :          0 : }
     450                 :            : 
     451                 :        390 : int mce_available(struct cpuinfo_x86 *c)
     452                 :            : {
     453         [ +  - ]:        390 :         if (mca_cfg.disabled)
     454                 :            :                 return 0;
     455   [ +  +  -  + ]:        702 :         return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
     456                 :            : }
     457                 :            : 
     458                 :         78 : static void mce_schedule_work(void)
     459                 :            : {
     460         [ -  + ]:         78 :         if (!mce_gen_pool_empty())
     461                 :          0 :                 schedule_work(&mce_work);
     462                 :         78 : }
     463                 :            : 
     464                 :          0 : static void mce_irq_work_cb(struct irq_work *entry)
     465                 :            : {
     466                 :          0 :         mce_schedule_work();
     467                 :          0 : }
     468                 :            : 
     469                 :            : /*
     470                 :            :  * Check if the address reported by the CPU is in a format we can parse.
     471                 :            :  * It would be possible to add code for most other cases, but all would
     472                 :            :  * be somewhat complicated (e.g. segment offset would require an instruction
     473                 :            :  * parser). So only support physical addresses up to page granuality for now.
     474                 :            :  */
     475                 :          0 : int mce_usable_address(struct mce *m)
     476                 :            : {
     477         [ #  # ]:          0 :         if (!(m->status & MCI_STATUS_ADDRV))
     478                 :            :                 return 0;
     479                 :            : 
     480                 :            :         /* Checks after this one are Intel/Zhaoxin-specific: */
     481         [ #  # ]:          0 :         if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
     482                 :            :             boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
     483                 :            :                 return 1;
     484                 :            : 
     485         [ #  # ]:          0 :         if (!(m->status & MCI_STATUS_MISCV))
     486                 :            :                 return 0;
     487                 :            : 
     488         [ #  # ]:          0 :         if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
     489                 :            :                 return 0;
     490                 :            : 
     491         [ #  # ]:          0 :         if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
     492                 :          0 :                 return 0;
     493                 :            : 
     494                 :            :         return 1;
     495                 :            : }
     496                 :            : EXPORT_SYMBOL_GPL(mce_usable_address);
     497                 :            : 
     498                 :          0 : bool mce_is_memory_error(struct mce *m)
     499                 :            : {
     500      [ #  #  # ]:          0 :         switch (m->cpuvendor) {
     501                 :          0 :         case X86_VENDOR_AMD:
     502                 :            :         case X86_VENDOR_HYGON:
     503                 :          0 :                 return amd_mce_is_memory_error(m);
     504                 :            : 
     505                 :          0 :         case X86_VENDOR_INTEL:
     506                 :            :         case X86_VENDOR_ZHAOXIN:
     507                 :            :                 /*
     508                 :            :                  * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
     509                 :            :                  *
     510                 :            :                  * Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
     511                 :            :                  * indicating a memory error. Bit 8 is used for indicating a
     512                 :            :                  * cache hierarchy error. The combination of bit 2 and bit 3
     513                 :            :                  * is used for indicating a `generic' cache hierarchy error
     514                 :            :                  * But we can't just blindly check the above bits, because if
     515                 :            :                  * bit 11 is set, then it is a bus/interconnect error - and
     516                 :            :                  * either way the above bits just gives more detail on what
     517                 :            :                  * bus/interconnect error happened. Note that bit 12 can be
     518                 :            :                  * ignored, as it's the "filter" bit.
     519                 :            :                  */
     520                 :          0 :                 return (m->status & 0xef80) == BIT(7) ||
     521   [ #  #  #  # ]:          0 :                        (m->status & 0xef00) == BIT(8) ||
     522         [ #  # ]:          0 :                        (m->status & 0xeffc) == 0xc;
     523                 :            : 
     524                 :            :         default:
     525                 :            :                 return false;
     526                 :            :         }
     527                 :            : }
     528                 :            : EXPORT_SYMBOL_GPL(mce_is_memory_error);
     529                 :            : 
     530                 :          0 : bool mce_is_correctable(struct mce *m)
     531                 :            : {
     532   [ #  #  #  # ]:          0 :         if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
     533                 :            :                 return false;
     534                 :            : 
     535   [ #  #  #  # ]:          0 :         if (m->cpuvendor == X86_VENDOR_HYGON && m->status & MCI_STATUS_DEFERRED)
     536                 :            :                 return false;
     537                 :            : 
     538         [ #  # ]:          0 :         if (m->status & MCI_STATUS_UC)
     539                 :          0 :                 return false;
     540                 :            : 
     541                 :            :         return true;
     542                 :            : }
     543                 :            : EXPORT_SYMBOL_GPL(mce_is_correctable);
     544                 :            : 
     545                 :          0 : static bool cec_add_mce(struct mce *m)
     546                 :            : {
     547                 :          0 :         if (!m)
     548                 :            :                 return false;
     549                 :            : 
     550                 :            :         /* We eat only correctable DRAM errors with usable addresses. */
     551                 :          0 :         if (mce_is_memory_error(m) &&
     552                 :            :             mce_is_correctable(m)  &&
     553                 :            :             mce_usable_address(m))
     554                 :            :                 if (!cec_add_elem(m->addr >> PAGE_SHIFT))
     555                 :            :                         return true;
     556                 :            : 
     557                 :            :         return false;
     558                 :            : }
     559                 :            : 
     560                 :          0 : static int mce_first_notifier(struct notifier_block *nb, unsigned long val,
     561                 :            :                               void *data)
     562                 :            : {
     563                 :          0 :         struct mce *m = (struct mce *)data;
     564                 :            : 
     565         [ #  # ]:          0 :         if (!m)
     566                 :            :                 return NOTIFY_DONE;
     567                 :            : 
     568                 :          0 :         if (cec_add_mce(m))
     569                 :            :                 return NOTIFY_STOP;
     570                 :            : 
     571                 :            :         /* Emit the trace record: */
     572                 :          0 :         trace_mce_record(m);
     573                 :            : 
     574                 :          0 :         set_bit(0, &mce_need_notify);
     575                 :            : 
     576                 :          0 :         mce_notify_irq();
     577                 :            : 
     578                 :          0 :         return NOTIFY_DONE;
     579                 :            : }
     580                 :            : 
     581                 :            : static struct notifier_block first_nb = {
     582                 :            :         .notifier_call  = mce_first_notifier,
     583                 :            :         .priority       = MCE_PRIO_FIRST,
     584                 :            : };
     585                 :            : 
     586                 :          0 : static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
     587                 :            :                               void *data)
     588                 :            : {
     589                 :          0 :         struct mce *mce = (struct mce *)data;
     590                 :          0 :         unsigned long pfn;
     591                 :            : 
     592   [ #  #  #  # ]:          0 :         if (!mce || !mce_usable_address(mce))
     593                 :            :                 return NOTIFY_DONE;
     594                 :            : 
     595         [ #  # ]:          0 :         if (mce->severity != MCE_AO_SEVERITY &&
     596                 :            :             mce->severity != MCE_DEFERRED_SEVERITY)
     597                 :            :                 return NOTIFY_DONE;
     598                 :            : 
     599                 :          0 :         pfn = mce->addr >> PAGE_SHIFT;
     600                 :          0 :         if (!memory_failure(pfn, 0))
     601                 :          0 :                 set_mce_nospec(pfn);
     602                 :            : 
     603                 :          0 :         return NOTIFY_OK;
     604                 :            : }
     605                 :            : 
     606                 :            : static struct notifier_block mce_uc_nb = {
     607                 :            :         .notifier_call  = uc_decode_notifier,
     608                 :            :         .priority       = MCE_PRIO_UC,
     609                 :            : };
     610                 :            : 
     611                 :          0 : static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
     612                 :            :                                 void *data)
     613                 :            : {
     614                 :          0 :         struct mce *m = (struct mce *)data;
     615                 :            : 
     616         [ #  # ]:          0 :         if (!m)
     617                 :            :                 return NOTIFY_DONE;
     618                 :            : 
     619         [ #  # ]:          0 :         if (atomic_read(&num_notifiers) > NUM_DEFAULT_NOTIFIERS)
     620                 :            :                 return NOTIFY_DONE;
     621                 :            : 
     622                 :          0 :         __print_mce(m);
     623                 :            : 
     624                 :          0 :         return NOTIFY_DONE;
     625                 :            : }
     626                 :            : 
     627                 :            : static struct notifier_block mce_default_nb = {
     628                 :            :         .notifier_call  = mce_default_notifier,
     629                 :            :         /* lowest prio, we want it to run last. */
     630                 :            :         .priority       = MCE_PRIO_LOWEST,
     631                 :            : };
     632                 :            : 
     633                 :            : /*
     634                 :            :  * Read ADDR and MISC registers.
     635                 :            :  */
     636                 :          0 : static void mce_read_aux(struct mce *m, int i)
     637                 :            : {
     638         [ #  # ]:          0 :         if (m->status & MCI_STATUS_MISCV)
     639                 :          0 :                 m->misc = mce_rdmsrl(msr_ops.misc(i));
     640                 :            : 
     641         [ #  # ]:          0 :         if (m->status & MCI_STATUS_ADDRV) {
     642                 :          0 :                 m->addr = mce_rdmsrl(msr_ops.addr(i));
     643                 :            : 
     644                 :            :                 /*
     645                 :            :                  * Mask the reported address by the reported granularity.
     646                 :            :                  */
     647   [ #  #  #  # ]:          0 :                 if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) {
     648                 :          0 :                         u8 shift = MCI_MISC_ADDR_LSB(m->misc);
     649                 :          0 :                         m->addr >>= shift;
     650                 :          0 :                         m->addr <<= shift;
     651                 :            :                 }
     652                 :            : 
     653                 :            :                 /*
     654                 :            :                  * Extract [55:<lsb>] where lsb is the least significant
     655                 :            :                  * *valid* bit of the address bits.
     656                 :            :                  */
     657         [ #  # ]:          0 :                 if (mce_flags.smca) {
     658                 :          0 :                         u8 lsb = (m->addr >> 56) & 0x3f;
     659                 :            : 
     660                 :          0 :                         m->addr &= GENMASK_ULL(55, lsb);
     661                 :            :                 }
     662                 :            :         }
     663                 :            : 
     664         [ #  # ]:          0 :         if (mce_flags.smca) {
     665                 :          0 :                 m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
     666                 :            : 
     667         [ #  # ]:          0 :                 if (m->status & MCI_STATUS_SYNDV)
     668                 :          0 :                         m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
     669                 :            :         }
     670                 :          0 : }
     671                 :            : 
     672                 :            : DEFINE_PER_CPU(unsigned, mce_poll_count);
     673                 :            : 
     674                 :            : /*
     675                 :            :  * Poll for corrected events or events that happened before reset.
     676                 :            :  * Those are just logged through /dev/mcelog.
     677                 :            :  *
     678                 :            :  * This is executed in standard interrupt context.
     679                 :            :  *
     680                 :            :  * Note: spec recommends to panic for fatal unsignalled
     681                 :            :  * errors here. However this would be quite problematic --
     682                 :            :  * we would need to reimplement the Monarch handling and
     683                 :            :  * it would mess up the exclusion between exception handler
     684                 :            :  * and poll handler -- * so we skip this for now.
     685                 :            :  * These cases should not happen anyways, or only when the CPU
     686                 :            :  * is already totally * confused. In this case it's likely it will
     687                 :            :  * not fully execute the machine check handler either.
     688                 :            :  */
     689                 :         78 : bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
     690                 :            : {
     691                 :         78 :         struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
     692                 :         78 :         bool error_seen = false;
     693                 :         78 :         struct mce m;
     694                 :         78 :         int i;
     695                 :            : 
     696                 :         78 :         this_cpu_inc(mce_poll_count);
     697                 :            : 
     698                 :         78 :         mce_gather_info(&m, NULL);
     699                 :            : 
     700         [ -  + ]:         78 :         if (flags & MCP_TIMESTAMP)
     701                 :          0 :                 m.tsc = rdtsc();
     702                 :            : 
     703         [ +  + ]:        858 :         for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
     704   [ +  +  -  + ]:        780 :                 if (!mce_banks[i].ctl || !test_bit(i, *b))
     705                 :         78 :                         continue;
     706                 :            : 
     707                 :        702 :                 m.misc = 0;
     708                 :        702 :                 m.addr = 0;
     709                 :        702 :                 m.bank = i;
     710                 :            : 
     711                 :        702 :                 barrier();
     712                 :        702 :                 m.status = mce_rdmsrl(msr_ops.status(i));
     713                 :            : 
     714                 :            :                 /* If this entry is not valid, ignore it */
     715         [ +  - ]:        702 :                 if (!(m.status & MCI_STATUS_VAL))
     716                 :        702 :                         continue;
     717                 :            : 
     718                 :            :                 /*
     719                 :            :                  * If we are logging everything (at CPU online) or this
     720                 :            :                  * is a corrected error, then we must log it.
     721                 :            :                  */
     722   [ #  #  #  # ]:          0 :                 if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC))
     723                 :          0 :                         goto log_it;
     724                 :            : 
     725                 :            :                 /*
     726                 :            :                  * Newer Intel systems that support software error
     727                 :            :                  * recovery need to make additional checks. Other
     728                 :            :                  * CPUs should skip over uncorrected errors, but log
     729                 :            :                  * everything else.
     730                 :            :                  */
     731         [ #  # ]:          0 :                 if (!mca_cfg.ser) {
     732                 :          0 :                         if (m.status & MCI_STATUS_UC)
     733                 :          0 :                                 continue;
     734                 :            :                         goto log_it;
     735                 :            :                 }
     736                 :            : 
     737                 :            :                 /* Log "not enabled" (speculative) errors */
     738         [ #  # ]:          0 :                 if (!(m.status & MCI_STATUS_EN))
     739                 :          0 :                         goto log_it;
     740                 :            : 
     741                 :            :                 /*
     742                 :            :                  * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
     743                 :            :                  * UC == 1 && PCC == 0 && S == 0
     744                 :            :                  */
     745         [ #  # ]:          0 :                 if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S))
     746                 :          0 :                         goto log_it;
     747                 :            : 
     748                 :            :                 /*
     749                 :            :                  * Skip anything else. Presumption is that our read of this
     750                 :            :                  * bank is racing with a machine check. Leave the log alone
     751                 :            :                  * for do_machine_check() to deal with it.
     752                 :            :                  */
     753                 :          0 :                 continue;
     754                 :            : 
     755                 :          0 : log_it:
     756                 :          0 :                 error_seen = true;
     757                 :            : 
     758         [ #  # ]:          0 :                 if (flags & MCP_DONTLOG)
     759                 :          0 :                         goto clear_it;
     760                 :            : 
     761                 :          0 :                 mce_read_aux(&m, i);
     762                 :          0 :                 m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
     763                 :            :                 /*
     764                 :            :                  * Don't get the IP here because it's unlikely to
     765                 :            :                  * have anything to do with the actual error location.
     766                 :            :                  */
     767                 :            : 
     768   [ #  #  #  # ]:          0 :                 if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
     769                 :          0 :                         goto clear_it;
     770                 :            : 
     771                 :          0 :                 mce_log(&m);
     772                 :            : 
     773                 :          0 : clear_it:
     774                 :            :                 /*
     775                 :            :                  * Clear state for this bank.
     776                 :            :                  */
     777                 :          0 :                 mce_wrmsrl(msr_ops.status(i), 0);
     778                 :            :         }
     779                 :            : 
     780                 :            :         /*
     781                 :            :          * Don't clear MCG_STATUS here because it's only defined for
     782                 :            :          * exceptions.
     783                 :            :          */
     784                 :            : 
     785                 :         78 :         sync_core();
     786                 :            : 
     787                 :         78 :         return error_seen;
     788                 :            : }
     789                 :            : EXPORT_SYMBOL_GPL(machine_check_poll);
     790                 :            : 
     791                 :            : /*
     792                 :            :  * Do a quick check if any of the events requires a panic.
     793                 :            :  * This decides if we keep the events around or clear them.
     794                 :            :  */
     795                 :          0 : static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
     796                 :            :                           struct pt_regs *regs)
     797                 :            : {
     798                 :          0 :         char *tmp = *msg;
     799                 :          0 :         int i;
     800                 :            : 
     801         [ #  # ]:          0 :         for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
     802                 :          0 :                 m->status = mce_rdmsrl(msr_ops.status(i));
     803         [ #  # ]:          0 :                 if (!(m->status & MCI_STATUS_VAL))
     804                 :          0 :                         continue;
     805                 :            : 
     806                 :          0 :                 __set_bit(i, validp);
     807         [ #  # ]:          0 :                 if (quirk_no_way_out)
     808                 :          0 :                         quirk_no_way_out(i, m, regs);
     809                 :            : 
     810                 :          0 :                 m->bank = i;
     811         [ #  # ]:          0 :                 if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
     812                 :          0 :                         mce_read_aux(m, i);
     813                 :          0 :                         *msg = tmp;
     814                 :          0 :                         return 1;
     815                 :            :                 }
     816                 :            :         }
     817                 :            :         return 0;
     818                 :            : }
     819                 :            : 
     820                 :            : /*
     821                 :            :  * Variable to establish order between CPUs while scanning.
     822                 :            :  * Each CPU spins initially until executing is equal its number.
     823                 :            :  */
     824                 :            : static atomic_t mce_executing;
     825                 :            : 
     826                 :            : /*
     827                 :            :  * Defines order of CPUs on entry. First CPU becomes Monarch.
     828                 :            :  */
     829                 :            : static atomic_t mce_callin;
     830                 :            : 
     831                 :            : /*
     832                 :            :  * Check if a timeout waiting for other CPUs happened.
     833                 :            :  */
     834                 :          0 : static int mce_timed_out(u64 *t, const char *msg)
     835                 :            : {
     836                 :            :         /*
     837                 :            :          * The others already did panic for some reason.
     838                 :            :          * Bail out like in a timeout.
     839                 :            :          * rmb() to tell the compiler that system_state
     840                 :            :          * might have been modified by someone else.
     841                 :            :          */
     842                 :          0 :         rmb();
     843         [ #  # ]:          0 :         if (atomic_read(&mce_panicked))
     844                 :          0 :                 wait_for_panic();
     845         [ #  # ]:          0 :         if (!mca_cfg.monarch_timeout)
     846                 :          0 :                 goto out;
     847         [ #  # ]:          0 :         if ((s64)*t < SPINUNIT) {
     848         [ #  # ]:          0 :                 if (mca_cfg.tolerant <= 1)
     849                 :          0 :                         mce_panic(msg, NULL, NULL);
     850                 :          0 :                 cpu_missing = 1;
     851                 :          0 :                 return 1;
     852                 :            :         }
     853                 :          0 :         *t -= SPINUNIT;
     854                 :            : out:
     855                 :            :         touch_nmi_watchdog();
     856                 :            :         return 0;
     857                 :            : }
     858                 :            : 
     859                 :            : /*
     860                 :            :  * The Monarch's reign.  The Monarch is the CPU who entered
     861                 :            :  * the machine check handler first. It waits for the others to
     862                 :            :  * raise the exception too and then grades them. When any
     863                 :            :  * error is fatal panic. Only then let the others continue.
     864                 :            :  *
     865                 :            :  * The other CPUs entering the MCE handler will be controlled by the
     866                 :            :  * Monarch. They are called Subjects.
     867                 :            :  *
     868                 :            :  * This way we prevent any potential data corruption in a unrecoverable case
     869                 :            :  * and also makes sure always all CPU's errors are examined.
     870                 :            :  *
     871                 :            :  * Also this detects the case of a machine check event coming from outer
     872                 :            :  * space (not detected by any CPUs) In this case some external agent wants
     873                 :            :  * us to shut down, so panic too.
     874                 :            :  *
     875                 :            :  * The other CPUs might still decide to panic if the handler happens
     876                 :            :  * in a unrecoverable place, but in this case the system is in a semi-stable
     877                 :            :  * state and won't corrupt anything by itself. It's ok to let the others
     878                 :            :  * continue for a bit first.
     879                 :            :  *
     880                 :            :  * All the spin loops have timeouts; when a timeout happens a CPU
     881                 :            :  * typically elects itself to be Monarch.
     882                 :            :  */
     883                 :          0 : static void mce_reign(void)
     884                 :            : {
     885                 :          0 :         int cpu;
     886                 :          0 :         struct mce *m = NULL;
     887                 :          0 :         int global_worst = 0;
     888                 :          0 :         char *msg = NULL;
     889                 :          0 :         char *nmsg = NULL;
     890                 :            : 
     891                 :            :         /*
     892                 :            :          * This CPU is the Monarch and the other CPUs have run
     893                 :            :          * through their handlers.
     894                 :            :          * Grade the severity of the errors of all the CPUs.
     895                 :            :          */
     896         [ #  # ]:          0 :         for_each_possible_cpu(cpu) {
     897                 :          0 :                 int severity = mce_severity(&per_cpu(mces_seen, cpu),
     898                 :            :                                             mca_cfg.tolerant,
     899                 :            :                                             &nmsg, true);
     900         [ #  # ]:          0 :                 if (severity > global_worst) {
     901                 :          0 :                         msg = nmsg;
     902                 :          0 :                         global_worst = severity;
     903                 :          0 :                         m = &per_cpu(mces_seen, cpu);
     904                 :            :                 }
     905                 :            :         }
     906                 :            : 
     907                 :            :         /*
     908                 :            :          * Cannot recover? Panic here then.
     909                 :            :          * This dumps all the mces in the log buffer and stops the
     910                 :            :          * other CPUs.
     911                 :            :          */
     912   [ #  #  #  # ]:          0 :         if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
     913                 :          0 :                 mce_panic("Fatal machine check", m, msg);
     914                 :            : 
     915                 :            :         /*
     916                 :            :          * For UC somewhere we let the CPU who detects it handle it.
     917                 :            :          * Also must let continue the others, otherwise the handling
     918                 :            :          * CPU could deadlock on a lock.
     919                 :            :          */
     920                 :            : 
     921                 :            :         /*
     922                 :            :          * No machine check event found. Must be some external
     923                 :            :          * source or one CPU is hung. Panic.
     924                 :            :          */
     925   [ #  #  #  # ]:          0 :         if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
     926                 :          0 :                 mce_panic("Fatal machine check from unknown source", NULL, NULL);
     927                 :            : 
     928                 :            :         /*
     929                 :            :          * Now clear all the mces_seen so that they don't reappear on
     930                 :            :          * the next mce.
     931                 :            :          */
     932         [ #  # ]:          0 :         for_each_possible_cpu(cpu)
     933                 :          0 :                 memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
     934                 :          0 : }
     935                 :            : 
     936                 :            : static atomic_t global_nwo;
     937                 :            : 
     938                 :            : /*
     939                 :            :  * Start of Monarch synchronization. This waits until all CPUs have
     940                 :            :  * entered the exception handler and then determines if any of them
     941                 :            :  * saw a fatal event that requires panic. Then it executes them
     942                 :            :  * in the entry order.
     943                 :            :  * TBD double check parallel CPU hotunplug
     944                 :            :  */
     945                 :          0 : static int mce_start(int *no_way_out)
     946                 :            : {
     947                 :          0 :         int order;
     948                 :          0 :         int cpus = num_online_cpus();
     949                 :          0 :         u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
     950                 :            : 
     951         [ #  # ]:          0 :         if (!timeout)
     952                 :            :                 return -1;
     953                 :            : 
     954                 :          0 :         atomic_add(*no_way_out, &global_nwo);
     955                 :            :         /*
     956                 :            :          * Rely on the implied barrier below, such that global_nwo
     957                 :            :          * is updated before mce_callin.
     958                 :            :          */
     959                 :          0 :         order = atomic_inc_return(&mce_callin);
     960                 :            : 
     961                 :            :         /*
     962                 :            :          * Wait for everyone.
     963                 :            :          */
     964         [ #  # ]:          0 :         while (atomic_read(&mce_callin) != cpus) {
     965         [ #  # ]:          0 :                 if (mce_timed_out(&timeout,
     966                 :            :                                   "Timeout: Not all CPUs entered broadcast exception handler")) {
     967                 :          0 :                         atomic_set(&global_nwo, 0);
     968                 :          0 :                         return -1;
     969                 :            :                 }
     970                 :          0 :                 ndelay(SPINUNIT);
     971                 :            :         }
     972                 :            : 
     973                 :            :         /*
     974                 :            :          * mce_callin should be read before global_nwo
     975                 :            :          */
     976                 :          0 :         smp_rmb();
     977                 :            : 
     978         [ #  # ]:          0 :         if (order == 1) {
     979                 :            :                 /*
     980                 :            :                  * Monarch: Starts executing now, the others wait.
     981                 :            :                  */
     982                 :          0 :                 atomic_set(&mce_executing, 1);
     983                 :            :         } else {
     984                 :            :                 /*
     985                 :            :                  * Subject: Now start the scanning loop one by one in
     986                 :            :                  * the original callin order.
     987                 :            :                  * This way when there are any shared banks it will be
     988                 :            :                  * only seen by one CPU before cleared, avoiding duplicates.
     989                 :            :                  */
     990         [ #  # ]:          0 :                 while (atomic_read(&mce_executing) < order) {
     991         [ #  # ]:          0 :                         if (mce_timed_out(&timeout,
     992                 :            :                                           "Timeout: Subject CPUs unable to finish machine check processing")) {
     993                 :          0 :                                 atomic_set(&global_nwo, 0);
     994                 :          0 :                                 return -1;
     995                 :            :                         }
     996                 :          0 :                         ndelay(SPINUNIT);
     997                 :            :                 }
     998                 :            :         }
     999                 :            : 
    1000                 :            :         /*
    1001                 :            :          * Cache the global no_way_out state.
    1002                 :            :          */
    1003                 :          0 :         *no_way_out = atomic_read(&global_nwo);
    1004                 :            : 
    1005                 :          0 :         return order;
    1006                 :            : }
    1007                 :            : 
    1008                 :            : /*
    1009                 :            :  * Synchronize between CPUs after main scanning loop.
    1010                 :            :  * This invokes the bulk of the Monarch processing.
    1011                 :            :  */
    1012                 :          0 : static int mce_end(int order)
    1013                 :            : {
    1014                 :          0 :         int ret = -1;
    1015                 :          0 :         u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
    1016                 :            : 
    1017         [ #  # ]:          0 :         if (!timeout)
    1018                 :          0 :                 goto reset;
    1019         [ #  # ]:          0 :         if (order < 0)
    1020                 :          0 :                 goto reset;
    1021                 :            : 
    1022                 :            :         /*
    1023                 :            :          * Allow others to run.
    1024                 :            :          */
    1025                 :          0 :         atomic_inc(&mce_executing);
    1026                 :            : 
    1027         [ #  # ]:          0 :         if (order == 1) {
    1028                 :            :                 /* CHECKME: Can this race with a parallel hotplug? */
    1029                 :          0 :                 int cpus = num_online_cpus();
    1030                 :            : 
    1031                 :            :                 /*
    1032                 :            :                  * Monarch: Wait for everyone to go through their scanning
    1033                 :            :                  * loops.
    1034                 :            :                  */
    1035         [ #  # ]:          0 :                 while (atomic_read(&mce_executing) <= cpus) {
    1036         [ #  # ]:          0 :                         if (mce_timed_out(&timeout,
    1037                 :            :                                           "Timeout: Monarch CPU unable to finish machine check processing"))
    1038                 :          0 :                                 goto reset;
    1039                 :          0 :                         ndelay(SPINUNIT);
    1040                 :            :                 }
    1041                 :            : 
    1042                 :          0 :                 mce_reign();
    1043                 :          0 :                 barrier();
    1044                 :          0 :                 ret = 0;
    1045                 :            :         } else {
    1046                 :            :                 /*
    1047                 :            :                  * Subject: Wait for Monarch to finish.
    1048                 :            :                  */
    1049         [ #  # ]:          0 :                 while (atomic_read(&mce_executing) != 0) {
    1050         [ #  # ]:          0 :                         if (mce_timed_out(&timeout,
    1051                 :            :                                           "Timeout: Monarch CPU did not finish machine check processing"))
    1052                 :          0 :                                 goto reset;
    1053                 :          0 :                         ndelay(SPINUNIT);
    1054                 :            :                 }
    1055                 :            : 
    1056                 :            :                 /*
    1057                 :            :                  * Don't reset anything. That's done by the Monarch.
    1058                 :            :                  */
    1059                 :            :                 return 0;
    1060                 :            :         }
    1061                 :            : 
    1062                 :            :         /*
    1063                 :            :          * Reset all global state.
    1064                 :            :          */
    1065                 :          0 : reset:
    1066                 :          0 :         atomic_set(&global_nwo, 0);
    1067                 :          0 :         atomic_set(&mce_callin, 0);
    1068                 :          0 :         barrier();
    1069                 :            : 
    1070                 :            :         /*
    1071                 :            :          * Let others run again.
    1072                 :            :          */
    1073                 :          0 :         atomic_set(&mce_executing, 0);
    1074                 :          0 :         return ret;
    1075                 :            : }
    1076                 :            : 
    1077                 :          0 : static void mce_clear_state(unsigned long *toclear)
    1078                 :            : {
    1079                 :          0 :         int i;
    1080                 :            : 
    1081         [ #  # ]:          0 :         for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
    1082         [ #  # ]:          0 :                 if (test_bit(i, toclear))
    1083                 :          0 :                         mce_wrmsrl(msr_ops.status(i), 0);
    1084                 :            :         }
    1085                 :          0 : }
    1086                 :            : 
    1087                 :            : static int do_memory_failure(struct mce *m)
    1088                 :            : {
    1089                 :            :         int flags = MF_ACTION_REQUIRED;
    1090                 :            :         int ret;
    1091                 :            : 
    1092                 :            :         pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
    1093                 :            :         if (!(m->mcgstatus & MCG_STATUS_RIPV))
    1094                 :            :                 flags |= MF_MUST_KILL;
    1095                 :            :         ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
    1096                 :            :         if (ret)
    1097                 :            :                 pr_err("Memory error not recovered");
    1098                 :            :         else
    1099                 :            :                 set_mce_nospec(m->addr >> PAGE_SHIFT);
    1100                 :            :         return ret;
    1101                 :            : }
    1102                 :            : 
    1103                 :            : 
    1104                 :            : /*
    1105                 :            :  * Cases where we avoid rendezvous handler timeout:
    1106                 :            :  * 1) If this CPU is offline.
    1107                 :            :  *
    1108                 :            :  * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
    1109                 :            :  *  skip those CPUs which remain looping in the 1st kernel - see
    1110                 :            :  *  crash_nmi_callback().
    1111                 :            :  *
    1112                 :            :  * Note: there still is a small window between kexec-ing and the new,
    1113                 :            :  * kdump kernel establishing a new #MC handler where a broadcasted MCE
    1114                 :            :  * might not get handled properly.
    1115                 :            :  */
    1116                 :          0 : static bool __mc_check_crashing_cpu(int cpu)
    1117                 :            : {
    1118         [ #  # ]:          0 :         if (cpu_is_offline(cpu) ||
    1119   [ #  #  #  # ]:          0 :             (crashing_cpu != -1 && crashing_cpu != cpu)) {
    1120                 :          0 :                 u64 mcgstatus;
    1121                 :            : 
    1122                 :          0 :                 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
    1123                 :            : 
    1124         [ #  # ]:          0 :                 if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
    1125         [ #  # ]:          0 :                         if (mcgstatus & MCG_STATUS_LMCES)
    1126                 :            :                                 return false;
    1127                 :            :                 }
    1128                 :            : 
    1129         [ #  # ]:          0 :                 if (mcgstatus & MCG_STATUS_RIPV) {
    1130                 :          0 :                         mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
    1131                 :          0 :                         return true;
    1132                 :            :                 }
    1133                 :            :         }
    1134                 :            :         return false;
    1135                 :            : }
    1136                 :            : 
    1137                 :          0 : static void __mc_scan_banks(struct mce *m, struct mce *final,
    1138                 :            :                             unsigned long *toclear, unsigned long *valid_banks,
    1139                 :            :                             int no_way_out, int *worst)
    1140                 :            : {
    1141                 :          0 :         struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
    1142                 :          0 :         struct mca_config *cfg = &mca_cfg;
    1143                 :          0 :         int severity, i;
    1144                 :            : 
    1145         [ #  # ]:          0 :         for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
    1146                 :          0 :                 __clear_bit(i, toclear);
    1147         [ #  # ]:          0 :                 if (!test_bit(i, valid_banks))
    1148                 :          0 :                         continue;
    1149                 :            : 
    1150         [ #  # ]:          0 :                 if (!mce_banks[i].ctl)
    1151                 :          0 :                         continue;
    1152                 :            : 
    1153                 :          0 :                 m->misc = 0;
    1154                 :          0 :                 m->addr = 0;
    1155                 :          0 :                 m->bank = i;
    1156                 :            : 
    1157                 :          0 :                 m->status = mce_rdmsrl(msr_ops.status(i));
    1158         [ #  # ]:          0 :                 if (!(m->status & MCI_STATUS_VAL))
    1159                 :          0 :                         continue;
    1160                 :            : 
    1161                 :            :                 /*
    1162                 :            :                  * Corrected or non-signaled errors are handled by
    1163                 :            :                  * machine_check_poll(). Leave them alone, unless this panics.
    1164                 :            :                  */
    1165   [ #  #  #  #  :          0 :                 if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
                   #  # ]
    1166                 :            :                         !no_way_out)
    1167                 :          0 :                         continue;
    1168                 :            : 
    1169                 :            :                 /* Set taint even when machine check was not enabled. */
    1170                 :          0 :                 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
    1171                 :            : 
    1172                 :          0 :                 severity = mce_severity(m, cfg->tolerant, NULL, true);
    1173                 :            : 
    1174                 :            :                 /*
    1175                 :            :                  * When machine check was for corrected/deferred handler don't
    1176                 :            :                  * touch, unless we're panicking.
    1177                 :            :                  */
    1178         [ #  # ]:          0 :                 if ((severity == MCE_KEEP_SEVERITY ||
    1179         [ #  # ]:          0 :                      severity == MCE_UCNA_SEVERITY) && !no_way_out)
    1180                 :          0 :                         continue;
    1181                 :            : 
    1182                 :          0 :                 __set_bit(i, toclear);
    1183                 :            : 
    1184                 :            :                 /* Machine check event was not enabled. Clear, but ignore. */
    1185         [ #  # ]:          0 :                 if (severity == MCE_NO_SEVERITY)
    1186                 :          0 :                         continue;
    1187                 :            : 
    1188                 :          0 :                 mce_read_aux(m, i);
    1189                 :            : 
    1190                 :            :                 /* assuming valid severity level != 0 */
    1191                 :          0 :                 m->severity = severity;
    1192                 :            : 
    1193                 :          0 :                 mce_log(m);
    1194                 :            : 
    1195         [ #  # ]:          0 :                 if (severity > *worst) {
    1196                 :          0 :                         *final = *m;
    1197                 :          0 :                         *worst = severity;
    1198                 :            :                 }
    1199                 :            :         }
    1200                 :            : 
    1201                 :            :         /* mce_clear_state will clear *final, save locally for use later */
    1202                 :          0 :         *m = *final;
    1203                 :          0 : }
    1204                 :            : 
    1205                 :            : /*
    1206                 :            :  * The actual machine check handler. This only handles real
    1207                 :            :  * exceptions when something got corrupted coming in through int 18.
    1208                 :            :  *
    1209                 :            :  * This is executed in NMI context not subject to normal locking rules. This
    1210                 :            :  * implies that most kernel services cannot be safely used. Don't even
    1211                 :            :  * think about putting a printk in there!
    1212                 :            :  *
    1213                 :            :  * On Intel systems this is entered on all CPUs in parallel through
    1214                 :            :  * MCE broadcast. However some CPUs might be broken beyond repair,
    1215                 :            :  * so be always careful when synchronizing with others.
    1216                 :            :  */
    1217                 :          0 : void do_machine_check(struct pt_regs *regs, long error_code)
    1218                 :            : {
    1219                 :          0 :         DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
    1220                 :          0 :         DECLARE_BITMAP(toclear, MAX_NR_BANKS);
    1221                 :          0 :         struct mca_config *cfg = &mca_cfg;
    1222                 :          0 :         int cpu = smp_processor_id();
    1223                 :          0 :         struct mce m, *final;
    1224                 :          0 :         char *msg = NULL;
    1225                 :          0 :         int worst = 0;
    1226                 :            : 
    1227                 :            :         /*
    1228                 :            :          * Establish sequential order between the CPUs entering the machine
    1229                 :            :          * check handler.
    1230                 :            :          */
    1231                 :          0 :         int order = -1;
    1232                 :            : 
    1233                 :            :         /*
    1234                 :            :          * If no_way_out gets set, there is no safe way to recover from this
    1235                 :            :          * MCE.  If mca_cfg.tolerant is cranked up, we'll try anyway.
    1236                 :            :          */
    1237                 :          0 :         int no_way_out = 0;
    1238                 :            : 
    1239                 :            :         /*
    1240                 :            :          * If kill_it gets set, there might be a way to recover from this
    1241                 :            :          * error.
    1242                 :            :          */
    1243                 :          0 :         int kill_it = 0;
    1244                 :            : 
    1245                 :            :         /*
    1246                 :            :          * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
    1247                 :            :          * on Intel.
    1248                 :            :          */
    1249                 :          0 :         int lmce = 1;
    1250                 :            : 
    1251         [ #  # ]:          0 :         if (__mc_check_crashing_cpu(cpu))
    1252                 :          0 :                 return;
    1253                 :            : 
    1254                 :          0 :         ist_enter(regs);
    1255                 :            : 
    1256                 :          0 :         this_cpu_inc(mce_exception_count);
    1257                 :            : 
    1258                 :          0 :         mce_gather_info(&m, regs);
    1259                 :          0 :         m.tsc = rdtsc();
    1260                 :            : 
    1261                 :          0 :         final = this_cpu_ptr(&mces_seen);
    1262                 :          0 :         *final = m;
    1263                 :            : 
    1264                 :          0 :         memset(valid_banks, 0, sizeof(valid_banks));
    1265                 :          0 :         no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
    1266                 :            : 
    1267                 :          0 :         barrier();
    1268                 :            : 
    1269                 :            :         /*
    1270                 :            :          * When no restart IP might need to kill or panic.
    1271                 :            :          * Assume the worst for now, but if we find the
    1272                 :            :          * severity is MCE_AR_SEVERITY we have other options.
    1273                 :            :          */
    1274                 :          0 :         if (!(m.mcgstatus & MCG_STATUS_RIPV))
    1275                 :            :                 kill_it = 1;
    1276                 :            : 
    1277                 :            :         /*
    1278                 :            :          * Check if this MCE is signaled to only this logical processor,
    1279                 :            :          * on Intel, Zhaoxin only.
    1280                 :            :          */
    1281         [ #  # ]:          0 :         if (m.cpuvendor == X86_VENDOR_INTEL ||
    1282                 :            :             m.cpuvendor == X86_VENDOR_ZHAOXIN)
    1283                 :          0 :                 lmce = m.mcgstatus & MCG_STATUS_LMCES;
    1284                 :            : 
    1285                 :            :         /*
    1286                 :            :          * Local machine check may already know that we have to panic.
    1287                 :            :          * Broadcast machine check begins rendezvous in mce_start()
    1288                 :            :          * Go through all banks in exclusion of the other CPUs. This way we
    1289                 :            :          * don't report duplicated events on shared banks because the first one
    1290                 :            :          * to see it will clear it.
    1291                 :            :          */
    1292         [ #  # ]:          0 :         if (lmce) {
    1293         [ #  # ]:          0 :                 if (no_way_out)
    1294                 :          0 :                         mce_panic("Fatal local machine check", &m, msg);
    1295                 :            :         } else {
    1296                 :          0 :                 order = mce_start(&no_way_out);
    1297                 :            :         }
    1298                 :            : 
    1299                 :          0 :         __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
    1300                 :            : 
    1301         [ #  # ]:          0 :         if (!no_way_out)
    1302                 :          0 :                 mce_clear_state(toclear);
    1303                 :            : 
    1304                 :            :         /*
    1305                 :            :          * Do most of the synchronization with other CPUs.
    1306                 :            :          * When there's any problem use only local no_way_out state.
    1307                 :            :          */
    1308         [ #  # ]:          0 :         if (!lmce) {
    1309         [ #  # ]:          0 :                 if (mce_end(order) < 0)
    1310                 :          0 :                         no_way_out = worst >= MCE_PANIC_SEVERITY;
    1311                 :            :         } else {
    1312                 :            :                 /*
    1313                 :            :                  * If there was a fatal machine check we should have
    1314                 :            :                  * already called mce_panic earlier in this function.
    1315                 :            :                  * Since we re-read the banks, we might have found
    1316                 :            :                  * something new. Check again to see if we found a
    1317                 :            :                  * fatal error. We call "mce_severity()" again to
    1318                 :            :                  * make sure we have the right "msg".
    1319                 :            :                  */
    1320   [ #  #  #  # ]:          0 :                 if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
    1321                 :          0 :                         mce_severity(&m, cfg->tolerant, &msg, true);
    1322                 :          0 :                         mce_panic("Local fatal machine check!", &m, msg);
    1323                 :            :                 }
    1324                 :            :         }
    1325                 :            : 
    1326                 :            :         /*
    1327                 :            :          * If tolerant is at an insane level we drop requests to kill
    1328                 :            :          * processes and continue even when there is no way out.
    1329                 :            :          */
    1330         [ #  # ]:          0 :         if (cfg->tolerant == 3)
    1331                 :            :                 kill_it = 0;
    1332         [ #  # ]:          0 :         else if (no_way_out)
    1333                 :          0 :                 mce_panic("Fatal machine check on current CPU", &m, msg);
    1334                 :            : 
    1335         [ #  # ]:          0 :         if (worst > 0)
    1336                 :          0 :                 irq_work_queue(&mce_irq_work);
    1337                 :            : 
    1338                 :          0 :         mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
    1339                 :            : 
    1340                 :          0 :         sync_core();
    1341                 :            : 
    1342   [ #  #  #  # ]:          0 :         if (worst != MCE_AR_SEVERITY && !kill_it)
    1343                 :          0 :                 goto out_ist;
    1344                 :            : 
    1345                 :            :         /* Fault was in user mode and we need to take some action */
    1346         [ #  # ]:          0 :         if ((m.cs & 3) == 3) {
    1347                 :          0 :                 ist_begin_non_atomic(regs);
    1348                 :          0 :                 local_irq_enable();
    1349                 :            : 
    1350   [ #  #  #  # ]:          0 :                 if (kill_it || do_memory_failure(&m))
    1351                 :          0 :                         force_sig(SIGBUS);
    1352                 :          0 :                 local_irq_disable();
    1353                 :          0 :                 ist_end_non_atomic();
    1354                 :            :         } else {
    1355         [ #  # ]:          0 :                 if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
    1356                 :          0 :                         mce_panic("Failed kernel mode recovery", &m, msg);
    1357                 :            :         }
    1358                 :            : 
    1359                 :          0 : out_ist:
    1360                 :          0 :         ist_exit(regs);
    1361                 :            : }
    1362                 :            : EXPORT_SYMBOL_GPL(do_machine_check);
    1363                 :            : 
    1364                 :            : #ifndef CONFIG_MEMORY_FAILURE
    1365                 :          0 : int memory_failure(unsigned long pfn, int flags)
    1366                 :            : {
    1367                 :            :         /* mce_severity() should not hand us an ACTION_REQUIRED error */
    1368         [ #  # ]:          0 :         BUG_ON(flags & MF_ACTION_REQUIRED);
    1369                 :          0 :         pr_err("Uncorrected memory error in page 0x%lx ignored\n"
    1370                 :            :                "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n",
    1371                 :            :                pfn);
    1372                 :            : 
    1373                 :          0 :         return 0;
    1374                 :            : }
    1375                 :            : #endif
    1376                 :            : 
    1377                 :            : /*
    1378                 :            :  * Periodic polling timer for "silent" machine check errors.  If the
    1379                 :            :  * poller finds an MCE, poll 2x faster.  When the poller finds no more
    1380                 :            :  * errors, poll 2x slower (up to check_interval seconds).
    1381                 :            :  */
    1382                 :            : static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
    1383                 :            : 
    1384                 :            : static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
    1385                 :            : static DEFINE_PER_CPU(struct timer_list, mce_timer);
    1386                 :            : 
    1387                 :          0 : static unsigned long mce_adjust_timer_default(unsigned long interval)
    1388                 :            : {
    1389                 :          0 :         return interval;
    1390                 :            : }
    1391                 :            : 
    1392                 :            : static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
    1393                 :            : 
    1394                 :         78 : static void __start_timer(struct timer_list *t, unsigned long interval)
    1395                 :            : {
    1396                 :         78 :         unsigned long when = jiffies + interval;
    1397                 :         78 :         unsigned long flags;
    1398                 :            : 
    1399                 :         78 :         local_irq_save(flags);
    1400                 :            : 
    1401   [ -  +  -  - ]:         78 :         if (!timer_pending(t) || time_before(when, t->expires))
    1402                 :         78 :                 mod_timer(t, round_jiffies(when));
    1403                 :            : 
    1404                 :         78 :         local_irq_restore(flags);
    1405                 :         78 : }
    1406                 :            : 
    1407                 :          0 : static void mce_timer_fn(struct timer_list *t)
    1408                 :            : {
    1409                 :          0 :         struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
    1410                 :          0 :         unsigned long iv;
    1411                 :            : 
    1412         [ #  # ]:          0 :         WARN_ON(cpu_t != t);
    1413                 :            : 
    1414                 :          0 :         iv = __this_cpu_read(mce_next_interval);
    1415                 :            : 
    1416         [ #  # ]:          0 :         if (mce_available(this_cpu_ptr(&cpu_info))) {
    1417                 :          0 :                 machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
    1418                 :            : 
    1419         [ #  # ]:          0 :                 if (mce_intel_cmci_poll()) {
    1420                 :          0 :                         iv = mce_adjust_timer(iv);
    1421                 :          0 :                         goto done;
    1422                 :            :                 }
    1423                 :            :         }
    1424                 :            : 
    1425                 :            :         /*
    1426                 :            :          * Alert userspace if needed. If we logged an MCE, reduce the polling
    1427                 :            :          * interval, otherwise increase the polling interval.
    1428                 :            :          */
    1429         [ #  # ]:          0 :         if (mce_notify_irq())
    1430                 :          0 :                 iv = max(iv / 2, (unsigned long) HZ/100);
    1431                 :            :         else
    1432                 :          0 :                 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
    1433                 :            : 
    1434                 :          0 : done:
    1435                 :          0 :         __this_cpu_write(mce_next_interval, iv);
    1436                 :          0 :         __start_timer(t, iv);
    1437                 :          0 : }
    1438                 :            : 
    1439                 :            : /*
    1440                 :            :  * Ensure that the timer is firing in @interval from now.
    1441                 :            :  */
    1442                 :          0 : void mce_timer_kick(unsigned long interval)
    1443                 :            : {
    1444                 :          0 :         struct timer_list *t = this_cpu_ptr(&mce_timer);
    1445                 :          0 :         unsigned long iv = __this_cpu_read(mce_next_interval);
    1446                 :            : 
    1447                 :          0 :         __start_timer(t, interval);
    1448                 :            : 
    1449         [ #  # ]:          0 :         if (interval < iv)
    1450                 :          0 :                 __this_cpu_write(mce_next_interval, interval);
    1451                 :          0 : }
    1452                 :            : 
    1453                 :            : /* Must not be called in IRQ context where del_timer_sync() can deadlock */
    1454                 :          0 : static void mce_timer_delete_all(void)
    1455                 :            : {
    1456                 :          0 :         int cpu;
    1457                 :            : 
    1458         [ #  # ]:          0 :         for_each_online_cpu(cpu)
    1459                 :          0 :                 del_timer_sync(&per_cpu(mce_timer, cpu));
    1460                 :          0 : }
    1461                 :            : 
    1462                 :            : /*
    1463                 :            :  * Notify the user(s) about new machine check events.
    1464                 :            :  * Can be called from interrupt context, but not from machine check/NMI
    1465                 :            :  * context.
    1466                 :            :  */
    1467                 :          0 : int mce_notify_irq(void)
    1468                 :            : {
    1469                 :            :         /* Not more than two messages every minute */
    1470                 :          0 :         static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
    1471                 :            : 
    1472         [ #  # ]:          0 :         if (test_and_clear_bit(0, &mce_need_notify)) {
    1473                 :          0 :                 mce_work_trigger();
    1474                 :            : 
    1475         [ #  # ]:          0 :                 if (__ratelimit(&ratelimit))
    1476                 :          0 :                         pr_info(HW_ERR "Machine check events logged\n");
    1477                 :            : 
    1478                 :          0 :                 return 1;
    1479                 :            :         }
    1480                 :            :         return 0;
    1481                 :            : }
    1482                 :            : EXPORT_SYMBOL_GPL(mce_notify_irq);
    1483                 :            : 
    1484                 :         78 : static void __mcheck_cpu_mce_banks_init(void)
    1485                 :            : {
    1486                 :        156 :         struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
    1487                 :         78 :         u8 n_banks = this_cpu_read(mce_num_banks);
    1488                 :         78 :         int i;
    1489                 :            : 
    1490         [ +  + ]:        936 :         for (i = 0; i < n_banks; i++) {
    1491                 :        780 :                 struct mce_bank *b = &mce_banks[i];
    1492                 :            : 
    1493                 :            :                 /*
    1494                 :            :                  * Init them all, __mcheck_cpu_apply_quirks() is going to apply
    1495                 :            :                  * the required vendor quirks before
    1496                 :            :                  * __mcheck_cpu_init_clear_banks() does the final bank setup.
    1497                 :            :                  */
    1498                 :        780 :                 b->ctl = -1ULL;
    1499                 :        780 :                 b->init = 1;
    1500                 :            :         }
    1501                 :            : }
    1502                 :            : 
    1503                 :            : /*
    1504                 :            :  * Initialize Machine Checks for a CPU.
    1505                 :            :  */
    1506                 :         78 : static void __mcheck_cpu_cap_init(void)
    1507                 :            : {
    1508                 :         78 :         u64 cap;
    1509                 :         78 :         u8 b;
    1510                 :            : 
    1511                 :         78 :         rdmsrl(MSR_IA32_MCG_CAP, cap);
    1512                 :            : 
    1513                 :         78 :         b = cap & MCG_BANKCNT_MASK;
    1514                 :            : 
    1515         [ -  + ]:         78 :         if (b > MAX_NR_BANKS) {
    1516                 :          0 :                 pr_warn("CPU%d: Using only %u machine check banks out of %u\n",
    1517                 :            :                         smp_processor_id(), MAX_NR_BANKS, b);
    1518                 :          0 :                 b = MAX_NR_BANKS;
    1519                 :            :         }
    1520                 :            : 
    1521                 :         78 :         this_cpu_write(mce_num_banks, b);
    1522                 :            : 
    1523                 :         78 :         __mcheck_cpu_mce_banks_init();
    1524                 :            : 
    1525                 :            :         /* Use accurate RIP reporting if available. */
    1526   [ -  +  -  - ]:         78 :         if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
    1527                 :          0 :                 mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
    1528                 :            : 
    1529         [ -  + ]:         78 :         if (cap & MCG_SER_P)
    1530                 :          0 :                 mca_cfg.ser = 1;
    1531                 :         78 : }
    1532                 :            : 
    1533                 :         78 : static void __mcheck_cpu_init_generic(void)
    1534                 :            : {
    1535                 :         78 :         enum mcp_flags m_fl = 0;
    1536                 :         78 :         mce_banks_t all_banks;
    1537                 :         78 :         u64 cap;
    1538                 :            : 
    1539         [ +  - ]:         78 :         if (!mca_cfg.bootlog)
    1540                 :         78 :                 m_fl = MCP_DONTLOG;
    1541                 :            : 
    1542                 :            :         /*
    1543                 :            :          * Log the machine checks left over from the previous reset.
    1544                 :            :          */
    1545                 :         78 :         bitmap_fill(all_banks, MAX_NR_BANKS);
    1546                 :         78 :         machine_check_poll(MCP_UC | m_fl, &all_banks);
    1547                 :            : 
    1548                 :         78 :         cr4_set_bits(X86_CR4_MCE);
    1549                 :            : 
    1550                 :         78 :         rdmsrl(MSR_IA32_MCG_CAP, cap);
    1551         [ +  - ]:         78 :         if (cap & MCG_CTL_P)
    1552                 :         78 :                 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
    1553                 :         78 : }
    1554                 :            : 
    1555                 :         78 : static void __mcheck_cpu_init_clear_banks(void)
    1556                 :            : {
    1557                 :         78 :         struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
    1558                 :         78 :         int i;
    1559                 :            : 
    1560         [ +  + ]:        858 :         for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
    1561                 :        780 :                 struct mce_bank *b = &mce_banks[i];
    1562                 :            : 
    1563         [ -  + ]:        780 :                 if (!b->init)
    1564                 :          0 :                         continue;
    1565                 :        780 :                 wrmsrl(msr_ops.ctl(i), b->ctl);
    1566                 :        780 :                 wrmsrl(msr_ops.status(i), 0);
    1567                 :            :         }
    1568                 :         78 : }
    1569                 :            : 
    1570                 :            : /*
    1571                 :            :  * Do a final check to see if there are any unused/RAZ banks.
    1572                 :            :  *
    1573                 :            :  * This must be done after the banks have been initialized and any quirks have
    1574                 :            :  * been applied.
    1575                 :            :  *
    1576                 :            :  * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs.
    1577                 :            :  * Otherwise, a user who disables a bank will not be able to re-enable it
    1578                 :            :  * without a system reboot.
    1579                 :            :  */
    1580                 :         78 : static void __mcheck_cpu_check_banks(void)
    1581                 :            : {
    1582                 :         78 :         struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
    1583                 :         78 :         u64 msrval;
    1584                 :         78 :         int i;
    1585                 :            : 
    1586         [ +  + ]:        858 :         for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
    1587                 :        780 :                 struct mce_bank *b = &mce_banks[i];
    1588                 :            : 
    1589         [ -  + ]:        780 :                 if (!b->init)
    1590                 :          0 :                         continue;
    1591                 :            : 
    1592                 :        780 :                 rdmsrl(msr_ops.ctl(i), msrval);
    1593                 :        780 :                 b->init = !!msrval;
    1594                 :            :         }
    1595                 :         78 : }
    1596                 :            : 
    1597                 :            : /*
    1598                 :            :  * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
    1599                 :            :  * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
    1600                 :            :  * Vol 3B Table 15-20). But this confuses both the code that determines
    1601                 :            :  * whether the machine check occurred in kernel or user mode, and also
    1602                 :            :  * the severity assessment code. Pretend that EIPV was set, and take the
    1603                 :            :  * ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
    1604                 :            :  */
    1605                 :          0 : static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
    1606                 :            : {
    1607         [ #  # ]:          0 :         if (bank != 0)
    1608                 :            :                 return;
    1609         [ #  # ]:          0 :         if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
    1610                 :            :                 return;
    1611         [ #  # ]:          0 :         if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
    1612                 :            :                           MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
    1613                 :            :                           MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
    1614                 :            :                           MCACOD)) !=
    1615                 :            :                          (MCI_STATUS_UC|MCI_STATUS_EN|
    1616                 :            :                           MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
    1617                 :            :                           MCI_STATUS_AR|MCACOD_INSTR))
    1618                 :            :                 return;
    1619                 :            : 
    1620                 :          0 :         m->mcgstatus |= MCG_STATUS_EIPV;
    1621                 :          0 :         m->ip = regs->ip;
    1622                 :          0 :         m->cs = regs->cs;
    1623                 :            : }
    1624                 :            : 
    1625                 :            : /* Add per CPU specific workarounds here */
    1626                 :         78 : static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
    1627                 :            : {
    1628                 :         78 :         struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
    1629                 :         78 :         struct mca_config *cfg = &mca_cfg;
    1630                 :            : 
    1631         [ -  + ]:         78 :         if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
    1632                 :          0 :                 pr_info("unknown CPU type - not enabling MCE support\n");
    1633                 :          0 :                 return -EOPNOTSUPP;
    1634                 :            :         }
    1635                 :            : 
    1636                 :            :         /* This should be disabled by the BIOS, but isn't always */
    1637         [ +  - ]:         78 :         if (c->x86_vendor == X86_VENDOR_AMD) {
    1638   [ -  +  -  - ]:         78 :                 if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
    1639                 :            :                         /*
    1640                 :            :                          * disable GART TBL walk error reporting, which
    1641                 :            :                          * trips off incorrectly with the IOMMU & 3ware
    1642                 :            :                          * & Cerberus:
    1643                 :            :                          */
    1644                 :          0 :                         clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
    1645                 :            :                 }
    1646   [ +  -  +  - ]:         78 :                 if (c->x86 < 0x11 && cfg->bootlog < 0) {
    1647                 :            :                         /*
    1648                 :            :                          * Lots of broken BIOS around that don't clear them
    1649                 :            :                          * by default and leave crap in there. Don't log:
    1650                 :            :                          */
    1651                 :         78 :                         cfg->bootlog = 0;
    1652                 :            :                 }
    1653                 :            :                 /*
    1654                 :            :                  * Various K7s with broken bank 0 around. Always disable
    1655                 :            :                  * by default.
    1656                 :            :                  */
    1657   [ +  -  +  - ]:         78 :                 if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
    1658                 :         78 :                         mce_banks[0].ctl = 0;
    1659                 :            : 
    1660                 :            :                 /*
    1661                 :            :                  * overflow_recov is supported for F15h Models 00h-0fh
    1662                 :            :                  * even though we don't have a CPUID bit for it.
    1663                 :            :                  */
    1664   [ -  +  -  - ]:         78 :                 if (c->x86 == 0x15 && c->x86_model <= 0xf)
    1665                 :          0 :                         mce_flags.overflow_recov = 1;
    1666                 :            : 
    1667                 :            :         }
    1668                 :            : 
    1669         [ -  + ]:         78 :         if (c->x86_vendor == X86_VENDOR_INTEL) {
    1670                 :            :                 /*
    1671                 :            :                  * SDM documents that on family 6 bank 0 should not be written
    1672                 :            :                  * because it aliases to another special BIOS controlled
    1673                 :            :                  * register.
    1674                 :            :                  * But it's not aliased anymore on model 0x1a+
    1675                 :            :                  * Don't ignore bank 0 completely because there could be a
    1676                 :            :                  * valid event later, merely don't write CTL0.
    1677                 :            :                  */
    1678                 :            : 
    1679   [ #  #  #  #  :          0 :                 if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0)
                   #  # ]
    1680                 :          0 :                         mce_banks[0].init = 0;
    1681                 :            : 
    1682                 :            :                 /*
    1683                 :            :                  * All newer Intel systems support MCE broadcasting. Enable
    1684                 :            :                  * synchronization with a one second timeout.
    1685                 :            :                  */
    1686   [ #  #  #  #  :          0 :                 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
                   #  # ]
    1687         [ #  # ]:          0 :                         cfg->monarch_timeout < 0)
    1688                 :          0 :                         cfg->monarch_timeout = USEC_PER_SEC;
    1689                 :            : 
    1690                 :            :                 /*
    1691                 :            :                  * There are also broken BIOSes on some Pentium M and
    1692                 :            :                  * earlier systems:
    1693                 :            :                  */
    1694   [ #  #  #  #  :          0 :                 if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
                   #  # ]
    1695                 :          0 :                         cfg->bootlog = 0;
    1696                 :            : 
    1697         [ #  # ]:          0 :                 if (c->x86 == 6 && c->x86_model == 45)
    1698                 :          0 :                         quirk_no_way_out = quirk_sandybridge_ifu;
    1699                 :            :         }
    1700                 :            : 
    1701         [ -  + ]:         78 :         if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
    1702                 :            :                 /*
    1703                 :            :                  * All newer Zhaoxin CPUs support MCE broadcasting. Enable
    1704                 :            :                  * synchronization with a one second timeout.
    1705                 :            :                  */
    1706   [ #  #  #  # ]:          0 :                 if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
    1707         [ #  # ]:          0 :                         if (cfg->monarch_timeout < 0)
    1708                 :          0 :                                 cfg->monarch_timeout = USEC_PER_SEC;
    1709                 :            :                 }
    1710                 :            :         }
    1711                 :            : 
    1712         [ +  - ]:         78 :         if (cfg->monarch_timeout < 0)
    1713                 :         78 :                 cfg->monarch_timeout = 0;
    1714         [ -  + ]:         78 :         if (cfg->bootlog != 0)
    1715                 :          0 :                 cfg->panic_timeout = 30;
    1716                 :            : 
    1717                 :            :         return 0;
    1718                 :            : }
    1719                 :            : 
    1720                 :         78 : static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
    1721                 :            : {
    1722                 :         78 :         if (c->x86 != 5)
    1723                 :            :                 return 0;
    1724                 :            : 
    1725         [ #  # ]:          0 :         switch (c->x86_vendor) {
    1726                 :            :         case X86_VENDOR_INTEL:
    1727                 :            :                 intel_p5_mcheck_init(c);
    1728                 :            :                 return 1;
    1729                 :            :                 break;
    1730                 :            :         case X86_VENDOR_CENTAUR:
    1731                 :            :                 winchip_mcheck_init(c);
    1732                 :            :                 return 1;
    1733                 :            :                 break;
    1734                 :            :         default:
    1735                 :            :                 return 0;
    1736                 :            :         }
    1737                 :            : 
    1738                 :            :         return 0;
    1739                 :            : }
    1740                 :            : 
    1741                 :            : /*
    1742                 :            :  * Init basic CPU features needed for early decoding of MCEs.
    1743                 :            :  */
    1744                 :         78 : static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
    1745                 :            : {
    1746         [ +  - ]:         78 :         if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
    1747                 :         78 :                 mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
    1748                 :         78 :                 mce_flags.succor         = !!cpu_has(c, X86_FEATURE_SUCCOR);
    1749                 :         78 :                 mce_flags.smca           = !!cpu_has(c, X86_FEATURE_SMCA);
    1750                 :            : 
    1751         [ -  + ]:         78 :                 if (mce_flags.smca) {
    1752                 :          0 :                         msr_ops.ctl     = smca_ctl_reg;
    1753                 :          0 :                         msr_ops.status  = smca_status_reg;
    1754                 :          0 :                         msr_ops.addr    = smca_addr_reg;
    1755                 :          0 :                         msr_ops.misc    = smca_misc_reg;
    1756                 :            :                 }
    1757                 :            :         }
    1758                 :         78 : }
    1759                 :            : 
    1760                 :          0 : static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
    1761                 :            : {
    1762                 :          0 :         struct mca_config *cfg = &mca_cfg;
    1763                 :            : 
    1764                 :            :          /*
    1765                 :            :           * All newer Centaur CPUs support MCE broadcasting. Enable
    1766                 :            :           * synchronization with a one second timeout.
    1767                 :            :           */
    1768         [ #  # ]:          0 :         if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) ||
    1769         [ #  # ]:          0 :              c->x86 > 6) {
    1770         [ #  # ]:          0 :                 if (cfg->monarch_timeout < 0)
    1771                 :          0 :                         cfg->monarch_timeout = USEC_PER_SEC;
    1772                 :            :         }
    1773                 :            : }
    1774                 :            : 
    1775                 :          0 : static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c)
    1776                 :            : {
    1777                 :          0 :         struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
    1778                 :            : 
    1779                 :            :         /*
    1780                 :            :          * These CPUs have MCA bank 8 which reports only one error type called
    1781                 :            :          * SVAD (System View Address Decoder). The reporting of that error is
    1782                 :            :          * controlled by IA32_MC8.CTL.0.
    1783                 :            :          *
    1784                 :            :          * If enabled, prefetching on these CPUs will cause SVAD MCE when
    1785                 :            :          * virtual machines start and result in a system  panic. Always disable
    1786                 :            :          * bank 8 SVAD error by default.
    1787                 :            :          */
    1788         [ #  # ]:          0 :         if ((c->x86 == 7 && c->x86_model == 0x1b) ||
    1789         [ #  # ]:          0 :             (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
    1790         [ #  # ]:          0 :                 if (this_cpu_read(mce_num_banks) > 8)
    1791                 :          0 :                         mce_banks[8].ctl = 0;
    1792                 :            :         }
    1793                 :            : 
    1794                 :          0 :         intel_init_cmci();
    1795                 :          0 :         intel_init_lmce();
    1796                 :          0 :         mce_adjust_timer = cmci_intel_adjust_timer;
    1797                 :          0 : }
    1798                 :            : 
    1799                 :          0 : static void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c)
    1800                 :            : {
    1801                 :          0 :         intel_clear_lmce();
    1802                 :          0 : }
    1803                 :            : 
    1804                 :         78 : static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
    1805                 :            : {
    1806   [ -  +  -  -  :         78 :         switch (c->x86_vendor) {
                   -  - ]
    1807                 :          0 :         case X86_VENDOR_INTEL:
    1808                 :          0 :                 mce_intel_feature_init(c);
    1809                 :          0 :                 mce_adjust_timer = cmci_intel_adjust_timer;
    1810                 :          0 :                 break;
    1811                 :            : 
    1812                 :         78 :         case X86_VENDOR_AMD: {
    1813                 :         78 :                 mce_amd_feature_init(c);
    1814                 :         78 :                 break;
    1815                 :            :                 }
    1816                 :            : 
    1817                 :            :         case X86_VENDOR_HYGON:
    1818                 :          0 :                 mce_hygon_feature_init(c);
    1819                 :            :                 break;
    1820                 :            : 
    1821                 :            :         case X86_VENDOR_CENTAUR:
    1822         [ #  # ]:          0 :                 mce_centaur_feature_init(c);
    1823                 :            :                 break;
    1824                 :            : 
    1825                 :          0 :         case X86_VENDOR_ZHAOXIN:
    1826                 :          0 :                 mce_zhaoxin_feature_init(c);
    1827                 :          0 :                 break;
    1828                 :            : 
    1829                 :            :         default:
    1830                 :            :                 break;
    1831                 :            :         }
    1832                 :         78 : }
    1833                 :            : 
    1834                 :          0 : static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
    1835                 :            : {
    1836      [ #  #  # ]:          0 :         switch (c->x86_vendor) {
    1837                 :          0 :         case X86_VENDOR_INTEL:
    1838                 :          0 :                 mce_intel_feature_clear(c);
    1839                 :          0 :                 break;
    1840                 :            : 
    1841                 :            :         case X86_VENDOR_ZHAOXIN:
    1842                 :          0 :                 mce_zhaoxin_feature_clear(c);
    1843                 :            :                 break;
    1844                 :            : 
    1845                 :            :         default:
    1846                 :            :                 break;
    1847                 :            :         }
    1848                 :          0 : }
    1849                 :            : 
    1850                 :         78 : static void mce_start_timer(struct timer_list *t)
    1851                 :            : {
    1852                 :         78 :         unsigned long iv = check_interval * HZ;
    1853                 :            : 
    1854   [ +  -  +  - ]:         78 :         if (mca_cfg.ignore_ce || !iv)
    1855                 :            :                 return;
    1856                 :            : 
    1857                 :         78 :         this_cpu_write(mce_next_interval, iv);
    1858                 :         78 :         __start_timer(t, iv);
    1859                 :            : }
    1860                 :            : 
    1861                 :         78 : static void __mcheck_cpu_setup_timer(void)
    1862                 :            : {
    1863                 :        156 :         struct timer_list *t = this_cpu_ptr(&mce_timer);
    1864                 :            : 
    1865                 :         78 :         timer_setup(t, mce_timer_fn, TIMER_PINNED);
    1866                 :         78 : }
    1867                 :            : 
    1868                 :          0 : static void __mcheck_cpu_init_timer(void)
    1869                 :            : {
    1870                 :          0 :         struct timer_list *t = this_cpu_ptr(&mce_timer);
    1871                 :            : 
    1872                 :          0 :         timer_setup(t, mce_timer_fn, TIMER_PINNED);
    1873                 :          0 :         mce_start_timer(t);
    1874                 :          0 : }
    1875                 :            : 
    1876                 :          0 : bool filter_mce(struct mce *m)
    1877                 :            : {
    1878         [ #  # ]:          0 :         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
    1879                 :          0 :                 return amd_filter_mce(m);
    1880                 :            : 
    1881                 :            :         return false;
    1882                 :            : }
    1883                 :            : 
    1884                 :            : /* Handle unconfigured int18 (should never happen) */
    1885                 :          0 : static void unexpected_machine_check(struct pt_regs *regs, long error_code)
    1886                 :            : {
    1887                 :          0 :         pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
    1888                 :            :                smp_processor_id());
    1889                 :          0 : }
    1890                 :            : 
    1891                 :            : /* Call the installed machine check handler for this CPU setup. */
    1892                 :            : void (*machine_check_vector)(struct pt_regs *, long error_code) =
    1893                 :            :                                                 unexpected_machine_check;
    1894                 :            : 
    1895                 :          0 : dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
    1896                 :            : {
    1897                 :          0 :         machine_check_vector(regs, error_code);
    1898                 :          0 : }
    1899                 :            : 
    1900                 :            : /*
    1901                 :            :  * Called for each booted CPU to set up machine checks.
    1902                 :            :  * Must be called with preempt off:
    1903                 :            :  */
    1904                 :         78 : void mcheck_cpu_init(struct cpuinfo_x86 *c)
    1905                 :            : {
    1906         [ +  - ]:         78 :         if (mca_cfg.disabled)
    1907                 :            :                 return;
    1908                 :            : 
    1909         [ -  + ]:         78 :         if (__mcheck_cpu_ancient_init(c))
    1910                 :            :                 return;
    1911                 :            : 
    1912         [ +  - ]:         78 :         if (!mce_available(c))
    1913                 :            :                 return;
    1914                 :            : 
    1915                 :         78 :         __mcheck_cpu_cap_init();
    1916                 :            : 
    1917         [ -  + ]:         78 :         if (__mcheck_cpu_apply_quirks(c) < 0) {
    1918                 :          0 :                 mca_cfg.disabled = 1;
    1919                 :          0 :                 return;
    1920                 :            :         }
    1921                 :            : 
    1922         [ -  + ]:         78 :         if (mce_gen_pool_init()) {
    1923                 :          0 :                 mca_cfg.disabled = 1;
    1924                 :          0 :                 pr_emerg("Couldn't allocate MCE records pool!\n");
    1925                 :          0 :                 return;
    1926                 :            :         }
    1927                 :            : 
    1928                 :         78 :         machine_check_vector = do_machine_check;
    1929                 :            : 
    1930                 :         78 :         __mcheck_cpu_init_early(c);
    1931                 :         78 :         __mcheck_cpu_init_generic();
    1932                 :         78 :         __mcheck_cpu_init_vendor(c);
    1933                 :         78 :         __mcheck_cpu_init_clear_banks();
    1934                 :         78 :         __mcheck_cpu_check_banks();
    1935                 :         78 :         __mcheck_cpu_setup_timer();
    1936                 :            : }
    1937                 :            : 
    1938                 :            : /*
    1939                 :            :  * Called for each booted CPU to clear some machine checks opt-ins
    1940                 :            :  */
    1941                 :          0 : void mcheck_cpu_clear(struct cpuinfo_x86 *c)
    1942                 :            : {
    1943         [ #  # ]:          0 :         if (mca_cfg.disabled)
    1944                 :            :                 return;
    1945                 :            : 
    1946         [ #  # ]:          0 :         if (!mce_available(c))
    1947                 :            :                 return;
    1948                 :            : 
    1949                 :            :         /*
    1950                 :            :          * Possibly to clear general settings generic to x86
    1951                 :            :          * __mcheck_cpu_clear_generic(c);
    1952                 :            :          */
    1953                 :          0 :         __mcheck_cpu_clear_vendor(c);
    1954                 :            : 
    1955                 :            : }
    1956                 :            : 
    1957                 :          0 : static void __mce_disable_bank(void *arg)
    1958                 :            : {
    1959                 :          0 :         int bank = *((int *)arg);
    1960                 :          0 :         __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
    1961                 :          0 :         cmci_disable_bank(bank);
    1962                 :          0 : }
    1963                 :            : 
    1964                 :          0 : void mce_disable_bank(int bank)
    1965                 :            : {
    1966         [ #  # ]:          0 :         if (bank >= this_cpu_read(mce_num_banks)) {
    1967                 :          0 :                 pr_warn(FW_BUG
    1968                 :            :                         "Ignoring request to disable invalid MCA bank %d.\n",
    1969                 :            :                         bank);
    1970                 :          0 :                 return;
    1971                 :            :         }
    1972                 :          0 :         set_bit(bank, mce_banks_ce_disabled);
    1973                 :          0 :         on_each_cpu(__mce_disable_bank, &bank, 1);
    1974                 :            : }
    1975                 :            : 
    1976                 :            : /*
    1977                 :            :  * mce=off Disables machine check
    1978                 :            :  * mce=no_cmci Disables CMCI
    1979                 :            :  * mce=no_lmce Disables LMCE
    1980                 :            :  * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
    1981                 :            :  * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
    1982                 :            :  * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
    1983                 :            :  *      monarchtimeout is how long to wait for other CPUs on machine
    1984                 :            :  *      check, or 0 to not wait
    1985                 :            :  * mce=bootlog Log MCEs from before booting. Disabled by default on AMD Fam10h
    1986                 :            :         and older.
    1987                 :            :  * mce=nobootlog Don't log MCEs from before booting.
    1988                 :            :  * mce=bios_cmci_threshold Don't program the CMCI threshold
    1989                 :            :  * mce=recovery force enable memcpy_mcsafe()
    1990                 :            :  */
    1991                 :          0 : static int __init mcheck_enable(char *str)
    1992                 :            : {
    1993                 :          0 :         struct mca_config *cfg = &mca_cfg;
    1994                 :            : 
    1995         [ #  # ]:          0 :         if (*str == 0) {
    1996                 :            :                 enable_p5_mce();
    1997                 :            :                 return 1;
    1998                 :            :         }
    1999         [ #  # ]:          0 :         if (*str == '=')
    2000                 :          0 :                 str++;
    2001         [ #  # ]:          0 :         if (!strcmp(str, "off"))
    2002                 :          0 :                 cfg->disabled = 1;
    2003         [ #  # ]:          0 :         else if (!strcmp(str, "no_cmci"))
    2004                 :          0 :                 cfg->cmci_disabled = true;
    2005         [ #  # ]:          0 :         else if (!strcmp(str, "no_lmce"))
    2006                 :          0 :                 cfg->lmce_disabled = 1;
    2007         [ #  # ]:          0 :         else if (!strcmp(str, "dont_log_ce"))
    2008                 :          0 :                 cfg->dont_log_ce = true;
    2009         [ #  # ]:          0 :         else if (!strcmp(str, "ignore_ce"))
    2010                 :          0 :                 cfg->ignore_ce = true;
    2011   [ #  #  #  # ]:          0 :         else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
    2012                 :          0 :                 cfg->bootlog = (str[0] == 'b');
    2013         [ #  # ]:          0 :         else if (!strcmp(str, "bios_cmci_threshold"))
    2014                 :          0 :                 cfg->bios_cmci_threshold = 1;
    2015         [ #  # ]:          0 :         else if (!strcmp(str, "recovery"))
    2016                 :          0 :                 cfg->recovery = 1;
    2017         [ #  # ]:          0 :         else if (isdigit(str[0])) {
    2018         [ #  # ]:          0 :                 if (get_option(&str, &cfg->tolerant) == 2)
    2019                 :          0 :                         get_option(&str, &(cfg->monarch_timeout));
    2020                 :            :         } else {
    2021                 :          0 :                 pr_info("mce argument %s ignored. Please use /sys\n", str);
    2022                 :          0 :                 return 0;
    2023                 :            :         }
    2024                 :            :         return 1;
    2025                 :            : }
    2026                 :            : __setup("mce", mcheck_enable);
    2027                 :            : 
    2028                 :         78 : int __init mcheck_init(void)
    2029                 :            : {
    2030                 :         78 :         mcheck_intel_therm_init();
    2031                 :         78 :         mce_register_decode_chain(&first_nb);
    2032                 :         78 :         mce_register_decode_chain(&mce_uc_nb);
    2033                 :         78 :         mce_register_decode_chain(&mce_default_nb);
    2034                 :         78 :         mcheck_vendor_init_severity();
    2035                 :            : 
    2036                 :         78 :         INIT_WORK(&mce_work, mce_gen_pool_process);
    2037                 :         78 :         init_irq_work(&mce_irq_work, mce_irq_work_cb);
    2038                 :            : 
    2039                 :         78 :         return 0;
    2040                 :            : }
    2041                 :            : 
    2042                 :            : /*
    2043                 :            :  * mce_syscore: PM support
    2044                 :            :  */
    2045                 :            : 
    2046                 :            : /*
    2047                 :            :  * Disable machine checks on suspend and shutdown. We can't really handle
    2048                 :            :  * them later.
    2049                 :            :  */
    2050                 :          0 : static void mce_disable_error_reporting(void)
    2051                 :            : {
    2052                 :          0 :         struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
    2053                 :          0 :         int i;
    2054                 :            : 
    2055         [ #  # ]:          0 :         for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
    2056                 :          0 :                 struct mce_bank *b = &mce_banks[i];
    2057                 :            : 
    2058         [ #  # ]:          0 :                 if (b->init)
    2059                 :          0 :                         wrmsrl(msr_ops.ctl(i), 0);
    2060                 :            :         }
    2061                 :          0 :         return;
    2062                 :            : }
    2063                 :            : 
    2064                 :          0 : static void vendor_disable_error_reporting(void)
    2065                 :            : {
    2066                 :            :         /*
    2067                 :            :          * Don't clear on Intel or AMD or Hygon or Zhaoxin CPUs. Some of these
    2068                 :            :          * MSRs are socket-wide. Disabling them for just a single offlined CPU
    2069                 :            :          * is bad, since it will inhibit reporting for all shared resources on
    2070                 :            :          * the socket like the last level cache (LLC), the integrated memory
    2071                 :            :          * controller (iMC), etc.
    2072                 :            :          */
    2073         [ #  # ]:          0 :         if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
    2074         [ #  # ]:          0 :             boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ||
    2075         [ #  # ]:          0 :             boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
    2076                 :            :             boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN)
    2077                 :            :                 return;
    2078                 :            : 
    2079                 :          0 :         mce_disable_error_reporting();
    2080                 :            : }
    2081                 :            : 
    2082                 :          0 : static int mce_syscore_suspend(void)
    2083                 :            : {
    2084                 :          0 :         vendor_disable_error_reporting();
    2085                 :          0 :         return 0;
    2086                 :            : }
    2087                 :            : 
    2088                 :          0 : static void mce_syscore_shutdown(void)
    2089                 :            : {
    2090                 :          0 :         vendor_disable_error_reporting();
    2091                 :          0 : }
    2092                 :            : 
    2093                 :            : /*
    2094                 :            :  * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
    2095                 :            :  * Only one CPU is active at this time, the others get re-added later using
    2096                 :            :  * CPU hotplug:
    2097                 :            :  */
    2098                 :          0 : static void mce_syscore_resume(void)
    2099                 :            : {
    2100                 :          0 :         __mcheck_cpu_init_generic();
    2101                 :          0 :         __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
    2102                 :          0 :         __mcheck_cpu_init_clear_banks();
    2103                 :          0 : }
    2104                 :            : 
    2105                 :            : static struct syscore_ops mce_syscore_ops = {
    2106                 :            :         .suspend        = mce_syscore_suspend,
    2107                 :            :         .shutdown       = mce_syscore_shutdown,
    2108                 :            :         .resume         = mce_syscore_resume,
    2109                 :            : };
    2110                 :            : 
    2111                 :            : /*
    2112                 :            :  * mce_device: Sysfs support
    2113                 :            :  */
    2114                 :            : 
    2115                 :          0 : static void mce_cpu_restart(void *data)
    2116                 :            : {
    2117         [ #  # ]:          0 :         if (!mce_available(raw_cpu_ptr(&cpu_info)))
    2118                 :            :                 return;
    2119                 :          0 :         __mcheck_cpu_init_generic();
    2120                 :          0 :         __mcheck_cpu_init_clear_banks();
    2121                 :          0 :         __mcheck_cpu_init_timer();
    2122                 :            : }
    2123                 :            : 
    2124                 :            : /* Reinit MCEs after user configuration changes */
    2125                 :          0 : static void mce_restart(void)
    2126                 :            : {
    2127                 :          0 :         mce_timer_delete_all();
    2128                 :          0 :         on_each_cpu(mce_cpu_restart, NULL, 1);
    2129                 :            : }
    2130                 :            : 
    2131                 :            : /* Toggle features for corrected errors */
    2132                 :          0 : static void mce_disable_cmci(void *data)
    2133                 :            : {
    2134         [ #  # ]:          0 :         if (!mce_available(raw_cpu_ptr(&cpu_info)))
    2135                 :            :                 return;
    2136                 :          0 :         cmci_clear();
    2137                 :            : }
    2138                 :            : 
    2139                 :          0 : static void mce_enable_ce(void *all)
    2140                 :            : {
    2141         [ #  # ]:          0 :         if (!mce_available(raw_cpu_ptr(&cpu_info)))
    2142                 :            :                 return;
    2143                 :          0 :         cmci_reenable();
    2144                 :          0 :         cmci_recheck();
    2145         [ #  # ]:          0 :         if (all)
    2146                 :          0 :                 __mcheck_cpu_init_timer();
    2147                 :            : }
    2148                 :            : 
    2149                 :            : static struct bus_type mce_subsys = {
    2150                 :            :         .name           = "machinecheck",
    2151                 :            :         .dev_name       = "machinecheck",
    2152                 :            : };
    2153                 :            : 
    2154                 :            : DEFINE_PER_CPU(struct device *, mce_device);
    2155                 :            : 
    2156                 :          0 : static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr)
    2157                 :            : {
    2158                 :          0 :         return container_of(attr, struct mce_bank_dev, attr);
    2159                 :            : }
    2160                 :            : 
    2161                 :          0 : static ssize_t show_bank(struct device *s, struct device_attribute *attr,
    2162                 :            :                          char *buf)
    2163                 :            : {
    2164                 :          0 :         u8 bank = attr_to_bank(attr)->bank;
    2165                 :          0 :         struct mce_bank *b;
    2166                 :            : 
    2167         [ #  # ]:          0 :         if (bank >= per_cpu(mce_num_banks, s->id))
    2168                 :            :                 return -EINVAL;
    2169                 :            : 
    2170                 :          0 :         b = &per_cpu(mce_banks_array, s->id)[bank];
    2171                 :            : 
    2172         [ #  # ]:          0 :         if (!b->init)
    2173                 :            :                 return -ENODEV;
    2174                 :            : 
    2175                 :          0 :         return sprintf(buf, "%llx\n", b->ctl);
    2176                 :            : }
    2177                 :            : 
    2178                 :          0 : static ssize_t set_bank(struct device *s, struct device_attribute *attr,
    2179                 :            :                         const char *buf, size_t size)
    2180                 :            : {
    2181                 :          0 :         u8 bank = attr_to_bank(attr)->bank;
    2182                 :          0 :         struct mce_bank *b;
    2183                 :          0 :         u64 new;
    2184                 :            : 
    2185         [ #  # ]:          0 :         if (kstrtou64(buf, 0, &new) < 0)
    2186                 :            :                 return -EINVAL;
    2187                 :            : 
    2188         [ #  # ]:          0 :         if (bank >= per_cpu(mce_num_banks, s->id))
    2189                 :            :                 return -EINVAL;
    2190                 :            : 
    2191                 :          0 :         b = &per_cpu(mce_banks_array, s->id)[bank];
    2192                 :            : 
    2193         [ #  # ]:          0 :         if (!b->init)
    2194                 :            :                 return -ENODEV;
    2195                 :            : 
    2196                 :          0 :         b->ctl = new;
    2197                 :          0 :         mce_restart();
    2198                 :            : 
    2199                 :          0 :         return size;
    2200                 :            : }
    2201                 :            : 
    2202                 :          0 : static ssize_t set_ignore_ce(struct device *s,
    2203                 :            :                              struct device_attribute *attr,
    2204                 :            :                              const char *buf, size_t size)
    2205                 :            : {
    2206                 :          0 :         u64 new;
    2207                 :            : 
    2208         [ #  # ]:          0 :         if (kstrtou64(buf, 0, &new) < 0)
    2209                 :            :                 return -EINVAL;
    2210                 :            : 
    2211                 :          0 :         mutex_lock(&mce_sysfs_mutex);
    2212         [ #  # ]:          0 :         if (mca_cfg.ignore_ce ^ !!new) {
    2213         [ #  # ]:          0 :                 if (new) {
    2214                 :            :                         /* disable ce features */
    2215                 :          0 :                         mce_timer_delete_all();
    2216                 :          0 :                         on_each_cpu(mce_disable_cmci, NULL, 1);
    2217                 :          0 :                         mca_cfg.ignore_ce = true;
    2218                 :            :                 } else {
    2219                 :            :                         /* enable ce features */
    2220                 :          0 :                         mca_cfg.ignore_ce = false;
    2221                 :          0 :                         on_each_cpu(mce_enable_ce, (void *)1, 1);
    2222                 :            :                 }
    2223                 :            :         }
    2224                 :          0 :         mutex_unlock(&mce_sysfs_mutex);
    2225                 :            : 
    2226                 :          0 :         return size;
    2227                 :            : }
    2228                 :            : 
    2229                 :          0 : static ssize_t set_cmci_disabled(struct device *s,
    2230                 :            :                                  struct device_attribute *attr,
    2231                 :            :                                  const char *buf, size_t size)
    2232                 :            : {
    2233                 :          0 :         u64 new;
    2234                 :            : 
    2235         [ #  # ]:          0 :         if (kstrtou64(buf, 0, &new) < 0)
    2236                 :            :                 return -EINVAL;
    2237                 :            : 
    2238                 :          0 :         mutex_lock(&mce_sysfs_mutex);
    2239         [ #  # ]:          0 :         if (mca_cfg.cmci_disabled ^ !!new) {
    2240         [ #  # ]:          0 :                 if (new) {
    2241                 :            :                         /* disable cmci */
    2242                 :          0 :                         on_each_cpu(mce_disable_cmci, NULL, 1);
    2243                 :          0 :                         mca_cfg.cmci_disabled = true;
    2244                 :            :                 } else {
    2245                 :            :                         /* enable cmci */
    2246                 :          0 :                         mca_cfg.cmci_disabled = false;
    2247                 :          0 :                         on_each_cpu(mce_enable_ce, NULL, 1);
    2248                 :            :                 }
    2249                 :            :         }
    2250                 :          0 :         mutex_unlock(&mce_sysfs_mutex);
    2251                 :            : 
    2252                 :          0 :         return size;
    2253                 :            : }
    2254                 :            : 
    2255                 :          0 : static ssize_t store_int_with_restart(struct device *s,
    2256                 :            :                                       struct device_attribute *attr,
    2257                 :            :                                       const char *buf, size_t size)
    2258                 :            : {
    2259                 :          0 :         unsigned long old_check_interval = check_interval;
    2260                 :          0 :         ssize_t ret = device_store_ulong(s, attr, buf, size);
    2261                 :            : 
    2262         [ #  # ]:          0 :         if (check_interval == old_check_interval)
    2263                 :            :                 return ret;
    2264                 :            : 
    2265                 :          0 :         mutex_lock(&mce_sysfs_mutex);
    2266                 :          0 :         mce_restart();
    2267                 :          0 :         mutex_unlock(&mce_sysfs_mutex);
    2268                 :            : 
    2269                 :          0 :         return ret;
    2270                 :            : }
    2271                 :            : 
    2272                 :            : static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
    2273                 :            : static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
    2274                 :            : static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
    2275                 :            : 
    2276                 :            : static struct dev_ext_attribute dev_attr_check_interval = {
    2277                 :            :         __ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
    2278                 :            :         &check_interval
    2279                 :            : };
    2280                 :            : 
    2281                 :            : static struct dev_ext_attribute dev_attr_ignore_ce = {
    2282                 :            :         __ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce),
    2283                 :            :         &mca_cfg.ignore_ce
    2284                 :            : };
    2285                 :            : 
    2286                 :            : static struct dev_ext_attribute dev_attr_cmci_disabled = {
    2287                 :            :         __ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled),
    2288                 :            :         &mca_cfg.cmci_disabled
    2289                 :            : };
    2290                 :            : 
    2291                 :            : static struct device_attribute *mce_device_attrs[] = {
    2292                 :            :         &dev_attr_tolerant.attr,
    2293                 :            :         &dev_attr_check_interval.attr,
    2294                 :            : #ifdef CONFIG_X86_MCELOG_LEGACY
    2295                 :            :         &dev_attr_trigger,
    2296                 :            : #endif
    2297                 :            :         &dev_attr_monarch_timeout.attr,
    2298                 :            :         &dev_attr_dont_log_ce.attr,
    2299                 :            :         &dev_attr_ignore_ce.attr,
    2300                 :            :         &dev_attr_cmci_disabled.attr,
    2301                 :            :         NULL
    2302                 :            : };
    2303                 :            : 
    2304                 :            : static cpumask_var_t mce_device_initialized;
    2305                 :            : 
    2306                 :          0 : static void mce_device_release(struct device *dev)
    2307                 :            : {
    2308                 :          0 :         kfree(dev);
    2309                 :          0 : }
    2310                 :            : 
    2311                 :            : /* Per CPU device init. All of the CPUs still share the same bank device: */
    2312                 :         78 : static int mce_device_create(unsigned int cpu)
    2313                 :            : {
    2314                 :         78 :         struct device *dev;
    2315                 :         78 :         int err;
    2316                 :         78 :         int i, j;
    2317                 :            : 
    2318         [ +  - ]:         78 :         if (!mce_available(&boot_cpu_data))
    2319                 :            :                 return -EIO;
    2320                 :            : 
    2321                 :         78 :         dev = per_cpu(mce_device, cpu);
    2322         [ +  - ]:         78 :         if (dev)
    2323                 :            :                 return 0;
    2324                 :            : 
    2325                 :         78 :         dev = kzalloc(sizeof(*dev), GFP_KERNEL);
    2326         [ +  - ]:         78 :         if (!dev)
    2327                 :            :                 return -ENOMEM;
    2328                 :         78 :         dev->id  = cpu;
    2329                 :         78 :         dev->bus = &mce_subsys;
    2330                 :         78 :         dev->release = &mce_device_release;
    2331                 :            : 
    2332                 :         78 :         err = device_register(dev);
    2333         [ -  + ]:         78 :         if (err) {
    2334                 :          0 :                 put_device(dev);
    2335                 :          0 :                 return err;
    2336                 :            :         }
    2337                 :            : 
    2338         [ +  + ]:        546 :         for (i = 0; mce_device_attrs[i]; i++) {
    2339                 :        468 :                 err = device_create_file(dev, mce_device_attrs[i]);
    2340         [ -  + ]:        468 :                 if (err)
    2341                 :          0 :                         goto error;
    2342                 :            :         }
    2343         [ +  + ]:        858 :         for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) {
    2344                 :        780 :                 err = device_create_file(dev, &mce_bank_devs[j].attr);
    2345         [ -  + ]:        780 :                 if (err)
    2346                 :          0 :                         goto error2;
    2347                 :            :         }
    2348                 :         78 :         cpumask_set_cpu(cpu, mce_device_initialized);
    2349                 :         78 :         per_cpu(mce_device, cpu) = dev;
    2350                 :            : 
    2351                 :         78 :         return 0;
    2352                 :            : error2:
    2353         [ #  # ]:          0 :         while (--j >= 0)
    2354                 :          0 :                 device_remove_file(dev, &mce_bank_devs[j].attr);
    2355                 :          0 : error:
    2356         [ #  # ]:          0 :         while (--i >= 0)
    2357                 :          0 :                 device_remove_file(dev, mce_device_attrs[i]);
    2358                 :            : 
    2359                 :          0 :         device_unregister(dev);
    2360                 :            : 
    2361                 :          0 :         return err;
    2362                 :            : }
    2363                 :            : 
    2364                 :          0 : static void mce_device_remove(unsigned int cpu)
    2365                 :            : {
    2366                 :          0 :         struct device *dev = per_cpu(mce_device, cpu);
    2367                 :          0 :         int i;
    2368                 :            : 
    2369         [ #  # ]:          0 :         if (!cpumask_test_cpu(cpu, mce_device_initialized))
    2370                 :            :                 return;
    2371                 :            : 
    2372         [ #  # ]:          0 :         for (i = 0; mce_device_attrs[i]; i++)
    2373                 :          0 :                 device_remove_file(dev, mce_device_attrs[i]);
    2374                 :            : 
    2375         [ #  # ]:          0 :         for (i = 0; i < per_cpu(mce_num_banks, cpu); i++)
    2376                 :          0 :                 device_remove_file(dev, &mce_bank_devs[i].attr);
    2377                 :            : 
    2378                 :          0 :         device_unregister(dev);
    2379                 :          0 :         cpumask_clear_cpu(cpu, mce_device_initialized);
    2380                 :          0 :         per_cpu(mce_device, cpu) = NULL;
    2381                 :            : }
    2382                 :            : 
    2383                 :            : /* Make sure there are no machine checks on offlined CPUs. */
    2384                 :          0 : static void mce_disable_cpu(void)
    2385                 :            : {
    2386         [ #  # ]:          0 :         if (!mce_available(raw_cpu_ptr(&cpu_info)))
    2387                 :            :                 return;
    2388                 :            : 
    2389         [ #  # ]:          0 :         if (!cpuhp_tasks_frozen)
    2390                 :          0 :                 cmci_clear();
    2391                 :            : 
    2392                 :          0 :         vendor_disable_error_reporting();
    2393                 :            : }
    2394                 :            : 
    2395                 :         78 : static void mce_reenable_cpu(void)
    2396                 :            : {
    2397                 :         78 :         struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
    2398                 :         78 :         int i;
    2399                 :            : 
    2400         [ +  - ]:         78 :         if (!mce_available(raw_cpu_ptr(&cpu_info)))
    2401                 :            :                 return;
    2402                 :            : 
    2403         [ +  - ]:         78 :         if (!cpuhp_tasks_frozen)
    2404                 :         78 :                 cmci_reenable();
    2405         [ +  + ]:        858 :         for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
    2406                 :        780 :                 struct mce_bank *b = &mce_banks[i];
    2407                 :            : 
    2408         [ +  + ]:        780 :                 if (b->init)
    2409                 :        702 :                         wrmsrl(msr_ops.ctl(i), b->ctl);
    2410                 :            :         }
    2411                 :            : }
    2412                 :            : 
    2413                 :          0 : static int mce_cpu_dead(unsigned int cpu)
    2414                 :            : {
    2415                 :          0 :         mce_intel_hcpu_update(cpu);
    2416                 :            : 
    2417                 :            :         /* intentionally ignoring frozen here */
    2418         [ #  # ]:          0 :         if (!cpuhp_tasks_frozen)
    2419                 :          0 :                 cmci_rediscover();
    2420                 :          0 :         return 0;
    2421                 :            : }
    2422                 :            : 
    2423                 :         78 : static int mce_cpu_online(unsigned int cpu)
    2424                 :            : {
    2425                 :         78 :         struct timer_list *t = this_cpu_ptr(&mce_timer);
    2426                 :         78 :         int ret;
    2427                 :            : 
    2428                 :         78 :         mce_device_create(cpu);
    2429                 :            : 
    2430                 :         78 :         ret = mce_threshold_create_device(cpu);
    2431         [ -  + ]:         78 :         if (ret) {
    2432                 :          0 :                 mce_device_remove(cpu);
    2433                 :          0 :                 return ret;
    2434                 :            :         }
    2435                 :         78 :         mce_reenable_cpu();
    2436                 :         78 :         mce_start_timer(t);
    2437                 :         78 :         return 0;
    2438                 :            : }
    2439                 :            : 
    2440                 :          0 : static int mce_cpu_pre_down(unsigned int cpu)
    2441                 :            : {
    2442                 :          0 :         struct timer_list *t = this_cpu_ptr(&mce_timer);
    2443                 :            : 
    2444                 :          0 :         mce_disable_cpu();
    2445                 :          0 :         del_timer_sync(t);
    2446                 :          0 :         mce_threshold_remove_device(cpu);
    2447                 :          0 :         mce_device_remove(cpu);
    2448                 :          0 :         return 0;
    2449                 :            : }
    2450                 :            : 
    2451                 :         78 : static __init void mce_init_banks(void)
    2452                 :            : {
    2453                 :         78 :         int i;
    2454                 :            : 
    2455         [ +  + ]:       2574 :         for (i = 0; i < MAX_NR_BANKS; i++) {
    2456                 :       2496 :                 struct mce_bank_dev *b = &mce_bank_devs[i];
    2457                 :       2496 :                 struct device_attribute *a = &b->attr;
    2458                 :            : 
    2459                 :       2496 :                 b->bank = i;
    2460                 :            : 
    2461                 :       2496 :                 sysfs_attr_init(&a->attr);
    2462                 :       2496 :                 a->attr.name = b->attrname;
    2463                 :       2496 :                 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
    2464                 :            : 
    2465                 :       2496 :                 a->attr.mode = 0644;
    2466                 :       2496 :                 a->show              = show_bank;
    2467                 :       2496 :                 a->store     = set_bank;
    2468                 :            :         }
    2469                 :         78 : }
    2470                 :            : 
    2471                 :         78 : static __init int mcheck_init_device(void)
    2472                 :            : {
    2473                 :         78 :         int err;
    2474                 :            : 
    2475                 :            :         /*
    2476                 :            :          * Check if we have a spare virtual bit. This will only become
    2477                 :            :          * a problem if/when we move beyond 5-level page tables.
    2478                 :            :          */
    2479         [ -  + ]:         78 :         MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63);
    2480                 :            : 
    2481         [ -  + ]:         78 :         if (!mce_available(&boot_cpu_data)) {
    2482                 :          0 :                 err = -EIO;
    2483                 :          0 :                 goto err_out;
    2484                 :            :         }
    2485                 :            : 
    2486                 :         78 :         if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
    2487                 :            :                 err = -ENOMEM;
    2488                 :            :                 goto err_out;
    2489                 :            :         }
    2490                 :            : 
    2491                 :         78 :         mce_init_banks();
    2492                 :            : 
    2493                 :         78 :         err = subsys_system_register(&mce_subsys, NULL);
    2494         [ -  + ]:         78 :         if (err)
    2495                 :          0 :                 goto err_out_mem;
    2496                 :            : 
    2497                 :         78 :         err = cpuhp_setup_state(CPUHP_X86_MCE_DEAD, "x86/mce:dead", NULL,
    2498                 :            :                                 mce_cpu_dead);
    2499         [ -  + ]:         78 :         if (err)
    2500                 :          0 :                 goto err_out_mem;
    2501                 :            : 
    2502                 :         78 :         err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online",
    2503                 :            :                                 mce_cpu_online, mce_cpu_pre_down);
    2504         [ -  + ]:         78 :         if (err < 0)
    2505                 :          0 :                 goto err_out_online;
    2506                 :            : 
    2507                 :         78 :         register_syscore_ops(&mce_syscore_ops);
    2508                 :            : 
    2509                 :         78 :         return 0;
    2510                 :            : 
    2511                 :            : err_out_online:
    2512                 :          0 :         cpuhp_remove_state(CPUHP_X86_MCE_DEAD);
    2513                 :            : 
    2514                 :            : err_out_mem:
    2515                 :            :         free_cpumask_var(mce_device_initialized);
    2516                 :            : 
    2517                 :          0 : err_out:
    2518                 :          0 :         pr_err("Unable to init MCE device (rc: %d)\n", err);
    2519                 :            : 
    2520                 :          0 :         return err;
    2521                 :            : }
    2522                 :            : device_initcall_sync(mcheck_init_device);
    2523                 :            : 
    2524                 :            : /*
    2525                 :            :  * Old style boot options parsing. Only for compatibility.
    2526                 :            :  */
    2527                 :          0 : static int __init mcheck_disable(char *str)
    2528                 :            : {
    2529                 :          0 :         mca_cfg.disabled = 1;
    2530                 :          0 :         return 1;
    2531                 :            : }
    2532                 :            : __setup("nomce", mcheck_disable);
    2533                 :            : 
    2534                 :            : #ifdef CONFIG_DEBUG_FS
    2535                 :        156 : struct dentry *mce_get_debugfs_dir(void)
    2536                 :            : {
    2537                 :        156 :         static struct dentry *dmce;
    2538                 :            : 
    2539         [ -  + ]:         78 :         if (!dmce)
    2540                 :         78 :                 dmce = debugfs_create_dir("mce", NULL);
    2541                 :            : 
    2542                 :        156 :         return dmce;
    2543                 :            : }
    2544                 :            : 
    2545                 :          0 : static void mce_reset(void)
    2546                 :            : {
    2547                 :          0 :         cpu_missing = 0;
    2548                 :          0 :         atomic_set(&mce_fake_panicked, 0);
    2549                 :          0 :         atomic_set(&mce_executing, 0);
    2550                 :          0 :         atomic_set(&mce_callin, 0);
    2551                 :          0 :         atomic_set(&global_nwo, 0);
    2552                 :          0 : }
    2553                 :            : 
    2554                 :          0 : static int fake_panic_get(void *data, u64 *val)
    2555                 :            : {
    2556                 :          0 :         *val = fake_panic;
    2557                 :          0 :         return 0;
    2558                 :            : }
    2559                 :            : 
    2560                 :          0 : static int fake_panic_set(void *data, u64 val)
    2561                 :            : {
    2562                 :          0 :         mce_reset();
    2563                 :          0 :         fake_panic = val;
    2564                 :          0 :         return 0;
    2565                 :            : }
    2566                 :            : 
    2567                 :          0 : DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
    2568                 :            :                          "%llu\n");
    2569                 :            : 
    2570                 :         78 : static void __init mcheck_debugfs_init(void)
    2571                 :            : {
    2572                 :         78 :         struct dentry *dmce;
    2573                 :            : 
    2574         [ +  - ]:         78 :         dmce = mce_get_debugfs_dir();
    2575                 :         78 :         debugfs_create_file_unsafe("fake_panic", 0444, dmce, NULL,
    2576                 :            :                                    &fake_panic_fops);
    2577                 :         78 : }
    2578                 :            : #else
    2579                 :            : static void __init mcheck_debugfs_init(void) { }
    2580                 :            : #endif
    2581                 :            : 
    2582                 :            : DEFINE_STATIC_KEY_FALSE(mcsafe_key);
    2583                 :            : EXPORT_SYMBOL_GPL(mcsafe_key);
    2584                 :            : 
    2585                 :         78 : static int __init mcheck_late_init(void)
    2586                 :            : {
    2587         [ -  + ]:         78 :         if (mca_cfg.recovery)
    2588                 :          0 :                 static_branch_inc(&mcsafe_key);
    2589                 :            : 
    2590                 :         78 :         mcheck_debugfs_init();
    2591                 :         78 :         cec_init();
    2592                 :            : 
    2593                 :            :         /*
    2594                 :            :          * Flush out everything that has been logged during early boot, now that
    2595                 :            :          * everything has been initialized (workqueues, decoders, ...).
    2596                 :            :          */
    2597                 :         78 :         mce_schedule_work();
    2598                 :            : 
    2599                 :         78 :         return 0;
    2600                 :            : }
    2601                 :            : late_initcall(mcheck_late_init);

Generated by: LCOV version 1.14