Commit c1f2ffe2 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ras_core_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:

 - Log and handle twp new AMD-specific MCA registers: SYND1 and SYND2
   and report the Field Replaceable Unit text info reported through them

 - Add support for handling variable-sized SMCA BERT records

 - Add the capability for reporting vendor-specific RAS error info
   without adding vendor-specific fields to struct mce

 - Cleanups

* tag 'ras_core_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  EDAC/mce_amd: Add support for FRU text in MCA
  x86/mce/apei: Handle variable SMCA BERT record size
  x86/MCE/AMD: Add support for new MCA_SYND{1,2} registers
  tracing: Add __print_dynamic_array() helper
  x86/mce: Add wrapper for struct mce to export vendor specific info
  x86/mce/intel: Use MCG_BANKCNT_MASK instead of 0xff
  x86/mce/mcelog: Use xchg() to get and clear the flags
parents 77286b86 612c2add
Loading
Loading
Loading
Loading
+34 −2
Original line number Diff line number Diff line
@@ -61,6 +61,7 @@
 *  - TCC bit is present in MCx_STATUS.
 */
#define MCI_CONFIG_MCAX		0x1
#define MCI_CONFIG_FRUTEXT	BIT_ULL(9)
#define MCI_IPID_MCATYPE	0xFFFF0000
#define MCI_IPID_HWID		0xFFF

@@ -122,6 +123,9 @@
#define MSR_AMD64_SMCA_MC0_DESTAT	0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR	0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1	0xc000200a
/* Registers MISC2 to MISC4 are at offsets B to D. */
#define MSR_AMD64_SMCA_MC0_SYND1	0xc000200e
#define MSR_AMD64_SMCA_MC0_SYND2	0xc000200f
#define MSR_AMD64_SMCA_MCx_CTL(x)	(MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_STATUS(x)	(MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_ADDR(x)	(MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
@@ -132,6 +136,8 @@
#define MSR_AMD64_SMCA_MCx_DESTAT(x)	(MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x)	(MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y)	((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
#define MSR_AMD64_SMCA_MCx_SYND1(x)	(MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_SYND2(x)	(MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x))

#define XEC(x, mask)			(((x) >> 16) & mask)

@@ -187,6 +193,32 @@ enum mce_notifier_prios {
	MCE_PRIO_HIGHEST = MCE_PRIO_CEC
};

/**
 * struct mce_hw_err - Hardware Error Record.
 * @m:		Machine Check record.
 * @vendor:	Vendor-specific error information.
 *
 * Vendor-specific fields should not be added to struct mce. Instead, vendors
 * should export their vendor-specific data through their structure in the
 * vendor union below.
 *
 * AMD's vendor data is parsed by error decoding tools for supplemental error
 * information. Thus, current offsets of existing fields must be maintained.
 * Only add new fields at the end of AMD's vendor structure.
 */
struct mce_hw_err {
	struct mce m;

	union vendor_info {
		struct {
			u64 synd1;		/* MCA_SYND1 MSR */
			u64 synd2;		/* MCA_SYND2 MSR */
		} amd;
	} vendor;
};

#define	to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m)

struct notifier_block;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -221,8 +253,8 @@ static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
					     u64 lapic_id) { return -EINVAL; }
#endif

void mce_prep_record(struct mce *m);
void mce_log(struct mce *m);
void mce_prep_record(struct mce_hw_err *err);
void mce_log(struct mce_hw_err *err);
DECLARE_PER_CPU(struct device *, mce_device);

/* Maximum number of MCA banks per CPU. */
+2 −1
Original line number Diff line number Diff line
@@ -8,7 +8,8 @@
/*
 * Fields are zero when not available. Also, this struct is shared with
 * userspace mcelog and thus must keep existing fields at current offsets.
 * Only add new fields to the end of the structure
 * Only add new, shared fields to the end of the structure.
 * Do not add vendor-specific fields.
 */
struct mce {
	__u64 status;		/* Bank's MCi_STATUS MSR */
+17 −13
Original line number Diff line number Diff line
@@ -778,29 +778,33 @@ bool amd_mce_usable_address(struct mce *m)

static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{
	struct mce m;
	struct mce_hw_err err;
	struct mce *m = &err.m;

	mce_prep_record(&m);
	mce_prep_record(&err);

	m.status = status;
	m.misc   = misc;
	m.bank   = bank;
	m.tsc	 = rdtsc();
	m->status = status;
	m->misc   = misc;
	m->bank   = bank;
	m->tsc	 = rdtsc();

	if (m.status & MCI_STATUS_ADDRV) {
		m.addr = addr;
	if (m->status & MCI_STATUS_ADDRV) {
		m->addr = addr;

		smca_extract_err_addr(&m);
		smca_extract_err_addr(m);
	}

	if (mce_flags.smca) {
		rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
		rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);

		if (m.status & MCI_STATUS_SYNDV)
			rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
		if (m->status & MCI_STATUS_SYNDV) {
			rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
			rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
			rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
		}
	}

	mce_log(&m);
	mce_log(&err);
}

DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
+78 −29
Original line number Diff line number Diff line
@@ -28,7 +28,8 @@

void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
{
	struct mce m;
	struct mce_hw_err err;
	struct mce *m;
	int lsb;

	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
@@ -44,31 +45,33 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
	else
		lsb = PAGE_SHIFT;

	mce_prep_record(&m);
	m.bank = -1;
	mce_prep_record(&err);
	m = &err.m;
	m->bank = -1;
	/* Fake a memory read error with unknown channel */
	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
	m.misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
	m->status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
	m->misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;

	if (severity >= GHES_SEV_RECOVERABLE)
		m.status |= MCI_STATUS_UC;
		m->status |= MCI_STATUS_UC;

	if (severity >= GHES_SEV_PANIC) {
		m.status |= MCI_STATUS_PCC;
		m.tsc = rdtsc();
		m->status |= MCI_STATUS_PCC;
		m->tsc = rdtsc();
	}

	m.addr = mem_err->physical_addr;
	mce_log(&m);
	m->addr = mem_err->physical_addr;
	mce_log(&err);
}
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);

int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
{
	const u64 *i_mce = ((const u64 *) (ctx_info + 1));
	unsigned int cpu, num_regs;
	bool apicid_found = false;
	unsigned int cpu;
	struct mce m;
	struct mce_hw_err err;
	struct mce *m;

	if (!boot_cpu_has(X86_FEATURE_SMCA))
		return -EINVAL;
@@ -86,16 +89,12 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
		return -EINVAL;

	/*
	 * The register array size must be large enough to include all the
	 * SMCA registers which need to be extracted.
	 *
	 * The number of registers in the register array is determined by
	 * Register Array Size/8 as defined in UEFI spec v2.8, sec N.2.4.2.2.
	 * The register layout is fixed and currently the raw data in the
	 * register array includes 6 SMCA registers which the kernel can
	 * extract.
	 * Sanity-check registers array size.
	 */
	if (ctx_info->reg_arr_size < 48)
	num_regs = ctx_info->reg_arr_size >> 3;
	if (!num_regs)
		return -EINVAL;

	for_each_possible_cpu(cpu) {
@@ -108,18 +107,68 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
	if (!apicid_found)
		return -EINVAL;

	mce_prep_record_common(&m);
	mce_prep_record_per_cpu(cpu, &m);
	m = &err.m;
	memset(&err, 0, sizeof(struct mce_hw_err));
	mce_prep_record_common(m);
	mce_prep_record_per_cpu(cpu, m);

	m->bank = (ctx_info->msr_addr >> 4) & 0xFF;

	m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
	m.status = *i_mce;
	m.addr = *(i_mce + 1);
	m.misc = *(i_mce + 2);
	/* Skipping MCA_CONFIG */
	m.ipid = *(i_mce + 4);
	m.synd = *(i_mce + 5);
	/*
	 * The SMCA register layout is fixed and includes 16 registers.
	 * The end of the array may be variable, but the beginning is known.
	 * Cap the number of registers to expected max (15).
	 */
	if (num_regs > 15)
		num_regs = 15;

	switch (num_regs) {
	/* MCA_SYND2 */
	case 15:
		err.vendor.amd.synd2 = *(i_mce + 14);
		fallthrough;
	/* MCA_SYND1 */
	case 14:
		err.vendor.amd.synd1 = *(i_mce + 13);
		fallthrough;
	/* MCA_MISC4 */
	case 13:
	/* MCA_MISC3 */
	case 12:
	/* MCA_MISC2 */
	case 11:
	/* MCA_MISC1 */
	case 10:
	/* MCA_DEADDR */
	case 9:
	/* MCA_DESTAT */
	case 8:
	/* reserved */
	case 7:
	/* MCA_SYND */
	case 6:
		m->synd = *(i_mce + 5);
		fallthrough;
	/* MCA_IPID */
	case 5:
		m->ipid = *(i_mce + 4);
		fallthrough;
	/* MCA_CONFIG */
	case 4:
	/* MCA_MISC0 */
	case 3:
		m->misc = *(i_mce + 2);
		fallthrough;
	/* MCA_ADDR */
	case 2:
		m->addr = *(i_mce + 1);
		fallthrough;
	/* MCA_STATUS */
	case 1:
		m->status = *i_mce;
	}

	mce_log(&m);
	mce_log(&err);

	return 0;
}
+123 −93
Original line number Diff line number Diff line
@@ -88,7 +88,7 @@ struct mca_config mca_cfg __read_mostly = {
	.monarch_timeout = -1
};

static DEFINE_PER_CPU(struct mce, mces_seen);
static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen);
static unsigned long mce_need_notify;

/*
@@ -119,8 +119,6 @@ BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);

void mce_prep_record_common(struct mce *m)
{
	memset(m, 0, sizeof(struct mce));

	m->cpuid	= cpuid_eax(1);
	m->cpuvendor	= boot_cpu_data.x86_vendor;
	m->mcgcap	= __rdmsr(MSR_IA32_MCG_CAP);
@@ -138,9 +136,12 @@ void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m)
	m->socketid	= topology_physical_package_id(cpu);
}

/* Do initial initialization of a struct mce */
void mce_prep_record(struct mce *m)
/* Do initial initialization of struct mce_hw_err */
void mce_prep_record(struct mce_hw_err *err)
{
	struct mce *m = &err->m;

	memset(err, 0, sizeof(struct mce_hw_err));
	mce_prep_record_common(m);
	mce_prep_record_per_cpu(smp_processor_id(), m);
}
@@ -148,9 +149,9 @@ void mce_prep_record(struct mce *m)
DEFINE_PER_CPU(struct mce, injectm);
EXPORT_PER_CPU_SYMBOL_GPL(injectm);

void mce_log(struct mce *m)
void mce_log(struct mce_hw_err *err)
{
	if (!mce_gen_pool_add(m))
	if (!mce_gen_pool_add(err))
		irq_work_queue(&mce_irq_work);
}
EXPORT_SYMBOL_GPL(mce_log);
@@ -171,8 +172,10 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);

static void __print_mce(struct mce *m)
static void __print_mce(struct mce_hw_err *err)
{
	struct mce *m = &err->m;

	pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
		 m->extcpu,
		 (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
@@ -199,6 +202,10 @@ static void __print_mce(struct mce *m)
	if (mce_flags.smca) {
		if (m->synd)
			pr_cont("SYND %llx ", m->synd);
		if (err->vendor.amd.synd1)
			pr_cont("SYND1 %llx ", err->vendor.amd.synd1);
		if (err->vendor.amd.synd2)
			pr_cont("SYND2 %llx ", err->vendor.amd.synd2);
		if (m->ipid)
			pr_cont("IPID %llx ", m->ipid);
	}
@@ -214,9 +221,11 @@ static void __print_mce(struct mce *m)
		m->microcode);
}

static void print_mce(struct mce *m)
static void print_mce(struct mce_hw_err *err)
{
	__print_mce(m);
	struct mce *m = &err->m;

	__print_mce(err);

	if (m->cpuvendor != X86_VENDOR_AMD && m->cpuvendor != X86_VENDOR_HYGON)
		pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
@@ -251,7 +260,7 @@ static const char *mce_dump_aux_info(struct mce *m)
	return NULL;
}

static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
static noinstr void mce_panic(const char *msg, struct mce_hw_err *final, char *exp)
{
	struct llist_node *pending;
	struct mce_evt_llist *l;
@@ -282,20 +291,22 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
	pending = mce_gen_pool_prepare_records();
	/* First print corrected ones that are still unlogged */
	llist_for_each_entry(l, pending, llnode) {
		struct mce *m = &l->mce;
		struct mce_hw_err *err = &l->err;
		struct mce *m = &err->m;
		if (!(m->status & MCI_STATUS_UC)) {
			print_mce(m);
			print_mce(err);
			if (!apei_err)
				apei_err = apei_write_mce(m);
		}
	}
	/* Now print uncorrected but with the final one last */
	llist_for_each_entry(l, pending, llnode) {
		struct mce *m = &l->mce;
		struct mce_hw_err *err = &l->err;
		struct mce *m = &err->m;
		if (!(m->status & MCI_STATUS_UC))
			continue;
		if (!final || mce_cmp(m, final)) {
			print_mce(m);
		if (!final || mce_cmp(m, &final->m)) {
			print_mce(err);
			if (!apei_err)
				apei_err = apei_write_mce(m);
		}
@@ -303,12 +314,12 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
	if (final) {
		print_mce(final);
		if (!apei_err)
			apei_err = apei_write_mce(final);
			apei_err = apei_write_mce(&final->m);
	}
	if (exp)
		pr_emerg(HW_ERR "Machine check: %s\n", exp);

	memmsg = mce_dump_aux_info(final);
	memmsg = mce_dump_aux_info(&final->m);
	if (memmsg)
		pr_emerg(HW_ERR "Machine check: %s\n", memmsg);

@@ -323,9 +334,9 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
		 * panic.
		 */
		if (kexec_crash_loaded()) {
			if (final && (final->status & MCI_STATUS_ADDRV)) {
			if (final && (final->m.status & MCI_STATUS_ADDRV)) {
				struct page *p;
				p = pfn_to_online_page(final->addr >> PAGE_SHIFT);
				p = pfn_to_online_page(final->m.addr >> PAGE_SHIFT);
				if (p)
					SetPageHWPoison(p);
			}
@@ -445,16 +456,18 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
 * check into our "mce" struct so that we can use it later to assess
 * the severity of the problem as we read per-bank specific details.
 */
static noinstr void mce_gather_info(struct mce *m, struct pt_regs *regs)
static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs)
{
	struct mce *m;
	/*
	 * Enable instrumentation around mce_prep_record() which calls external
	 * facilities.
	 */
	instrumentation_begin();
	mce_prep_record(m);
	mce_prep_record(err);
	instrumentation_end();

	m = &err->m;
	m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
	if (regs) {
		/*
@@ -574,13 +587,13 @@ EXPORT_SYMBOL_GPL(mce_is_correctable);
static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
			      void *data)
{
	struct mce *m = (struct mce *)data;
	struct mce_hw_err *err = to_mce_hw_err(data);

	if (!m)
	if (!err)
		return NOTIFY_DONE;

	/* Emit the trace record: */
	trace_mce_record(m);
	trace_mce_record(err);

	set_bit(0, &mce_need_notify);

@@ -624,13 +637,13 @@ static struct notifier_block mce_uc_nb = {
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
				void *data)
{
	struct mce *m = (struct mce *)data;
	struct mce_hw_err *err = to_mce_hw_err(data);

	if (!m)
	if (!err)
		return NOTIFY_DONE;

	if (mca_cfg.print_all || !m->kflags)
		__print_mce(m);
	if (mca_cfg.print_all || !(err->m.kflags))
		__print_mce(err);

	return NOTIFY_DONE;
}
@@ -644,8 +657,10 @@ static struct notifier_block mce_default_nb = {
/*
 * Read ADDR and MISC registers.
 */
static noinstr void mce_read_aux(struct mce *m, int i)
static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
{
	struct mce *m = &err->m;

	if (m->status & MCI_STATUS_MISCV)
		m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));

@@ -667,8 +682,11 @@ static noinstr void mce_read_aux(struct mce *m, int i)
	if (mce_flags.smca) {
		m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));

		if (m->status & MCI_STATUS_SYNDV)
		if (m->status & MCI_STATUS_SYNDV) {
			m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
			err->vendor.amd.synd1 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(i));
			err->vendor.amd.synd2 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(i));
		}
	}
}

@@ -692,26 +710,28 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
	struct mce m;
	struct mce_hw_err err;
	struct mce *m;
	int i;

	this_cpu_inc(mce_poll_count);

	mce_gather_info(&m, NULL);
	mce_gather_info(&err, NULL);
	m = &err.m;

	if (flags & MCP_TIMESTAMP)
		m.tsc = rdtsc();
		m->tsc = rdtsc();

	for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
		if (!mce_banks[i].ctl || !test_bit(i, *b))
			continue;

		m.misc = 0;
		m.addr = 0;
		m.bank = i;
		m->misc = 0;
		m->addr = 0;
		m->bank = i;

		barrier();
		m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
		m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));

		/*
		 * Update storm tracking here, before checking for the
@@ -721,17 +741,17 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
		 * storm status.
		 */
		if (!mca_cfg.cmci_disabled)
			mce_track_storm(&m);
			mce_track_storm(m);

		/* If this entry is not valid, ignore it */
		if (!(m.status & MCI_STATUS_VAL))
		if (!(m->status & MCI_STATUS_VAL))
			continue;

		/*
		 * If we are logging everything (at CPU online) or this
		 * is a corrected error, then we must log it.
		 */
		if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC))
		if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
			goto log_it;

		/*
@@ -741,20 +761,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
		 * everything else.
		 */
		if (!mca_cfg.ser) {
			if (m.status & MCI_STATUS_UC)
			if (m->status & MCI_STATUS_UC)
				continue;
			goto log_it;
		}

		/* Log "not enabled" (speculative) errors */
		if (!(m.status & MCI_STATUS_EN))
		if (!(m->status & MCI_STATUS_EN))
			goto log_it;

		/*
		 * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
		 * UC == 1 && PCC == 0 && S == 0
		 */
		if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S))
		if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
			goto log_it;

		/*
@@ -768,20 +788,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
		if (flags & MCP_DONTLOG)
			goto clear_it;

		mce_read_aux(&m, i);
		m.severity = mce_severity(&m, NULL, NULL, false);
		mce_read_aux(&err, i);
		m->severity = mce_severity(m, NULL, NULL, false);
		/*
		 * Don't get the IP here because it's unlikely to
		 * have anything to do with the actual error location.
		 */

		if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
		if (mca_cfg.dont_log_ce && !mce_usable_address(m))
			goto clear_it;

		if (flags & MCP_QUEUE_LOG)
			mce_gen_pool_add(&m);
			mce_gen_pool_add(&err);
		else
			mce_log(&m);
			mce_log(&err);

clear_it:
		/*
@@ -905,9 +925,10 @@ static __always_inline void quirk_zen_ifu(int bank, struct mce *m, struct pt_reg
 * Do a quick check if any of the events requires a panic.
 * This decides if we keep the events around or clear them.
 */
static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
static __always_inline int mce_no_way_out(struct mce_hw_err *err, char **msg, unsigned long *validp,
					  struct pt_regs *regs)
{
	struct mce *m = &err->m;
	char *tmp = *msg;
	int i;

@@ -925,7 +946,7 @@ static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned lo

		m->bank = i;
		if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) {
			mce_read_aux(m, i);
			mce_read_aux(err, i);
			*msg = tmp;
			return 1;
		}
@@ -1016,10 +1037,11 @@ static noinstr int mce_timed_out(u64 *t, const char *msg)
 */
static void mce_reign(void)
{
	int cpu;
	struct mce_hw_err *err = NULL;
	struct mce *m = NULL;
	int global_worst = 0;
	char *msg = NULL;
	int cpu;

	/*
	 * This CPU is the Monarch and the other CPUs have run
@@ -1027,11 +1049,13 @@ static void mce_reign(void)
	 * Grade the severity of the errors of all the CPUs.
	 */
	for_each_possible_cpu(cpu) {
		struct mce *mtmp = &per_cpu(mces_seen, cpu);
		struct mce_hw_err *etmp = &per_cpu(hw_errs_seen, cpu);
		struct mce *mtmp = &etmp->m;

		if (mtmp->severity > global_worst) {
			global_worst = mtmp->severity;
			m = &per_cpu(mces_seen, cpu);
			err = &per_cpu(hw_errs_seen, cpu);
			m = &err->m;
		}
	}

@@ -1043,7 +1067,7 @@ static void mce_reign(void)
	if (m && global_worst >= MCE_PANIC_SEVERITY) {
		/* call mce_severity() to get "msg" for panic */
		mce_severity(m, NULL, &msg, true);
		mce_panic("Fatal machine check", m, msg);
		mce_panic("Fatal machine check", err, msg);
	}

	/*
@@ -1060,11 +1084,11 @@ static void mce_reign(void)
		mce_panic("Fatal machine check from unknown source", NULL, NULL);

	/*
	 * Now clear all the mces_seen so that they don't reappear on
	 * Now clear all the hw_errs_seen so that they don't reappear on
	 * the next mce.
	 */
	for_each_possible_cpu(cpu)
		memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
		memset(&per_cpu(hw_errs_seen, cpu), 0, sizeof(struct mce_hw_err));
}

static atomic_t global_nwo;
@@ -1268,13 +1292,14 @@ static noinstr bool mce_check_crashing_cpu(void)
}

static __always_inline int
__mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
		unsigned long *toclear, unsigned long *valid_banks, int no_way_out,
		int *worst)
__mc_scan_banks(struct mce_hw_err *err, struct pt_regs *regs,
		struct mce_hw_err *final, unsigned long *toclear,
		unsigned long *valid_banks, int no_way_out, int *worst)
{
	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
	struct mca_config *cfg = &mca_cfg;
	int severity, i, taint = 0;
	struct mce *m = &err->m;

	for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
		arch___clear_bit(i, toclear);
@@ -1319,7 +1344,7 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
		if (severity == MCE_NO_SEVERITY)
			continue;

		mce_read_aux(m, i);
		mce_read_aux(err, i);

		/* assuming valid severity level != 0 */
		m->severity = severity;
@@ -1329,17 +1354,17 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
		 * done in #MC context, where instrumentation is disabled.
		 */
		instrumentation_begin();
		mce_log(m);
		mce_log(err);
		instrumentation_end();

		if (severity > *worst) {
			*final = *m;
			*final = *err;
			*worst = severity;
		}
	}

	/* mce_clear_state will clear *final, save locally for use later */
	*m = *final;
	*err = *final;

	return taint;
}
@@ -1399,9 +1424,10 @@ static void kill_me_never(struct callback_head *cb)
		set_mce_nospec(pfn);
}

static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
static void queue_task_work(struct mce_hw_err *err, char *msg, void (*func)(struct callback_head *))
{
	int count = ++current->mce_count;
	struct mce *m = &err->m;

	/* First call, save all the details */
	if (count == 1) {
@@ -1414,11 +1440,12 @@ static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callba

	/* Ten is likely overkill. Don't expect more than two faults before task_work() */
	if (count > 10)
		mce_panic("Too many consecutive machine checks while accessing user data", m, msg);
		mce_panic("Too many consecutive machine checks while accessing user data",
			  err, msg);

	/* Second or later call, make sure page address matches the one from first call */
	if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT))
		mce_panic("Consecutive machine checks to different user pages", m, msg);
		mce_panic("Consecutive machine checks to different user pages", err, msg);

	/* Do not call task_work_add() more than once */
	if (count > 1)
@@ -1467,8 +1494,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
	int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0;
	DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 };
	DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 };
	struct mce m, *final;
	struct mce_hw_err *final;
	struct mce_hw_err err;
	char *msg = NULL;
	struct mce *m;

	if (unlikely(mce_flags.p5))
		return pentium_machine_check(regs);
@@ -1506,13 +1535,14 @@ noinstr void do_machine_check(struct pt_regs *regs)

	this_cpu_inc(mce_exception_count);

	mce_gather_info(&m, regs);
	m.tsc = rdtsc();
	mce_gather_info(&err, regs);
	m = &err.m;
	m->tsc = rdtsc();

	final = this_cpu_ptr(&mces_seen);
	*final = m;
	final = this_cpu_ptr(&hw_errs_seen);
	*final = err;

	no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
	no_way_out = mce_no_way_out(&err, &msg, valid_banks, regs);

	barrier();

@@ -1521,15 +1551,15 @@ noinstr void do_machine_check(struct pt_regs *regs)
	 * Assume the worst for now, but if we find the
	 * severity is MCE_AR_SEVERITY we have other options.
	 */
	if (!(m.mcgstatus & MCG_STATUS_RIPV))
	if (!(m->mcgstatus & MCG_STATUS_RIPV))
		kill_current_task = 1;
	/*
	 * Check if this MCE is signaled to only this logical processor,
	 * on Intel, Zhaoxin only.
	 */
	if (m.cpuvendor == X86_VENDOR_INTEL ||
	    m.cpuvendor == X86_VENDOR_ZHAOXIN)
		lmce = m.mcgstatus & MCG_STATUS_LMCES;
	if (m->cpuvendor == X86_VENDOR_INTEL ||
	    m->cpuvendor == X86_VENDOR_ZHAOXIN)
		lmce = m->mcgstatus & MCG_STATUS_LMCES;

	/*
	 * Local machine check may already know that we have to panic.
@@ -1540,12 +1570,12 @@ noinstr void do_machine_check(struct pt_regs *regs)
	 */
	if (lmce) {
		if (no_way_out)
			mce_panic("Fatal local machine check", &m, msg);
			mce_panic("Fatal local machine check", &err, msg);
	} else {
		order = mce_start(&no_way_out);
	}

	taint = __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
	taint = __mc_scan_banks(&err, regs, final, toclear, valid_banks, no_way_out, &worst);

	if (!no_way_out)
		mce_clear_state(toclear);
@@ -1560,7 +1590,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
				no_way_out = worst >= MCE_PANIC_SEVERITY;

			if (no_way_out)
				mce_panic("Fatal machine check on current CPU", &m, msg);
				mce_panic("Fatal machine check on current CPU", &err, msg);
		}
	} else {
		/*
@@ -1572,8 +1602,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
		 * make sure we have the right "msg".
		 */
		if (worst >= MCE_PANIC_SEVERITY) {
			mce_severity(&m, regs, &msg, true);
			mce_panic("Local fatal machine check!", &m, msg);
			mce_severity(m, regs, &msg, true);
			mce_panic("Local fatal machine check!", &err, msg);
		}
	}

@@ -1591,16 +1621,16 @@ noinstr void do_machine_check(struct pt_regs *regs)
		goto out;

	/* Fault was in user mode and we need to take some action */
	if ((m.cs & 3) == 3) {
	if ((m->cs & 3) == 3) {
		/* If this triggers there is no way to recover. Die hard. */
		BUG_ON(!on_thread_stack() || !user_mode(regs));

		if (!mce_usable_address(&m))
			queue_task_work(&m, msg, kill_me_now);
		if (!mce_usable_address(m))
			queue_task_work(&err, msg, kill_me_now);
		else
			queue_task_work(&m, msg, kill_me_maybe);
			queue_task_work(&err, msg, kill_me_maybe);

	} else if (m.mcgstatus & MCG_STATUS_SEAM_NR) {
	} else if (m->mcgstatus & MCG_STATUS_SEAM_NR) {
		/*
		 * Saved RIP on stack makes it look like the machine check
		 * was taken in the kernel on the instruction following
@@ -1612,8 +1642,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
		 * not occur there. Mark the page as poisoned so it won't
		 * be added to free list when the guest is terminated.
		 */
		if (mce_usable_address(&m)) {
			struct page *p = pfn_to_online_page(m.addr >> PAGE_SHIFT);
		if (mce_usable_address(m)) {
			struct page *p = pfn_to_online_page(m->addr >> PAGE_SHIFT);

			if (p)
				SetPageHWPoison(p);
@@ -1628,13 +1658,13 @@ noinstr void do_machine_check(struct pt_regs *regs)
		 * corresponding exception handler which would do that is the
		 * proper one.
		 */
		if (m.kflags & MCE_IN_KERNEL_RECOV) {
		if (m->kflags & MCE_IN_KERNEL_RECOV) {
			if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
				mce_panic("Failed kernel mode recovery", &m, msg);
				mce_panic("Failed kernel mode recovery", &err, msg);
		}

		if (m.kflags & MCE_IN_KERNEL_COPYIN)
			queue_task_work(&m, msg, kill_me_never);
		if (m->kflags & MCE_IN_KERNEL_COPYIN)
			queue_task_work(&err, msg, kill_me_never);
	}

out:
Loading