Commit bdde3141 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ras_urgent_for_v6.16_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS fixes from Borislav Petkov:

 - Do not remove the MCE sysfs hierarchy if thresholding sysfs nodes
   init fails due to new/unknown banks present, which in itself is not
   fatal anyway; add default names for new banks

 - Make sure MCE polling settings are honored after CMCI storms

 - Make sure MCE threshold limit is reset after the thresholding
   interrupt has been serviced

 - Clean up properly and disable CMCI banks on shutdown so that a
   second/kexec-ed kernel can rediscover those banks again

* tag 'ras_urgent_for_v6.16_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Make sure CMCI banks are cleared during shutdown on Intel
  x86/mce/amd: Fix threshold limit reset
  x86/mce/amd: Add default names for MCA banks and blocks
  x86/mce: Ensure user polling settings are honored when restarting timer
  x86/mce: Don't remove sysfs if thresholding sysfs init fails
parents 45a3f125 30ad231a
Loading
Loading
Loading
Loading
+17 −11
Original line number Diff line number Diff line
@@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)

struct thresh_restart {
	struct threshold_block	*b;
	int			reset;
	int			set_lvt_off;
	int			lvt_off;
	u16			old_limit;
@@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr)

	rdmsr(tr->b->address, lo, hi);

	if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
		tr->reset = 1;	/* limit cannot be lower than err count */

	if (tr->reset) {		/* reset err count and overflow bit */
		hi =
		    (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
		    (THRESHOLD_MAX - tr->b->threshold_limit);
	/*
	 * Reset error count and overflow bit.
	 * This is done during init or after handling an interrupt.
	 */
	if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) {
		hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI);
		hi |= THRESHOLD_MAX - tr->b->threshold_limit;
	} else if (tr->old_limit) {	/* change limit w/o reset */
		int new_count = (hi & THRESHOLD_MAX) +
		    (tr->old_limit - tr->b->threshold_limit);
@@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
	}

	bank_type = smca_get_bank_type(cpu, bank);
	if (bank_type >= N_SMCA_BANK_TYPES)
		return NULL;

	if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
		if (b->block < ARRAY_SIZE(smca_umc_block_names))
			return smca_umc_block_names[b->block];
		return NULL;
	}

	if (b && b->block) {
		snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block);
		return buf_mcatype;
	}

	if (bank_type >= N_SMCA_BANK_TYPES) {
		snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank);
		return buf_mcatype;
	}

	if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
+11 −13
Original line number Diff line number Diff line
@@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void)

void (*mc_poll_banks)(void) = mc_poll_banks_default;

static bool should_enable_timer(unsigned long iv)
{
	return !mca_cfg.ignore_ce && iv;
}

static void mce_timer_fn(struct timer_list *t)
{
	struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
@@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t)

	if (mce_get_storm_mode()) {
		__start_timer(t, HZ);
	} else {
	} else if (should_enable_timer(iv)) {
		__this_cpu_write(mce_next_interval, iv);
		__start_timer(t, iv);
	}
@@ -2156,12 +2161,11 @@ static void mce_start_timer(struct timer_list *t)
{
	unsigned long iv = check_interval * HZ;

	if (mca_cfg.ignore_ce || !iv)
		return;

	if (should_enable_timer(iv)) {
		this_cpu_write(mce_next_interval, iv);
		__start_timer(t, iv);
	}
}

static void __mcheck_cpu_setup_timer(void)
{
@@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu)
static int mce_cpu_online(unsigned int cpu)
{
	struct timer_list *t = this_cpu_ptr(&mce_timer);
	int ret;

	mce_device_create(cpu);

	ret = mce_threshold_create_device(cpu);
	if (ret) {
		mce_device_remove(cpu);
		return ret;
	}
	mce_threshold_create_device(cpu);
	mce_reenable_cpu();
	mce_start_timer(t);
	return 0;
+1 −0
Original line number Diff line number Diff line
@@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
	intel_clear_lmce();
	cmci_clear();
}

bool intel_filter_mce(struct mce *m)