Commit d3504411 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ras_core_for_v6.14_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS updates from Borislav Petkov:

 - Remove the shared threshold bank hack on AMD and streamline and
   simplify it

 - Cleanup and sanitize MCA code

* tag 'ras_core_for_v6.14_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce/amd: Remove shared threshold bank plumbing
  x86/mce: Remove the redundant mce_hygon_feature_init()
  x86/mce: Convert family/model mixed checks to VFM-based checks
  x86/mce: Break up __mcheck_cpu_apply_quirks()
  x86/mce: Make four functions return bool
  x86/mce/threshold: Remove the redundant this_cpu_dec_return()
  x86/mce: Make several functions return bool
parents 95ec54a4 d35fb312
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1188,7 +1188,7 @@ config X86_MCE_INTEL
config X86_MCE_AMD
	def_bool y
	prompt "AMD MCE features"
	depends on X86_MCE && X86_LOCAL_APIC && AMD_NB
	depends on X86_MCE && X86_LOCAL_APIC
	help
	  Additional support for AMD specific MCE features such as
	  the DRAM Error Threshold.
+0 −31
Original line number Diff line number Diff line
@@ -4,7 +4,6 @@

#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/refcount.h>

struct amd_nb_bus_dev_range {
	u8 bus;
@@ -29,41 +28,11 @@ struct amd_l3_cache {
	u8	 subcaches[4];
};

struct threshold_block {
	unsigned int	 block;			/* Number within bank */
	unsigned int	 bank;			/* MCA bank the block belongs to */
	unsigned int	 cpu;			/* CPU which controls MCA bank */
	u32		 address;		/* MSR address for the block */
	u16		 interrupt_enable;	/* Enable/Disable APIC interrupt */
	bool		 interrupt_capable;	/* Bank can generate an interrupt. */

	u16		 threshold_limit;	/*
						 * Value upon which threshold
						 * interrupt is generated.
						 */

	struct kobject	 kobj;			/* sysfs object */
	struct list_head miscj;			/*
						 * List of threshold blocks
						 * within a bank.
						 */
};

struct threshold_bank {
	struct kobject		*kobj;
	struct threshold_block	*blocks;

	/* initialized to the number of CPUs on the node sharing this bank */
	refcount_t		cpus;
	unsigned int		shared;
};

struct amd_northbridge {
	struct pci_dev *root;
	struct pci_dev *misc;
	struct pci_dev *link;
	struct amd_l3_cache l3_cache;
	struct threshold_bank *bank4;
};

struct amd_northbridge_info {
+2 −4
Original line number Diff line number Diff line
@@ -276,7 +276,7 @@ static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {}
#endif

int mce_available(struct cpuinfo_x86 *c);
bool mce_available(struct cpuinfo_x86 *c);
bool mce_is_memory_error(struct mce *m);
bool mce_is_correctable(struct mce *m);
bool mce_usable_address(struct mce *m);
@@ -296,7 +296,7 @@ enum mcp_flags {

void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);

int mce_notify_irq(void);
bool mce_notify_irq(void);

DECLARE_PER_CPU(struct mce, injectm);

@@ -386,8 +386,6 @@ static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)		{ }
#endif

static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c)	{ return mce_amd_feature_init(c); }

unsigned long copy_mc_fragile_handle_tail(char *to, char *from, unsigned len);

#endif /* _ASM_X86_MCE_H */
+31 −106
Original line number Diff line number Diff line
@@ -4,8 +4,6 @@
 *
 *  Written by Jacob Shin - AMD, Inc.
 *  Maintained by: Borislav Petkov <bp@alien8.de>
 *
 *  All MC4_MISCi registers are shared between cores on a node.
 */
#include <linux/interrupt.h>
#include <linux/notifier.h>
@@ -20,7 +18,6 @@
#include <linux/smp.h>
#include <linux/string.h>

#include <asm/amd_nb.h>
#include <asm/traps.h>
#include <asm/apic.h>
#include <asm/mce.h>
@@ -221,6 +218,32 @@ static const struct smca_hwid smca_hwid_mcatypes[] = {
#define MAX_MCATYPE_NAME_LEN	30
static char buf_mcatype[MAX_MCATYPE_NAME_LEN];

struct threshold_block {
	/* This block's number within its bank. */
	unsigned int		block;
	/* MCA bank number that contains this block. */
	unsigned int		bank;
	/* CPU which controls this block's MCA bank. */
	unsigned int		cpu;
	/* MCA_MISC MSR address for this block. */
	u32			address;
	/* Enable/Disable APIC interrupt. */
	bool			interrupt_enable;
	/* Bank can generate an interrupt. */
	bool			interrupt_capable;
	/* Value upon which threshold interrupt is generated. */
	u16			threshold_limit;
	/* sysfs object */
	struct kobject		kobj;
	/* List of threshold blocks within this block's MCA bank. */
	struct list_head	miscj;
};

struct threshold_bank {
	struct kobject		*kobj;
	struct threshold_block	*blocks;
};

static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);

/*
@@ -333,19 +356,6 @@ struct thresh_restart {
	u16			old_limit;
};

static inline bool is_shared_bank(int bank)
{
	/*
	 * Scalable MCA provides for only one core to have access to the MSRs of
	 * a shared bank.
	 */
	if (mce_flags.smca)
		return false;

	/* Bank 4 is for northbridge reporting and is thus shared */
	return (bank == 4);
}

static const char *bank4_names(const struct threshold_block *b)
{
	switch (b->address) {
@@ -381,7 +391,7 @@ static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
	return msr_high_bits & BIT(28);
}

static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
{
	int msr = (hi & MASK_LVTOFF_HI) >> 20;

@@ -389,7 +399,7 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
		pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
		       b->bank, b->block, b->address, hi, lo);
		return 0;
		return false;
	}

	if (apic != msr) {
@@ -399,15 +409,15 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
		 * was set is reserved. Return early here:
		 */
		if (mce_flags.smca)
			return 0;
			return false;

		pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
		       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
		return 0;
		return false;
	}

	return 1;
	return true;
};

/* Reprogram MCx_MISC MSR behind this threshold bank. */
@@ -1198,35 +1208,10 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
	return err;
}

static int __threshold_add_blocks(struct threshold_bank *b)
{
	struct list_head *head = &b->blocks->miscj;
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;
	int err = 0;

	err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
	if (err)
		return err;

	list_for_each_entry_safe(pos, tmp, head, miscj) {

		err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
		if (err) {
			list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
				kobject_del(&pos->kobj);

			return err;
		}
	}
	return err;
}

static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
				 unsigned int bank)
{
	struct device *dev = this_cpu_read(mce_device);
	struct amd_northbridge *nb = NULL;
	struct threshold_bank *b = NULL;
	const char *name = get_name(cpu, bank, NULL);
	int err = 0;
@@ -1234,26 +1219,6 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
	if (!dev)
		return -ENODEV;

	if (is_shared_bank(bank)) {
		nb = node_to_amd_nb(topology_amd_node_id(cpu));

		/* threshold descriptor already initialized on this node? */
		if (nb && nb->bank4) {
			/* yes, use it */
			b = nb->bank4;
			err = kobject_add(b->kobj, &dev->kobj, name);
			if (err)
				goto out;

			bp[bank] = b;
			refcount_inc(&b->cpus);

			err = __threshold_add_blocks(b);

			goto out;
		}
	}

	b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
	if (!b) {
		err = -ENOMEM;
@@ -1267,17 +1232,6 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
		goto out_free;
	}

	if (is_shared_bank(bank)) {
		b->shared = 1;
		refcount_set(&b->cpus, 1);

		/* nb is already initialized, see above */
		if (nb) {
			WARN_ON(nb->bank4);
			nb->bank4 = b;
		}
	}

	err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
	if (err)
		goto out_kobj;
@@ -1310,40 +1264,11 @@ static void deallocate_threshold_blocks(struct threshold_bank *bank)
	kobject_put(&bank->blocks->kobj);
}

static void __threshold_remove_blocks(struct threshold_bank *b)
{
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;

	kobject_put(b->kobj);

	list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
		kobject_put(b->kobj);
}

static void threshold_remove_bank(struct threshold_bank *bank)
{
	struct amd_northbridge *nb;

	if (!bank->blocks)
		goto out_free;

	if (!bank->shared)
		goto out_dealloc;

	if (!refcount_dec_and_test(&bank->cpus)) {
		__threshold_remove_blocks(bank);
		return;
	} else {
		/*
		 * The last CPU on this node using the shared bank is going
		 * away, remove that bank now.
		 */
		nb = node_to_amd_nb(topology_amd_node_id(smp_processor_id()));
		nb->bank4 = NULL;
	}

out_dealloc:
	deallocate_threshold_blocks(bank);

out_free:
+112 −97
Original line number Diff line number Diff line
@@ -151,7 +151,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm);

void mce_log(struct mce_hw_err *err)
{
	if (!mce_gen_pool_add(err))
	if (mce_gen_pool_add(err))
		irq_work_queue(&mce_irq_work);
}
EXPORT_SYMBOL_GPL(mce_log);
@@ -492,10 +492,10 @@ static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs
	}
}

int mce_available(struct cpuinfo_x86 *c)
bool mce_available(struct cpuinfo_x86 *c)
{
	if (mca_cfg.disabled)
		return 0;
		return false;
	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
}

@@ -1778,7 +1778,7 @@ static void mce_timer_delete_all(void)
 * Can be called from interrupt context, but not from machine check/NMI
 * context.
 */
int mce_notify_irq(void)
bool mce_notify_irq(void)
{
	/* Not more than two messages every minute */
	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
@@ -1789,9 +1789,9 @@ int mce_notify_irq(void)
		if (__ratelimit(&ratelimit))
			pr_info(HW_ERR "Machine check events logged\n");

		return 1;
		return true;
	}
	return 0;
	return false;
}
EXPORT_SYMBOL_GPL(mce_notify_irq);

@@ -1910,19 +1910,11 @@ static void __mcheck_cpu_check_banks(void)
	}
}

/* Add per CPU specific workarounds here */
static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
static void apply_quirks_amd(struct cpuinfo_x86 *c)
{
	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
	struct mca_config *cfg = &mca_cfg;

	if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
		pr_info("unknown CPU type - not enabling MCE support\n");
		return -EOPNOTSUPP;
	}

	/* This should be disabled by the BIOS, but isn't always */
	if (c->x86_vendor == X86_VENDOR_AMD) {
	if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
		/*
		 * disable GART TBL walk error reporting, which
@@ -1931,18 +1923,20 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
		 */
		clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
	}
		if (c->x86 < 0x11 && cfg->bootlog < 0) {

	if (c->x86 < 0x11 && mca_cfg.bootlog < 0) {
		/*
		 * Lots of broken BIOS around that don't clear them
		 * by default and leave crap in there. Don't log:
		 */
			cfg->bootlog = 0;
		mca_cfg.bootlog = 0;
	}

	/*
	 * Various K7s with broken bank 0 around. Always disable
	 * by default.
	 */
		if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
	if (c->x86 == 6 && this_cpu_read(mce_num_banks))
		mce_banks[0].ctl = 0;

	/*
@@ -1954,10 +1948,16 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)

	if (c->x86 >= 0x17 && c->x86 <= 0x1A)
		mce_flags.zen_ifu_quirk = 1;

}

	if (c->x86_vendor == X86_VENDOR_INTEL) {
static void apply_quirks_intel(struct cpuinfo_x86 *c)
{
	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);

	/* Older CPUs (prior to family 6) don't need quirks. */
	if (c->x86_vfm < INTEL_PENTIUM_PRO)
		return;

	/*
	 * SDM documents that on family 6 bank 0 should not be written
	 * because it aliases to another special BIOS controlled
@@ -1966,24 +1966,22 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
	 * Don't ignore bank 0 completely because there could be a
	 * valid event later, merely don't write CTL0.
	 */

		if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0)
	if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
		mce_banks[0].init = false;

	/*
	 * All newer Intel systems support MCE broadcasting. Enable
	 * synchronization with a one second timeout.
	 */
		if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
			cfg->monarch_timeout < 0)
			cfg->monarch_timeout = USEC_PER_SEC;
	if (c->x86_vfm >= INTEL_CORE_YONAH && mca_cfg.monarch_timeout < 0)
		mca_cfg.monarch_timeout = USEC_PER_SEC;

	/*
	 * There are also broken BIOSes on some Pentium M and
	 * earlier systems:
	 */
		if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
			cfg->bootlog = 0;
	if (c->x86_vfm < INTEL_CORE_YONAH && mca_cfg.bootlog < 0)
		mca_cfg.bootlog = 0;

	if (c->x86_vfm == INTEL_SANDYBRIDGE_X)
		mce_flags.snb_ifu_quirk = 1;
@@ -1996,44 +1994,65 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
		mce_flags.skx_repmov_quirk = 1;
}

	if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
{
	/*
	 * All newer Zhaoxin CPUs support MCE broadcasting. Enable
	 * synchronization with a one second timeout.
	 */
	if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
			if (cfg->monarch_timeout < 0)
				cfg->monarch_timeout = USEC_PER_SEC;
		if (mca_cfg.monarch_timeout < 0)
			mca_cfg.monarch_timeout = USEC_PER_SEC;
	}
}

/* Add per CPU specific workarounds here */
static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
	struct mca_config *cfg = &mca_cfg;

	switch (c->x86_vendor) {
	case X86_VENDOR_UNKNOWN:
		pr_info("unknown CPU type - not enabling MCE support\n");
		return false;
	case X86_VENDOR_AMD:
		apply_quirks_amd(c);
		break;
	case X86_VENDOR_INTEL:
		apply_quirks_intel(c);
		break;
	case X86_VENDOR_ZHAOXIN:
		apply_quirks_zhaoxin(c);
		break;
	}

	if (cfg->monarch_timeout < 0)
		cfg->monarch_timeout = 0;
	if (cfg->bootlog != 0)
		cfg->panic_timeout = 30;

	return 0;
	return true;
}

static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
	if (c->x86 != 5)
		return 0;
		return false;

	switch (c->x86_vendor) {
	case X86_VENDOR_INTEL:
		intel_p5_mcheck_init(c);
		mce_flags.p5 = 1;
		return 1;
		return true;
	case X86_VENDOR_CENTAUR:
		winchip_mcheck_init(c);
		mce_flags.winchip = 1;
		return 1;
		return true;
	default:
		return 0;
		return false;
	}

	return 0;
	return false;
}

/*
@@ -2099,13 +2118,9 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
		mce_intel_feature_init(c);
		break;

	case X86_VENDOR_AMD: {
		mce_amd_feature_init(c);
		break;
		}

	case X86_VENDOR_AMD:
	case X86_VENDOR_HYGON:
		mce_hygon_feature_init(c);
		mce_amd_feature_init(c);
		break;

	case X86_VENDOR_CENTAUR:
@@ -2279,12 +2294,12 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)

	__mcheck_cpu_cap_init();

	if (__mcheck_cpu_apply_quirks(c) < 0) {
	if (!__mcheck_cpu_apply_quirks(c)) {
		mca_cfg.disabled = 1;
		return;
	}

	if (mce_gen_pool_init()) {
	if (!mce_gen_pool_init()) {
		mca_cfg.disabled = 1;
		pr_emerg("Couldn't allocate MCE records pool!\n");
		return;
Loading