Commit 4b753955 authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Borislav Petkov (AMD)
Browse files

x86/microcode: Add per CPU result state



The microcode rendezvous is purely acting on global state, which does
not allow to analyze fails in a coherent way.

Introduce per CPU state where the results are written into, which allows to
analyze the return codes of the individual CPUs.

Initialize the state when walking the cpu_present_mask in the online
check to avoid another for_each_cpu() loop.

Enhance the result print out with that.

The structure is intentionally named ucode_ctrl as it will gain control
fields in subsequent changes.

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarBorislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20231017211723.632681010@linutronix.de
parent 0772b9aa
Loading
Loading
Loading
Loading
+67 −47
Original line number Diff line number Diff line
@@ -252,6 +252,11 @@ static struct platform_device *microcode_pdev;
 *   requirement can be relaxed in the future. Right now, this is conservative
 *   and good.
 */
struct microcode_ctrl {
	enum ucode_state	result;
};

static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
static atomic_t late_cpus_in, late_cpus_out;

static bool wait_for_cpus(atomic_t *cnt)
@@ -274,23 +279,19 @@ static bool wait_for_cpus(atomic_t *cnt)
	return false;
}

/*
 * Returns:
 * < 0 - on error
 *   0 - success (no update done or microcode was updated)
 */
static int __reload_late(void *info)
static int load_cpus_stopped(void *unused)
{
	int cpu = smp_processor_id();
	enum ucode_state err;
	int ret = 0;
	enum ucode_state ret;

	/*
	 * Wait for all CPUs to arrive. A load will not be attempted unless all
	 * CPUs show up.
	 * */
	if (!wait_for_cpus(&late_cpus_in))
		return -1;
	if (!wait_for_cpus(&late_cpus_in)) {
		this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
		return 0;
	}

	/*
	 * On an SMT system, it suffices to load the microcode on one sibling of
@@ -299,17 +300,11 @@ static int __reload_late(void *info)
	 * loading attempts happen on multiple threads of an SMT core. See
	 * below.
	 */
	if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
		err = microcode_ops->apply_microcode(cpu);
	else
	if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
		goto wait_for_siblings;

	if (err >= UCODE_NFOUND) {
		if (err == UCODE_ERROR) {
			pr_warn("Error reloading microcode on CPU %d\n", cpu);
			ret = -1;
		}
	}
	ret = microcode_ops->apply_microcode(cpu);
	this_cpu_write(ucode_ctrl.result, ret);

wait_for_siblings:
	if (!wait_for_cpus(&late_cpus_out))
@@ -321,19 +316,18 @@ static int __reload_late(void *info)
	 * per-cpu cpuinfo can be updated with right microcode
	 * revision.
	 */
	if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
		err = microcode_ops->apply_microcode(cpu);
	if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
		return 0;

	return ret;
	ret = microcode_ops->apply_microcode(cpu);
	this_cpu_write(ucode_ctrl.result, ret);
	return 0;
}

/*
 * Reload microcode late on all CPUs. Wait for a sec until they
 * all gather together.
 */
static int microcode_reload_late(void)
static int load_late_stop_cpus(void)
{
	int old = boot_cpu_data.microcode, ret;
	unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
	int old_rev = boot_cpu_data.microcode;
	struct cpuinfo_x86 prev_info;

	pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n");
@@ -348,25 +342,46 @@ static int microcode_reload_late(void)
	 */
	store_cpu_caps(&prev_info);

	ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
	stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);

	/* Analyze the results */
	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
		switch (per_cpu(ucode_ctrl.result, cpu)) {
		case UCODE_UPDATED:	updated++; break;
		case UCODE_TIMEOUT:	timedout++; break;
		case UCODE_OK:		siblings++; break;
		default:		failed++; break;
		}
	}

	if (microcode_ops->finalize_late_load)
		microcode_ops->finalize_late_load(ret);
		microcode_ops->finalize_late_load(!updated);

	if (!updated) {
		/* Nothing changed. */
		if (!failed && !timedout)
			return 0;
		pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
		       failed, timedout);
		return -EIO;
	}

	if (!ret) {
		pr_info("Reload succeeded, microcode revision: 0x%x -> 0x%x\n",
			old, boot_cpu_data.microcode);
		microcode_check(&prev_info);
	add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
	} else {
		pr_info("Reload failed, current microcode revision: 0x%x\n",
			boot_cpu_data.microcode);
	pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
	if (failed || timedout) {
		pr_err("load incomplete. %u CPUs timed out or failed\n",
		       num_online_cpus() - (updated + siblings));
	}
	return ret;
	pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
	microcode_check(&prev_info);

	return updated + siblings == num_online_cpus() ? 0 : -EIO;
}

/*
 *  Ensure that all required CPUs which are present and have been booted
 * This function does two things:
 *
 * 1) Ensure that all required CPUs which are present and have been booted
 *    once are online.
 *
 *    To pass this check, all primary threads must be online.
@@ -378,9 +393,12 @@ static int microcode_reload_late(void)
 *    behaviour is undefined. The default play_dead() implementation on
 *    modern CPUs uses MWAIT, which is also not guaranteed to be safe
 *    against a microcode update which affects MWAIT.
 *
 * 2) Initialize the per CPU control structure
 */
static bool ensure_cpus_are_online(void)
static bool setup_cpus(void)
{
	struct microcode_ctrl ctrl = { .result = -1, };
	unsigned int cpu;

	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
@@ -390,18 +408,20 @@ static bool ensure_cpus_are_online(void)
				return false;
			}
		}
		/* Initialize the per CPU state */
		per_cpu(ucode_ctrl, cpu) = ctrl;
	}
	return true;
}

static int ucode_load_late_locked(void)
static int load_late_locked(void)
{
	if (!ensure_cpus_are_online())
	if (!setup_cpus())
		return -EBUSY;

	switch (microcode_ops->request_microcode_fw(0, &microcode_pdev->dev)) {
	case UCODE_NEW:
		return microcode_reload_late();
		return load_late_stop_cpus();
	case UCODE_NFOUND:
		return -ENOENT;
	default:
@@ -421,7 +441,7 @@ static ssize_t reload_store(struct device *dev,
		return -EINVAL;

	cpus_read_lock();
	ret = ucode_load_late_locked();
	ret = load_late_locked();
	cpus_read_unlock();

	return ret ? : size;
+1 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ enum ucode_state {
	UCODE_UPDATED,
	UCODE_NFOUND,
	UCODE_ERROR,
	UCODE_TIMEOUT,
};

struct microcode_ops {