Commit 516e5bd0 authored by Dave Jiang's avatar Dave Jiang
Browse files

cxl: Add mce notifier to emit aliased address for extended linear cache

Below is a setup with extended linear cache configuration with an example
layout of memory region shown below presented as a single memory region
consists of 256G memory where there's 128G of DRAM and 128G of CXL memory.
The kernel sees a region of total 256G of system memory.

              128G DRAM                          128G CXL memory
|-----------------------------------|-------------------------------------|

Data resides in either DRAM or far memory (FM) with no replication. Hot
data is swapped into DRAM by the hardware behind the scenes. When error is
detected in one location, it is possible that error also resides in the
aliased location. Therefore when a memory location that is flagged by MCE
is part of the special region, the aliased memory location needs to be
offlined as well.

Add an mce notify callback to identify if the MCE address location is part
of an extended linear cache region and handle accordingly.

Added symbol export to set_mce_nospec() in x86 code in order to call
set_mce_nospec() from the CXL MCE notify callback.

Link: https://lore.kernel.org/linux-cxl/668333b17e4b2_5639294fd@dwillia2-xfh.jf.intel.com.notmuch/


Reviewed-by: default avatarJonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: default avatarLi Ming <ming.li@zohomail.com>
Reviewed-by: default avatarAlison Schofield <alison.schofield@intel.com>
Link: https://patch.msgid.link/20250226162224.3633792-5-dave.jiang@intel.com


Signed-off-by: default avatarDave Jiang <dave.jiang@intel.com>
parent 8c520c5f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -2081,6 +2081,7 @@ int set_mce_nospec(unsigned long pfn)
		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
	return rc;
}
EXPORT_SYMBOL_GPL(set_mce_nospec);

/* Restore full speculative operation to the pfn. */
int clear_mce_nospec(unsigned long pfn)
+4 −0
Original line number Diff line number Diff line
@@ -146,4 +146,8 @@ config CXL_REGION_INVALIDATION_TEST
	  If unsure, or if this kernel is meant for production environments,
	  say N.

config CXL_MCE
	def_bool y
	depends on X86_MCE && MEMORY_FAILURE

endif
+1 −0
Original line number Diff line number Diff line
@@ -17,3 +17,4 @@ cxl_core-y += cdat.o
cxl_core-y += acpi.o
cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
cxl_core-$(CONFIG_CXL_MCE) += mce.o
+6 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@

#include "core.h"
#include "trace.h"
#include "mce.h"

static bool cxl_raw_allow_all;

@@ -1444,6 +1445,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL");
struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
{
	struct cxl_memdev_state *mds;
	int rc;

	mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL);
	if (!mds) {
@@ -1459,6 +1461,10 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
	mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
	mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID;

	rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier);
	if (rc)
		return ERR_PTR(rc);

	return mds;
}
EXPORT_SYMBOL_NS_GPL(cxl_memdev_state_create, "CXL");

drivers/cxl/core/mce.c

0 → 100644
+65 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2024 Intel Corporation. All rights reserved. */
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/set_memory.h>
#include <asm/mce.h>
#include <cxlmem.h>
#include "mce.h"

static int cxl_handle_mce(struct notifier_block *nb, unsigned long val,
			  void *data)
{
	struct cxl_memdev_state *mds = container_of(nb, struct cxl_memdev_state,
						    mce_notifier);
	struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
	struct cxl_port *endpoint = cxlmd->endpoint;
	struct mce *mce = data;
	u64 spa, spa_alias;
	unsigned long pfn;

	if (!mce || !mce_usable_address(mce))
		return NOTIFY_DONE;

	if (!endpoint)
		return NOTIFY_DONE;

	spa = mce->addr & MCI_ADDR_PHYSADDR;

	pfn = spa >> PAGE_SHIFT;
	if (!pfn_valid(pfn))
		return NOTIFY_DONE;

	spa_alias = cxl_port_get_spa_cache_alias(endpoint, spa);
	if (spa_alias == ~0ULL)
		return NOTIFY_DONE;

	pfn = spa_alias >> PAGE_SHIFT;

	/*
	 * Take down the aliased memory page. The original memory page flagged
	 * by the MCE will be taken cared of by the standard MCE handler.
	 */
	dev_emerg(mds->cxlds.dev, "Offlining aliased SPA address0: %#llx\n",
		  spa_alias);
	if (!memory_failure(pfn, 0))
		set_mce_nospec(pfn);

	return NOTIFY_OK;
}

static void cxl_unregister_mce_notifier(void *mce_notifier)
{
	mce_unregister_decode_chain(mce_notifier);
}

int devm_cxl_register_mce_notifier(struct device *dev,
				   struct notifier_block *mce_notifier)
{
	mce_notifier->notifier_call = cxl_handle_mce;
	mce_notifier->priority = MCE_PRIO_UC;
	mce_register_decode_chain(mce_notifier);

	return devm_add_action_or_reset(dev, cxl_unregister_mce_notifier,
					mce_notifier);
}
Loading