Commit d9412f08 authored by Dave Jiang's avatar Dave Jiang
Browse files

Merge branch 'for-6.18/cxl-poison-inject' into cxl-for-next

Add support to allow expert users to inject and clear poison for the CXL
subsystem by writing a System Physical Address (SPA) to a debugfs file.
parents 733c4e9b c3dd6768
Loading
Loading
Loading
Loading
+87 −0
Original line number Diff line number Diff line
@@ -19,6 +19,20 @@ Description:
		is returned to the user. The inject_poison attribute is only
		visible for devices supporting the capability.

		TEST-ONLY INTERFACE: This interface is intended for testing
		and validation purposes only. It is not a data repair mechanism
		and should never be used on production systems or live data.

		DATA LOSS RISK: For CXL persistent memory (PMEM) devices,
		poison injection can result in permanent data loss. Injected
		poison may render data permanently inaccessible even after
		clearing, as the clear operation writes zeros and does not
		recover original data.

		SYSTEM STABILITY RISK: For volatile memory, poison injection
		can cause kernel crashes, system instability, or unpredictable
		behavior if the poisoned addresses are accessed by running code
		or critical kernel structures.

What:		/sys/kernel/debug/cxl/memX/clear_poison
Date:		April, 2023
@@ -35,6 +49,79 @@ Description:
		The clear_poison attribute is only visible for devices
		supporting the capability.

		TEST-ONLY INTERFACE: This interface is intended for testing
		and validation purposes only. It is not a data repair mechanism
		and should never be used on production systems or live data.

		CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the
		specified address range and removes the address from the poison
		list. It does NOT recover or restore original data that may have
		been present before poison injection. Any original data at the
		cleared address is permanently lost and replaced with zeros.

		CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing
		purposes only and should not be used as a data repair tool.
		Clearing poison is fundamentally different from data recovery
		or error correction.

What:		/sys/kernel/debug/cxl/regionX/inject_poison
Date:		August, 2025
Contact:	linux-cxl@vger.kernel.org
Description:
		(WO) When a Host Physical Address (HPA) is written to this
		attribute, the region driver translates it to a Device
		Physical Address (DPA) and identifies the corresponding
		memdev. It then sends an inject poison command to that memdev
		at the translated DPA. Refer to the memdev ABI entry at:
		/sys/kernel/debug/cxl/memX/inject_poison for the detailed
		behavior. This attribute is only visible if all memdevs
		participating in the region support both inject and clear
		poison commands.

		TEST-ONLY INTERFACE: This interface is intended for testing
		and validation purposes only. It is not a data repair mechanism
		and should never be used on production systems or live data.

		DATA LOSS RISK: For CXL persistent memory (PMEM) devices,
		poison injection can result in permanent data loss. Injected
		poison may render data permanently inaccessible even after
		clearing, as the clear operation writes zeros and does not
		recover original data.

		SYSTEM STABILITY RISK: For volatile memory, poison injection
		can cause kernel crashes, system instability, or unpredictable
		behavior if the poisoned addresses are accessed by running code
		or critical kernel structures.

What:		/sys/kernel/debug/cxl/regionX/clear_poison
Date:		August, 2025
Contact:	linux-cxl@vger.kernel.org
Description:
		(WO) When a Host Physical Address (HPA) is written to this
		attribute, the region driver translates it to a Device
		Physical Address (DPA) and identifies the corresponding
		memdev. It then sends a clear poison command to that memdev
		at the translated DPA. Refer to the memdev ABI entry at:
		/sys/kernel/debug/cxl/memX/clear_poison for the detailed
		behavior. This attribute is only visible if all memdevs
		participating in the region support both inject and clear
		poison commands.

		TEST-ONLY INTERFACE: This interface is intended for testing
		and validation purposes only. It is not a data repair mechanism
		and should never be used on production systems or live data.

		CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the
		specified address range and removes the address from the poison
		list. It does NOT recover or restore original data that may have
		been present before poison injection. Any original data at the
		cleared address is permanently lost and replaced with zeros.

		CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing
		purposes only and should not be used as a data repair tool.
		Clearing poison is fundamentally different from data recovery
		or error correction.

What:		/sys/kernel/debug/cxl/einj_types
Date:		January, 2024
KernelVersion:	v6.9
+1 −1
Original line number Diff line number Diff line
@@ -173,7 +173,7 @@ Accelerator
User Flow Support
-----------------

* [0] Inject & clear poison by HPA
* [2] Inject & clear poison by region offset

Details
=======
+22 −13
Original line number Diff line number Diff line
@@ -20,8 +20,7 @@ static const guid_t acpi_cxl_qtg_id_guid =
	GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071,
		  0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52);


static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa)
static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr)
{
	struct cxl_cxims_data *cximsd = cxlrd->platform_data;
	int hbiw = cxlrd->cxlsd.nr_targets;
@@ -30,19 +29,23 @@ static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa)

	/* No xormaps for host bridge interleave ways of 1 or 3 */
	if (hbiw == 1 || hbiw == 3)
		return hpa;
		return addr;

	/*
	 * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore
	 * the position bit to its value before the xormap was applied at
	 * HPA->DPA translation.
	 * In regions using XOR interleave arithmetic the CXL HPA may not
	 * be the same as the SPA. This helper performs the SPA->CXL HPA
	 * or the CXL HPA->SPA translation. Since XOR is self-inverting,
	 * so is this function.
	 *
	 * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) applying the
	 * xormaps will toggle a position bit.
	 *
	 * pos is the lowest set bit in an XORMAP
	 * val is the XORALLBITS(HPA & XORMAP)
	 * val is the XORALLBITS(addr & XORMAP)
	 *
	 * XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS
	 * as an operation that outputs a single bit by XORing all the
	 * bits in the input (hpa & xormap). Implement XORALLBITS using
	 * bits in the input (addr & xormap). Implement XORALLBITS using
	 * hweight64(). If the hamming weight is even the XOR of those
	 * bits results in val==0, if odd the XOR result is val==1.
	 */
@@ -51,11 +54,11 @@ static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa)
		if (!cximsd->xormaps[i])
			continue;
		pos = __ffs(cximsd->xormaps[i]);
		val = (hweight64(hpa & cximsd->xormaps[i]) & 1);
		hpa = (hpa & ~(1ULL << pos)) | (val << pos);
		val = (hweight64(addr & cximsd->xormaps[i]) & 1);
		addr = (addr & ~(1ULL << pos)) | (val << pos);
	}

	return hpa;
	return addr;
}

struct cxl_cxims_context {
@@ -472,8 +475,14 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,

	cxlrd->qos_class = cfmws->qtg_id;

	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR)
		cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa;
	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
		cxlrd->ops = kzalloc(sizeof(*cxlrd->ops), GFP_KERNEL);
		if (!cxlrd->ops)
			return -ENOMEM;

		cxlrd->ops->hpa_to_spa = cxl_apply_xor_maps;
		cxlrd->ops->spa_to_hpa = cxl_apply_xor_maps;
	}

	rc = cxl_decoder_add(cxld, target_map);
	if (rc)
+4 −0
Original line number Diff line number Diff line
@@ -135,6 +135,10 @@ enum cxl_poison_trace_type {
	CXL_POISON_TRACE_CLEAR,
};

enum poison_cmd_enabled_bits;
bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd,
			       enum poison_cmd_enabled_bits cmd);

long cxl_pci_get_latency(struct pci_dev *pdev);
int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c);
int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
+44 −16
Original line number Diff line number Diff line
@@ -200,6 +200,14 @@ static ssize_t security_erase_store(struct device *dev,
static struct device_attribute dev_attr_security_erase =
	__ATTR(erase, 0200, NULL, security_erase_store);

bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd,
			       enum poison_cmd_enabled_bits cmd)
{
	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);

	return test_bit(cmd, mds->poison.enabled_cmds);
}

static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
{
	struct cxl_dev_state *cxlds = cxlmd->cxlds;
@@ -276,7 +284,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
	return 0;
}

int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
int cxl_inject_poison_locked(struct cxl_memdev *cxlmd, u64 dpa)
{
	struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
	struct cxl_mbox_inject_poison inject;
@@ -288,13 +296,8 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
	if (!IS_ENABLED(CONFIG_DEBUG_FS))
		return 0;

	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
		return rc;

	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
		return rc;
	lockdep_assert_held(&cxl_rwsem.dpa);
	lockdep_assert_held(&cxl_rwsem.region);

	rc = cxl_validate_poison_dpa(cxlmd, dpa);
	if (rc)
@@ -324,9 +327,24 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)

	return 0;
}

int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
{
	int rc;

	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
		return rc;

	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
		return rc;

	return cxl_inject_poison_locked(cxlmd, dpa);
}
EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, "CXL");

int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
int cxl_clear_poison_locked(struct cxl_memdev *cxlmd, u64 dpa)
{
	struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
	struct cxl_mbox_clear_poison clear;
@@ -338,13 +356,8 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
	if (!IS_ENABLED(CONFIG_DEBUG_FS))
		return 0;

	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
		return rc;

	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
		return rc;
	lockdep_assert_held(&cxl_rwsem.dpa);
	lockdep_assert_held(&cxl_rwsem.region);

	rc = cxl_validate_poison_dpa(cxlmd, dpa);
	if (rc)
@@ -383,6 +396,21 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)

	return 0;
}

int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
{
	int rc;

	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
		return rc;

	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
		return rc;

	return cxl_clear_poison_locked(cxlmd, dpa);
}
EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, "CXL");

static struct attribute *cxl_memdev_attributes[] = {
Loading