Commit 26fd9f7b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'cxl-fixes-6.16-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

Pull Compute Express Link (CXL) fixes from Dave Jiang:
 "These fixes address a few issues in the CXL subsystem, including
  dealing with some bugs in the CXL EDAC and RAS drivers:

   - Fix return value of cxlctl_validate_set_features()

   - Fix min_scrub_cycle of a region miscaculation and add additional
     documentation

   - Fix potential memory leak issues for CXL EDAC

   - Fix CPER handler device confusion for CXL RAS

   - Fix using wrong repair type to check DRAM event record"

* tag 'cxl-fixes-6.16-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl:
  cxl/edac: Fix using wrong repair type to check dram event record
  cxl/ras: Fix CPER handler device confusion
  cxl/edac: Fix potential memory leak issues
  cxl/Documentation: Add more description about min/max scrub cycle
  cxl/edac: Fix the min_scrub_cycle of a region miscalculation
  cxl: fix return value in cxlctl_validate_set_features()
parents 5683cd63 0a46f60a
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -49,6 +49,12 @@ Description:
		(RO) Supported minimum scrub cycle duration in seconds
		by the memory scrubber.

		Device-based scrub: returns the minimum scrub cycle
		supported by the memory device.

		Region-based scrub: returns the max of minimum scrub cycles
		supported by individual memory devices that back the region.

What:		/sys/bus/edac/devices/<dev-name>/scrubX/max_cycle_duration
Date:		March 2025
KernelVersion:	6.15
@@ -57,6 +63,16 @@ Description:
		(RO) Supported maximum scrub cycle duration in seconds
		by the memory scrubber.

		Device-based scrub: returns the maximum scrub cycle supported
		by the memory device.

		Region-based scrub: returns the min of maximum scrub cycles
		supported by individual memory devices that back the region.

		If the memory device does not provide maximum scrub cycle
		information, return the maximum supported value of the scrub
		cycle field.

What:		/sys/bus/edac/devices/<dev-name>/scrubX/current_cycle_duration
Date:		March 2025
KernelVersion:	6.15
+13 −5
Original line number Diff line number Diff line
@@ -103,10 +103,10 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
				u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle)
{
	struct cxl_mailbox *cxl_mbox;
	u8 min_scrub_cycle = U8_MAX;
	struct cxl_region_params *p;
	struct cxl_memdev *cxlmd;
	struct cxl_region *cxlr;
	u8 min_scrub_cycle = 0;
	int i, ret;

	if (!cxl_ps_ctx->cxlr) {
@@ -133,8 +133,12 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
		if (ret)
			return ret;

		/*
		 * The min_scrub_cycle of a region is the max of minimum scrub
		 * cycles supported by memdevs that back the region.
		 */
		if (min_cycle)
			min_scrub_cycle = min(*min_cycle, min_scrub_cycle);
			min_scrub_cycle = max(*min_cycle, min_scrub_cycle);
	}

	if (min_cycle)
@@ -1099,8 +1103,10 @@ int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt)
	old_rec = xa_store(&array_rec->rec_gen_media,
			   le64_to_cpu(rec->media_hdr.phys_addr), rec,
			   GFP_KERNEL);
	if (xa_is_err(old_rec))
	if (xa_is_err(old_rec)) {
		kfree(rec);
		return xa_err(old_rec);
	}

	kfree(old_rec);

@@ -1127,8 +1133,10 @@ int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt)
	old_rec = xa_store(&array_rec->rec_dram,
			   le64_to_cpu(rec->media_hdr.phys_addr), rec,
			   GFP_KERNEL);
	if (xa_is_err(old_rec))
	if (xa_is_err(old_rec)) {
		kfree(rec);
		return xa_err(old_rec);
	}

	kfree(old_rec);

@@ -1315,7 +1323,7 @@ cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd,
		attrbs.bank = ctx->bank;
	break;
	case EDAC_REPAIR_RANK_SPARING:
		attrbs.repair_type = CXL_BANK_SPARING;
		attrbs.repair_type = CXL_RANK_SPARING;
		break;
	default:
		return NULL;
+1 −1
Original line number Diff line number Diff line
@@ -544,7 +544,7 @@ static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs,
	u32 flags;

	if (rpc_in->op_size < sizeof(uuid_t))
		return ERR_PTR(-EINVAL);
		return false;

	feat = cxl_feature_info(cxlfs, &rpc_in->set_feat_in.uuid);
	if (IS_ERR(feat))
+27 −20
Original line number Diff line number Diff line
@@ -31,40 +31,38 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
					       ras_cap.header_log);
}

static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd,
					 struct cxl_ras_capability_regs ras_cap)
{
	u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
	struct cxl_dev_state *cxlds;

	cxlds = pci_get_drvdata(pdev);
	if (!cxlds)
		return;

	trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
	trace_cxl_aer_correctable_error(cxlmd, status);
}

static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev,
static void
cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd,
			       struct cxl_ras_capability_regs ras_cap)
{
	u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
	struct cxl_dev_state *cxlds;
	u32 fe;

	cxlds = pci_get_drvdata(pdev);
	if (!cxlds)
		return;

	if (hweight32(status) > 1)
		fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
				   ras_cap.cap_control));
	else
		fe = status;

	trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe,
	trace_cxl_aer_uncorrectable_error(cxlmd, status, fe,
					  ras_cap.header_log);
}

static int match_memdev_by_parent(struct device *dev, const void *uport)
{
	if (is_cxl_memdev(dev) && dev->parent == uport)
		return 1;
	return 0;
}

static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
{
	unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device,
@@ -73,13 +71,12 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
		pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment,
					    data->prot_err.agent_addr.bus,
					    devfn);
	struct cxl_memdev *cxlmd;
	int port_type;

	if (!pdev)
		return;

	guard(device)(&pdev->dev);

	port_type = pci_pcie_type(pdev);
	if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
	    port_type == PCI_EXP_TYPE_DOWNSTREAM ||
@@ -92,10 +89,20 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
		return;
	}

	guard(device)(&pdev->dev);
	if (!pdev->dev.driver)
		return;

	struct device *mem_dev __free(put_device) = bus_find_device(
		&cxl_bus_type, NULL, pdev, match_memdev_by_parent);
	if (!mem_dev)
		return;

	cxlmd = to_cxl_memdev(mem_dev);
	if (data->severity == AER_CORRECTABLE)
		cxl_cper_trace_corr_prot_err(pdev, data->ras_cap);
		cxl_cper_trace_corr_prot_err(cxlmd, data->ras_cap);
	else
		cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap);
		cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap);
}

static void cxl_cper_prot_err_work_fn(struct work_struct *work)