Commit d781a452 authored by Dave Jiang's avatar Dave Jiang
Browse files

Merge branch 'for-6.15/dirty-shutdown' into cxl-for-next2

Add support for Global Persistent Flush (GPF) and dirty shutdown
accounting.
parents b6faa9c6 6eb52f63
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -604,3 +604,15 @@ Description:
		See Documentation/ABI/stable/sysfs-devices-node. access0 provides
		the number to the closest initiator and access1 provides the
		number to the closest CPU.


What:		/sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown
Date:		Feb, 2025
KernelVersion:	v6.15
Contact:	linux-cxl@vger.kernel.org
Description:
		(RO) The device dirty shutdown count value, which is the number
		of times the device could have incurred in potential data loss.
		The count is persistent across power loss and wraps back to 0
		upon overflow. If this file is not present, the device does not
		have the necessary support for dirty tracking.
+1 −1
Original line number Diff line number Diff line
@@ -130,7 +130,7 @@ Mailbox commands
* [0] Switch CCI
* [3] Timestamp
* [1] PMEM labels
* [0] PMEM GPF / Dirty Shutdown
* [3] PMEM GPF / Dirty Shutdown
* [0] Scan Media

PMU
+1 −0
Original line number Diff line number Diff line
@@ -117,5 +117,6 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,

int cxl_ras_init(void);
void cxl_ras_exit(void);
int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port);

#endif /* __CXL_CORE_H__ */
+39 −0
Original line number Diff line number Diff line
@@ -1282,6 +1282,45 @@ int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info)
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL");

int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count)
{
	struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
	struct cxl_mbox_get_health_info_out hi;
	struct cxl_mbox_cmd mbox_cmd;
	int rc;

	mbox_cmd = (struct cxl_mbox_cmd) {
		.opcode = CXL_MBOX_OP_GET_HEALTH_INFO,
		.size_out = sizeof(hi),
		.payload_out = &hi,
	};

	rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
	if (!rc)
		*count = le32_to_cpu(hi.dirty_shutdown_cnt);

	return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL");

int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds)
{
	struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
	struct cxl_mbox_cmd mbox_cmd;
	struct cxl_mbox_set_shutdown_state_in in = {
		.state = 1
	};

	mbox_cmd = (struct cxl_mbox_cmd) {
		.opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE,
		.size_in = sizeof(in),
		.payload_in = &in,
	};

	return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
}
EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL");

int cxl_set_timestamp(struct cxl_memdev_state *mds)
{
	struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+97 −0
Original line number Diff line number Diff line
@@ -1054,3 +1054,100 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c)

	return 0;
}

/*
 * Set max timeout such that platforms will optimize GPF flow to avoid
 * the implied worst-case scenario delays. On a sane platform, all
 * devices should always complete GPF within the energy budget of
 * the GPF flow. The kernel does not have enough information to pick
 * anything better than "maximize timeouts and hope it works".
 *
 * A misbehaving device could block forward progress of GPF for all
 * the other devices, exhausting the energy budget of the platform.
 * However, the spec seems to assume that moving on from slow to respond
 * devices is a virtue. It is not possible to know that, in actuality,
 * the slow to respond device is *the* most critical device in the
 * system to wait.
 */
#define GPF_TIMEOUT_BASE_MAX 2
#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */

u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port)
{
	u16 dvsec;

	if (!dev_is_pci(dev))
		return 0;

	dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL,
			is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF);
	if (!dvsec)
		dev_warn(dev, "%s GPF DVSEC not present\n",
			 is_port ? "Port" : "Device");
	return dvsec;
}
EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL");

static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase)
{
	u64 base, scale;
	int rc, offset;
	u16 ctrl;

	switch (phase) {
	case 1:
		offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET;
		base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK;
		scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK;
		break;
	case 2:
		offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET;
		base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK;
		scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK;
		break;
	default:
		return -EINVAL;
	}

	rc = pci_read_config_word(pdev, dvsec + offset, &ctrl);
	if (rc)
		return rc;

	if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX &&
	    FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX)
		return 0;

	ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX);
	ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX);

	rc = pci_write_config_word(pdev, dvsec + offset, ctrl);
	if (!rc)
		pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n",
			phase, GPF_TIMEOUT_BASE_MAX);

	return rc;
}

int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port)
{
	struct pci_dev *pdev;

	if (!port)
		return -EINVAL;

	if (!port->gpf_dvsec) {
		int dvsec;

		dvsec = cxl_gpf_get_dvsec(dport_dev, true);
		if (!dvsec)
			return -EINVAL;

		port->gpf_dvsec = dvsec;
	}

	pdev = to_pci_dev(dport_dev);
	update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1);
	update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2);

	return 0;
}
Loading