Merge branch 'for-6.15/dirty-shutdown' into cxl-for-next2 (d781a452) · Commits · git / linux-net

Documentation/ABI/testing/sysfs-bus-cxl

+12 −0

Original line number	Diff line number	Diff line
		@@ -604,3 +604,15 @@ Description:
		See Documentation/ABI/stable/sysfs-devices-node. access0 provides
		the number to the closest initiator and access1 provides the
		number to the closest CPU.


		What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown
		Date: Feb, 2025
		KernelVersion: v6.15
		Contact: linux-cxl@vger.kernel.org
		Description:
		(RO) The device dirty shutdown count value, which is the number
		of times the device could have incurred in potential data loss.
		The count is persistent across power loss and wraps back to 0
		upon overflow. If this file is not present, the device does not
		have the necessary support for dirty tracking.

Documentation/driver-api/cxl/maturity-map.rst

+1 −1

Original line number	Diff line number	Diff line
		@@ -130,7 +130,7 @@ Mailbox commands
		* [0] Switch CCI
		* [3] Timestamp
		* [1] PMEM labels
		* [0] PMEM GPF / Dirty Shutdown
		* [3] PMEM GPF / Dirty Shutdown
		* [0] Scan Media

		PMU

drivers/cxl/core/core.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -117,5 +117,6 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,

		int cxl_ras_init(void);
		void cxl_ras_exit(void);
		int cxl_gpf_port_setup(struct device dport_dev, struct cxl_port port);

		#endif /* __CXL_CORE_H__ */

drivers/cxl/core/mbox.c

+39 −0

Original line number	Diff line number	Diff line
		@@ -1282,6 +1282,45 @@ int cxl_mem_dpa_fetch(struct cxl_memdev_state mds, struct cxl_dpa_info info)
		}
		EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL");

		int cxl_get_dirty_count(struct cxl_memdev_state mds, u32 count)
		{
		struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
		struct cxl_mbox_get_health_info_out hi;
		struct cxl_mbox_cmd mbox_cmd;
		int rc;

		mbox_cmd = (struct cxl_mbox_cmd) {
		.opcode = CXL_MBOX_OP_GET_HEALTH_INFO,
		.size_out = sizeof(hi),
		.payload_out = &hi,
		};

		rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
		if (!rc)
		*count = le32_to_cpu(hi.dirty_shutdown_cnt);

		return rc;
		}
		EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL");

		int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds)
		{
		struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
		struct cxl_mbox_cmd mbox_cmd;
		struct cxl_mbox_set_shutdown_state_in in = {
		.state = 1
		};

		mbox_cmd = (struct cxl_mbox_cmd) {
		.opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE,
		.size_in = sizeof(in),
		.payload_in = &in,
		};

		return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
		}
		EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL");

		int cxl_set_timestamp(struct cxl_memdev_state *mds)
		{
		struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;

drivers/cxl/core/pci.c

+97 −0

Original line number	Diff line number	Diff line
		@@ -1054,3 +1054,100 @@ int cxl_pci_get_bandwidth(struct pci_dev pdev, struct access_coordinate c)

		return 0;
		}

		/*
		* Set max timeout such that platforms will optimize GPF flow to avoid
		* the implied worst-case scenario delays. On a sane platform, all
		* devices should always complete GPF within the energy budget of
		* the GPF flow. The kernel does not have enough information to pick
		* anything better than "maximize timeouts and hope it works".
		*
		* A misbehaving device could block forward progress of GPF for all
		* the other devices, exhausting the energy budget of the platform.
		* However, the spec seems to assume that moving on from slow to respond
		* devices is a virtue. It is not possible to know that, in actuality,
		* the slow to respond device is the most critical device in the
		* system to wait.
		*/
		#define GPF_TIMEOUT_BASE_MAX 2
		#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */

		u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port)
		{
		u16 dvsec;

		if (!dev_is_pci(dev))
		return 0;

		dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL,
		is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF);
		if (!dvsec)
		dev_warn(dev, "%s GPF DVSEC not present\n",
		is_port ? "Port" : "Device");
		return dvsec;
		}
		EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL");

		static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase)
		{
		u64 base, scale;
		int rc, offset;
		u16 ctrl;

		switch (phase) {
		case 1:
		offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET;
		base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK;
		scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK;
		break;
		case 2:
		offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET;
		base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK;
		scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK;
		break;
		default:
		return -EINVAL;
		}

		rc = pci_read_config_word(pdev, dvsec + offset, &ctrl);
		if (rc)
		return rc;

		if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX &&
		FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX)
		return 0;

		ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX);
		ctrl \|= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX);

		rc = pci_write_config_word(pdev, dvsec + offset, ctrl);
		if (!rc)
		pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n",
		phase, GPF_TIMEOUT_BASE_MAX);

		return rc;
		}

		int cxl_gpf_port_setup(struct device dport_dev, struct cxl_port port)
		{
		struct pci_dev *pdev;

		if (!port)
		return -EINVAL;

		if (!port->gpf_dvsec) {
		int dvsec;

		dvsec = cxl_gpf_get_dvsec(dport_dev, true);
		if (!dvsec)
		return -EINVAL;

		port->gpf_dvsec = dvsec;
		}

		pdev = to_pci_dev(dport_dev);
		update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1);
		update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2);

		return 0;
		}