Loading Documentation/ABI/testing/sysfs-bus-cxl +12 −0 Original line number Diff line number Diff line Loading @@ -604,3 +604,15 @@ Description: See Documentation/ABI/stable/sysfs-devices-node. access0 provides the number to the closest initiator and access1 provides the number to the closest CPU. What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown Date: Feb, 2025 KernelVersion: v6.15 Contact: linux-cxl@vger.kernel.org Description: (RO) The device dirty shutdown count value, which is the number of times the device could have incurred in potential data loss. The count is persistent across power loss and wraps back to 0 upon overflow. If this file is not present, the device does not have the necessary support for dirty tracking. Documentation/driver-api/cxl/maturity-map.rst +1 −1 Original line number Diff line number Diff line Loading @@ -130,7 +130,7 @@ Mailbox commands * [0] Switch CCI * [3] Timestamp * [1] PMEM labels * [0] PMEM GPF / Dirty Shutdown * [3] PMEM GPF / Dirty Shutdown * [0] Scan Media PMU Loading drivers/cxl/core/core.h +1 −0 Original line number Diff line number Diff line Loading @@ -117,5 +117,6 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, int cxl_ras_init(void); void cxl_ras_exit(void); int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port); #endif /* __CXL_CORE_H__ */ drivers/cxl/core/mbox.c +39 −0 Original line number Diff line number Diff line Loading @@ -1282,6 +1282,45 @@ int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) } EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_health_info_out hi; struct cxl_mbox_cmd mbox_cmd; int rc; mbox_cmd = (struct cxl_mbox_cmd) { .opcode = CXL_MBOX_OP_GET_HEALTH_INFO, .size_out = sizeof(hi), .payload_out = &hi, }; rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (!rc) *count = le32_to_cpu(hi.dirty_shutdown_cnt); return rc; } EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL"); int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_cmd mbox_cmd; struct cxl_mbox_set_shutdown_state_in in = { .state = 1 }; mbox_cmd = (struct cxl_mbox_cmd) { .opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE, .size_in = sizeof(in), .payload_in = &in, }; return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); } EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL"); int cxl_set_timestamp(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; Loading drivers/cxl/core/pci.c +97 −0 Original line number Diff line number Diff line Loading @@ -1054,3 +1054,100 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) return 0; } /* * Set max timeout such that platforms will optimize GPF flow to avoid * the implied worst-case scenario delays. On a sane platform, all * devices should always complete GPF within the energy budget of * the GPF flow. The kernel does not have enough information to pick * anything better than "maximize timeouts and hope it works". * * A misbehaving device could block forward progress of GPF for all * the other devices, exhausting the energy budget of the platform. * However, the spec seems to assume that moving on from slow to respond * devices is a virtue. It is not possible to know that, in actuality, * the slow to respond device is *the* most critical device in the * system to wait. */ #define GPF_TIMEOUT_BASE_MAX 2 #define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port) { u16 dvsec; if (!dev_is_pci(dev)) return 0; dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL, is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); if (!dvsec) dev_warn(dev, "%s GPF DVSEC not present\n", is_port ? "Port" : "Device"); return dvsec; } EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL"); static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) { u64 base, scale; int rc, offset; u16 ctrl; switch (phase) { case 1: offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; break; case 2: offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; break; default: return -EINVAL; } rc = pci_read_config_word(pdev, dvsec + offset, &ctrl); if (rc) return rc; if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX && FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX) return 0; ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX); ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX); rc = pci_write_config_word(pdev, dvsec + offset, ctrl); if (!rc) pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n", phase, GPF_TIMEOUT_BASE_MAX); return rc; } int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port) { struct pci_dev *pdev; if (!port) return -EINVAL; if (!port->gpf_dvsec) { int dvsec; dvsec = cxl_gpf_get_dvsec(dport_dev, true); if (!dvsec) return -EINVAL; port->gpf_dvsec = dvsec; } pdev = to_pci_dev(dport_dev); update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1); update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2); return 0; } Loading
Documentation/ABI/testing/sysfs-bus-cxl +12 −0 Original line number Diff line number Diff line Loading @@ -604,3 +604,15 @@ Description: See Documentation/ABI/stable/sysfs-devices-node. access0 provides the number to the closest initiator and access1 provides the number to the closest CPU. What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown Date: Feb, 2025 KernelVersion: v6.15 Contact: linux-cxl@vger.kernel.org Description: (RO) The device dirty shutdown count value, which is the number of times the device could have incurred in potential data loss. The count is persistent across power loss and wraps back to 0 upon overflow. If this file is not present, the device does not have the necessary support for dirty tracking.
Documentation/driver-api/cxl/maturity-map.rst +1 −1 Original line number Diff line number Diff line Loading @@ -130,7 +130,7 @@ Mailbox commands * [0] Switch CCI * [3] Timestamp * [1] PMEM labels * [0] PMEM GPF / Dirty Shutdown * [3] PMEM GPF / Dirty Shutdown * [0] Scan Media PMU Loading
drivers/cxl/core/core.h +1 −0 Original line number Diff line number Diff line Loading @@ -117,5 +117,6 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, int cxl_ras_init(void); void cxl_ras_exit(void); int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port); #endif /* __CXL_CORE_H__ */
drivers/cxl/core/mbox.c +39 −0 Original line number Diff line number Diff line Loading @@ -1282,6 +1282,45 @@ int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) } EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_health_info_out hi; struct cxl_mbox_cmd mbox_cmd; int rc; mbox_cmd = (struct cxl_mbox_cmd) { .opcode = CXL_MBOX_OP_GET_HEALTH_INFO, .size_out = sizeof(hi), .payload_out = &hi, }; rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (!rc) *count = le32_to_cpu(hi.dirty_shutdown_cnt); return rc; } EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL"); int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_cmd mbox_cmd; struct cxl_mbox_set_shutdown_state_in in = { .state = 1 }; mbox_cmd = (struct cxl_mbox_cmd) { .opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE, .size_in = sizeof(in), .payload_in = &in, }; return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); } EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL"); int cxl_set_timestamp(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; Loading
drivers/cxl/core/pci.c +97 −0 Original line number Diff line number Diff line Loading @@ -1054,3 +1054,100 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) return 0; } /* * Set max timeout such that platforms will optimize GPF flow to avoid * the implied worst-case scenario delays. On a sane platform, all * devices should always complete GPF within the energy budget of * the GPF flow. The kernel does not have enough information to pick * anything better than "maximize timeouts and hope it works". * * A misbehaving device could block forward progress of GPF for all * the other devices, exhausting the energy budget of the platform. * However, the spec seems to assume that moving on from slow to respond * devices is a virtue. It is not possible to know that, in actuality, * the slow to respond device is *the* most critical device in the * system to wait. */ #define GPF_TIMEOUT_BASE_MAX 2 #define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port) { u16 dvsec; if (!dev_is_pci(dev)) return 0; dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL, is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); if (!dvsec) dev_warn(dev, "%s GPF DVSEC not present\n", is_port ? "Port" : "Device"); return dvsec; } EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL"); static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) { u64 base, scale; int rc, offset; u16 ctrl; switch (phase) { case 1: offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; break; case 2: offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; break; default: return -EINVAL; } rc = pci_read_config_word(pdev, dvsec + offset, &ctrl); if (rc) return rc; if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX && FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX) return 0; ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX); ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX); rc = pci_write_config_word(pdev, dvsec + offset, ctrl); if (!rc) pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n", phase, GPF_TIMEOUT_BASE_MAX); return rc; } int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port) { struct pci_dev *pdev; if (!port) return -EINVAL; if (!port->gpf_dvsec) { int dvsec; dvsec = cxl_gpf_get_dvsec(dport_dev, true); if (!dvsec) return -EINVAL; port->gpf_dvsec = dvsec; } pdev = to_pci_dev(dport_dev); update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1); update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2); return 0; }