Commit 95350eff authored by Fabio M. De Francesco's avatar Fabio M. De Francesco Committed by Rafael J. Wysocki
Browse files

ACPI: extlog: Trace CPER CXL Protocol Error Section



When Firmware First is enabled, BIOS handles errors first and then it
makes them available to the kernel via the Common Platform Error Record
(CPER) sections (UEFI 2.11 Appendix N.2.13). Linux parses the CPER
sections via one of two similar paths, either ELOG or GHES. The errors
managed by ELOG are signaled to the BIOS by the I/O Machine Check
Architecture (I/O MCA).

Currently, ELOG and GHES show some inconsistencies in how they report to
userspace via trace events.

Therefore, make the two mentioned paths act similarly by tracing the CPER
CXL Protocol Error Section.

Reviewed-by: default avatarDave Jiang <dave.jiang@intel.com>
Reviewed-by: default avatarJonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: default avatarKuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: default avatarFabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
Link: https://patch.msgid.link/20260114101543.85926-6-fabio.m.de.francesco@linux.intel.com


Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
parent ba8af8e1
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -494,6 +494,8 @@ config ACPI_EXTLOG
	tristate "Extended Error Log support"
	depends on X86_MCE && X86_LOCAL_APIC && EDAC
	select UEFI_CPER
	select ACPI_APEI
	select ACPI_APEI_GHES
	help
	  Certain usages such as Predictive Failure Analysis (PFA) require
	  more information about the error than what can be described in
+24 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include <linux/ratelimit.h>
#include <linux/edac.h>
#include <linux/ras.h>
#include <cxl/event.h>
#include <acpi/ghes.h>
#include <asm/cpu.h>
#include <asm/mce.h>
@@ -162,6 +163,23 @@ static void extlog_print_pcie(struct cper_sec_pcie *pcie_err,
#endif
}

static void
extlog_cxl_cper_handle_prot_err(struct cxl_cper_sec_prot_err *prot_err,
				int severity)
{
#ifdef ACPI_APEI_PCIEAER
	struct cxl_cper_prot_err_work_data wd;

	if (cxl_cper_sec_prot_err_valid(prot_err))
		return;

	if (cxl_cper_setup_prot_err_work_data(&wd, prot_err, severity))
		return;

	cxl_cper_handle_prot_err(&wd);
#endif
}

static int extlog_print(struct notifier_block *nb, unsigned long val,
			void *data)
{
@@ -213,6 +231,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
			if (gdata->error_data_length >= sizeof(*mem))
				trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
						       (u8)gdata->error_severity);
		} else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
			struct cxl_cper_sec_prot_err *prot_err =
				acpi_hest_get_payload(gdata);

			extlog_cxl_cper_handle_prot_err(prot_err,
							gdata->error_severity);
		} else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
			struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);

+2 −1
Original line number Diff line number Diff line
@@ -63,7 +63,7 @@ static int match_memdev_by_parent(struct device *dev, const void *uport)
	return 0;
}

static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
{
	unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device,
				       data->prot_err.agent_addr.function);
@@ -104,6 +104,7 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
	else
		cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap);
}
EXPORT_SYMBOL_GPL(cxl_cper_handle_prot_err);

static void cxl_cper_prot_err_work_fn(struct work_struct *work)
{
+2 −0
Original line number Diff line number Diff line
@@ -340,4 +340,6 @@ cxl_cper_setup_prot_err_work_data(struct cxl_cper_prot_err_work_data *wd,
}
#endif

void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *wd);

#endif /* _LINUX_CXL_EVENT_H */