Commit e6afcd60 authored by Jakub Kicinski's avatar Jakub Kicinski Committed by Paolo Abeni
Browse files

eth: fbnic: add OTP health reporter



OTP memory ("fuses") are used for secure boot and anti-rollback
protection. The OTP memory is ECC protected. Check for its health
periodically to notice when the chip is starting to go bad.

Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
Reviewed-by: default avatarSimon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250916231420.1693955-10-kuba@kernel.org


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent 6da8344f
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -81,6 +81,13 @@ happened since power cycle - a snapshot of the FW memory. Diagnose callback
shows FW uptime based on the most recently received heartbeat message
(the crashes are detected by checking if uptime goes down).

otp reporter
~~~~~~~~~~~~

OTP memory ("fuses") are used for secure boot and anti-rollback
protection. The OTP memory is ECC protected, ECC errors indicate
either manufacturing defect or part deteriorating with age.

Statistics
----------

+2 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ struct fbnic_dev {
	struct dentry *dbg_fbd;
	struct device *hwmon;
	struct devlink_health_reporter *fw_reporter;
	struct devlink_health_reporter *otp_reporter;

	u32 __iomem *uc_addr0;
	u32 __iomem *uc_addr4;
@@ -166,6 +167,7 @@ void fbnic_devlink_register(struct fbnic_dev *fbd);
void fbnic_devlink_unregister(struct fbnic_dev *fbd);
void __printf(2, 3)
fbnic_devlink_fw_report(struct fbnic_dev *fbd, const char *format, ...);
void fbnic_devlink_otp_check(struct fbnic_dev *fbd, const char *msg);

int fbnic_fw_request_mbx(struct fbnic_dev *fbd);
void fbnic_fw_free_mbx(struct fbnic_dev *fbd);
+18 −0
Original line number Diff line number Diff line
@@ -1178,4 +1178,22 @@ enum {
#define FBNIC_IPC_MBX_DESC_FW_CMPL	DESC_BIT(1)
#define FBNIC_IPC_MBX_DESC_HOST_CMPL	DESC_BIT(0)

/* OTP Registers
 * These registers are accessible via bar4 offset and are written by CMRT
 * on boot. For the write status, the register is broken up in half with OTP
 * Write Data Status occupying the top 16 bits and the ECC status occupying the
 * bottom 16 bits.
 */
#define FBNIC_NS_OTP_STATUS		0x0021d
#define FBNIC_NS_OTP_WRITE_STATUS	0x0021e

#define FBNIC_NS_OTP_WRITE_DATA_STATUS_MASK	CSR_GENMASK(31, 16)
#define FBNIC_NS_OTP_WRITE_ECC_STATUS_MASK	CSR_GENMASK(15, 0)

#define FBNIC_REGS_VERSION			CSR_GENMASK(31, 16)
#define FBNIC_REGS_HW_TYPE			CSR_GENMASK(15, 8)
enum{
	FBNIC_CSR_VERSION_V1_0_ASIC = 1,
};

#endif /* _FBNIC_CSR_H_ */
+65 −0
Original line number Diff line number Diff line
@@ -534,6 +534,60 @@ static const struct devlink_health_reporter_ops fbnic_fw_ops = {
	.diagnose = fbnic_fw_reporter_diagnose,
};

static u32 fbnic_read_otp_status(struct fbnic_dev *fbd)
{
	return fbnic_fw_rd32(fbd, FBNIC_NS_OTP_STATUS);
}

static int
fbnic_otp_reporter_dump(struct devlink_health_reporter *reporter,
			struct devlink_fmsg *fmsg, void *priv_ctx,
			struct netlink_ext_ack *extack)
{
	struct fbnic_dev *fbd = devlink_health_reporter_priv(reporter);
	u32 otp_status, otp_write_status, m;

	otp_status = fbnic_read_otp_status(fbd);
	otp_write_status = fbnic_fw_rd32(fbd, FBNIC_NS_OTP_WRITE_STATUS);

	/* Dump OTP status */
	devlink_fmsg_pair_nest_start(fmsg, "OTP");
	devlink_fmsg_obj_nest_start(fmsg);

	devlink_fmsg_u32_pair_put(fmsg, "Status", otp_status);

	/* Extract OTP Write Data status */
	m = FBNIC_NS_OTP_WRITE_DATA_STATUS_MASK;
	devlink_fmsg_u32_pair_put(fmsg, "Data",
				  FIELD_GET(m, otp_write_status));

	/* Extract OTP Write ECC status */
	m = FBNIC_NS_OTP_WRITE_ECC_STATUS_MASK;
	devlink_fmsg_u32_pair_put(fmsg, "ECC",
				  FIELD_GET(m, otp_write_status));

	devlink_fmsg_obj_nest_end(fmsg);
	devlink_fmsg_pair_nest_end(fmsg);

	return 0;
}

void fbnic_devlink_otp_check(struct fbnic_dev *fbd, const char *msg)
{
	/* Check if there is anything to report */
	if (!fbnic_read_otp_status(fbd))
		return;

	devlink_health_report(fbd->otp_reporter, msg, fbd);
	if (fbnic_fw_log_ready(fbd))
		fbnic_fw_log_write(fbd, 0, fbd->firmware_time, msg);
}

static const struct devlink_health_reporter_ops fbnic_otp_ops = {
	.name = "otp",
	.dump = fbnic_otp_reporter_dump,
};

int fbnic_devlink_health_create(struct fbnic_dev *fbd)
{
	fbd->fw_reporter = devlink_health_reporter_create(priv_to_devlink(fbd),
@@ -545,11 +599,22 @@ int fbnic_devlink_health_create(struct fbnic_dev *fbd)
		return PTR_ERR(fbd->fw_reporter);
	}

	fbd->otp_reporter = devlink_health_reporter_create(priv_to_devlink(fbd),
							   &fbnic_otp_ops, fbd);
	if (IS_ERR(fbd->otp_reporter)) {
		devlink_health_reporter_destroy(fbd->fw_reporter);
		dev_warn(fbd->dev,
			 "Failed to create OTP fault reporter: %pe\n",
			 fbd->otp_reporter);
		return PTR_ERR(fbd->otp_reporter);
	}

	return 0;
}

void fbnic_devlink_health_destroy(struct fbnic_dev *fbd)
{
	devlink_health_reporter_destroy(fbd->otp_reporter);
	devlink_health_reporter_destroy(fbd->fw_reporter);
}

+5 −0
Original line number Diff line number Diff line
@@ -197,6 +197,7 @@ static void fbnic_health_check(struct fbnic_dev *fbd)
		return;

	fbnic_devlink_fw_report(fbd, "Firmware crashed detected!");
	fbnic_devlink_otp_check(fbd, "error detected after firmware recovery");

	if (fbnic_fw_config_after_crash(fbd))
		dev_err(fbd->dev, "Firmware recovery failed after crash\n");
@@ -321,6 +322,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
			 err);

	fbnic_devlink_register(fbd);
	fbnic_devlink_otp_check(fbd, "error detected during probe");
	fbnic_dbg_fbd_init(fbd);

	/* Capture snapshot of hardware stats so netdev can calculate delta */
@@ -474,6 +476,9 @@ static int __fbnic_pm_resume(struct device *dev)
	 */
	fbnic_fw_log_enable(fbd, list_empty(&fbd->fw_log.entries));

	/* Since the FW should be up, check if it reported OTP errors */
	fbnic_devlink_otp_check(fbd, "error detected after PM resume");

	/* No netdev means there isn't a network interface to bring up */
	if (fbnic_init_failure(fbd))
		return 0;