Commit a19b4801 authored by Kalesh AP's avatar Kalesh AP Committed by David S. Miller
Browse files

bnxt_en: Event handler for Thermal event

Newer FW will send a new async event when it detects that
the chip's temperature has crossed the configured threshold value.
The driver will now notify hwmon and will log a warning message.

Link: https://lore.kernel.org/netdev/20230815045658.80494-13-michael.chan@broadcom.com/


Cc: Jean Delvare <jdelvare@suse.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: linux-hwmon@vger.kernel.org
Signed-off-by: default avatarKalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: default avatarMichael Chan <michael.chan@broadcom.com>
Acked-by: default avatarGuenter Roeck <linux@roeck-us.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 3d9cf962
Loading
Loading
Loading
Loading
+52 −0
Original line number Diff line number Diff line
@@ -2129,6 +2129,24 @@ static u16 bnxt_agg_ring_id_to_grp_idx(struct bnxt *bp, u16 ring_id)
	return INVALID_HW_RING_ID;
}

#define BNXT_EVENT_THERMAL_CURRENT_TEMP(data2)				\
	((data2) &							\
	  ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_CURRENT_TEMP_MASK)

#define BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2)			\
	(((data2) &							\
	  ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_MASK) >>\
	 ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_SFT)

#define EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1)			\
	((data1) &							\
	 ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_MASK)

#define EVENT_DATA1_THERMAL_THRESHOLD_DIR_INCREASING(data1)		\
	(((data1) &							\
	  ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR) ==\
	 ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR_INCREASING)

static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
{
	u32 err_type = BNXT_EVENT_ERROR_REPORT_TYPE(data1);
@@ -2144,6 +2162,40 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
	case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD:
		netdev_warn(bp->dev, "One or more MMIO doorbells dropped by the device!\n");
		break;
	case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD: {
		u32 type = EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1);
		char *threshold_type;
		char *dir_str;

		switch (type) {
		case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN:
			threshold_type = "warning";
			break;
		case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_CRITICAL:
			threshold_type = "critical";
			break;
		case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_FATAL:
			threshold_type = "fatal";
			break;
		case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN:
			threshold_type = "shutdown";
			break;
		default:
			netdev_err(bp->dev, "Unknown Thermal threshold type event\n");
			return;
		}
		if (EVENT_DATA1_THERMAL_THRESHOLD_DIR_INCREASING(data1))
			dir_str = "above";
		else
			dir_str = "below";
		netdev_warn(bp->dev, "Chip temperature has gone %s the %s thermal threshold!\n",
			    dir_str, threshold_type);
		netdev_warn(bp->dev, "Temperature (In Celsius), Current: %lu, threshold: %lu\n",
			    BNXT_EVENT_THERMAL_CURRENT_TEMP(data2),
			    BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2));
		bnxt_hwmon_notify_event(bp, type);
		break;
	}
	default:
		netdev_err(bp->dev, "FW reported unknown error type %u\n",
			   err_type);
+25 −0
Original line number Diff line number Diff line
@@ -18,6 +18,31 @@
#include "bnxt_hwrm.h"
#include "bnxt_hwmon.h"

void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type)
{
	u32 attr;

	if (!bp->hwmon_dev)
		return;

	switch (type) {
	case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN:
		attr = hwmon_temp_max_alarm;
		break;
	case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_CRITICAL:
		attr = hwmon_temp_crit_alarm;
		break;
	case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_FATAL:
	case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN:
		attr = hwmon_temp_emergency_alarm;
		break;
	default:
		return;
	}

	hwmon_notify_event(&bp->pdev->dev, hwmon_temp, attr, 0);
}

static int bnxt_hwrm_temp_query(struct bnxt *bp, u8 *temp)
{
	struct hwrm_temp_monitor_query_output *resp;
+5 −0
Original line number Diff line number Diff line
@@ -11,9 +11,14 @@
#define BNXT_HWMON_H

#ifdef CONFIG_BNXT_HWMON
void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type);
void bnxt_hwmon_uninit(struct bnxt *bp);
void bnxt_hwmon_init(struct bnxt *bp);
#else
static inline void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type)
{
}

static inline void bnxt_hwmon_uninit(struct bnxt *bp)
{
}