Unverified Commit c332fba8 authored by Karthik Poosa's avatar Karthik Poosa Committed by Rodrigo Vivi
Browse files

drm/xe/hwmon: Expose temperature limits



Read temperature limits using pcode mailbox and expose shutdown
temperature limit as tempX_emergency, critical temperature limit as
tempX_crit and GPU max temperature limit as temp2_max.

Update Xe hwmon documentation with above entries.

v2:
 - Resolve a documentation warning.
 - Address below review comments from Raag.
 - Update date and kernel version in Xe hwmon documentation.
 - Remove explicit disable of has_mbx_thermal_info for unsupported
   platforms.
 - Remove unnecessary default case in switches.
 - Remove obvious comments.
 - Use TEMP_LIMIT_MAX to compute number of dwords needed in
   xe_hwmon_thermal_info.
 - Remove THERMAL_LIMITS_DWORDS macro.
 - Use has_mbx_thermal_info for checking thermal mailbox support.

v3:
 - Address below minor comments. (Raag)
 - Group new temperature attributes with existing temperature attributes
   as per channel index in Xe hwmon documentation.
 - Rename enums of xe_temp_limit to improve clarity.
 - Use DIV_ROUND_UP to calculate dwords needed for temperature limits.
 - Use return instead of breaks in xe_hwmon_temp_read.
 - Minor aesthetic refinements.

v4:
 - Remove a redundant break. (Raag)
 - Update drm_dbg to drm_warn to inform user of unavailability for
   thermal mailbox on expected platforms.

Signed-off-by: default avatarKarthik Poosa <karthik.poosa@intel.com>
Reviewed-by: default avatarRaag Jadav <raag.jadav@intel.com>
Link: https://patch.msgid.link/20260112203521.1014388-2-karthik.poosa@intel.com


Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent b1dcec9b
Loading
Loading
Loading
Loading
+40 −0
Original line number Diff line number Diff line
@@ -109,6 +109,22 @@ Description: RO. Package current voltage in millivolt.

		Only supported for particular Intel Xe graphics platforms.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp2_crit
Date:		January 2026
KernelVersion:	7.0
Contact:	intel-xe@lists.freedesktop.org
Description:	RO. Package critical temperature in millidegree Celsius.

		Only supported for particular Intel Xe graphics platforms.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp2_emergency
Date:		January 2026
KernelVersion:	7.0
Contact:	intel-xe@lists.freedesktop.org
Description:	RO. Package shutdown temperature in millidegree Celsius.

		Only supported for particular Intel Xe graphics platforms.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp2_input
Date:		March 2025
KernelVersion:	6.15
@@ -117,6 +133,30 @@ Description: RO. Package temperature in millidegree Celsius.

		Only supported for particular Intel Xe graphics platforms.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp2_max
Date:		January 2026
KernelVersion:	7.0
Contact:	intel-xe@lists.freedesktop.org
Description:	RO. Package maximum temperature limit in millidegree Celsius.

		Only supported for particular Intel Xe graphics platforms.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp3_crit
Date:		January 2026
KernelVersion:	7.0
Contact:	intel-xe@lists.freedesktop.org
Description:	RO. VRAM critical temperature in millidegree Celsius.

		Only supported for particular Intel Xe graphics platforms.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp3_emergency
Date:		January 2026
KernelVersion:	7.0
Contact:	intel-xe@lists.freedesktop.org
Description:	RO. VRAM shutdown temperature in millidegree Celsius.

		Only supported for particular Intel Xe graphics platforms.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp3_input
Date:		March 2025
KernelVersion:	6.15
+2 −0
Original line number Diff line number Diff line
@@ -341,6 +341,8 @@ struct xe_device {
		 * pcode mailbox commands.
		 */
		u8 has_mbx_power_limits:1;
		/** @info.has_mbx_thermal_info: Device supports thermal mailbox commands */
		u8 has_mbx_thermal_info:1;
		/** @info.has_mem_copy_instr: Device supports MEM_COPY instruction */
		u8 has_mem_copy_instr:1;
		/** @info.has_mert: Device has standalone MERT */
+99 −3
Original line number Diff line number Diff line
@@ -53,6 +53,15 @@ enum xe_fan_channel {
	FAN_MAX,
};

enum xe_temp_limit {
	TEMP_LIMIT_PKG_SHUTDOWN,
	TEMP_LIMIT_PKG_CRIT,
	TEMP_LIMIT_MEM_SHUTDOWN,
	TEMP_LIMIT_PKG_MAX,
	TEMP_LIMIT_MEM_CRIT,
	TEMP_LIMIT_MAX
};

/* Attribute index for powerX_xxx_interval sysfs entries */
enum sensor_attr_power {
	SENSOR_INDEX_PSYS_PL1,
@@ -111,6 +120,18 @@ struct xe_hwmon_fan_info {
	u64 time_prev;
};

/**
 * struct xe_hwmon_thermal_info - to store temperature data
 */
struct xe_hwmon_thermal_info {
	union {
		/** @limit: temperatures limits */
		u8 limit[TEMP_LIMIT_MAX];
		/** @data: temperature limits in dwords */
		u32 data[DIV_ROUND_UP(TEMP_LIMIT_MAX, sizeof(u32))];
	};
};

/**
 * struct xe_hwmon - xe hwmon data structure
 */
@@ -137,7 +158,8 @@ struct xe_hwmon {
	u32 pl1_on_boot[CHANNEL_MAX];
	/** @pl2_on_boot: power limit PL2 on boot */
	u32 pl2_on_boot[CHANNEL_MAX];

	/** @temp: Temperature info */
	struct xe_hwmon_thermal_info temp;
};

static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 attr, int channel,
@@ -677,8 +699,11 @@ static const struct attribute_group *hwmon_groups[] = {
};

static const struct hwmon_channel_info * const hwmon_info[] = {
	HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL,
			   HWMON_T_INPUT | HWMON_T_LABEL),
	HWMON_CHANNEL_INFO(temp,
			   HWMON_T_LABEL,
			   HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL |
			   HWMON_T_MAX,
			   HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL),
	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT |
			   HWMON_P_CAP,
			   HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CAP),
@@ -689,6 +714,19 @@ static const struct hwmon_channel_info * const hwmon_info[] = {
	NULL
};

static int xe_hwmon_pcode_read_thermal_info(struct xe_hwmon *hwmon)
{
	struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
	int ret;

	ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_THERMAL_INFO, READ_THERMAL_LIMITS, 0),
			    &hwmon->temp.data[0], &hwmon->temp.data[1]);
	drm_dbg(&hwmon->xe->drm, "thermal info read val 0x%x val1 0x%x\n",
		hwmon->temp.data[0], hwmon->temp.data[1]);

	return ret;
}

/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */
static int xe_hwmon_pcode_read_i1(const struct xe_hwmon *hwmon, u32 *uval)
{
@@ -787,6 +825,31 @@ static umode_t
xe_hwmon_temp_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
{
	switch (attr) {
	case hwmon_temp_emergency:
		switch (channel) {
		case CHANNEL_PKG:
			return hwmon->temp.limit[TEMP_LIMIT_PKG_SHUTDOWN] ? 0444 : 0;
		case CHANNEL_VRAM:
			return hwmon->temp.limit[TEMP_LIMIT_MEM_SHUTDOWN] ? 0444 : 0;
		default:
			return 0;
		}
	case hwmon_temp_crit:
		switch (channel) {
		case CHANNEL_PKG:
			return hwmon->temp.limit[TEMP_LIMIT_PKG_CRIT] ? 0444 : 0;
		case CHANNEL_VRAM:
			return hwmon->temp.limit[TEMP_LIMIT_MEM_CRIT] ? 0444 : 0;
		default:
			return 0;
		}
	case hwmon_temp_max:
		switch (channel) {
		case CHANNEL_PKG:
			return hwmon->temp.limit[TEMP_LIMIT_PKG_MAX] ? 0444 : 0;
		default:
			return 0;
		}
	case hwmon_temp_input:
	case hwmon_temp_label:
		return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_TEMP, channel)) ? 0444 : 0;
@@ -808,6 +871,36 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
		/* HW register value is in degrees Celsius, convert to millidegrees. */
		*val = REG_FIELD_GET(TEMP_MASK, reg_val) * MILLIDEGREE_PER_DEGREE;
		return 0;
	case hwmon_temp_emergency:
		switch (channel) {
		case CHANNEL_PKG:
			*val = hwmon->temp.limit[TEMP_LIMIT_PKG_SHUTDOWN] * MILLIDEGREE_PER_DEGREE;
			return 0;
		case CHANNEL_VRAM:
			*val = hwmon->temp.limit[TEMP_LIMIT_MEM_SHUTDOWN] * MILLIDEGREE_PER_DEGREE;
			return 0;
		default:
			return -EOPNOTSUPP;
		}
	case hwmon_temp_crit:
		switch (channel) {
		case CHANNEL_PKG:
			*val = hwmon->temp.limit[TEMP_LIMIT_PKG_CRIT] * MILLIDEGREE_PER_DEGREE;
			return 0;
		case CHANNEL_VRAM:
			*val = hwmon->temp.limit[TEMP_LIMIT_MEM_CRIT] * MILLIDEGREE_PER_DEGREE;
			return 0;
		default:
			return -EOPNOTSUPP;
		}
	case hwmon_temp_max:
		switch (channel) {
		case CHANNEL_PKG:
			*val = hwmon->temp.limit[TEMP_LIMIT_PKG_MAX] * MILLIDEGREE_PER_DEGREE;
			return 0;
		default:
			return -EOPNOTSUPP;
		}
	default:
		return -EOPNOTSUPP;
	}
@@ -1263,6 +1356,9 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
	for (channel = 0; channel < FAN_MAX; channel++)
		if (xe_hwmon_is_visible(hwmon, hwmon_fan, hwmon_fan_input, channel))
			xe_hwmon_fan_input_read(hwmon, channel, &fan_speed);

	if (hwmon->xe->info.has_mbx_thermal_info && xe_hwmon_pcode_read_thermal_info(hwmon))
		drm_warn(&hwmon->xe->drm, "Thermal mailbox not supported by card firmware\n");
}

int xe_hwmon_register(struct xe_device *xe)
+3 −0
Original line number Diff line number Diff line
@@ -366,6 +366,7 @@ static const struct xe_device_desc bmg_desc = {
	.has_fan_control = true,
	.has_flat_ccs = 1,
	.has_mbx_power_limits = true,
	.has_mbx_thermal_info = true,
	.has_gsc_nvm = 1,
	.has_heci_cscfi = 1,
	.has_i2c = true,
@@ -422,6 +423,7 @@ static const struct xe_device_desc cri_desc = {
	.has_gsc_nvm = 1,
	.has_i2c = true,
	.has_mbx_power_limits = true,
	.has_mbx_thermal_info = true,
	.has_mert = true,
	.has_pre_prod_wa = 1,
	.has_soc_remapper_sysctrl = true,
@@ -687,6 +689,7 @@ static int xe_info_init_early(struct xe_device *xe,
	/* runtime fusing may force flat_ccs to disabled later */
	xe->info.has_flat_ccs = desc->has_flat_ccs;
	xe->info.has_mbx_power_limits = desc->has_mbx_power_limits;
	xe->info.has_mbx_thermal_info = desc->has_mbx_thermal_info;
	xe->info.has_gsc_nvm = desc->has_gsc_nvm;
	xe->info.has_heci_gscfi = desc->has_heci_gscfi;
	xe->info.has_heci_cscfi = desc->has_heci_cscfi;
+1 −0
Original line number Diff line number Diff line
@@ -48,6 +48,7 @@ struct xe_device_desc {
	u8 has_late_bind:1;
	u8 has_llc:1;
	u8 has_mbx_power_limits:1;
	u8 has_mbx_thermal_info:1;
	u8 has_mem_copy_instr:1;
	u8 has_mert:1;
	u8 has_pre_prod_wa:1;
Loading