Unverified Commit d1f51a4f authored by Riana Tauro's avatar Riana Tauro Committed by Rodrigo Vivi
Browse files

drm/xe/xe_hw_error: Add fault injection to trigger csc error handler



Add a debugfs fault handler to trigger csc error handler that
wedges the device and enables runtime survivability mode.

v2: add debugfs only for bmg (Umesh)
v3: do not use csc_fault attribute if debugfs is not enabled
v4: rebase

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: default avatarRiana Tauro <riana.tauro@intel.com>
Reviewed-by: default avatarRaag Jadav <raag.jadav@intel.com>
Link: https://lore.kernel.org/r/20250826063419.3022216-11-riana.tauro@intel.com


Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent a7df563b
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@
#endif

DECLARE_FAULT_ATTR(gt_reset_failure);
DECLARE_FAULT_ATTR(inject_csc_hw_error);

static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio,
				   u32 offset, char *name, struct drm_printer *p)
@@ -361,10 +362,13 @@ void xe_debugfs_register(struct xe_device *xe)
				 ARRAY_SIZE(debugfs_list),
				 root, minor);

	if (xe->info.platform == XE_BATTLEMAGE)
	if (xe->info.platform == XE_BATTLEMAGE) {
		drm_debugfs_create_files(debugfs_residencies,
					 ARRAY_SIZE(debugfs_residencies),
					 root, minor);
		fault_create_debugfs_attr("inject_csc_hw_error", root,
					  &inject_csc_hw_error);
	}

	debugfs_create_file("forcewake_all", 0400, root, xe,
			    &forcewake_all_fops);
+11 −0
Original line number Diff line number Diff line
@@ -3,6 +3,8 @@
 * Copyright © 2025 Intel Corporation
 */

#include <linux/fault-inject.h>

#include "regs/xe_gsc_regs.h"
#include "regs/xe_hw_error_regs.h"
#include "regs/xe_irq_regs.h"
@@ -13,6 +15,7 @@
#include "xe_survivability_mode.h"

#define  HEC_UNCORR_FW_ERR_BITS 4
extern struct fault_attr inject_csc_hw_error;

/* Error categories reported by hardware */
enum hardware_error {
@@ -43,6 +46,11 @@ static const char *hw_error_to_str(const enum hardware_error hw_err)
	}
}

static bool fault_inject_csc_hw_error(void)
{
	return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(&inject_csc_hw_error, 1);
}

static void csc_hw_error_work(struct work_struct *work)
{
	struct xe_tile *tile = container_of(work, typeof(*tile), csc_hw_error_work);
@@ -130,6 +138,9 @@ void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl)
{
	enum hardware_error hw_err;

	if (fault_inject_csc_hw_error())
		schedule_work(&tile->csc_hw_error_work);

	for (hw_err = 0; hw_err < HARDWARE_ERROR_MAX; hw_err++)
		if (master_ctl & ERROR_IRQ(hw_err))
			hw_error_source_handler(tile, hw_err);