Commit f62e6edf authored by Matthew Brost's avatar Matthew Brost
Browse files

drm/xe: Add exec queue param to devcoredump



During capture time, the target job may be unavailable (e.g., if it's in
LR mode). However, the associated exec queue will be available
regardless, so add an exec queue param for such cases.

v2:
 - Reword commit message (Jonathan)

Cc: Zhanjun Dong <zhanjun.dong@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarJonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241114022522.1951351-5-matthew.brost@intel.com
parent 9a1fce9d
Loading
Loading
Loading
Loading
+9 −6
Original line number Diff line number Diff line
@@ -238,10 +238,10 @@ static void xe_devcoredump_free(void *data)
}

static void devcoredump_snapshot(struct xe_devcoredump *coredump,
				 struct xe_exec_queue *q,
				 struct xe_sched_job *job)
{
	struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
	struct xe_exec_queue *q = job->q;
	struct xe_guc *guc = exec_queue_to_guc(q);
	u32 adj_logical_mask = q->logical_mask;
	u32 width_mask = (0x1 << q->width) - 1;
@@ -278,9 +278,11 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
	ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true);
	ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct);
	ss->ge = xe_guc_exec_queue_snapshot_capture(q);
	if (job)
		ss->job = xe_sched_job_snapshot_capture(job);
	ss->vm = xe_vm_snapshot_capture(q->vm);

	if (job)
		xe_engine_snapshot_capture_for_job(job);

	queue_work(system_unbound_wq, &ss->work);
@@ -291,15 +293,16 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,

/**
 * xe_devcoredump - Take the required snapshots and initialize coredump device.
 * @q: The faulty xe_exec_queue, where the issue was detected.
 * @job: The faulty xe_sched_job, where the issue was detected.
 *
 * This function should be called at the crash time within the serialized
 * gt_reset. It is skipped if we still have the core dump device available
 * with the information of the 'first' snapshot.
 */
void xe_devcoredump(struct xe_sched_job *job)
void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job)
{
	struct xe_device *xe = gt_to_xe(job->q->gt);
	struct xe_device *xe = gt_to_xe(q->gt);
	struct xe_devcoredump *coredump = &xe->devcoredump;

	if (coredump->captured) {
@@ -308,7 +311,7 @@ void xe_devcoredump(struct xe_sched_job *job)
	}

	coredump->captured = true;
	devcoredump_snapshot(coredump, job);
	devcoredump_snapshot(coredump, q, job);

	drm_info(&xe->drm, "Xe device coredump has been created\n");
	drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
+4 −2
Original line number Diff line number Diff line
@@ -10,13 +10,15 @@

struct drm_printer;
struct xe_device;
struct xe_exec_queue;
struct xe_sched_job;

#ifdef CONFIG_DEV_COREDUMP
void xe_devcoredump(struct xe_sched_job *job);
void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job);
int xe_devcoredump_init(struct xe_device *xe);
#else
static inline void xe_devcoredump(struct xe_sched_job *job)
static inline void xe_devcoredump(struct xe_exec_queue *q,
				  struct xe_sched_job *job)
{
}

+1 −1
Original line number Diff line number Diff line
@@ -1154,7 +1154,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
	trace_xe_sched_job_timedout(job);

	if (!exec_queue_killed(q))
		xe_devcoredump(job);
		xe_devcoredump(q, job);

	/*
	 * Kernel jobs should never fail, nor should VM jobs if they do