Commit a54b0de7 authored by Matthew Brost's avatar Matthew Brost
Browse files

drm/xe: Change xe_engine_snapshot_capture_for_job to be for queue



During capture time, the target job may be unavailable (e.g., if it's in
LR mode). However, the associated exec queue will be available
regardless, change xe_engine_snapshot_capture_for_job to take a queue
argument ann rename to xe_engine_snapshot_capture_for_queue.

v2:
 - Reword commit message (Jonathan)
 - Remove redundant queueu check (Zhanjun)
 - Remove devcoredump job member (Zhanjun)

Cc: Zhanjun Dong <zhanjun.dong@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarJonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241114022522.1951351-7-matthew.brost@intel.com
parent 990c29c2
Loading
Loading
Loading
Loading
+1 −4
Original line number Diff line number Diff line
@@ -232,7 +232,6 @@ static void xe_devcoredump_free(void *data)
	/* To prevent stale data on next snapshot, clear everything */
	memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
	coredump->captured = false;
	coredump->job = NULL;
	drm_info(&coredump_to_xe(coredump)->drm,
		 "Xe device coredump has been deleted.\n");
}
@@ -259,7 +258,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
	strscpy(ss->process_name, process_name);

	ss->gt = q->gt;
	coredump->job = job;
	INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);

	cookie = dma_fence_begin_signalling();
@@ -282,8 +280,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
		ss->job = xe_sched_job_snapshot_capture(job);
	ss->vm = xe_vm_snapshot_capture(q->vm);

	if (job)
		xe_engine_snapshot_capture_for_job(job);
	xe_engine_snapshot_capture_for_queue(q);

	queue_work(system_unbound_wq, &ss->work);

+0 −2
Original line number Diff line number Diff line
@@ -80,8 +80,6 @@ struct xe_devcoredump {
	bool captured;
	/** @snapshot: Snapshot is captured at time of the first crash */
	struct xe_devcoredump_snapshot snapshot;
	/** @job: Point to the faulting job */
	struct xe_sched_job *job;
};

#endif
+12 −17
Original line number Diff line number Diff line
@@ -1793,29 +1793,24 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm
}

/**
 * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the job.
 * @job: The job object.
 * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the queue.
 * @q: The exec queue object
 *
 * Search within the capture outlist for the job, could be used for check if
 * GuC capture is ready for the job.
 * Search within the capture outlist for the queue, could be used for check if
 * GuC capture is ready for the queue.
 * If found, the locked boolean of the node will be flagged.
 *
 * Returns: found guc-capture node ptr else NULL
 */
struct __guc_capture_parsed_output *
xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q)
{
	struct xe_hw_engine *hwe;
	enum xe_hw_engine_id id;
	struct xe_exec_queue *q;
	struct xe_device *xe;
	u16 guc_class = GUC_LAST_ENGINE_CLASS + 1;
	struct xe_devcoredump_snapshot *ss;

	if (!job)
		return NULL;

	q = job->q;
	if (!q || !q->gt)
		return NULL;

@@ -1827,7 +1822,7 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
	if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC)
		return ss->matched_node;

	/* Find hwe for the job */
	/* Find hwe for the queue */
	for_each_hw_engine(hwe, q->gt, id) {
		if (hwe != q->hwe)
			continue;
@@ -1859,17 +1854,16 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
}

/**
 * xe_engine_snapshot_capture_for_job - Take snapshot of associated engine
 * @job: The job object
 * xe_engine_snapshot_capture_for_queue - Take snapshot of associated engine
 * @q: The exec queue object
 *
 * Take snapshot of associated HW Engine
 *
 * Returns: None.
 */
void
xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q)
{
	struct xe_exec_queue *q = job->q;
	struct xe_device *xe = gt_to_xe(q->gt);
	struct xe_devcoredump *coredump = &xe->devcoredump;
	struct xe_hw_engine *hwe;
@@ -1887,11 +1881,12 @@ xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
		}

		if (!coredump->snapshot.hwe[id]) {
			coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job);
			coredump->snapshot.hwe[id] =
				xe_hw_engine_snapshot_capture(hwe, q);
		} else {
			struct __guc_capture_parsed_output *new;

			new = xe_guc_capture_get_matching_and_lock(job);
			new = xe_guc_capture_get_matching_and_lock(q);
			if (new) {
				struct xe_guc *guc =  &q->gt->uc.guc;

+3 −3
Original line number Diff line number Diff line
@@ -11,10 +11,10 @@
#include "xe_guc.h"
#include "xe_guc_fwif.h"

struct xe_exec_queue;
struct xe_guc;
struct xe_hw_engine;
struct xe_hw_engine_snapshot;
struct xe_sched_job;

static inline enum guc_capture_list_class_type xe_guc_class_to_capture_class(u16 class)
{
@@ -50,10 +50,10 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc);
const struct __guc_mmio_reg_descr_group *
xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
				 enum guc_capture_list_class_type capture_class, bool is_ext);
struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job);
struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q);
void xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot);
void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p);
void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job);
void xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q);
void xe_guc_capture_steered_list_init(struct xe_guc *guc);
void xe_guc_capture_put_matched_nodes(struct xe_guc *guc);
int xe_guc_capture_init(struct xe_guc *guc);
+2 −2
Original line number Diff line number Diff line
@@ -1061,13 +1061,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
	 * do manual capture first and decide later if we need to use it
	 */
	if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
	    !xe_guc_capture_get_matching_and_lock(job)) {
	    !xe_guc_capture_get_matching_and_lock(q)) {
		/* take force wake before engine register manual capture */
		fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
		if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
			xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");

		xe_engine_snapshot_capture_for_job(job);
		xe_engine_snapshot_capture_for_queue(q);

		xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
	}
Loading