Commit bb9343f1 authored by Niranjana Vishwanathapura's avatar Niranjana Vishwanathapura
Browse files

drm/xe/multi_queue: Reset GT upon CGP_SYNC failure



If GuC doesn't response to CGP_SYNC message, trigger
GT reset and cleanup of all the queues of the multi
queue group.

Signed-off-by: default avatarNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251211010249.1647839-31-niranjana.vishwanathapura@intel.com
parent c85285b3
Loading
Loading
Loading
Loading
+38 −0
Original line number Diff line number Diff line
@@ -593,6 +593,23 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
		xe_sched_tdr_queue_imm(&q->guc->sched);
}

static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q)
{
	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
	struct xe_exec_queue_group *group = q->multi_queue.group;
	struct xe_exec_queue *eq;

	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
		     xe_exec_queue_is_multi_queue(q));

	xe_guc_exec_queue_trigger_cleanup(primary);

	mutex_lock(&group->list_lock);
	list_for_each_entry(eq, &group->list, multi_queue.link)
		xe_guc_exec_queue_trigger_cleanup(eq);
	mutex_unlock(&group->list_lock);
}

static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q)
{
	if (xe_exec_queue_is_multi_queue(q)) {
@@ -618,6 +635,23 @@ static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q)
	}
}

static void set_exec_queue_group_banned(struct xe_exec_queue *q)
{
	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
	struct xe_exec_queue_group *group = q->multi_queue.group;
	struct xe_exec_queue *eq;

	/* Ban all queues of the multi-queue group */
	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
		     xe_exec_queue_is_multi_queue(q));
	set_exec_queue_banned(primary);

	mutex_lock(&group->list_lock);
	list_for_each_entry(eq, &group->list, multi_queue.link)
		set_exec_queue_banned(eq);
	mutex_unlock(&group->list_lock);
}

#define parallel_read(xe_, map_, field_) \
	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
			field_)
@@ -677,7 +711,11 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc,
				 !READ_ONCE(group->sync_pending) ||
				 xe_guc_read_stopped(guc), HZ);
	if (!ret || xe_guc_read_stopped(guc)) {
		/* CGP_SYNC failed. Reset gt, cleanup the group */
		xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n");
		set_exec_queue_group_banned(q);
		xe_gt_reset_async(q->gt);
		xe_guc_exec_queue_group_trigger_cleanup(q);
		return;
	}