Commit 61e7649a authored by Michal Wajdeczko's avatar Michal Wajdeczko
Browse files

drm/xe/vf: Improve getting clean NULL context



There is a small risk that when fetching a NULL context image the
VF may get a tweaked context image prepared by another VF that was
previously running on the engine before the GuC scheduler switched
the VFs.

To avoid that risk, without forcing GuC scheduler to trigger costly
engine reset on every VF switch, use a watchdog mechanism that when
configured with impossible condition, triggers an interrupt, which
GuC will handle by doing an engine reset. Also adjust job size to
account for additional dwords with watchdog setup.

Signed-off-by: default avatarMichal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: default avatarMichał Winiarski <michal.winiarski@intel.com>
Link: https://patch.msgid.link/20260303201354.17948-4-michal.wajdeczko@intel.com
parent d95fda29
Loading
Loading
Loading
Loading
+6 −3
Original line number Diff line number Diff line
@@ -171,7 +171,7 @@ static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt)
static void gt_reset_worker(struct work_struct *w);

static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb,
			 long timeout_jiffies)
			 long timeout_jiffies, bool force_reset)
{
	struct xe_sched_job *job;
	struct dma_fence *fence;
@@ -181,6 +181,8 @@ static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb,
	if (IS_ERR(job))
		return PTR_ERR(job);

	job->ring_ops_force_reset = force_reset;

	xe_sched_job_arm(job);
	fence = dma_fence_get(&job->drm.s_fence->finished);
	xe_sched_job_push(job);
@@ -204,7 +206,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
	if (IS_ERR(bb))
		return PTR_ERR(bb);

	ret = emit_job_sync(q, bb, HZ);
	ret = emit_job_sync(q, bb, HZ, false);
	xe_bb_free(bb, NULL);

	return ret;
@@ -369,7 +371,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)

	bb->len = cs - bb->cs;

	ret = emit_job_sync(q, bb, HZ);
	/* only VFs need to trigger reset to get a clean NULL context */
	ret = emit_job_sync(q, bb, HZ, IS_SRIOV_VF(gt_to_xe(gt)));

	xe_bb_free(bb, NULL);

+37 −0
Original line number Diff line number Diff line
@@ -256,6 +256,32 @@ static int emit_copy_timestamp(struct xe_device *xe, struct xe_lrc *lrc,
	return i;
}

static int emit_fake_watchdog(struct xe_lrc *lrc, u32 *dw, int i)
{
	/*
	 * Setup a watchdog with impossible condition to always trigger an
	 * hardware interrupt that would force the GuC to reset the engine.
	 */

	dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | MI_LRI_LRM_CS_MMIO;
	dw[i++] = PR_CTR_THRSH(0).addr;
	dw[i++] = 2; /* small threshold */
	dw[i++] = PR_CTR_CTRL(0).addr;
	dw[i++] = CTR_LOGIC_OP(START);

	dw[i++] = MI_SEMAPHORE_WAIT | MI_SEMW_GGTT | MI_SEMW_POLL | MI_SEMW_COMPARE(SAD_EQ_SDD);
	dw[i++] = 0xdead; /* this should never be seen */
	dw[i++] = lower_32_bits(xe_lrc_ggtt_addr(lrc));
	dw[i++] = upper_32_bits(xe_lrc_ggtt_addr(lrc));
	dw[i++] = 0; /* unused token */

	dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_LRM_CS_MMIO;
	dw[i++] = PR_CTR_CTRL(0).addr;
	dw[i++] = CTR_LOGIC_OP(STOP);

	return i;
}

/* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */
static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc,
				    u64 batch_addr, u32 *head, u32 seqno)
@@ -266,6 +292,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc

	*head = lrc->ring.tail;

	if (job->ring_ops_force_reset)
		i = emit_fake_watchdog(lrc, dw, i);

	i = emit_copy_timestamp(gt_to_xe(gt), lrc, dw, i);

	if (job->ring_ops_flush_tlb) {
@@ -324,6 +353,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,

	*head = lrc->ring.tail;

	if (job->ring_ops_force_reset)
		i = emit_fake_watchdog(lrc, dw, i);

	i = emit_copy_timestamp(xe, lrc, dw, i);

	dw[i++] = preparser_disable(true);
@@ -381,6 +413,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,

	*head = lrc->ring.tail;

	if (job->ring_ops_force_reset)
		i = emit_fake_watchdog(lrc, dw, i);

	i = emit_copy_timestamp(xe, lrc, dw, i);

	dw[i++] = preparser_disable(true);
@@ -433,6 +468,8 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,

	*head = lrc->ring.tail;

	xe_gt_assert(gt, !job->ring_ops_force_reset);

	i = emit_copy_timestamp(xe, lrc, dw, i);

	i = emit_store_imm_ggtt(saddr, seqno, dw, i);
+1 −1
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@

struct xe_sched_job;

#define MAX_JOB_SIZE_DW 58
#define MAX_JOB_SIZE_DW 72
#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4)

/**
+2 −0
Original line number Diff line number Diff line
@@ -63,6 +63,8 @@ struct xe_sched_job {
	u64 sample_timestamp;
	/** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
	bool ring_ops_flush_tlb;
	/** @ring_ops_force_reset: The ring ops need to trigger a reset before payload. */
	bool ring_ops_force_reset;
	/** @ggtt: mapped in ggtt. */
	bool ggtt;
	/** @restore_replay: job being replayed for restore */