Unverified Commit fd4c1eea authored by Tvrtko Ursulin's avatar Tvrtko Ursulin Committed by Rodrigo Vivi
Browse files

drm/xe: Move aux table invalidation to ring ops



Implement the suggestion of moving the aux invalidation from a helper to a
ring ops vfunc, together with the suggestion to split the vfunc table of
video decode and video enhance engines.

With this done the LRC code will be able to access the functionality via
the newly added ring ops vfunc.

Signed-off-by: default avatarTvrtko Ursulin <tvrtko.ursulin@igalia.com>
Suggested-by: default avatarMatthew Brost <matthew.brost@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260324084018.20353-7-tvrtko.ursulin@igalia.com


Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent cd1a5162
Loading
Loading
Loading
Loading
+77 −28
Original line number Diff line number Diff line
@@ -48,21 +48,47 @@ static u32 preparser_disable(bool state)
	return MI_ARB_CHECK | BIT(8) | state;
}

static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg,
			      u32 *dw, int i)
static u32 *
__emit_aux_table_inv(u32 *cmd, const struct xe_reg reg, u32 adj_offset)
{
	dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN;
	dw[i++] = reg.addr + gt->mmio.adj_offset;
	dw[i++] = AUX_INV;
	dw[i++] = MI_SEMAPHORE_WAIT_TOKEN |
		  MI_SEMAPHORE_REGISTER_POLL |
		  MI_SEMAPHORE_POLL |
		  MI_SEMAPHORE_SAD_EQ_SDD;
	dw[i++] = 0;
	dw[i++] = reg.addr + gt->mmio.adj_offset;
	dw[i++] = 0;
	dw[i++] = 0;
	*cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) |
		 MI_LRI_MMIO_REMAP_EN;
	*cmd++ = reg.addr + adj_offset;
	*cmd++ = AUX_INV;
	*cmd++ = MI_SEMAPHORE_WAIT_TOKEN | MI_SEMAPHORE_REGISTER_POLL |
		 MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_EQ_SDD;
	*cmd++ = 0;
	*cmd++ = reg.addr + adj_offset;
	*cmd++ = 0;
	*cmd++ = 0;

	return cmd;
}

static u32 *emit_aux_table_inv_render_compute(struct xe_gt *gt, u32 *cmd)
{
	return __emit_aux_table_inv(cmd, CCS_AUX_INV, gt->mmio.adj_offset);
}

static u32 *emit_aux_table_inv_video_decode(struct xe_gt *gt, u32 *cmd)
{
	return __emit_aux_table_inv(cmd, VD0_AUX_INV, gt->mmio.adj_offset);
}

static u32 *emit_aux_table_inv_video_enhance(struct xe_gt *gt, u32 *cmd)
{
	return __emit_aux_table_inv(cmd, VE0_AUX_INV, gt->mmio.adj_offset);
}

static int emit_aux_table_inv(struct xe_hw_engine *hwe, u32 *dw, int i)
{
	struct xe_gt *gt = hwe->gt;
	u32 *(*emit)(struct xe_gt *gt, u32 *cmd) =
		gt->ring_ops[hwe->class]->emit_aux_table_inv;

	if (emit)
		return emit(gt, dw + i) - dw;
	else
		return i;
}

@@ -356,7 +382,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
	u32 ppgtt_flag = get_ppgtt_flag(job);
	struct xe_gt *gt = job->q->gt;
	struct xe_device *xe = gt_to_xe(gt);
	bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;

	*head = lrc->ring.tail;

@@ -368,12 +393,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
	dw[i++] = preparser_disable(true);

	/* hsdes: 1809175790 */
	if (has_aux_ccs(xe)) {
		if (decode)
			i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i);
		else
			i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
	}
	i = emit_aux_table_inv(job->q->hwe, dw, i);

	if (job->ring_ops_flush_tlb)
		i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
@@ -416,7 +436,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
	struct xe_gt *gt = job->q->gt;
	struct xe_device *xe = gt_to_xe(gt);
	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
	const bool aux_ccs = has_aux_ccs(xe);
	u32 mask_flags = 0;

	*head = lrc->ring.tail;
@@ -430,7 +449,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
	 * On AuxCCS platforms the invalidation of the Aux table requires
	 * quiescing the memory traffic beforehand.
	 */
	if (aux_ccs)
	if (has_aux_ccs(xe))
		i = emit_render_cache_flush(job, dw, i);

	dw[i++] = preparser_disable(true);
@@ -443,8 +462,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
	i = emit_pipe_invalidate(job->q, mask_flags, job->ring_ops_flush_tlb, dw, i);

	/* hsdes: 1809175790 */
	if (aux_ccs)
		i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i);
	i = emit_aux_table_inv(job->q->hwe, dw, i);

	dw[i++] = preparser_disable(false);

@@ -571,7 +589,11 @@ static const struct xe_ring_ops ring_ops_gen12_copy = {
	.emit_job = emit_job_gen12_copy,
};

static const struct xe_ring_ops ring_ops_gen12_video = {
static const struct xe_ring_ops ring_ops_gen12_video_decode = {
	.emit_job = emit_job_gen12_video,
};

static const struct xe_ring_ops ring_ops_gen12_video_enhance = {
	.emit_job = emit_job_gen12_video,
};

@@ -579,19 +601,46 @@ static const struct xe_ring_ops ring_ops_gen12_render_compute = {
	.emit_job = emit_job_gen12_render_compute,
};

static const struct xe_ring_ops auxccs_ring_ops_gen12_video_decode = {
	.emit_job = emit_job_gen12_video,
	.emit_aux_table_inv = emit_aux_table_inv_video_decode,
};

static const struct xe_ring_ops auxccs_ring_ops_gen12_video_enhance = {
	.emit_job = emit_job_gen12_video,
	.emit_aux_table_inv = emit_aux_table_inv_video_enhance,
};

static const struct xe_ring_ops auxccs_ring_ops_gen12_render_compute = {
	.emit_job = emit_job_gen12_render_compute,
	.emit_aux_table_inv = emit_aux_table_inv_render_compute,
};

const struct xe_ring_ops *
xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class)
{
	struct xe_device *xe = gt_to_xe(gt);

	switch (class) {
	case XE_ENGINE_CLASS_OTHER:
		return &ring_ops_gen12_gsc;
	case XE_ENGINE_CLASS_COPY:
		return &ring_ops_gen12_copy;
	case XE_ENGINE_CLASS_VIDEO_DECODE:
		if (has_aux_ccs(xe))
			return &auxccs_ring_ops_gen12_video_decode;
		else
			return &ring_ops_gen12_video_decode;
	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
		return &ring_ops_gen12_video;
		if (has_aux_ccs(xe))
			return &auxccs_ring_ops_gen12_video_enhance;
		else
			return &ring_ops_gen12_video_enhance;
	case XE_ENGINE_CLASS_RENDER:
	case XE_ENGINE_CLASS_COMPUTE:
		if (has_aux_ccs(xe))
			return &auxccs_ring_ops_gen12_render_compute;
		else
			return &ring_ops_gen12_render_compute;
	default:
		return NULL;
+6 −0
Original line number Diff line number Diff line
@@ -6,6 +6,9 @@
#ifndef _XE_RING_OPS_TYPES_H_
#define _XE_RING_OPS_TYPES_H_

#include <linux/types.h>

struct xe_gt;
struct xe_sched_job;

#define MAX_JOB_SIZE_DW 74
@@ -17,6 +20,9 @@ struct xe_sched_job;
struct xe_ring_ops {
	/** @emit_job: Write job to ring */
	void (*emit_job)(struct xe_sched_job *job);

	/** @emit_aux_table_inv: Emit aux table invalidation to the ring */
	u32 *(*emit_aux_table_inv)(struct xe_gt *gt, u32 *cmd);
};

#endif