Commit 1b5d39e6 authored by Niranjana Vishwanathapura's avatar Niranjana Vishwanathapura
Browse files

drm/xe/multi_queue: Set QUEUE_DRAIN_MODE for Multi Queue batches



To properly support soft light restore between batches
being arbitrated at the CFEG, PIPE_CONTROL instructions
have a new bit in the first DW, QUEUE_DRAIN_MODE. When
set, this indicates to the CFEG that it should only
drain the current queue.

Additionally we no longer want to set the CS_STALL bit
for these multi queue queues as this causes the entire
pipeline to stall waiting for completion of the prior
batch, preventing this soft light restore from occurring
between queues in a queue group.

v4: Assert !multi_queue where applicable (Matt Roper)

Bspec: 56551
Signed-off-by: default avatarStuart Summers <stuart.summers@intel.com>
Signed-off-by: default avatarNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: default avatarMatt Roper <matthew.d.roper@intel.com>
Link: https://patch.msgid.link/20251211010249.1647839-29-niranjana.vishwanathapura@intel.com
parent d716a508
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@

#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))

#define   PIPE_CONTROL0_QUEUE_DRAIN_MODE		BIT(12)
#define	  PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE	BIT(10)	/* gen12 */
#define	  PIPE_CONTROL0_HDC_PIPELINE_FLUSH		BIT(9)	/* gen12 */

+41 −23
Original line number Diff line number Diff line
@@ -12,7 +12,7 @@
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_lrc_layout.h"
#include "xe_exec_queue_types.h"
#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_lrc.h"
#include "xe_macros.h"
@@ -135,12 +135,11 @@ emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset,
	return i;
}

static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
				int i)
static int emit_pipe_invalidate(struct xe_exec_queue *q, u32 mask_flags,
				bool invalidate_tlb, u32 *dw, int i)
{
	u32 flags0 = 0;
	u32 flags1 = PIPE_CONTROL_CS_STALL |
		PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
	u32 flags1 = PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
		PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
		PIPE_CONTROL_VF_CACHE_INVALIDATE |
@@ -152,6 +151,11 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
	if (invalidate_tlb)
		flags1 |= PIPE_CONTROL_TLB_INVALIDATE;

	if (xe_exec_queue_is_multi_queue(q))
		flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE;
	else
		flags1 |= PIPE_CONTROL_CS_STALL;

	flags1 &= ~mask_flags;

	if (flags1 & PIPE_CONTROL_VF_CACHE_INVALIDATE)
@@ -175,37 +179,47 @@ static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,

static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
{
	struct xe_gt *gt = job->q->gt;
	struct xe_exec_queue *q = job->q;
	struct xe_gt *gt = q->gt;
	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
	u32 flags;
	u32 flags0, flags1;

	if (XE_GT_WA(gt, 14016712196))
		i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH,
				      LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0);

	flags = (PIPE_CONTROL_CS_STALL |
		 PIPE_CONTROL_TILE_CACHE_FLUSH |
	flags0 = PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
	flags1 = (PIPE_CONTROL_TILE_CACHE_FLUSH |
		 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
		 PIPE_CONTROL_DC_FLUSH_ENABLE |
		 PIPE_CONTROL_FLUSH_ENABLE);

	if (XE_GT_WA(gt, 1409600907))
		flags |= PIPE_CONTROL_DEPTH_STALL;
		flags1 |= PIPE_CONTROL_DEPTH_STALL;

	if (lacks_render)
		flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
		flags1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
	else if (job->q->class == XE_ENGINE_CLASS_COMPUTE)
		flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
		flags1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;

	if (xe_exec_queue_is_multi_queue(q))
		flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE;
	else
		flags1 |= PIPE_CONTROL_CS_STALL;

	return emit_pipe_control(dw, i, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0, 0);
	return emit_pipe_control(dw, i, flags0, flags1, 0, 0);
}

static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i)
static int emit_pipe_control_to_ring_end(struct xe_exec_queue *q, u32 *dw, int i)
{
	struct xe_hw_engine *hwe = q->hwe;

	if (hwe->class != XE_ENGINE_CLASS_RENDER)
		return i;

	xe_gt_assert(q->gt, !xe_exec_queue_is_multi_queue(q));

	if (XE_GT_WA(hwe->gt, 16020292621))
		i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_LRI_POST_SYNC,
				      RING_NOPID(hwe->mmio_base).addr, 0);
@@ -213,16 +227,20 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int
	return i;
}

static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
			      int i)
static int emit_pipe_imm_ggtt(struct xe_exec_queue *q, u32 addr, u32 value,
			      bool stall_only, u32 *dw, int i)
{
	u32 flags = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_GLOBAL_GTT_IVB |
		    PIPE_CONTROL_QW_WRITE;
	u32 flags0 = 0, flags1 = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE;

	if (!stall_only)
		flags |= PIPE_CONTROL_FLUSH_ENABLE;
		flags1 |= PIPE_CONTROL_FLUSH_ENABLE;

	if (xe_exec_queue_is_multi_queue(q))
		flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE;
	else
		flags1 |= PIPE_CONTROL_CS_STALL;

	return emit_pipe_control(dw, i, 0, flags, addr, value);
	return emit_pipe_control(dw, i, flags0, flags1, addr, value);
}

static u32 get_ppgtt_flag(struct xe_sched_job *job)
@@ -371,7 +389,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;

	/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
	i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i);
	i = emit_pipe_invalidate(job->q, mask_flags, job->ring_ops_flush_tlb, dw, i);

	/* hsdes: 1809175790 */
	if (has_aux_ccs(xe))
@@ -391,11 +409,11 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
						job->user_fence.value,
						dw, i);

	i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i);
	i = emit_pipe_imm_ggtt(job->q, xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i);

	i = emit_user_interrupt(dw, i);

	i = emit_pipe_control_to_ring_end(job->q->hwe, dw, i);
	i = emit_pipe_control_to_ring_end(job->q, dw, i);

	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);