Commit 3a1edef8 authored by Matthew Brost's avatar Matthew Brost
Browse files

drm/xe: Make WA BB part of LRC BO



No idea why, but without this GuC context switches randomly fail when
running IGTs in a loop. Need to follow up why this fixes the
aforementioned issue but can live with a stable driver for now.

Fixes: 617d824c ("drm/xe: Add WA BB to capture active context utilization")
Cc: stable@vger.kernel.org
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
Tested-by: default avatarShuicheng Lin <shuicheng.lin@intel.com>
Link: https://lore.kernel.org/r/20250612031925.4009701-1-matthew.brost@intel.com
parent 0fccfb63
Loading
Loading
Loading
Loading
+18 −18
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@

#define LRC_PPHWSP_SIZE				SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
#define LRC_WA_BB_SIZE				SZ_4K

static struct xe_device *
lrc_to_xe(struct xe_lrc *lrc)
@@ -910,7 +911,6 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
{
	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
	xe_bo_unpin_map_no_vm(lrc->bo);
	xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
}

/*
@@ -973,22 +973,27 @@ struct wa_bb_setup {
			 u32 *batch, size_t max_size);
};

static size_t wa_bb_offset(struct xe_lrc *lrc)
{
	return lrc->bo->size - LRC_WA_BB_SIZE;
}

static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
{
	const size_t max_size = lrc->bb_per_ctx_bo->size;
	const size_t max_size = LRC_WA_BB_SIZE;
	static const struct wa_bb_setup funcs[] = {
		{ .setup = wa_bb_setup_utilization },
	};
	ssize_t remain;
	u32 *cmd, *buf = NULL;

	if (lrc->bb_per_ctx_bo->vmap.is_iomem) {
	if (lrc->bo->vmap.is_iomem) {
		buf = kmalloc(max_size, GFP_KERNEL);
		if (!buf)
			return -ENOMEM;
		cmd = buf;
	} else {
		cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
		cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc);
	}

	remain = max_size / sizeof(*cmd);
@@ -1011,13 +1016,14 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
	*cmd++ = MI_BATCH_BUFFER_END;

	if (buf) {
		xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0,
				 buf, (cmd - buf) * sizeof(*cmd));
		xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap,
				 wa_bb_offset(lrc), buf,
				 (cmd - buf) * sizeof(*cmd));
		kfree(buf);
	}

	xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
			     xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
	xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) +
			     wa_bb_offset(lrc) + 1);

	return 0;

@@ -1059,20 +1065,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
	 * via VM bind calls.
	 */
	lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size,
	lrc->bo = xe_bo_create_pin_map(xe, tile, NULL,
				       lrc_size + LRC_WA_BB_SIZE,
				       ttm_bo_type_kernel,
				       bo_flags);
	if (IS_ERR(lrc->bo))
		return PTR_ERR(lrc->bo);

	lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
						  ttm_bo_type_kernel,
						  bo_flags);
	if (IS_ERR(lrc->bb_per_ctx_bo)) {
		err = PTR_ERR(lrc->bb_per_ctx_bo);
		goto err_lrc_finish;
	}

	lrc->size = lrc_size;
	lrc->ring.size = ring_size;
	lrc->ring.tail = 0;
@@ -1860,7 +1859,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
	snapshot->seqno = xe_lrc_seqno(lrc);
	snapshot->lrc_bo = xe_bo_get(lrc->bo);
	snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
	snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
	snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset -
		LRC_WA_BB_SIZE;
	snapshot->lrc_snapshot = NULL;
	snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
	snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
+0 −3
Original line number Diff line number Diff line
@@ -53,9 +53,6 @@ struct xe_lrc {

	/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
	u64 ctx_timestamp;

	/** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
	struct xe_bo *bb_per_ctx_bo;
};

struct xe_lrc_snapshot;