Commit 168b5867 authored by Tomasz Lis's avatar Tomasz Lis Committed by Michał Winiarski
Browse files

drm/xe/vf: Refresh utilization buffer during migration recovery



The WA buffer we use to capture context utilization contains GGTT
references. This means its instructions have to be either fixed or
re-emitted during VF post-migration recovery.

This patch adds re-emitting content of the utilization WA BB during
the recovery.

The way we write to vram requires scratch buffer to be used before
the whole block is memcopied. We are re-using a scratch buffer
introduced in earlier part of the recovery. This is not a performance
optimization, but a necessity to avoid creating dependencies between
locks.

v2: Notable rebase after "Prepare WA BB setup for more users" patch
v3: Added error propagation

Signed-off-by: default avatarTomasz Lis <tomasz.lis@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Reviewed-by: default avatarMichal Winiarski <michal.winiarski@intel.com>
Link: https://lore.kernel.org/r/20250802031045.1127138-8-tomasz.lis@intel.com


Signed-off-by: default avatarMichał Winiarski <michal.winiarski@intel.com>
parent a0dda25d
Loading
Loading
Loading
Loading
+9 −1
Original line number Diff line number Diff line
@@ -1082,15 +1082,23 @@ int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm)
 * within all LRCs of a queue.
 * @q: the &xe_exec_queue struct instance containing target LRCs
 * @scratch: scratch buffer to be used as temporary storage
 *
 * Returns: zero on success, negative error code on failure
 */
void xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch)
int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch)
{
	int i;
	int err = 0;

	for (i = 0; i < q->width; ++i) {
		xe_lrc_update_memirq_regs_with_address(q->lrc[i], q->hwe, scratch);
		xe_lrc_update_hwctx_regs_with_address(q->lrc[i]);
		err = xe_lrc_setup_wa_bb_with_scratch(q->lrc[i], q->hwe, scratch);
		if (err)
			break;
	}

	return err;
}

/**
+1 −1
Original line number Diff line number Diff line
@@ -90,7 +90,7 @@ int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q,
				      struct xe_vm *vm);
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q);

void xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch);
int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch);

void xe_exec_queue_jobs_ring_restore(struct xe_exec_queue *q);

+11 −3
Original line number Diff line number Diff line
@@ -2535,14 +2535,22 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
 * exec queues registered to given GuC.
 * @guc: the &xe_guc struct instance
 * @scratch: scratch buffer to be used as temporary storage
 *
 * Returns: zero on success, negative error code on failure.
 */
void xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
{
	struct xe_exec_queue *q;
	unsigned long index;
	int err = 0;

	mutex_lock(&guc->submission_state.lock);
	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
		xe_exec_queue_contexts_hwsp_rebase(q, scratch);
	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
		err = xe_exec_queue_contexts_hwsp_rebase(q, scratch);
		if (err)
			break;
	}
	mutex_unlock(&guc->submission_state.lock);

	return err;
}
+1 −1
Original line number Diff line number Diff line
@@ -48,6 +48,6 @@ xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapsh
void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type);

void xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch);
int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch);

#endif
+32 −7
Original line number Diff line number Diff line
@@ -41,7 +41,6 @@
#define LRC_PPHWSP_SIZE				SZ_4K
#define LRC_INDIRECT_CTX_BO_SIZE		SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
#define LRC_WA_BB_SIZE				SZ_4K

/*
 * Layout of the LRC and associated data allocated as
@@ -1149,13 +1148,11 @@ static int setup_bo(struct bo_setup_state *state)
	ssize_t remain;

	if (state->lrc->bo->vmap.is_iomem) {
		state->buffer = kmalloc(state->max_size, GFP_KERNEL);
		if (!state->buffer)
			return -ENOMEM;
		state->ptr = state->buffer;
	} else {
		state->ptr = state->lrc->bo->vmap.vaddr + state->offset;
		state->buffer = NULL;
	}

	remain = state->max_size / sizeof(u32);
@@ -1180,7 +1177,6 @@ static int setup_bo(struct bo_setup_state *state)
	return 0;

fail:
	kfree(state->buffer);
	return -ENOSPC;
}

@@ -1192,10 +1188,16 @@ static void finish_bo(struct bo_setup_state *state)
	xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap,
			 state->offset, state->buffer,
			 state->written * sizeof(u32));
	kfree(state->buffer);
}

static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
/**
 * xe_lrc_setup_wa_bb_with_scratch - Execute all wa bb setup callbacks.
 * @lrc: the &xe_lrc struct instance
 * @hwe: the &xe_hw_engine struct instance
 * @scratch: preallocated scratch buffer for temporary storage
 * Return: 0 on success, negative error code on failure
 */
int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *scratch)
{
	static const struct bo_setup funcs[] = {
		{ .setup = setup_timestamp_wa },
@@ -1206,6 +1208,7 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
		.lrc = lrc,
		.hwe = hwe,
		.max_size = LRC_WA_BB_SIZE,
		.buffer = scratch,
		.reserve_dw = 1,
		.offset = __xe_lrc_wa_bb_offset(lrc),
		.funcs = funcs,
@@ -1228,6 +1231,21 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
	return 0;
}

static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
{
	u32 *buf = NULL;
	int ret;

	if (lrc->bo->vmap.is_iomem)
		buf = kmalloc(LRC_WA_BB_SIZE, GFP_KERNEL);

	ret = xe_lrc_setup_wa_bb_with_scratch(lrc, hwe, buf);

	kfree(buf);

	return ret;
}

static int
setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
{
@@ -1238,6 +1256,7 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
		.lrc = lrc,
		.hwe = hwe,
		.max_size = (63 * 64) /* max 63 cachelines */,
		.buffer = NULL,
		.offset = __xe_lrc_indirect_ctx_offset(lrc),
	};
	int ret;
@@ -1254,9 +1273,14 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
	if (xe_gt_WARN_ON(lrc->gt, !state.funcs))
		return 0;

	if (lrc->bo->vmap.is_iomem)
		state.buffer = kmalloc(state.max_size, GFP_KERNEL);

	ret = setup_bo(&state);
	if (ret)
	if (ret) {
		kfree(state.buffer);
		return ret;
	}

	/*
	 * Align to 64B cacheline so there's no garbage at the end for CS to
@@ -1268,6 +1292,7 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
	}

	finish_bo(&state);
	kfree(state.buffer);

	xe_lrc_write_ctx_reg(lrc,
			     CTX_CS_INDIRECT_CTX,
Loading