Commit f1029b9d authored by Matthew Brost's avatar Matthew Brost
Browse files

drm/xe/vf: Don't allow GT reset to be queued during VF post migration recovery



With well-behaved software, a GT reset should never occur, nor should it
happen during VF post-migration recovery. If it does, trigger a warning
but suppress the GT reset, as VF post-migration recovery is expected to
bring the VF back to a working state.

v3:
 - Better commit message (Tomasz)
v5:
 - Use xe_gt_WARN_ON (Michal)

Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarTomasz Lis <tomasz.lis@intel.com>
Link: https://lore.kernel.org/r/20251008214532.3442967-17-matthew.brost@intel.com
parent b47c0c07
Loading
Loading
Loading
Loading
+0 −9
Original line number Diff line number Diff line
@@ -803,11 +803,6 @@ static int do_gt_restart(struct xe_gt *gt)
	return 0;
}

static int gt_wait_reset_unblock(struct xe_gt *gt)
{
	return xe_guc_wait_reset_unblock(&gt->uc.guc);
}

static int gt_reset(struct xe_gt *gt)
{
	unsigned int fw_ref;
@@ -822,10 +817,6 @@ static int gt_reset(struct xe_gt *gt)

	xe_gt_info(gt, "reset started\n");

	err = gt_wait_reset_unblock(gt);
	if (!err)
		xe_gt_warn(gt, "reset block failed to get lifted");

	xe_pm_runtime_get(gt_to_xe(gt));

	if (xe_fault_inject_gt_reset()) {
+0 −7
Original line number Diff line number Diff line
@@ -1097,17 +1097,11 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)

static void vf_post_migration_shutdown(struct xe_gt *gt)
{
	int ret = 0;

	spin_lock_irq(&gt->sriov.vf.migration.lock);
	gt->sriov.vf.migration.recovery_queued = false;
	spin_unlock_irq(&gt->sriov.vf.migration.lock);

	xe_guc_submit_pause(&gt->uc.guc);
	ret |= xe_guc_submit_reset_block(&gt->uc.guc);

	if (ret)
		xe_gt_sriov_info(gt, "migration recovery encountered ongoing reset\n");
}

static size_t post_migration_scratch_size(struct xe_device *xe)
@@ -1142,7 +1136,6 @@ static void vf_post_migration_kickstart(struct xe_gt *gt)
	 */
	xe_irq_resume(gt_to_xe(gt));

	xe_guc_submit_reset_unblock(&gt->uc.guc);
	xe_guc_submit_unpause(&gt->uc.guc);
}

+5 −37
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include "xe_gt.h"
#include "xe_gt_clock.h"
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
@@ -1900,47 +1901,14 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
	}
}

/**
 * xe_guc_submit_reset_block - Disallow reset calls on given GuC.
 * @guc: the &xe_guc struct instance
 */
int xe_guc_submit_reset_block(struct xe_guc *guc)
{
	return atomic_fetch_or(1, &guc->submission_state.reset_blocked);
}

/**
 * xe_guc_submit_reset_unblock - Allow back reset calls on given GuC.
 * @guc: the &xe_guc struct instance
 */
void xe_guc_submit_reset_unblock(struct xe_guc *guc)
{
	atomic_set_release(&guc->submission_state.reset_blocked, 0);
	wake_up_all(&guc->ct.wq);
}

static int guc_submit_reset_is_blocked(struct xe_guc *guc)
{
	return atomic_read_acquire(&guc->submission_state.reset_blocked);
}

/* Maximum time of blocking reset */
#define RESET_BLOCK_PERIOD_MAX (HZ * 5)

/**
 * xe_guc_wait_reset_unblock - Wait until reset blocking flag is lifted, or timeout.
 * @guc: the &xe_guc struct instance
 */
int xe_guc_wait_reset_unblock(struct xe_guc *guc)
{
	return wait_event_timeout(guc->ct.wq,
				  !guc_submit_reset_is_blocked(guc), RESET_BLOCK_PERIOD_MAX);
}

int xe_guc_submit_reset_prepare(struct xe_guc *guc)
{
	int ret;

	if (xe_gt_WARN_ON(guc_to_gt(guc),
			  xe_gt_sriov_vf_recovery_pending(guc_to_gt(guc))))
		return 0;

	if (!guc->submission_state.initialized)
		return 0;

+0 −3
Original line number Diff line number Diff line
@@ -22,9 +22,6 @@ void xe_guc_submit_stop(struct xe_guc *guc);
int xe_guc_submit_start(struct xe_guc *guc);
void xe_guc_submit_pause(struct xe_guc *guc);
void xe_guc_submit_unpause(struct xe_guc *guc);
int xe_guc_submit_reset_block(struct xe_guc *guc);
void xe_guc_submit_reset_unblock(struct xe_guc *guc);
int xe_guc_wait_reset_unblock(struct xe_guc *guc);
void xe_guc_submit_wedge(struct xe_guc *guc);

int xe_guc_read_stopped(struct xe_guc *guc);