Commit f99c7cca authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-xe-fixes-2024-10-31' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes



Driver Changes:
- Fix missing HPD interrupt enabling, bringing one PM refactor with it
  (Imre / Maarten)
- Workaround LNL GGTT invalidation not being visible to GuC
  (Matthew Brost)
- Avoid getting jobs stuck without a protecting timeout (Matthew Brost)

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/tsbftadm7owyizzdaqnqu7u4tqggxgeqeztlfvmj5fryxlfomi@5m5bfv2zvzmw
parents 42736071 fe05cee4
Loading
Loading
Loading
Loading
+43 −22
Original line number Diff line number Diff line
@@ -309,18 +309,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe)
}

/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */
void xe_display_pm_runtime_suspend(struct xe_device *xe)
{
	if (!xe->info.probe_display)
		return;

	if (xe->d3cold.allowed)
		xe_display_pm_suspend(xe, true);

	intel_hpd_poll_enable(xe);
}

void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
{
	struct intel_display *display = &xe->display;
	bool s2idle = suspend_to_idle();
@@ -353,28 +342,38 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
	intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);

	intel_dmc_suspend(xe);

	if (runtime && has_display(xe))
		intel_hpd_poll_enable(xe);
}

void xe_display_pm_suspend_late(struct xe_device *xe)
void xe_display_pm_suspend(struct xe_device *xe)
{
	__xe_display_pm_suspend(xe, false);
}

void xe_display_pm_runtime_suspend(struct xe_device *xe)
{
	bool s2idle = suspend_to_idle();
	if (!xe->info.probe_display)
		return;

	intel_power_domains_suspend(xe, s2idle);
	if (xe->d3cold.allowed) {
		__xe_display_pm_suspend(xe, true);
		return;
	}

	intel_display_power_suspend_late(xe);
	intel_hpd_poll_enable(xe);
}

void xe_display_pm_runtime_resume(struct xe_device *xe)
void xe_display_pm_suspend_late(struct xe_device *xe)
{
	bool s2idle = suspend_to_idle();
	if (!xe->info.probe_display)
		return;

	intel_hpd_poll_disable(xe);
	intel_power_domains_suspend(xe, s2idle);

	if (xe->d3cold.allowed)
		xe_display_pm_resume(xe, true);
	intel_display_power_suspend_late(xe);
}

void xe_display_pm_resume_early(struct xe_device *xe)
@@ -387,7 +386,7 @@ void xe_display_pm_resume_early(struct xe_device *xe)
	intel_power_domains_resume(xe);
}

void xe_display_pm_resume(struct xe_device *xe, bool runtime)
static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
{
	struct intel_display *display = &xe->display;

@@ -411,9 +410,11 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
		intel_display_driver_resume(xe);
		drm_kms_helper_poll_enable(&xe->drm);
		intel_display_driver_enable_user_access(xe);
		intel_hpd_poll_disable(xe);
	}

	if (has_display(xe))
		intel_hpd_poll_disable(xe);

	intel_opregion_resume(display);

	intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
@@ -421,6 +422,26 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
	intel_power_domains_enable(xe);
}

void xe_display_pm_resume(struct xe_device *xe)
{
	__xe_display_pm_resume(xe, false);
}

void xe_display_pm_runtime_resume(struct xe_device *xe)
{
	if (!xe->info.probe_display)
		return;

	if (xe->d3cold.allowed) {
		__xe_display_pm_resume(xe, true);
		return;
	}

	intel_hpd_init(xe);
	intel_hpd_poll_disable(xe);
}


static void display_device_remove(struct drm_device *dev, void *arg)
{
	struct xe_device *xe = arg;
+4 −4
Original line number Diff line number Diff line
@@ -34,10 +34,10 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
void xe_display_irq_reset(struct xe_device *xe);
void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);

void xe_display_pm_suspend(struct xe_device *xe, bool runtime);
void xe_display_pm_suspend(struct xe_device *xe);
void xe_display_pm_suspend_late(struct xe_device *xe);
void xe_display_pm_resume_early(struct xe_device *xe);
void xe_display_pm_resume(struct xe_device *xe, bool runtime);
void xe_display_pm_resume(struct xe_device *xe);
void xe_display_pm_runtime_suspend(struct xe_device *xe);
void xe_display_pm_runtime_resume(struct xe_device *xe);

@@ -65,10 +65,10 @@ static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
static inline void xe_display_irq_reset(struct xe_device *xe) {}
static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}

static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {}
static inline void xe_display_pm_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {}
static inline void xe_display_pm_resume(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {}

+10 −0
Original line number Diff line number Diff line
@@ -397,6 +397,16 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)

static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
{
	struct xe_device *xe = tile_to_xe(ggtt->tile);

	/*
	 * XXX: Barrier for GGTT pages. Unsure exactly why this required but
	 * without this LNL is having issues with the GuC reading scratch page
	 * vs. correct GGTT page. Not particularly a hot code path so blindly
	 * do a mmio read here which results in GuC reading correct GGTT page.
	 */
	xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);

	/* Each GT in a tile has its own TLB to cache GGTT lookups */
	ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
	ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);
+12 −6
Original line number Diff line number Diff line
@@ -916,12 +916,22 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
{
	struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
	u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
	u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
	u32 ctx_timestamp, ctx_job_timestamp;
	u32 timeout_ms = q->sched_props.job_timeout_ms;
	u32 diff;
	u64 running_time_ms;

	if (!xe_sched_job_started(job)) {
		xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
			   xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
			   q->guc->id);

		return xe_sched_invalidate_job(job, 2);
	}

	ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
	ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);

	/*
	 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
	 * possible overflows with a high timeout.
@@ -1049,10 +1059,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
		exec_queue_killed_or_banned_or_wedged(q) ||
		exec_queue_destroyed(q);

	/* Job hasn't started, can't be timed out */
	if (!skip_timeout_check && !xe_sched_job_started(job))
		goto rearm;

	/*
	 * XXX: Sampling timeout doesn't work in wedged mode as we have to
	 * modify scheduling state to read timestamp. We could read the
+3 −3
Original line number Diff line number Diff line
@@ -123,7 +123,7 @@ int xe_pm_suspend(struct xe_device *xe)
	for_each_gt(gt, xe, id)
		xe_gt_suspend_prepare(gt);

	xe_display_pm_suspend(xe, false);
	xe_display_pm_suspend(xe);

	/* FIXME: Super racey... */
	err = xe_bo_evict_all(xe);
@@ -133,7 +133,7 @@ int xe_pm_suspend(struct xe_device *xe)
	for_each_gt(gt, xe, id) {
		err = xe_gt_suspend(gt);
		if (err) {
			xe_display_pm_resume(xe, false);
			xe_display_pm_resume(xe);
			goto err;
		}
	}
@@ -187,7 +187,7 @@ int xe_pm_resume(struct xe_device *xe)
	for_each_gt(gt, xe, id)
		xe_gt_resume(gt);

	xe_display_pm_resume(xe, false);
	xe_display_pm_resume(xe);

	err = xe_bo_restore_user(xe);
	if (err)