Unverified Commit 8d490e01 authored by Rodrigo Vivi's avatar Rodrigo Vivi
Browse files

drm/xe: Stop checking for power_lost on D3Cold



GuC reset status is not reliable for this purpose and it is
once in a while ending up in a situation of D3Cold, where
power_reset is false and without the proper memory restoration
the GuC reload and Display will fail to come back from D3Cold.

So, let's do a full restoration of everything if we have a risk
of losing power, without further optimizations.

v2: also remove the gut_in_reset function (Anshuman)

Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: default avatarAnshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: default avatarBadal Nilawar <badal.nilawar@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522170105.327472-6-rodrigo.vivi@intel.com


Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent e7b180b2
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -431,9 +431,6 @@ struct xe_device {
		/** @d3cold.allowed: Indicates if d3cold is a valid device state */
		bool allowed;

		/** @d3cold.power_lost: Indicates if card has really lost power. */
		bool power_lost;

		/**
		 * @d3cold.vram_threshold:
		 *
+0 −27
Original line number Diff line number Diff line
@@ -1023,30 +1023,3 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
	xe_guc_ct_print(&guc->ct, p, false);
	xe_guc_submit_print(guc, p);
}

/**
 * xe_guc_in_reset() - Detect if GuC MIA is in reset.
 * @guc: The GuC object
 *
 * This function detects runtime resume from d3cold by leveraging
 * GUC_STATUS, GUC doesn't get reset during d3hot,
 * it strictly to be called from RPM resume handler.
 *
 * Return: true if failed to get forcewake or GuC MIA is in Reset,
 * otherwise false.
 */
bool xe_guc_in_reset(struct xe_guc *guc)
{
	struct xe_gt *gt = guc_to_gt(guc);
	u32 status;
	int err;

	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
	if (err)
		return true;

	status = xe_mmio_read32(gt, GUC_STATUS);
	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);

	return  status & GS_MIA_IN_RESET;
}
+0 −1
Original line number Diff line number Diff line
@@ -37,7 +37,6 @@ void xe_guc_reset_wait(struct xe_guc *guc);
void xe_guc_stop_prepare(struct xe_guc *guc);
void xe_guc_stop(struct xe_guc *guc);
int xe_guc_start(struct xe_guc *guc);
bool xe_guc_in_reset(struct xe_guc *guc);

static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
{
+2 −10
Original line number Diff line number Diff line
@@ -404,15 +404,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)

	lock_map_acquire(&xe_pm_runtime_lockdep_map);

	/*
	 * It can be possible that xe has allowed d3cold but other pcie devices
	 * in gfx card soc would have blocked d3cold, therefore card has not
	 * really lost power. Detecting primary Gt power is sufficient.
	 */
	gt = xe_device_get_gt(xe, 0);
	xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);

	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
	if (xe->d3cold.allowed) {
		err = xe_pcode_ready(xe, true);
		if (err)
			goto out;
@@ -433,7 +425,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
	for_each_gt(gt, xe, id)
		xe_gt_resume(gt);

	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
	if (xe->d3cold.allowed) {
		xe_display_pm_resume(xe, true);
		err = xe_bo_restore_user(xe);
		if (err)