Unverified Commit 56b7432b authored by Matthew Brost's avatar Matthew Brost Committed by Rodrigo Vivi
Browse files

drm/xe: Avoid memory allocations in xe_device_declare_wedged()



xe_device_declare_wedged() runs in the DMA-fence signaling path, where
GFP_KERNEL memory allocations are not allowed. However, registering
xe_device_wedged_fini via drmm_add_action_or_reset() triggers a
GFP_KERNEL allocation.

Fix this by deferring the registration of xe_device_wedged_fini until
late in the driver load sequence. Additionally, drop the wedged PM
reference only if the device is actually wedged in
xe_device_wedged_fini.

Fixes: 452bca0e ("drm/xe: Don't suspend device upon wedge")
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/20260326210116.202585-2-matthew.brost@intel.com


(cherry picked from commit b08ceb443866808b881b12d4183008d214d816c1)
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent bce7cd6d
Loading
Loading
Loading
Loading
+13 −14
Original line number Diff line number Diff line
@@ -837,6 +837,14 @@ static void detect_preproduction_hw(struct xe_device *xe)
	}
}

static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
{
	struct xe_device *xe = arg;

	if (atomic_read(&xe->wedged.flag))
		xe_pm_runtime_put(xe);
}

int xe_device_probe(struct xe_device *xe)
{
	struct xe_tile *tile;
@@ -1013,6 +1021,10 @@ int xe_device_probe(struct xe_device *xe)

	detect_preproduction_hw(xe);

	err = drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe);
	if (err)
		goto err_unregister_display;

	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);

err_unregister_display:
@@ -1216,13 +1228,6 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
	return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
}

static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
{
	struct xe_device *xe = arg;

	xe_pm_runtime_put(xe);
}

/**
 * DOC: Xe Device Wedging
 *
@@ -1300,15 +1305,9 @@ void xe_device_declare_wedged(struct xe_device *xe)
		return;
	}

	xe_pm_runtime_get_noresume(xe);

	if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
		drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
		return;
	}

	if (!atomic_xchg(&xe->wedged.flag, 1)) {
		xe->needs_flr_on_fini = true;
		xe_pm_runtime_get_noresume(xe);
		drm_err(&xe->drm,
			"CRITICAL: Xe has declared device %s as wedged.\n"
			"IOCTLs and executions are blocked. Only a rebind may clear the failure\n"