Commit ec62d37d authored by Boris Brezillon's avatar Boris Brezillon
Browse files

drm/panthor: Fix the fast-reset logic



If we do a GPU soft-reset, that's no longer fast reset. This also means
the slow reset fallback doesn't work because the MCU state is only reset
after a GPU soft-reset.

Let's move the retry logic to panthor_device_resume() to issue a
soft-reset between the fast and slow attempts, and patch
panthor_gpu_suspend() to only power-off the L2 when a fast reset is
requested.

v3:
- No changes

v2:
- Add R-b

Signed-off-by: default avatarBoris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: default avatarSteven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241211075419.2333731-6-boris.brezillon@collabora.com
parent 303e9e98
Loading
Loading
Loading
Loading
+24 −8
Original line number Diff line number Diff line
@@ -435,6 +435,22 @@ int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *
	return 0;
}

static int panthor_device_resume_hw_components(struct panthor_device *ptdev)
{
	int ret;

	panthor_gpu_resume(ptdev);
	panthor_mmu_resume(ptdev);

	ret = panthor_fw_resume(ptdev);
	if (!ret)
		return 0;

	panthor_mmu_suspend(ptdev);
	panthor_gpu_suspend(ptdev);
	return ret;
}

int panthor_device_resume(struct device *dev)
{
	struct panthor_device *ptdev = dev_get_drvdata(dev);
@@ -461,16 +477,16 @@ int panthor_device_resume(struct device *dev)

	if (panthor_device_is_initialized(ptdev) &&
	    drm_dev_enter(&ptdev->base, &cookie)) {
		panthor_gpu_resume(ptdev);
		panthor_mmu_resume(ptdev);
		ret = panthor_fw_resume(ptdev);
		if (!drm_WARN_ON(&ptdev->base, ret)) {
			panthor_sched_resume(ptdev);
		} else {
			panthor_mmu_suspend(ptdev);
			panthor_gpu_suspend(ptdev);
		ret = panthor_device_resume_hw_components(ptdev);
		if (ret && ptdev->reset.fast) {
			drm_err(&ptdev->base, "Fast reset failed, trying a slow reset");
			ptdev->reset.fast = false;
			ret = panthor_device_resume_hw_components(ptdev);
		}

		if (!ret)
			panthor_sched_resume(ptdev);

		drm_dev_exit(cookie);

		if (ret)
+11 −0
Original line number Diff line number Diff line
@@ -157,6 +157,17 @@ struct panthor_device {

		/** @pending: Set to true if a reset is pending. */
		atomic_t pending;

		/**
		 * @fast: True if the post_reset logic can proceed with a fast reset.
		 *
		 * A fast reset is just a reset where the driver doesn't reload the FW sections.
		 *
		 * Any time the firmware is properly suspended, a fast reset can take place.
		 * On the other hand, if the halt operation failed, the driver will reload
		 * all FW sections to make sure we start from a fresh state.
		 */
		bool fast;
	} reset;

	/** @pm: Power management related data. */
+12 −42
Original line number Diff line number Diff line
@@ -263,17 +263,6 @@ struct panthor_fw {
	/** @booted: True is the FW is booted */
	bool booted;

	/**
	 * @fast_reset: True if the post_reset logic can proceed with a fast reset.
	 *
	 * A fast reset is just a reset where the driver doesn't reload the FW sections.
	 *
	 * Any time the firmware is properly suspended, a fast reset can take place.
	 * On the other hand, if the halt operation failed, the driver will reload
	 * all sections to make sure we start from a fresh state.
	 */
	bool fast_reset;

	/** @irq: Job irq data. */
	struct panthor_irq irq;
};
@@ -1090,7 +1079,7 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
	/* Make sure we won't be woken up by a ping. */
	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);

	ptdev->fw->fast_reset = false;
	ptdev->reset.fast = false;

	if (!on_hang) {
		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
@@ -1100,7 +1089,7 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
		gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
		if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
					status == MCU_STATUS_HALT, 10, 100000)) {
			ptdev->fw->fast_reset = true;
			ptdev->reset.fast = true;
		} else {
			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
		}
@@ -1125,49 +1114,30 @@ int panthor_fw_post_reset(struct panthor_device *ptdev)
	if (ret)
		return ret;

	/* If this is a fast reset, try to start the MCU without reloading
	 * the FW sections. If it fails, go for a full reset.
	if (!ptdev->reset.fast) {
		/* On a slow reset, reload all sections, including RO ones.
		 * We're not supposed to end up here anyway, let's just assume
		 * the overhead of reloading everything is acceptable.
		 */
	if (ptdev->fw->fast_reset) {
		panthor_reload_fw_sections(ptdev, true);
	} else {
		/* The FW detects 0 -> 1 transitions. Make sure we reset
		 * the HALT bit before the FW is rebooted.
		 * This is not needed on a slow reset because FW sections are
		 * re-initialized.
		 */
		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);

		ret = panthor_fw_start(ptdev);
		if (!ret)
			goto out;

		/* Forcibly reset the MCU and force a slow reset, so we get a
		 * fresh boot on the next panthor_fw_start() call.
		 */
		panthor_fw_stop(ptdev);
		ptdev->fw->fast_reset = false;
		drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");

		ret = panthor_vm_flush_all(ptdev->fw->vm);
		if (ret) {
			drm_err(&ptdev->base, "FW slow reset failed (couldn't flush FW's AS l2cache)");
			return ret;
		}
		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
	}

	/* Reload all sections, including RO ones. We're not supposed
	 * to end up here anyway, let's just assume the overhead of
	 * reloading everything is acceptable.
	 */
	panthor_reload_fw_sections(ptdev, true);

	ret = panthor_fw_start(ptdev);
	if (ret) {
		drm_err(&ptdev->base, "FW slow reset failed (couldn't start the FW )");
		drm_err(&ptdev->base, "FW %s reset failed",
			ptdev->reset.fast ?  "fast" : "slow");
		return ret;
	}

out:
	/* We must re-initialize the global interface even on fast-reset. */
	panthor_fw_init_global_iface(ptdev);
	return 0;
+6 −5
Original line number Diff line number Diff line
@@ -470,11 +470,12 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev)
 */
void panthor_gpu_suspend(struct panthor_device *ptdev)
{
	/*
	 * It may be preferable to simply power down the L2, but for now just
	 * soft-reset which will leave the L2 powered down.
	 */
	/* On a fast reset, simply power down the L2. */
	if (!ptdev->reset.fast)
		panthor_gpu_soft_reset(ptdev);
	else
		panthor_gpu_power_off(ptdev, L2, 1, 20000);

	panthor_gpu_irq_suspend(&ptdev->gpu->irq);
}