Commit 27d19268 authored by Jacek Lawrynowicz's avatar Jacek Lawrynowicz
Browse files

accel/ivpu: Improve recovery and reset support



  - Synchronize job submission with reset/recovery using reset_lock
  - Always print recovery reason and call diagnose_failure()
  - Don't allow for autosupend during recovery
  - Prevent immediate autosuspend after reset/recovery
  - Prevent force_recovery for issuing TDR when device is suspended
  - Reset VPU instead triggering recovery after changing debugfs params

Signed-off-by: default avatarJacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: default avatarWachowski, Karol <karol.wachowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240122120945.1150728-4-jacek.lawrynowicz@linux.intel.com
parent 264b271d
Loading
Loading
Loading
Loading
+16 −4
Original line number Diff line number Diff line
@@ -102,7 +102,7 @@ static int reset_pending_show(struct seq_file *s, void *v)
{
	struct ivpu_device *vdev = seq_to_ivpu(s);

	seq_printf(s, "%d\n", atomic_read(&vdev->pm->in_reset));
	seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_pending));
	return 0;
}

@@ -130,7 +130,9 @@ dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size

	fw->dvfs_mode = dvfs_mode;

	ivpu_pm_schedule_recovery(vdev);
	ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
	if (ret)
		return ret;

	return size;
}
@@ -190,7 +192,10 @@ fw_profiling_freq_fops_write(struct file *file, const char __user *user_buf,
		return ret;

	ivpu_hw_profiling_freq_drive(vdev, enable);
	ivpu_pm_schedule_recovery(vdev);

	ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
	if (ret)
		return ret;

	return size;
}
@@ -301,11 +306,18 @@ static ssize_t
ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
{
	struct ivpu_device *vdev = file->private_data;
	int ret;

	if (!size)
		return -EINVAL;

	ivpu_pm_schedule_recovery(vdev);
	ret = ivpu_rpm_get(vdev);
	if (ret)
		return ret;

	ivpu_pm_trigger_recovery(vdev, "debugfs");
	flush_work(&vdev->pm->recovery_work);
	ivpu_rpm_put(vdev);
	return size;
}

+4 −10
Original line number Diff line number Diff line
@@ -875,24 +875,18 @@ static void ivpu_hw_37xx_irq_disable(struct ivpu_device *vdev)

static void ivpu_hw_37xx_irq_wdt_nce_handler(struct ivpu_device *vdev)
{
	ivpu_err_ratelimited(vdev, "WDT NCE irq\n");

	ivpu_pm_schedule_recovery(vdev);
	ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ");
}

static void ivpu_hw_37xx_irq_wdt_mss_handler(struct ivpu_device *vdev)
{
	ivpu_err_ratelimited(vdev, "WDT MSS irq\n");

	ivpu_hw_wdt_disable(vdev);
	ivpu_pm_schedule_recovery(vdev);
	ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ");
}

static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
{
	ivpu_err_ratelimited(vdev, "NOC Firewall irq\n");

	ivpu_pm_schedule_recovery(vdev);
	ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
}

/* Handler for IRQs from VPU core (irqV) */
@@ -970,7 +964,7 @@ static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
		REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, status);

	if (schedule_recovery)
		ivpu_pm_schedule_recovery(vdev);
		ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");

	return true;
}
+4 −4
Original line number Diff line number Diff line
@@ -1049,18 +1049,18 @@ static void ivpu_hw_40xx_irq_disable(struct ivpu_device *vdev)
static void ivpu_hw_40xx_irq_wdt_nce_handler(struct ivpu_device *vdev)
{
	/* TODO: For LNN hang consider engine reset instead of full recovery */
	ivpu_pm_schedule_recovery(vdev);
	ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ");
}

static void ivpu_hw_40xx_irq_wdt_mss_handler(struct ivpu_device *vdev)
{
	ivpu_hw_wdt_disable(vdev);
	ivpu_pm_schedule_recovery(vdev);
	ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ");
}

static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
{
	ivpu_pm_schedule_recovery(vdev);
	ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
}

/* Handler for IRQs from VPU core (irqV) */
@@ -1154,7 +1154,7 @@ static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
	REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status);

	if (schedule_recovery)
		ivpu_pm_schedule_recovery(vdev);
		ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");

	return true;
}
+2 −4
Original line number Diff line number Diff line
@@ -343,10 +343,8 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r
	hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
						&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
						vdev->timeout.jsm);
	if (hb_ret == -ETIMEDOUT) {
		ivpu_hw_diagnose_failure(vdev);
		ivpu_pm_schedule_recovery(vdev);
	}
	if (hb_ret == -ETIMEDOUT)
		ivpu_pm_trigger_recovery(vdev, "IPC timeout");

	return ret;
}
+2 −0
Original line number Diff line number Diff line
@@ -515,7 +515,9 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
		goto err_destroy_job;
	}

	down_read(&vdev->pm->reset_lock);
	ret = ivpu_job_submit(job);
	up_read(&vdev->pm->reset_lock);
	if (ret)
		goto err_signal_fence;

Loading