Commit 5e162f87 authored by Tomasz Rusinowicz's avatar Tomasz Rusinowicz Committed by Jacek Lawrynowicz
Browse files

accel/ivpu: Add FW state dump on TDR



Send JSM state dump message at the beginning of TDR handler. This allows
FW to collect debug info in the FW log before the state of the NPU is
lost allowing to analyze the cause of a TDR.

Wait a predefined timeout (10 ms) so the FW has a chance to write debug
logs. We cannot wait for JSM response at this point because IRQs are
already disabled before TDR handler is invoked.

Signed-off-by: default avatarTomasz Rusinowicz <tomasz.rusinowicz@intel.com>
Reviewed-by: default avatarJacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240930195322.461209-9-jacek.lawrynowicz@linux.intel.com


Signed-off-by: default avatarJacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
parent bade0340
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -152,6 +152,7 @@ struct ivpu_device {
		int tdr;
		int autosuspend;
		int d0i3_entry_msg;
		int state_dump_msg;
	} timeout;
};

+3 −0
Original line number Diff line number Diff line
@@ -89,12 +89,14 @@ static void timeouts_init(struct ivpu_device *vdev)
		vdev->timeout.tdr = 2000000;
		vdev->timeout.autosuspend = -1;
		vdev->timeout.d0i3_entry_msg = 500;
		vdev->timeout.state_dump_msg = 10;
	} else if (ivpu_is_simics(vdev)) {
		vdev->timeout.boot = 50;
		vdev->timeout.jsm = 500;
		vdev->timeout.tdr = 10000;
		vdev->timeout.autosuspend = -1;
		vdev->timeout.d0i3_entry_msg = 100;
		vdev->timeout.state_dump_msg = 10;
	} else {
		vdev->timeout.boot = 1000;
		vdev->timeout.jsm = 500;
@@ -104,6 +106,7 @@ static void timeouts_init(struct ivpu_device *vdev)
		else
			vdev->timeout.autosuspend = 100;
		vdev->timeout.d0i3_entry_msg = 5;
		vdev->timeout.state_dump_msg = 10;
	}
}

+26 −0
Original line number Diff line number Diff line
@@ -364,6 +364,32 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
	return ret;
}

int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
			   u32 channel, unsigned long timeout_ms)
{
	struct ivpu_ipc_consumer cons;
	int ret;

	ret = ivpu_rpm_get(vdev);
	if (ret < 0)
		return ret;

	ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);

	ret = ivpu_ipc_send(vdev, &cons, req);
	if (ret) {
		ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret);
		goto consumer_del;
	}

	msleep(timeout_ms);

consumer_del:
	ivpu_ipc_consumer_del(vdev, &cons);
	ivpu_rpm_put(vdev);
	return ret;
}

static bool
ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
			struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
+2 −0
Original line number Diff line number Diff line
@@ -108,5 +108,7 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r
int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
			  enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
			  u32 channel, unsigned long timeout_ms);
int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
			   u32 channel, unsigned long timeout_ms);

#endif /* __IVPU_IPC_H__ */
+8 −0
Original line number Diff line number Diff line
@@ -559,3 +559,11 @@ int ivpu_jsm_dct_disable(struct ivpu_device *vdev)
					    &resp, VPU_IPC_CHAN_ASYNC_CMD,
					    vdev->timeout.jsm);
}

int ivpu_jsm_state_dump(struct ivpu_device *vdev)
{
	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_STATE_DUMP };

	return ivpu_ipc_send_and_wait(vdev, &req, VPU_IPC_CHAN_ASYNC_CMD,
				      vdev->timeout.state_dump_msg);
}
Loading