Commit a86e0c0e authored by Lijo Lazar's avatar Lijo Lazar Committed by Alex Deucher
Browse files

drm/amdgpu: Add init level for post reset reinit



When device needs to be reset before initialization, it's not required
for all IPs to be initialized before a reset. In such cases, it needs to
identify whether the IP/feature is initialized for the first time or
whether it's reinitialized after a reset.

Add RESET_RECOVERY init level to identify post reset reinitialization
phase. This only provides a device level identification, IP/features may
choose to track their state independently also.

Signed-off-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Acked-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6719ab82
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -330,6 +330,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
	}

	list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
		amdgpu_set_init_level(tmp_adev,
				AMDGPU_INIT_LEVEL_RESET_RECOVERY);
		dev_info(tmp_adev->dev,
			 "GPU reset succeeded, trying to resume\n");
		r = aldebaran_mode2_restore_ip(tmp_adev);
@@ -375,6 +377,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
							tmp_adev);

		if (!r) {
			amdgpu_set_init_level(tmp_adev,
					      AMDGPU_INIT_LEVEL_DEFAULT);
			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);

			r = amdgpu_ib_ring_tests(tmp_adev);
+1 −0
Original line number Diff line number Diff line
@@ -839,6 +839,7 @@ struct amdgpu_mqd {
enum amdgpu_init_lvl_id {
	AMDGPU_INIT_LEVEL_DEFAULT,
	AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
	AMDGPU_INIT_LEVEL_RESET_RECOVERY,
};

struct amdgpu_init_level {
+22 −3
Original line number Diff line number Diff line
@@ -156,6 +156,11 @@ struct amdgpu_init_level amdgpu_init_default = {
	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
};

struct amdgpu_init_level amdgpu_init_recovery = {
	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
};

/*
 * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
 * is used for cases like reset on initialization where the entire hive needs to
@@ -182,6 +187,9 @@ void amdgpu_set_init_level(struct amdgpu_device *adev,
	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
		adev->init_lvl = &amdgpu_init_minimal_xgmi;
		break;
	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
		adev->init_lvl = &amdgpu_init_recovery;
		break;
	case AMDGPU_INIT_LEVEL_DEFAULT:
		fallthrough;
	default:
@@ -5419,7 +5427,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
	struct list_head *device_list_handle;
	bool full_reset, vram_lost = false;
	struct amdgpu_device *tmp_adev;
	int r;
	int r, init_level;

	device_list_handle = reset_context->reset_device_list;

@@ -5428,10 +5436,18 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)

	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);

	/**
	 * If it's reset on init, it's default init level, otherwise keep level
	 * as recovery level.
	 */
	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
	else
			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;

	r = 0;
	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
		/* After reset, it's default init level */
		amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
		amdgpu_set_init_level(tmp_adev, init_level);
		if (full_reset) {
			/* post card */
			amdgpu_ras_set_fed(tmp_adev, false);
@@ -5518,6 +5534,9 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)

out:
		if (!r) {
			/* IP init is complete now, set level as default */
			amdgpu_set_init_level(tmp_adev,
					      AMDGPU_INIT_LEVEL_DEFAULT);
			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
			r = amdgpu_ib_ring_tests(tmp_adev);
			if (r) {
+5 −0
Original line number Diff line number Diff line
@@ -342,3 +342,8 @@ void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
		strscpy(buf, "unknown", len);
	}
}

bool amdgpu_reset_in_recovery(struct amdgpu_device *adev)
{
	return (adev->init_lvl->level == AMDGPU_INIT_LEVEL_RESET_RECOVERY);
}
+2 −0
Original line number Diff line number Diff line
@@ -158,4 +158,6 @@ extern struct amdgpu_reset_handler xgmi_reset_on_init_handler;
int amdgpu_reset_do_xgmi_reset_on_init(
	struct amdgpu_reset_context *reset_context);

bool amdgpu_reset_in_recovery(struct amdgpu_device *adev);

#endif
Loading