Commit bac38ca8 authored by Jonathan Kim's avatar Jonathan Kim Committed by Alex Deucher
Browse files

drm/amdkfd: implement per queue sdma reset for gfx 9.4+



To reset hung SDMA queues on GFX 9.4+ for the GFX9 family, a soft reset
must be issued through SMU.  Since soft resets will reset an entire SDMA
engine, use a common KGD call to do the reset as the KGD will handle
avoiding a reset of in flight GFX and paging queues on that engine.

In addition, create a common call for all reset types to simplify
the handling of module parameter settings that block gpu resets.

Signed-off-by: default avatarJonathan Kim <jonathan.kim@amd.com>
Reviewed-by: default avatarHarish Kasiviswanathan <harish.kasiviswanathan@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 057fef20
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -193,4 +193,5 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
	.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
	.hqd_reset = kgd_gfx_v9_hqd_reset,
	.hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
+2 −1
Original line number Diff line number Diff line
@@ -419,5 +419,6 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
	.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
	.hqd_reset = kgd_gfx_v9_hqd_reset
	.hqd_reset = kgd_gfx_v9_hqd_reset,
	.hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
+13 −1
Original line number Diff line number Diff line
@@ -509,6 +509,17 @@ static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev,
	return 0;
}

static uint32_t kgd_gfx_v9_4_3_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
						     int engine, int queue)
{
	uint32_t reg_offset = get_sdma_rlc_reg_offset(adev, engine, queue);
	uint32_t status = RREG32(regSDMA_RLC0_CONTEXT_STATUS + reg_offset);
	uint32_t doorbell_off = RREG32(regSDMA_RLC0_DOORBELL_OFFSET + reg_offset);
	bool is_active = !!REG_GET_FIELD(status, SDMA_RLC0_CONTEXT_STATUS, SELECTED);

	return is_active ? doorbell_off >> 2 : 0;
}

const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
	.set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping,
@@ -543,5 +554,6 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
	.set_address_watch = kgd_gfx_v9_4_3_set_address_watch,
	.clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch,
	.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
	.hqd_reset = kgd_gfx_v9_hqd_reset
	.hqd_reset = kgd_gfx_v9_hqd_reset,
	.hqd_sdma_get_doorbell = kgd_gfx_v9_4_3_hqd_sdma_get_doorbell
};
+8 −1
Original line number Diff line number Diff line
@@ -1084,6 +1084,12 @@ uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
	return 0;
}

uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
					   int engine, int queue)
{
	return 0;
}

const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
	.program_sh_mem_settings = kgd_program_sh_mem_settings,
	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -1112,5 +1118,6 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
	.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
	.program_trap_handler_settings = program_trap_handler_settings,
	.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
	.hqd_reset = kgd_gfx_v10_hqd_reset
	.hqd_reset = kgd_gfx_v10_hqd_reset,
	.hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
};
+2 −0
Original line number Diff line number Diff line
@@ -65,3 +65,5 @@ uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
			      uint32_t queue_id,
			      uint32_t inst,
			      unsigned int utimeout);
uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
					   int engine, int queue);
Loading