Commit 0c3c2e33 authored by Alex Deucher's avatar Alex Deucher
Browse files

drm/amdgpu/sdma: allow caller to handle kernel rings in engine reset



Add a parameter to amdgpu_sdma_reset_engine() to let the
caller handle the kernel rings.  This allows the kernel
rings to back up their unprocessed state if the reset comes in
via the drm scheduler rather than KFD.

Reviewed-by: default avatarJesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent f8410a17
Loading
Loading
Loading
Loading
+26 −18
Original line number Diff line number Diff line
@@ -545,10 +545,13 @@ static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id)
 * amdgpu_sdma_reset_engine - Reset a specific SDMA engine
 * @adev: Pointer to the AMDGPU device
 * @instance_id: Logical ID of the SDMA engine instance to reset
 * @caller_handles_kernel_queues: Skip kernel queue processing. Caller
 * will handle it.
 *
 * Returns: 0 on success, or a negative error code on failure.
 */
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
			     bool caller_handles_kernel_queues)
{
	int ret = 0;
	struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
@@ -556,6 +559,8 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
	struct amdgpu_ring *page_ring = &sdma_instance->page;

	mutex_lock(&sdma_instance->engine_reset_mutex);

	if (!caller_handles_kernel_queues) {
		/* Stop the scheduler's work queue for the GFX and page rings if they are running.
		 * This ensures that no new tasks are submitted to the queues while
		 * the reset is in progress.
@@ -564,6 +569,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)

		if (adev->sdma.has_page_queue)
			drm_sched_wqueue_stop(&page_ring->sched);
	}

	if (sdma_instance->funcs->stop_kernel_queue) {
		sdma_instance->funcs->stop_kernel_queue(gfx_ring);
@@ -585,6 +591,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
	}

exit:
	if (!caller_handles_kernel_queues) {
		/* Restart the scheduler's work queue for the GFX and page rings
		 * if they were stopped by this function. This allows new tasks
		 * to be submitted to the queues after the reset is complete.
@@ -597,6 +604,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
				drm_sched_wqueue_start(&page_ring->sched);
			}
		}
	}
	mutex_unlock(&sdma_instance->engine_reset_mutex);

	return ret;
+2 −1
Original line number Diff line number Diff line
@@ -172,7 +172,8 @@ struct amdgpu_buffer_funcs {
				 uint32_t byte_count);
};

int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id);
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
			     bool caller_handles_kernel_queues);

#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b), (t))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
+1 −1
Original line number Diff line number Diff line
@@ -1668,7 +1668,7 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring,
		return -EOPNOTSUPP;

	amdgpu_amdkfd_suspend(adev, true);
	r = amdgpu_sdma_reset_engine(adev, id);
	r = amdgpu_sdma_reset_engine(adev, id, false);
	amdgpu_amdkfd_resume(adev, true);
	return r;
}
+1 −1
Original line number Diff line number Diff line
@@ -1548,7 +1548,7 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring,
	int r;

	amdgpu_amdkfd_suspend(adev, true);
	r = amdgpu_sdma_reset_engine(adev, inst_id);
	r = amdgpu_sdma_reset_engine(adev, inst_id, false);
	amdgpu_amdkfd_resume(adev, true);

	return r;
+1 −1
Original line number Diff line number Diff line
@@ -1461,7 +1461,7 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring,
	int r;

	amdgpu_amdkfd_suspend(adev, true);
	r = amdgpu_sdma_reset_engine(adev, inst_id);
	r = amdgpu_sdma_reset_engine(adev, inst_id, false);
	amdgpu_amdkfd_resume(adev, true);

	return r;
Loading