drm/amdgpu: cleanup GPU recovery check a bit (v2)

Check if we should call the function instead of providing the forced
flag.

v2: rebase on KFD changes (Alex)

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Christian König
2018-08-21 10:45:29 +02:00
committed by Alex Deucher
parent 1849e73748
commit 12938fad23
8 changed files with 38 additions and 22 deletions

View File

@@ -3243,32 +3243,44 @@ error:
return r;
}
/**
* amdgpu_device_should_recover_gpu - check if we should try GPU recovery
*
* @adev: amdgpu device pointer
*
* Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
* a hung GPU.
*/
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
{
if (!amdgpu_device_ip_check_soft_reset(adev)) {
DRM_INFO("Timeout, but no hardware hang detected.\n");
return false;
}
if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 &&
!amdgpu_sriov_vf(adev))) {
DRM_INFO("GPU recovery disabled.\n");
return false;
}
return true;
}
/**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
* @adev: amdgpu device pointer
* @job: which job trigger hang
* @force: forces reset regardless of amdgpu_gpu_recovery
*
* Attempt to reset the GPU if it has hung (all asics).
* Returns 0 for success or an error on failure.
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job, bool force)
struct amdgpu_job *job)
{
int i, r, resched;
if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
return 0;
}
if (!force && (amdgpu_gpu_recovery == 0 ||
(amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) {
DRM_INFO("GPU recovery disabled.\n");
return 0;
}
dev_info(adev->dev, "GPU reset begin!\n");
mutex_lock(&adev->lock_reset);