drm/amdgpu: Add gpu_recovery parameter

Add new parameter to control GPU recovery procedure.

v2:
Add auto logic where reset is disabled for bare metal and enabled
for SR-IOV.
Allow forced reset from debugfs.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Andrey Grodzovsky
2017-12-12 14:09:30 -05:00
committed by Alex Deucher
parent 3e98d829ad
commit dcebf026e6
8 changed files with 19 additions and 7 deletions

View File

@@ -3009,11 +3009,12 @@ error:
*
* @adev: amdgpu device pointer
* @job: which job trigger hang
* @force forces reset regardless of amdgpu_gpu_recovery
*
* Attempt to reset the GPU if it has hung (all asics).
* Returns 0 for success or an error on failure.
*/
int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job)
int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job, bool force)
{
struct drm_atomic_state *state = NULL;
uint64_t reset_flags = 0;
@@ -3024,6 +3025,12 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job)
return 0;
}
if (!force && (amdgpu_gpu_recovery == 0 ||
(amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) {
DRM_INFO("GPU recovery disabled.\n");
return 0;
}
dev_info(adev->dev, "GPU reset begin!\n");
mutex_lock(&adev->lock_reset);