drm/amdgpu: add new INFO ioctl query for the last GPU page fault

Add a interface to query the last GPU page fault for the process.
Useful for debugging context lost errors.

v2: split vmhub representation between kernel and userspace
v3: add locking when fetching fault info in INFO IOCTL

Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238
libdrm MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238

Cc: samuel.pitoiset@gmail.com
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Alex Deucher
2020-10-06 16:54:35 -04:00
parent 9cff0879ae
commit 7a41ed8b59
5 changed files with 63 additions and 5 deletions

View File

@@ -1224,6 +1224,26 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return copy_to_user(out, max_ibs,
min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0;
}
case AMDGPU_INFO_GPUVM_FAULT: {
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
unsigned long flags;
if (!vm)
return -EINVAL;
memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
xa_lock_irqsave(&adev->vm_manager.pasids, flags);
gpuvm_fault.addr = vm->fault_info.addr;
gpuvm_fault.status = vm->fault_info.status;
gpuvm_fault.vmhub = vm->fault_info.vmhub;
xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
return copy_to_user(out, &gpuvm_fault,
min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
}
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;