drm/amdgpu: Introduce reset domain

Defined a reset_domain struct such that
all the entities that go through reset
together will be serialized one against
another. Do it for both single device and
XGMI hive cases.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Suggested-by: Christian König <ckoenig.leichtzumerken@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://www.spinics.net/lists/amd-gfx/msg74111.html
This commit is contained in:
Andrey Grodzovsky
2021-11-30 16:19:03 -05:00
parent b21a142fd2
commit a4c63cafa5
4 changed files with 35 additions and 1 deletions

View File

@@ -2398,9 +2398,27 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
goto init_failed;
if (adev->gmc.xgmi.num_physical_nodes > 1)
if (adev->gmc.xgmi.num_physical_nodes > 1) {
struct amdgpu_hive_info *hive;
amdgpu_xgmi_add_device(adev);
hive = amdgpu_get_xgmi_hive(adev);
if (!hive || !hive->reset_domain.wq) {
DRM_ERROR("Failed to obtain reset domain info for XGMI hive:%llx", hive->hive_id);
r = -EINVAL;
goto init_failed;
}
adev->reset_domain.wq = hive->reset_domain.wq;
} else {
adev->reset_domain.wq = alloc_ordered_workqueue("amdgpu-reset-dev", 0);
if (!adev->reset_domain.wq) {
r = -ENOMEM;
goto init_failed;
}
}
/* Don't init kfd if whole hive need to be reset during init */
if (!adev->gmc.xgmi.pending_reset)
amdgpu_amdkfd_device_init(adev);