drm/amdgpu: nuke the VM PD/PT shadow handling

This was only used as workaround for recovering the page tables after
VRAM was lost and is no longer necessary after the function
amdgpu_vm_bo_reset_state_machine() started to do the same.

Compute never used shadows either, so the only proplematic case left is
SVM and that is most likely not recoverable in any way when VRAM is
lost.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Christian König
2024-08-27 16:12:11 +02:00
committed by Alex Deucher
parent c1de938fb7
commit 7181faaa47
7 changed files with 6 additions and 265 deletions

View File

@@ -4107,9 +4107,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->mm_stats.lock);
spin_lock_init(&adev->wb.lock);
INIT_LIST_HEAD(&adev->shadow_list);
mutex_init(&adev->shadow_list_lock);
INIT_LIST_HEAD(&adev->reset_list);
INIT_LIST_HEAD(&adev->ras_list);
@@ -5029,80 +5026,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
return 0;
}
/**
* amdgpu_device_recover_vram - Recover some VRAM contents
*
* @adev: amdgpu_device pointer
*
* Restores the contents of VRAM buffers from the shadows in GTT. Used to
* restore things like GPUVM page tables after a GPU reset where
* the contents of VRAM might be lost.
*
* Returns:
* 0 on success, negative error code on failure.
*/
static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
{
struct dma_fence *fence = NULL, *next = NULL;
struct amdgpu_bo *shadow;
struct amdgpu_bo_vm *vmbo;
long r = 1, tmo;
if (amdgpu_sriov_runtime(adev))
tmo = msecs_to_jiffies(8000);
else
tmo = msecs_to_jiffies(100);
dev_info(adev->dev, "recover vram bo from shadow start\n");
mutex_lock(&adev->shadow_list_lock);
list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
/* If vm is compute context or adev is APU, shadow will be NULL */
if (!vmbo->shadow)
continue;
shadow = vmbo->shadow;
/* No need to recover an evicted BO */
if (!shadow->tbo.resource ||
shadow->tbo.resource->mem_type != TTM_PL_TT ||
shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
continue;
r = amdgpu_bo_restore_shadow(shadow, &next);
if (r)
break;
if (fence) {
tmo = dma_fence_wait_timeout(fence, false, tmo);
dma_fence_put(fence);
fence = next;
if (tmo == 0) {
r = -ETIMEDOUT;
break;
} else if (tmo < 0) {
r = tmo;
break;
}
} else {
fence = next;
}
}
mutex_unlock(&adev->shadow_list_lock);
if (fence)
tmo = dma_fence_wait_timeout(fence, false, tmo);
dma_fence_put(fence);
if (r < 0 || tmo <= 0) {
dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
return -EIO;
}
dev_info(adev->dev, "recover vram bo from shadow done\n");
return 0;
}
/**
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
*
@@ -5165,12 +5088,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (r)
return r;
if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
amdgpu_inc_vram_lost(adev);
r = amdgpu_device_recover_vram(adev);
}
if (r)
return r;
/* need to be called during full access so we can't do it later like
* bare-metal does.
@@ -5569,9 +5488,7 @@ out:
}
}
if (!r)
r = amdgpu_device_recover_vram(tmp_adev);
else
if (r)
tmp_adev->asic_reset_res = r;
}