mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
synced 2026-04-23 05:56:14 -04:00
drm/amdgpu: avoid ras error injection for retired page
check whether a page is bad page before umc error injection, bad page should not be accessed again Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Guchun Chen <guchun.chen@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -71,6 +71,9 @@ const char *ras_block_string[] = {
|
||||
|
||||
atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
|
||||
|
||||
static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
|
||||
uint64_t addr);
|
||||
|
||||
static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
|
||||
size_t size, loff_t *pos)
|
||||
{
|
||||
@@ -291,6 +294,14 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
|
||||
break;
|
||||
}
|
||||
|
||||
/* umc ce/ue error injection for a bad page is not allowed */
|
||||
if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
|
||||
amdgpu_ras_check_bad_page(adev, data.inject.address)) {
|
||||
DRM_WARN("RAS WARN: 0x%llx has been marked as bad before error injection!\n",
|
||||
data.inject.address);
|
||||
break;
|
||||
}
|
||||
|
||||
/* data.inject.address is offset instead of absolute gpu address */
|
||||
ret = amdgpu_ras_error_inject(adev, &data.inject);
|
||||
break;
|
||||
@@ -1431,6 +1442,39 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* check if an address belongs to bad page
|
||||
*
|
||||
* Note: this check is only for umc block
|
||||
*/
|
||||
static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
|
||||
uint64_t addr)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
struct ras_err_handler_data *data;
|
||||
int i;
|
||||
bool ret = false;
|
||||
|
||||
if (!con || !con->eh_data)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&con->recovery_lock);
|
||||
data = con->eh_data;
|
||||
if (!data)
|
||||
goto out;
|
||||
|
||||
addr >>= AMDGPU_GPU_PAGE_SHIFT;
|
||||
for (i = 0; i < data->count; i++)
|
||||
if (addr == data->bps[i].retired_page) {
|
||||
ret = true;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&con->recovery_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* called in gpu recovery/init */
|
||||
int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user