mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
synced 2026-04-23 05:56:14 -04:00
drm/amdgpu: Refine bad page adding
bad page adding can be simpler with nps info Signed-off-by: ganglxie <ganglxie@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -2799,20 +2799,100 @@ static int amdgpu_ras_mca2pa(struct amdgpu_device *adev,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
|
||||
struct eeprom_table_record *bps, int count)
|
||||
{
|
||||
int j;
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
struct ras_err_handler_data *data = con->eh_data;
|
||||
|
||||
for (j = 0; j < count; j++) {
|
||||
if (amdgpu_ras_check_bad_page_unlock(con,
|
||||
bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT))
|
||||
continue;
|
||||
|
||||
if (!data->space_left &&
|
||||
amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
amdgpu_ras_reserve_page(adev, bps[j].retired_page);
|
||||
|
||||
memcpy(&data->bps[data->count], &(bps[j]),
|
||||
sizeof(struct eeprom_table_record));
|
||||
data->count++;
|
||||
data->space_left--;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
|
||||
struct eeprom_table_record *bps, struct ras_err_data *err_data,
|
||||
enum amdgpu_memory_partition nps)
|
||||
{
|
||||
int i = 0;
|
||||
enum amdgpu_memory_partition save_nps;
|
||||
|
||||
save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
|
||||
|
||||
for (i = 0; i < adev->umc.retire_unit; i++)
|
||||
bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
|
||||
|
||||
if (save_nps) {
|
||||
if (save_nps == nps) {
|
||||
if (amdgpu_umc_pages_in_a_row(adev, err_data,
|
||||
bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data))
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
if (amdgpu_ras_mca2pa(adev, &bps[0], err_data)) {
|
||||
if (nps == AMDGPU_NPS1_PARTITION_MODE)
|
||||
memcpy(err_data->err_addr, bps,
|
||||
sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
|
||||
else
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev->umc.retire_unit);
|
||||
}
|
||||
|
||||
static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
|
||||
struct eeprom_table_record *bps, struct ras_err_data *err_data,
|
||||
enum amdgpu_memory_partition nps)
|
||||
{
|
||||
enum amdgpu_memory_partition save_nps;
|
||||
|
||||
save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
|
||||
bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
|
||||
|
||||
if (save_nps == nps) {
|
||||
if (amdgpu_umc_pages_in_a_row(adev, err_data,
|
||||
bps->retired_page << AMDGPU_GPU_PAGE_SHIFT))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
|
||||
return -EINVAL;
|
||||
}
|
||||
return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr,
|
||||
adev->umc.retire_unit);
|
||||
}
|
||||
|
||||
/* it deal with vram only. */
|
||||
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
|
||||
struct eeprom_table_record *bps, int pages, bool from_rom)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
struct ras_err_handler_data *data;
|
||||
struct ras_err_data err_data;
|
||||
struct eeprom_table_record *err_rec;
|
||||
struct amdgpu_ras_eeprom_control *control =
|
||||
&adev->psp.ras_context.ras->eeprom_control;
|
||||
enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
|
||||
int ret = 0;
|
||||
uint32_t i, j, loop_cnt = 1;
|
||||
bool find_pages_per_pa = false;
|
||||
uint32_t i;
|
||||
|
||||
if (!con || !con->eh_data || !bps || pages <= 0)
|
||||
return 0;
|
||||
@@ -2823,114 +2903,46 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
|
||||
sizeof(struct eeprom_table_record), GFP_KERNEL);
|
||||
if (!err_data.err_addr) {
|
||||
dev_warn(adev->dev, "Failed to alloc UMC error address record in mca2pa conversion!\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
err_rec = err_data.err_addr;
|
||||
loop_cnt = adev->umc.retire_unit;
|
||||
if (adev->gmc.gmc_funcs->query_mem_partition_mode)
|
||||
nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
|
||||
}
|
||||
|
||||
mutex_lock(&con->recovery_lock);
|
||||
data = con->eh_data;
|
||||
if (!data) {
|
||||
/* Returning 0 as the absence of eh_data is acceptable */
|
||||
goto free;
|
||||
}
|
||||
|
||||
for (i = 0; i < pages; i++) {
|
||||
if (from_rom &&
|
||||
control->rec_type == AMDGPU_RAS_EEPROM_REC_MCA) {
|
||||
if (!find_pages_per_pa) {
|
||||
if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) {
|
||||
if (!i && nps == AMDGPU_NPS1_PARTITION_MODE) {
|
||||
/* may use old RAS TA, use PA to find pages in
|
||||
* one row
|
||||
*/
|
||||
if (amdgpu_umc_pages_in_a_row(adev, &err_data,
|
||||
bps[i].retired_page <<
|
||||
AMDGPU_GPU_PAGE_SHIFT)) {
|
||||
ret = -EINVAL;
|
||||
goto free;
|
||||
} else {
|
||||
find_pages_per_pa = true;
|
||||
}
|
||||
} else {
|
||||
/* unsupported cases */
|
||||
ret = -EOPNOTSUPP;
|
||||
if (from_rom) {
|
||||
for (i = 0; i < pages; i++) {
|
||||
if (control->ras_num_recs - i >= adev->umc.retire_unit) {
|
||||
if ((bps[i].address == bps[i + 1].address) &&
|
||||
(bps[i].mem_channel == bps[i + 1].mem_channel)) {
|
||||
//deal with retire_unit records a time
|
||||
ret = __amdgpu_ras_convert_rec_array_from_rom(adev,
|
||||
&bps[i], &err_data, nps);
|
||||
if (ret)
|
||||
goto free;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (amdgpu_umc_pages_in_a_row(adev, &err_data,
|
||||
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
|
||||
ret = -EINVAL;
|
||||
goto free;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (from_rom && !find_pages_per_pa) {
|
||||
if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) {
|
||||
/* bad page in any NPS mode in eeprom */
|
||||
if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) {
|
||||
ret = -EINVAL;
|
||||
goto free;
|
||||
}
|
||||
} else {
|
||||
/* legacy bad page in eeprom, generated only in
|
||||
* NPS1 mode
|
||||
*/
|
||||
if (amdgpu_ras_mca2pa(adev, &bps[i], &err_data)) {
|
||||
/* old RAS TA or ASICs which don't support to
|
||||
* convert addrss via mca address
|
||||
*/
|
||||
if (!i && nps == AMDGPU_NPS1_PARTITION_MODE) {
|
||||
find_pages_per_pa = true;
|
||||
err_rec = &bps[i];
|
||||
loop_cnt = 1;
|
||||
} else {
|
||||
/* non-nps1 mode, old RAS TA
|
||||
* can't support it
|
||||
*/
|
||||
ret = -EOPNOTSUPP;
|
||||
goto free;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!find_pages_per_pa)
|
||||
i += (adev->umc.retire_unit - 1);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
err_rec = &bps[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < loop_cnt; j++) {
|
||||
if (amdgpu_ras_check_bad_page_unlock(con,
|
||||
err_rec[j].retired_page << AMDGPU_GPU_PAGE_SHIFT))
|
||||
continue;
|
||||
|
||||
if (!data->space_left &&
|
||||
amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
|
||||
ret = -ENOMEM;
|
||||
for (; i < pages; i++) {
|
||||
ret = __amdgpu_ras_convert_rec_from_rom(adev,
|
||||
&bps[i], &err_data, nps);
|
||||
if (ret)
|
||||
goto free;
|
||||
}
|
||||
|
||||
amdgpu_ras_reserve_page(adev, err_rec[j].retired_page);
|
||||
|
||||
memcpy(&data->bps[data->count], &(err_rec[j]),
|
||||
sizeof(struct eeprom_table_record));
|
||||
data->count++;
|
||||
data->space_left--;
|
||||
}
|
||||
} else {
|
||||
ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages);
|
||||
}
|
||||
|
||||
free:
|
||||
if (from_rom)
|
||||
kfree(err_data.err_addr);
|
||||
out:
|
||||
mutex_unlock(&con->recovery_lock);
|
||||
|
||||
return ret;
|
||||
|
||||
Reference in New Issue
Block a user