mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
synced 2026-04-23 05:56:14 -04:00
drm/amdgpu: break GPU recovery once it's in bad state(v4)
When GPU executes recovery and retriving bad GPU tag
from external eerpom device, the recovery will be broken
and error message is printed as well for user's awareness.
v2: Refine warning message in threshold reaching case, and
fix spelling typo.
v3: Fix explicit calling of bad gpu.
v4: Rename function names.
Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
9c06f91ff2
commit
e8fbaf0342
@@ -386,6 +386,46 @@ static uint32_t __correct_eeprom_dest_address(uint32_t curr_address)
|
||||
return curr_address;
|
||||
}
|
||||
|
||||
int amdgpu_ras_eeprom_check_err_threshold(
|
||||
struct amdgpu_ras_eeprom_control *control,
|
||||
bool *exceed_err_limit)
|
||||
{
|
||||
struct amdgpu_device *adev = to_amdgpu_device(control);
|
||||
unsigned char buff[EEPROM_ADDRESS_SIZE +
|
||||
EEPROM_TABLE_HEADER_SIZE] = { 0 };
|
||||
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
|
||||
struct i2c_msg msg = {
|
||||
.addr = control->i2c_address,
|
||||
.flags = I2C_M_RD,
|
||||
.len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
|
||||
.buf = buff,
|
||||
};
|
||||
int ret;
|
||||
|
||||
*exceed_err_limit = false;
|
||||
|
||||
/* read EEPROM table header */
|
||||
mutex_lock(&control->tbl_mutex);
|
||||
ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
|
||||
if (ret < 1) {
|
||||
dev_err(adev->dev, "Failed to read EEPROM table header.\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
__decode_table_header_from_buff(hdr, &buff[2]);
|
||||
|
||||
if (hdr->header == EEPROM_TABLE_HDR_BAD) {
|
||||
dev_warn(adev->dev, "This GPU is in BAD status.");
|
||||
dev_warn(adev->dev, "Please retire it or setting one bigger "
|
||||
"threshold value when reloading driver.\n");
|
||||
*exceed_err_limit = true;
|
||||
}
|
||||
|
||||
err:
|
||||
mutex_unlock(&control->tbl_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
|
||||
struct eeprom_table_record *records,
|
||||
bool write,
|
||||
|
||||
Reference in New Issue
Block a user