Commit 660261df authored by ganglxie's avatar ganglxie Committed by Alex Deucher
Browse files

drm/amdgpu: refine eeprom data check



add eeprom data checksum check before driver unload. reset eeprom
and save correct data to eeprom when check failed

Signed-off-by: default avatarganglxie <ganglxie@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 340231cd
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -2512,6 +2512,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
	struct drm_device *dev = pci_get_drvdata(pdev);
	struct amdgpu_device *adev = drm_to_adev(dev);

	amdgpu_ras_eeprom_check_and_recover(adev);
	amdgpu_xcp_dev_unplug(adev);
	amdgpu_gmc_prepare_nps_mode_change(adev);
	drm_dev_unplug(dev);
+28 −0
Original line number Diff line number Diff line
@@ -1531,3 +1531,31 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)

	return res < 0 ? res : 0;
}

void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev)
{
	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
	struct amdgpu_ras_eeprom_control *control;
	int res;

	if (!__is_ras_eeprom_supported(adev) || !ras)
		return;
	control = &ras->eeprom_control;
	if (!control->is_eeprom_valid)
		return;
	res = __verify_ras_table_checksum(control);
	if (res) {
		dev_warn(adev->dev,
			"RAS table incorrect checksum or error:%d, try to recover\n",
			res);
		if (!amdgpu_ras_eeprom_reset_table(control))
			if (!amdgpu_ras_save_bad_pages(adev, NULL))
				if (!__verify_ras_table_checksum(control)) {
					dev_info(adev->dev, "RAS table recovery succeed\n");
					return;
				}
		dev_err(adev->dev, "RAS table recovery failed\n");
		control->is_eeprom_valid = false;
	}
	return;
}
 No newline at end of file
+2 −0
Original line number Diff line number Diff line
@@ -161,6 +161,8 @@ void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);

int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control);

void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev);

extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;