Commit 2b17c240 authored by Tao Zhou's avatar Tao Zhou Committed by Alex Deucher
Browse files

drm/amdgpu: add range check for RAS bad page address



Exclude invalid bad pages.

Signed-off-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent a813437c
Loading
Loading
Loading
Loading
+28 −30
Original line number Diff line number Diff line
@@ -139,9 +139,9 @@ enum amdgpu_ras_retire_page_reservation {

atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);

static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
				uint64_t addr);
static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
				uint64_t addr);
#ifdef CONFIG_X86_MCE_AMD
static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
@@ -172,15 +172,13 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
	struct eeprom_table_record err_rec;
	int ret;

	if ((address >= adev->gmc.mc_vram_size) ||
	    (address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
	ret = amdgpu_ras_check_bad_page(adev, address);
	if (ret == -EINVAL) {
		dev_warn(adev->dev,
			"RAS WARN: input address 0x%llx is invalid.\n",
			address);
		return -EINVAL;
	}

	if (amdgpu_ras_check_bad_page(adev, address)) {
	} else if (ret == 1) {
		dev_warn(adev->dev,
			"RAS WARN: 0x%llx has already been marked as bad page!\n",
			address);
@@ -573,21 +571,15 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
		ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
		break;
	case 2:
		if ((data.inject.address >= adev->gmc.mc_vram_size &&
		    adev->gmc.mc_vram_size) ||
		    (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
			dev_warn(adev->dev, "RAS WARN: input address "
					"0x%llx is invalid.",
		/* umc ce/ue error injection for a bad page is not allowed */
		if (data.head.block == AMDGPU_RAS_BLOCK__UMC)
			ret = amdgpu_ras_check_bad_page(adev, data.inject.address);
		if (ret == -EINVAL) {
			dev_warn(adev->dev, "RAS WARN: input address 0x%llx is invalid.",
					data.inject.address);
			ret = -EINVAL;
			break;
		}

		/* umc ce/ue error injection for a bad page is not allowed */
		if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
		    amdgpu_ras_check_bad_page(adev, data.inject.address)) {
			dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has "
				 "already been marked as bad!\n",
		} else if (ret == 1) {
			dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has already been marked as bad!\n",
					data.inject.address);
			break;
		}
@@ -3194,18 +3186,24 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
	return ret;
}

static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
				uint64_t addr)
{
	struct ras_err_handler_data *data = con->eh_data;
	struct amdgpu_device *adev = con->adev;
	int i;

	if ((addr >= adev->gmc.mc_vram_size &&
	    adev->gmc.mc_vram_size) ||
	    (addr >= RAS_UMC_INJECT_ADDR_LIMIT))
		return -EINVAL;

	addr >>= AMDGPU_GPU_PAGE_SHIFT;
	for (i = 0; i < data->count; i++)
		if (addr == data->bps[i].retired_page)
			return true;
			return 1;

	return false;
	return 0;
}

/*
@@ -3213,11 +3211,11 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
 *
 * Note: this check is only for umc block
 */
static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
				uint64_t addr)
{
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
	bool ret = false;
	int ret = 0;

	if (!con || !con->eh_data)
		return ret;