Commit a8f921a1 authored by ganglxie's avatar ganglxie Committed by Alex Deucher
Browse files

drm/amdgpu: Change page/record number calculation based on nps



save only one record to save eeprom space,and
bad_page_num = pa_rec_num + mca_rec_num*16

Signed-off-by: default avatarganglxie <ganglxie@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 0153d276
Loading
Loading
Loading
Loading
+22 −27
Original line number Diff line number Diff line
@@ -2981,14 +2981,6 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,

	/* only new entries are saved */
	if (save_count > 0) {
		if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) {
			if (amdgpu_ras_eeprom_append(control,
						     &data->bps[control->ras_num_recs],
						     save_count)) {
				dev_err(adev->dev, "Failed to save EEPROM table data!");
				return -EIO;
			}
		} else {
		for (i = 0; i < unit_num; i++) {
			if (amdgpu_ras_eeprom_append(control,
					&data->bps[bad_page_num + i * adev->umc.retire_unit],
@@ -2997,8 +2989,6 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
				return -EIO;
			}
		}
		}

		dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
	}

@@ -3014,7 +3004,7 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
	struct amdgpu_ras_eeprom_control *control =
		&adev->psp.ras_context.ras->eeprom_control;
	struct eeprom_table_record *bps;
	int ret;
	int ret, i = 0;

	/* no bad page record, skip eeprom access */
	if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0)
@@ -3028,13 +3018,23 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
	if (ret) {
		dev_err(adev->dev, "Failed to load EEPROM table records!");
	} else {
		if (control->ras_num_recs > 1 &&
		    adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
			if ((bps[0].address == bps[1].address) &&
			    (bps[0].mem_channel == bps[1].mem_channel))
				control->rec_type = AMDGPU_RAS_EEPROM_REC_PA;
			else
				control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA;
		if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
			for (i = 0; i < control->ras_num_recs; i++) {
				if ((control->ras_num_recs - i) >= adev->umc.retire_unit) {
					if ((bps[i].address == bps[i + 1].address) &&
						(bps[i].mem_channel == bps[i + 1].mem_channel)) {
						control->ras_num_pa_recs += adev->umc.retire_unit;
						i += (adev->umc.retire_unit - 1);
					} else {
						control->ras_num_mca_recs +=
									(control->ras_num_recs - i);
						break;
					}
				} else {
					control->ras_num_mca_recs += (control->ras_num_recs - i);
					break;
				}
			}
		}

		ret = amdgpu_ras_eeprom_check(control);
@@ -3440,12 +3440,7 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
		return ret;

	if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr)
		control->rec_type = AMDGPU_RAS_EEPROM_REC_PA;

	/* default status is MCA storage */
	if (control->ras_num_recs <= 1 &&
	    adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
		control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA;
		control->ras_num_pa_recs = control->ras_num_recs;

	if (control->ras_num_recs) {
		ret = amdgpu_ras_load_bad_pages(adev);
+7 −10
Original line number Diff line number Diff line
@@ -727,11 +727,9 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
				     - control->ras_fri)
		% control->ras_max_record_count;

	if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA)
		control->ras_num_bad_pages = control->ras_num_recs;
	else
		control->ras_num_bad_pages =
			control->ras_num_recs * adev->umc.retire_unit;
	control->ras_num_mca_recs += num;
	control->ras_num_bad_pages += num * adev->umc.retire_unit;

Out:
	kfree(buf);
	return res;
@@ -1396,6 +1394,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
	}
	control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);

	control->ras_num_mca_recs = 0;
	control->ras_num_pa_recs = 0;
	return 0;
}

@@ -1416,11 +1416,8 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
	if (!__get_eeprom_i2c_addr(adev, control))
		return -EINVAL;

	if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA)
		control->ras_num_bad_pages = control->ras_num_recs;
	else
		control->ras_num_bad_pages =
			control->ras_num_recs * adev->umc.retire_unit;
	control->ras_num_bad_pages = control->ras_num_pa_recs +
			control->ras_num_mca_recs * adev->umc.retire_unit;

	if (hdr->header == RAS_TABLE_HDR_VAL) {
		DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
+6 −14
Original line number Diff line number Diff line
@@ -43,19 +43,6 @@ enum amdgpu_ras_eeprom_err_type {
	AMDGPU_RAS_EEPROM_ERR_COUNT,
};

/*
 * one UMC MCA address could map to multiply physical address (PA),
 * such as 1:16, we use eeprom_table_record.address to store MCA
 * address and use eeprom_table_record.retired_page to save PA.
 *
 * AMDGPU_RAS_EEPROM_REC_PA: one record store one PA
 * AMDGPU_RAS_EEPROM_REC_MCA: one record store one MCA address
 */
enum amdgpu_ras_eeprom_rec_type {
	AMDGPU_RAS_EEPROM_REC_PA,
	AMDGPU_RAS_EEPROM_REC_MCA,
};

struct amdgpu_ras_eeprom_table_header {
	uint32_t header;
	uint32_t version;
@@ -100,6 +87,12 @@ struct amdgpu_ras_eeprom_control {
	 */
	u32 ras_num_bad_pages;

	/* Number of records store mca address */
	u32 ras_num_mca_recs;

	/* Number of records store physical address */
	u32 ras_num_pa_recs;

	/* First record index to read, 0-based.
	 * Range is [0, num_recs-1]. This is
	 * an absolute index, starting right after
@@ -120,7 +113,6 @@ struct amdgpu_ras_eeprom_control {
	/* Record channel info which occurred bad pages
	 */
	u32 bad_channel_bitmap;
	enum amdgpu_ras_eeprom_rec_type rec_type;
};

/*