Commit 27cdf8c3 authored by YiPeng Chai's avatar YiPeng Chai Committed by Alex Deucher
Browse files

drm/amdgpu: optimize umc v12 address conversion function



Split into 3 parts:
1. Convert soc physical address via ras ta.
2. Expand bad pages from soc physical address.
3. Dump bad address info.

Signed-off-by: default avatarYiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent a9ed2f05
Loading
Loading
Loading
Loading
+77 −39
Original line number Diff line number Diff line
@@ -225,26 +225,16 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
	}
}

static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev,
				struct ta_ras_query_address_input *addr_in,
				uint64_t *pfns, int len)
static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev,
				struct ta_ras_query_address_output *addr_out,
				uint64_t err_addr)
{
	uint32_t col, row, row_xor, bank, channel_index;
	uint64_t soc_pa, retired_page, column, err_addr;
	struct ta_ras_query_address_output addr_out;
	uint32_t pos = 0;

	err_addr = addr_in->ma.err_addr;
	addr_in->addr_type = TA_RAS_MCA_TO_PA;
	if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) {
		dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
			err_addr);
		return 0;
	}
	uint64_t soc_pa, retired_page, column;

	soc_pa = addr_out.pa.pa;
	bank = addr_out.pa.bank;
	channel_index = addr_out.pa.channel_idx;
	soc_pa = addr_out->pa.pa;
	bank = addr_out->pa.bank;
	channel_index = addr_out->pa.channel_idx;

	col = (err_addr >> 1) & 0x1fULL;
	row = (err_addr >> 10) & 0x3fffULL;
@@ -258,11 +248,6 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev,
	for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
		retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
		retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);

		if (pos >= len)
			return 0;
		pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;

		/* include column bit 0 and 1 */
		col &= 0x3;
		col |= (column << 2);
@@ -270,6 +255,35 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev,
			"Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
			retired_page, row, col, bank, channel_index);

		/* shift R13 bit */
		retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
		dev_info(adev->dev,
			"Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
			retired_page, row_xor, col, bank, channel_index);
	}
}

static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
			uint64_t pa_addr, uint64_t *pfns, int len)
{
	uint64_t soc_pa, retired_page, column;
	uint32_t pos = 0;

	soc_pa = pa_addr;
	/* clear [C3 C2] in soc physical address */
	soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
	/* clear [C4] in soc physical address */
	soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);

	/* loop for all possibilities of [C4 C3 C2] */
	for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
		retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
		retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);

		if (pos >= len)
			return 0;
		pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;

		/* shift R13 bit */
		retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);

@@ -277,14 +291,40 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev,
			return 0;
		pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;

		dev_info(adev->dev,
			"Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
			retired_page, row_xor, col, bank, channel_index);
	}

	return pos;
}

static int umc_v12_0_convert_mca_to_addr(struct amdgpu_device *adev,
			uint64_t err_addr, uint32_t ch, uint32_t umc,
			uint32_t node, uint32_t socket,
			uint64_t *addr, bool dump_addr)
{
	struct ta_ras_query_address_input addr_in;
	struct ta_ras_query_address_output addr_out;

	memset(&addr_in, 0, sizeof(addr_in));
	addr_in.ma.err_addr = err_addr;
	addr_in.ma.ch_inst = ch;
	addr_in.ma.umc_inst = umc;
	addr_in.ma.node_inst = node;
	addr_in.ma.socket_id = socket;
	addr_in.addr_type = TA_RAS_MCA_TO_PA;
	if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) {
		dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
			err_addr);
		return -EINVAL;
	}

	if (dump_addr)
		umc_v12_0_dump_addr_info(adev, &addr_out, err_addr);

	*addr = addr_out.pa.pa;

	return 0;
}

static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
					uint32_t node_inst, uint32_t umc_inst,
					uint32_t ch_inst, void *data)
@@ -483,12 +523,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
{
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
	uint16_t hwid, mcatype;
	struct ta_ras_query_address_input addr_in;
	uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
	uint64_t err_addr, hash_val = 0;
	uint64_t err_addr, hash_val = 0, pa_addr = 0;
	struct ras_ecc_err *ecc_err;
	int count;
	int ret;
	int count, ret;

	hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
	mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
@@ -514,17 +552,17 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
		MCA_IPID_2_UMC_CH(ipid),
		err_addr);

	memset(page_pfn, 0, sizeof(page_pfn));

	memset(&addr_in, 0, sizeof(addr_in));
	addr_in.ma.err_addr = err_addr;
	addr_in.ma.ch_inst = MCA_IPID_2_UMC_CH(ipid);
	addr_in.ma.umc_inst = MCA_IPID_2_UMC_INST(ipid);
	addr_in.ma.node_inst = MCA_IPID_2_DIE_ID(ipid);
	addr_in.ma.socket_id = MCA_IPID_2_SOCKET_ID(ipid);
	ret = umc_v12_0_convert_mca_to_addr(adev,
			err_addr, MCA_IPID_2_UMC_CH(ipid),
			MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid),
			MCA_IPID_2_SOCKET_ID(ipid), &pa_addr, true);
	if (ret)
		return ret;

	count = umc_v12_0_convert_err_addr(adev,
				&addr_in, page_pfn, ARRAY_SIZE(page_pfn));
	memset(page_pfn, 0, sizeof(page_pfn));
	count = umc_v12_0_lookup_bad_pages_in_a_row(adev,
				pa_addr,
				page_pfn, ARRAY_SIZE(page_pfn));
	if (count <= 0) {
		dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count);
		return 0;