Commit fcb600b0 authored by Tao Zhou's avatar Tao Zhou Committed by Alex Deucher
Browse files

drm/amdgpu: add interface to get die id from memory address



And implement it for UMC v12_0. The die id is calculated from IPID
register in bad page retirement flow, but we don't store it on eeprom
and it can be also gotten from physical address.

v2: get PA_C4 and PA_R13 from MCA address since they may be cleared in
retired page.

Signed-off-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2206daa1
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -91,6 +91,8 @@ struct amdgpu_umc_ras {
			struct ta_ras_query_address_input *addr_in,
			struct ta_ras_query_address_output *addr_out,
			bool dump_addr);
	uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
			uint64_t mca_addr, uint64_t retired_page);
};

struct amdgpu_umc_funcs {
+26 −0
Original line number Diff line number Diff line
@@ -619,6 +619,31 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
	mutex_unlock(&con->umc_ecc_log.lock);
}

static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev,
		uint64_t mca_addr, uint64_t retired_page)
{
	uint32_t die = 0;

	/* we only calculate die id for nps1 mode right now */
	die += ((((retired_page >> 12) & 0x1ULL)^
	    ((retired_page >> 20) & 0x1ULL) ^
	    ((retired_page >> 27) & 0x1ULL) ^
	    ((retired_page >> 34) & 0x1ULL) ^
	    ((retired_page >> 41) & 0x1ULL)) << 0);

	/* the original PA_C4 and PA_R13 may be cleared in retired_page, so
	 * get them from mca_addr.
	 */
	die += ((((retired_page >> 13) & 0x1ULL) ^
	    ((mca_addr >> 5) & 0x1ULL) ^
	    ((retired_page >> 28) & 0x1ULL) ^
	    ((mca_addr >> 23) & 0x1ULL) ^
	    ((retired_page >> 42) & 0x1ULL)) << 1);
	die &= 3;

	return die;
}

struct amdgpu_umc_ras umc_v12_0_ras = {
	.ras_block = {
		.hw_ops = &umc_v12_0_ras_hw_ops,
@@ -630,5 +655,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
	.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
	.update_ecc_status = umc_v12_0_update_ecc_status,
	.convert_ras_err_addr = umc_v12_0_convert_error_address,
	.get_die_id_from_pa = umc_v12_0_get_die_id,
};