Commit 8e8e08c8 authored by Xiang Liu's avatar Xiang Liu Committed by Alex Deucher
Browse files

drm/amdgpu: Skip poison aca bank from UE channel



Avoid GFX poison consumption errors logged when fatal error occurs.

Signed-off-by: default avatarXiang Liu <xiang.liu@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4d22db6d
Loading
Loading
Loading
Loading
+30 −21
Original line number Diff line number Diff line
@@ -130,6 +130,27 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st
		RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n");
}

static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
{

	struct aca_hwip *hwip;
	int hwid, mcatype;
	u64 ipid;

	if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
		return false;

	hwip = &aca_hwid_mcatypes[type];
	if (!hwip->hwid)
		return false;

	ipid = bank->regs[ACA_REG_IDX_IPID];
	hwid = ACA_REG__IPID__HARDWAREID(ipid);
	mcatype = ACA_REG__IPID__MCATYPE(ipid);

	return hwip->hwid == hwid && hwip->mcatype == mcatype;
}

static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
				       int start, int count,
				       struct aca_banks *banks, struct ras_query_context *qctx)
@@ -168,6 +189,15 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_

		bank.smu_err_type = type;

		/*
		 * Poison being consumed when injecting a UE while running background workloads,
		 * which are unexpected.
		 */
		if (type == ACA_SMU_TYPE_UE &&
		    ACA_REG__STATUS__POISON(bank.regs[ACA_REG_IDX_STATUS]) &&
		    !aca_bank_hwip_is_matched(&bank, ACA_HWIP_TYPE_UMC))
			continue;

		aca_smu_bank_dump(adev, i, count, &bank, qctx);

		ret = aca_banks_add_bank(banks, &bank);
@@ -178,27 +208,6 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
	return 0;
}

static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
{

	struct aca_hwip *hwip;
	int hwid, mcatype;
	u64 ipid;

	if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
		return false;

	hwip = &aca_hwid_mcatypes[type];
	if (!hwip->hwid)
		return false;

	ipid = bank->regs[ACA_REG_IDX_IPID];
	hwid = ACA_REG__IPID__HARDWAREID(ipid);
	mcatype = ACA_REG__IPID__MCATYPE(ipid);

	return hwip->hwid == hwid && hwip->mcatype == mcatype;
}

static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
{
	const struct aca_bank_ops *bank_ops = handle->bank_ops;