Commit 301661b0 authored by Lijo Lazar's avatar Lijo Lazar Committed by Alex Deucher
Browse files

drm/amd/pm: Add special handling for RAS messages



When a RAS fatal error is detected, PMFW will only process priority
messages. Other messages won't be taken up for processing and therefore
won't get any response in such a state.

Add logic to filter out non-priority messages when RAS error is
detected. Also, don't poll response response status register before
sending priority messages. Use firmware capability flag to determine
whether to filter priority messages.

Signed-off-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarAsad Kamal <asad.kamal@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 7b8081ea
Loading
Loading
Loading
Loading
+59 −6
Original line number Diff line number Diff line
@@ -235,6 +235,50 @@ static void __smu_cmn_send_msg(struct smu_context *smu,
	WREG32(smu->msg_reg, msg);
}

static inline uint32_t __smu_cmn_get_msg_flags(struct smu_context *smu,
					       enum smu_message_type msg)
{
	return smu->message_map[msg].flags;
}

static int __smu_cmn_ras_filter_msg(struct smu_context *smu,
				    enum smu_message_type msg, bool *poll)
{
	struct amdgpu_device *adev = smu->adev;
	uint32_t flags, resp;
	bool fed_status;

	flags = __smu_cmn_get_msg_flags(smu, msg);
	*poll = true;

	/* When there is RAS fatal error, FW won't process non-RAS priority
	 * messages. Don't allow any messages other than RAS priority messages.
	 */
	fed_status = amdgpu_ras_get_fed_status(adev);
	if (fed_status) {
		if (!(flags & SMU_MSG_RAS_PRI)) {
			dev_dbg(adev->dev,
				"RAS error detected, skip sending %s",
				smu_get_message_name(smu, msg));
			return -EACCES;
		}

		/* FW will ignore non-priority messages when a RAS fatal error
		 * is detected. Hence it is possible that a previous message
		 * wouldn't have got response. Allow to continue without polling
		 * for response status for priority messages.
		 */
		resp = RREG32(smu->resp_reg);
		dev_dbg(adev->dev,
			"Sending RAS priority message %s response status: %x",
			smu_get_message_name(smu, msg), resp);
		if (resp == 0)
			*poll = false;
	}

	return 0;
}

static int __smu_cmn_send_debug_msg(struct smu_context *smu,
			       u32 msg,
			       u32 param)
@@ -354,6 +398,7 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
{
	struct amdgpu_device *adev = smu->adev;
	int res, index;
	bool poll = true;
	u32 reg;

	if (adev->no_hw_access)
@@ -366,13 +411,21 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
		return index == -EACCES ? 0 : index;

	mutex_lock(&smu->message_lock);

	if (smu->smc_fw_caps & SMU_FW_CAP_RAS_PRI) {
		res = __smu_cmn_ras_filter_msg(smu, msg, &poll);
		if (res)
			goto Out;
	}

	if (poll) {
		reg = __smu_cmn_poll_stat(smu);
		res = __smu_cmn_reg2errno(smu, reg);
	if (reg == SMU_RESP_NONE ||
	    res == -EREMOTEIO) {
		if (reg == SMU_RESP_NONE || res == -EREMOTEIO) {
			__smu_cmn_reg_print_error(smu, reg, index, param, msg);
			goto Out;
		}
	}
	__smu_cmn_send_msg(smu, (uint16_t) index, param);
	reg = __smu_cmn_poll_stat(smu);
	res = __smu_cmn_reg2errno(smu, reg);