Commit d9443ac4 authored by Tao Zhou's avatar Tao Zhou Committed by Alex Deucher
Browse files

drm/amdgpu: drop status query/reset for GCEA 9.4.3 and MMEA 1.8



PMFW will be responsible for them.

v2: remove query interfaces.

Signed-off-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 626121fc
Loading
Loading
Loading
Loading
+0 −60
Original line number Diff line number Diff line
@@ -3754,10 +3754,6 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = {
	    AMDGPU_GFX_LDS_MEM, 4},
};

static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = {
	SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
};

static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
					void *ras_error_status, int xcc_id)
{
@@ -3846,39 +3842,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
	mutex_unlock(&adev->grbm_idx_mutex);
}

static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev,
					int xcc_id)
{
	uint32_t i, j;
	uint32_t reg_value;

	mutex_lock(&adev->grbm_idx_mutex);

	for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
		for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
			gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
			reg_value = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
					regGCEA_ERR_STATUS);
			if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
			    REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
			    REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
				dev_warn(adev->dev,
					"GCEA err detected at instance: %d, status: 0x%x!\n",
					j, reg_value);
			}
			/* clear after read */
			reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS,
						  CLEAR_ERROR_STATUS, 0x1);
			WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS,
					reg_value);
		}
	}

	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
			xcc_id);
	mutex_unlock(&adev->grbm_idx_mutex);
}

static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev,
					int xcc_id)
{
@@ -3983,7 +3946,6 @@ static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev,
static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev,
					void *ras_error_status, int xcc_id)
{
	gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id);
	gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id);
	gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id);
}
@@ -3996,27 +3958,6 @@ static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev,
	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3);
}

static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev,
					int xcc_id)
{
	uint32_t i, j;
	uint32_t value;

	mutex_lock(&adev->grbm_idx_mutex);
	for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
		for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
			gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
			value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS);
			value = REG_SET_FIELD(value, GCEA_ERR_STATUS,
						CLEAR_ERROR_STATUS, 0x1);
			WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value);
		}
	}
	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
			xcc_id);
	mutex_unlock(&adev->grbm_idx_mutex);
}

static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev,
					int xcc_id)
{
@@ -4042,7 +3983,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev,
					void *ras_error_status, int xcc_id)
{
	gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id);
	gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id);
	gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id);
}

+0 −143
Original line number Diff line number Diff line
@@ -700,152 +700,9 @@ static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev)
		mmhub_v1_8_inst_reset_ras_error_count(adev, i);
}

static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = {
	regMMEA0_ERR_STATUS,
	regMMEA1_ERR_STATUS,
	regMMEA2_ERR_STATUS,
	regMMEA3_ERR_STATUS,
	regMMEA4_ERR_STATUS,
};

static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev,
						 uint32_t mmhub_inst)
{
	uint32_t reg_value;
	uint32_t mmea_err_status_addr_dist;
	uint32_t i;

	/* query mmea ras err status */
	mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
	for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
		reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
						regMMEA0_ERR_STATUS,
						i * mmea_err_status_addr_dist);
		if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
		    REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
		    REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
			dev_warn(adev->dev,
				 "Detected MMEA%d err in MMHUB%d, status: 0x%x\n",
				 i, mmhub_inst, reg_value);
		}
	}

	/* query mm_cane ras err status */
	reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
	if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) ||
	    REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) ||
	    REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) {
		dev_warn(adev->dev,
			 "Detected MM CANE err in MMHUB%d, status: 0x%x\n",
			 mmhub_inst, reg_value);
	}
}

static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev)
{
	uint32_t inst_mask;
	uint32_t i;

	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
		dev_warn(adev->dev, "MMHUB RAS is not supported\n");
		return;
	}

	inst_mask = adev->aid_mask;
	for_each_inst(i, inst_mask)
		mmhub_v1_8_inst_query_ras_err_status(adev, i);
}

static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev,
						 uint32_t mmhub_inst)
{
	uint32_t mmea_cgtt_clk_cntl_addr_dist;
	uint32_t mmea_err_status_addr_dist;
	uint32_t reg_value;
	uint32_t i;

	/* reset mmea ras err status */
	mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL;
	mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
	for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
		/* force clk branch on for response path
		 * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1
		 */
		reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
						regMMEA0_CGTT_CLK_CTRL,
						i * mmea_cgtt_clk_cntl_addr_dist);
		reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
					  SOFT_OVERRIDE_RETURN, 1);
		WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
				    regMMEA0_CGTT_CLK_CTRL,
				    i * mmea_cgtt_clk_cntl_addr_dist,
				    reg_value);

		/* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
		reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
						regMMEA0_ERR_STATUS,
						i * mmea_err_status_addr_dist);
		reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
					  CLEAR_ERROR_STATUS, 1);
		WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
				    regMMEA0_ERR_STATUS,
				    i * mmea_err_status_addr_dist,
				    reg_value);

		/* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */
		reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
						regMMEA0_CGTT_CLK_CTRL,
						i * mmea_cgtt_clk_cntl_addr_dist);
		reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
					  SOFT_OVERRIDE_RETURN, 0);
		WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
				    regMMEA0_CGTT_CLK_CTRL,
				    i * mmea_cgtt_clk_cntl_addr_dist,
				    reg_value);
	}

	/* reset mm_cane ras err status
	 * force clk branch on for response path
	 * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1
	 */
	reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
	reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
				  SOFT_OVERRIDE_ATRET, 1);
	WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);

	/* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
	reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
	reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS,
				  CLEAR_ERROR_STATUS, 1);
	WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value);

	/* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */
	reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
	reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
				  SOFT_OVERRIDE_ATRET, 0);
	WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
}

static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev)
{
	uint32_t inst_mask;
	uint32_t i;

	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
		dev_warn(adev->dev, "MMHUB RAS is not supported\n");
		return;
	}

	inst_mask = adev->aid_mask;
	for_each_inst(i, inst_mask)
		mmhub_v1_8_inst_reset_ras_err_status(adev, i);
}

static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
	.query_ras_error_count = mmhub_v1_8_query_ras_error_count,
	.reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
	.query_ras_error_status = mmhub_v1_8_query_ras_error_status,
	.reset_ras_error_status = mmhub_v1_8_reset_ras_error_status,
};

struct amdgpu_mmhub_ras mmhub_v1_8_ras = {