Commit 719a9b33 authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher
Browse files

drm/amdgpu: split gfx callbacks into ras and non-ras ones



gfx ras is only available in cerntain ip generations.

Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarDennis Li <Dennis.Li@amd.com>
Reviewed-by: default avatarJohn Clements <John.Clements@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8bc7b360
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -677,8 +677,9 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
	 */
	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
		if (adev->gfx.funcs->query_ras_error_count)
			adev->gfx.funcs->query_ras_error_count(adev, err_data);
		if (adev->gfx.ras_funcs &&
		    adev->gfx.ras_funcs->query_ras_error_count)
			adev->gfx.ras_funcs->query_ras_error_count(adev, err_data);
		amdgpu_ras_reset_gpu(adev);
	}
	return AMDGPU_RAS_SUCCESS;
+15 −7
Original line number Diff line number Diff line
@@ -205,6 +205,19 @@ struct amdgpu_cu_info {
	uint32_t bitmap[4][4];
};

struct amdgpu_gfx_ras_funcs {
	int (*ras_late_init)(struct amdgpu_device *adev);
	void (*ras_fini)(struct amdgpu_device *adev);
	int (*ras_error_inject)(struct amdgpu_device *adev,
				void *inject_if);
	int (*query_ras_error_count)(struct amdgpu_device *adev,
				     void *ras_error_status);
	void (*reset_ras_error_count)(struct amdgpu_device *adev);
	void (*query_ras_error_status)(struct amdgpu_device *adev);
	void (*reset_ras_error_status)(struct amdgpu_device *adev);
	void (*enable_watchdog_timer)(struct amdgpu_device *adev);
};

struct amdgpu_gfx_funcs {
	/* get the gpu clock counter */
	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
@@ -220,14 +233,8 @@ struct amdgpu_gfx_funcs {
				uint32_t *dst);
	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
				 u32 queue, u32 vmid);
	int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
	int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status);
	void (*reset_ras_error_count) (struct amdgpu_device *adev);
	void (*init_spm_golden)(struct amdgpu_device *adev);
	void (*query_ras_error_status) (struct amdgpu_device *adev);
	void (*reset_ras_error_status) (struct amdgpu_device *adev);
	void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
	void (*enable_watchdog_timer)(struct amdgpu_device *adev);
};

struct sq_work {
@@ -331,6 +338,7 @@ struct amdgpu_gfx {

	/*ras */
	struct ras_common_if			*ras_if;
	const struct amdgpu_gfx_ras_funcs	*ras_funcs;
};

#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
+18 −12
Original line number Diff line number Diff line
@@ -792,11 +792,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
		}
		break;
	case AMDGPU_RAS_BLOCK__GFX:
		if (adev->gfx.funcs->query_ras_error_count)
			adev->gfx.funcs->query_ras_error_count(adev, &err_data);
		if (adev->gfx.ras_funcs &&
		    adev->gfx.ras_funcs->query_ras_error_count)
			adev->gfx.ras_funcs->query_ras_error_count(adev, &err_data);

		if (adev->gfx.funcs->query_ras_error_status)
			adev->gfx.funcs->query_ras_error_status(adev);
		if (adev->gfx.ras_funcs &&
		    adev->gfx.ras_funcs->query_ras_error_status)
			adev->gfx.ras_funcs->query_ras_error_status(adev);
		break;
	case AMDGPU_RAS_BLOCK__MMHUB:
		if (adev->mmhub.ras_funcs &&
@@ -852,11 +854,13 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,

	switch (block) {
	case AMDGPU_RAS_BLOCK__GFX:
		if (adev->gfx.funcs->reset_ras_error_count)
			adev->gfx.funcs->reset_ras_error_count(adev);
		if (adev->gfx.ras_funcs &&
		    adev->gfx.ras_funcs->reset_ras_error_count)
			adev->gfx.ras_funcs->reset_ras_error_count(adev);

		if (adev->gfx.funcs->reset_ras_error_status)
			adev->gfx.funcs->reset_ras_error_status(adev);
		if (adev->gfx.ras_funcs &&
		    adev->gfx.ras_funcs->reset_ras_error_status)
			adev->gfx.ras_funcs->reset_ras_error_status(adev);
		break;
	case AMDGPU_RAS_BLOCK__MMHUB:
		if (adev->mmhub.ras_funcs &&
@@ -926,8 +930,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,

	switch (info->head.block) {
	case AMDGPU_RAS_BLOCK__GFX:
		if (adev->gfx.funcs->ras_error_inject)
			ret = adev->gfx.funcs->ras_error_inject(adev, info);
		if (adev->gfx.ras_funcs &&
		    adev->gfx.ras_funcs->ras_error_inject)
			ret = adev->gfx.ras_funcs->ras_error_inject(adev, info);
		else
			ret = -EINVAL;
		break;
@@ -1514,8 +1519,9 @@ static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
	 */
	switch (info->head.block) {
	case AMDGPU_RAS_BLOCK__GFX:
		if (adev->gfx.funcs->query_ras_error_status)
			adev->gfx.funcs->query_ras_error_status(adev);
		if (adev->gfx.ras_funcs &&
		    adev->gfx.ras_funcs->query_ras_error_status)
			adev->gfx.ras_funcs->query_ras_error_status(adev);
		break;
	case AMDGPU_RAS_BLOCK__MMHUB:
		if (adev->mmhub.ras_funcs &&
+26 −42
Original line number Diff line number Diff line
@@ -2109,39 +2109,16 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
        .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
        .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
        .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
};

static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
	.ras_late_init = amdgpu_gfx_ras_late_init,
	.ras_fini = amdgpu_gfx_ras_fini,
	.ras_error_inject = &gfx_v9_0_ras_error_inject,
	.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
	.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
};

static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
	.select_se_sh = &gfx_v9_0_select_se_sh,
	.read_wave_data = &gfx_v9_0_read_wave_data,
	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
	.ras_error_inject = &gfx_v9_4_ras_error_inject,
	.query_ras_error_count = &gfx_v9_4_query_ras_error_count,
	.reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
	.query_ras_error_status = &gfx_v9_4_query_ras_error_status,
};

static const struct amdgpu_gfx_funcs gfx_v9_4_2_gfx_funcs = {
	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
	.select_se_sh = &gfx_v9_0_select_se_sh,
	.read_wave_data = &gfx_v9_0_read_wave_data,
	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
	.ras_error_inject = &gfx_v9_4_2_ras_error_inject,
	.query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
	.reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
	.query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
	.reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
	.enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
};

static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
{
	u32 gb_addr_config;
@@ -2168,6 +2145,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
		DRM_INFO("fix gfx.config for vega12\n");
		break;
	case CHIP_VEGA20:
		adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
		adev->gfx.config.max_hw_contexts = 8;
		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2193,7 +2171,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
		break;
	case CHIP_ARCTURUS:
		adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
		adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
		adev->gfx.config.max_hw_contexts = 8;
		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2214,7 +2192,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
		gb_addr_config |= 0x22010042;
		break;
	case CHIP_ALDEBARAN:
		adev->gfx.funcs = &gfx_v9_4_2_gfx_funcs;
		adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
		adev->gfx.config.max_hw_contexts = 8;
		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -2447,7 +2425,9 @@ static int gfx_v9_0_sw_fini(void *handle)
	int i;
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

	amdgpu_gfx_ras_fini(adev);
	if (adev->gfx.ras_funcs &&
	    adev->gfx.ras_funcs->ras_fini)
		adev->gfx.ras_funcs->ras_fini(adev);

	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -4766,12 +4746,16 @@ static int gfx_v9_0_ecc_late_init(void *handle)
	if (r)
		return r;

	r = amdgpu_gfx_ras_late_init(adev);
	if (adev->gfx.ras_funcs &&
	    adev->gfx.ras_funcs->ras_late_init) {
		r = adev->gfx.ras_funcs->ras_late_init(adev);
		if (r)
			return r;
	}

	if (adev->gfx.funcs->enable_watchdog_timer)
		adev->gfx.funcs->enable_watchdog_timer(adev);
	if (adev->gfx.ras_funcs &&
	    adev->gfx.ras_funcs->enable_watchdog_timer)
		adev->gfx.ras_funcs->enable_watchdog_timer(adev);

	return 0;
}
+15 −5
Original line number Diff line number Diff line
@@ -863,7 +863,7 @@ static int gfx_v9_4_ras_error_count(struct amdgpu_device *adev,
	return 0;
}

int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
					  void *ras_error_status)
{
	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -906,7 +906,7 @@ int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
	return 0;
}

void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
{
	int i, j, k;

@@ -971,7 +971,8 @@ void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
}

int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
static int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
				     void *inject_if)
{
	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
	int ret;
@@ -996,7 +997,7 @@ int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
static const struct soc15_reg_entry gfx_v9_4_rdrsp_status_regs =
	{ SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 };

void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
{
	uint32_t i, j;
	uint32_t reg_value;
@@ -1021,3 +1022,12 @@ void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
	gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
	mutex_unlock(&adev->grbm_idx_mutex);
}

const struct amdgpu_gfx_ras_funcs gfx_v9_4_ras_funcs = {
        .ras_late_init = amdgpu_gfx_ras_late_init,
        .ras_fini = amdgpu_gfx_ras_fini,
        .ras_error_inject = &gfx_v9_4_ras_error_inject,
        .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
        .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
        .query_ras_error_status = &gfx_v9_4_query_ras_error_status,
};
Loading