Commit 5e67bba3 authored by yipechai's avatar yipechai Committed by Alex Deucher
Browse files

drm/amdgpu: Modify mmhub block to fit for the unified ras block data and ops



1.Modify mmhub block to fit for the unified ras block data and ops.
2.Change amdgpu_mmhub_ras_funcs to amdgpu_mmhub_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of mmhub ras variable so that mmhub ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register mmhub ras block into amdgpu device ras block link list. 5.Remove the redundant code about mmhub in amdgpu_ras.c after using the unified ras block.
5.Remove the redundant code about mmhub in amdgpu_ras.c after using the unified ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of mmhub versions. If .ras_late_init and .ras_fini had been defined by the selected mmhub version, the defined functions will take effect; if not defined, default fill them with amdgpu_mmhub_ras_late_init and amdgpu_mmhub_ras_fini.

Signed-off-by: default avataryipechai <YiPeng.Chai@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarJohn Clements <john.clements@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6d76e904
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -3307,9 +3307,9 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
		if (adev->asic_reset_res)
			goto fail;

		if (adev->mmhub.ras_funcs &&
		    adev->mmhub.ras_funcs->reset_ras_error_count)
			adev->mmhub.ras_funcs->reset_ras_error_count(adev);
		if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
		    adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
			adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
	} else {

		task_barrier_full(&hive->tb);
@@ -4656,9 +4656,9 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,

	if (!r && amdgpu_ras_intr_triggered()) {
		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
			if (tmp_adev->mmhub.ras_funcs &&
			    tmp_adev->mmhub.ras_funcs->reset_ras_error_count)
				tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev);
			if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
			    tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
				tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
		}

		amdgpu_ras_intr_cleared();
+4 −6
Original line number Diff line number Diff line
@@ -447,9 +447,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
			return r;
	}

	if (adev->mmhub.ras_funcs &&
	    adev->mmhub.ras_funcs->ras_late_init) {
		r = adev->mmhub.ras_funcs->ras_late_init(adev);
	if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_late_init) {
		r = adev->mmhub.ras->ras_block.ras_late_init(adev, NULL);
		if (r)
			return r;
	}
@@ -501,9 +500,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
	    adev->umc.ras_funcs->ras_fini)
		adev->umc.ras_funcs->ras_fini(adev);

	if (adev->mmhub.ras_funcs &&
	    adev->mmhub.ras_funcs->ras_fini)
		adev->mmhub.ras_funcs->ras_fini(adev);
	if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini)
		adev->mmhub.ras->ras_block.ras_fini(adev);

	if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
		adev->gmc.xgmi.ras->ras_block.ras_fini(adev);
+1 −1
Original line number Diff line number Diff line
@@ -24,7 +24,7 @@
#include "amdgpu.h"
#include "amdgpu_ras.h"

int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev)
int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info)
{
	int r;
	struct ras_ih_if ih_info = {
+4 −10
Original line number Diff line number Diff line
@@ -21,14 +21,8 @@
#ifndef __AMDGPU_MMHUB_H__
#define __AMDGPU_MMHUB_H__

struct amdgpu_mmhub_ras_funcs {
	int (*ras_late_init)(struct amdgpu_device *adev);
	void (*ras_fini)(struct amdgpu_device *adev);
	void (*query_ras_error_count)(struct amdgpu_device *adev,
				      void *ras_error_status);
	void (*query_ras_error_status)(struct amdgpu_device *adev);
	void (*reset_ras_error_count)(struct amdgpu_device *adev);
	void (*reset_ras_error_status)(struct amdgpu_device *adev);
struct amdgpu_mmhub_ras {
	struct amdgpu_ras_block_object ras_block;
};

struct amdgpu_mmhub_funcs {
@@ -50,10 +44,10 @@ struct amdgpu_mmhub_funcs {
struct amdgpu_mmhub {
	struct ras_common_if *ras_if;
	const struct amdgpu_mmhub_funcs *funcs;
	const struct amdgpu_mmhub_ras_funcs *ras_funcs;
	struct amdgpu_mmhub_ras  *ras;
};

int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev);
int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info);
void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
#endif
+13 −34
Original line number Diff line number Diff line
@@ -986,6 +986,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
		}
		break;
	case AMDGPU_RAS_BLOCK__GFX:
	case AMDGPU_RAS_BLOCK__MMHUB:
		if (!block_obj || !block_obj->hw_ops)   {
			dev_info(adev->dev, "%s doesn't config ras function \n",
						get_ras_block_str(&info->head));
@@ -997,15 +998,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
		if (block_obj->hw_ops->query_ras_error_status)
			block_obj->hw_ops->query_ras_error_status(adev);
		break;
	case AMDGPU_RAS_BLOCK__MMHUB:
		if (adev->mmhub.ras_funcs &&
		    adev->mmhub.ras_funcs->query_ras_error_count)
			adev->mmhub.ras_funcs->query_ras_error_count(adev, &err_data);

		if (adev->mmhub.ras_funcs &&
		    adev->mmhub.ras_funcs->query_ras_error_status)
			adev->mmhub.ras_funcs->query_ras_error_status(adev);
		break;
	case AMDGPU_RAS_BLOCK__PCIE_BIF:
		if (adev->nbio.ras_funcs &&
		    adev->nbio.ras_funcs->query_ras_error_count)
@@ -1089,6 +1081,7 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,

	switch (block) {
	case AMDGPU_RAS_BLOCK__GFX:
	case AMDGPU_RAS_BLOCK__MMHUB:
		if (!block_obj || !block_obj->hw_ops)   {
			dev_info(adev->dev, "%s doesn't config ras function \n", ras_block_str(block));
			return -EINVAL;
@@ -1100,15 +1093,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
		if (block_obj->hw_ops->reset_ras_error_status)
			block_obj->hw_ops->reset_ras_error_status(adev);
		break;
	case AMDGPU_RAS_BLOCK__MMHUB:
		if (adev->mmhub.ras_funcs &&
		    adev->mmhub.ras_funcs->reset_ras_error_count)
			adev->mmhub.ras_funcs->reset_ras_error_count(adev);

		if (adev->mmhub.ras_funcs &&
		    adev->mmhub.ras_funcs->reset_ras_error_status)
			adev->mmhub.ras_funcs->reset_ras_error_status(adev);
		break;
	case AMDGPU_RAS_BLOCK__SDMA:
		if (adev->sdma.funcs->reset_ras_error_count)
			adev->sdma.funcs->reset_ras_error_count(adev);
@@ -1825,8 +1809,11 @@ static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
	 * Only two block need to query read/write
	 * RspStatus at current state
	 */
	switch (info->head.block) {
	case AMDGPU_RAS_BLOCK__GFX:
	if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) &&
		(info->head.block != AMDGPU_RAS_BLOCK__MMHUB))
		return ;

	block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, info->head.sub_block_index);
	if (!block_obj || !block_obj->hw_ops) {
		dev_info(adev->dev, "%s doesn't config ras function \n", get_ras_block_str(&info->head));
		return ;
@@ -1834,15 +1821,7 @@ static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,

	if (block_obj->hw_ops->query_ras_error_status)
	block_obj->hw_ops->query_ras_error_status(adev);
		break;
	case AMDGPU_RAS_BLOCK__MMHUB:
		if (adev->mmhub.ras_funcs &&
		    adev->mmhub.ras_funcs->query_ras_error_status)
			adev->mmhub.ras_funcs->query_ras_error_status(adev);
		break;
	default:
		break;
	}

}

static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
Loading