Commit 4e2965bd authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher
Browse files

drm/amdgpu: Centralize ras cap query to amdgpu_ras_check_supported



Move ras capablity check to amdgpu_ras_check_supported.
Driver will query ras capablity through psp interace, or
vbios interface, or specific ip callbacks.

Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 0e14eb0c
Loading
Loading
Loading
Loading
+93 −77
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include "nbio_v7_9.h"
#include "atom.h"
#include "amdgpu_reset.h"
#include "amdgpu_psp.h"

#ifdef CONFIG_X86_MCE_AMD
#include <asm/mce.h>
@@ -2811,23 +2812,10 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
}

/*
 * check hardware's ras ability which will be saved in hw_supported.
 * if hardware does not support ras, we can skip some ras initializtion and
 * forbid some ras operations from IP.
 * if software itself, say boot parameter, limit the ras ability. We still
 * need allow IP do some limited operations, like disable. In such case,
 * we have to initialize ras as normal. but need check if operation is
 * allowed or not in each function.
 */
static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
/* Query ras capablity via atomfirmware interface */
static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev)
{
	adev->ras_hw_enabled = adev->ras_enabled = 0;

	if (!amdgpu_ras_asic_supported(adev))
		return;

	if (!adev->gmc.xgmi.connected_to_cpu &&	!adev->gmc.is_app_apu) {
	/* mem_ecc cap */
	if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
		dev_info(adev->dev, "MEM ECC is active.\n");
		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
@@ -2836,6 +2824,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
		dev_info(adev->dev, "MEM ECC is not presented.\n");
	}

	/* sram_ecc cap */
	if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
		dev_info(adev->dev, "SRAM ECC is active.\n");
		if (!amdgpu_sriov_vf(adev))
@@ -2846,15 +2835,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
						 1 << AMDGPU_RAS_BLOCK__SDMA |
						 1 << AMDGPU_RAS_BLOCK__GFX);

			/* VCN/JPEG RAS can be supported on both bare metal and
		/*
		 * VCN/JPEG RAS can be supported on both bare metal and
		 * SRIOV environment
		 */
			if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
				    IP_VERSION(2, 6, 0) ||
			    amdgpu_ip_version(adev, VCN_HWIP, 0) ==
				    IP_VERSION(4, 0, 0) ||
			    amdgpu_ip_version(adev, VCN_HWIP, 0) ==
				    IP_VERSION(4, 0, 3))
		if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) ||
		    amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) ||
		    amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3))
			adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
						 1 << AMDGPU_RAS_BLOCK__JPEG);
		else
@@ -2870,6 +2857,65 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
	} else {
		dev_info(adev->dev, "SRAM ECC is not presented.\n");
	}
}

/* Query poison mode from umc/df IP callbacks */
static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
{
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
	bool df_poison, umc_poison;

	/* poison setting is useless on SRIOV guest */
	if (amdgpu_sriov_vf(adev) || !con)
		return;

	/* Init poison supported flag, the default value is false */
	if (adev->gmc.xgmi.connected_to_cpu ||
	    adev->gmc.is_app_apu) {
		/* enabled by default when GPU is connected to CPU */
		con->poison_supported = true;
	} else if (adev->df.funcs &&
	    adev->df.funcs->query_ras_poison_mode &&
	    adev->umc.ras &&
	    adev->umc.ras->query_ras_poison_mode) {
		df_poison =
			adev->df.funcs->query_ras_poison_mode(adev);
		umc_poison =
			adev->umc.ras->query_ras_poison_mode(adev);

		/* Only poison is set in both DF and UMC, we can support it */
		if (df_poison && umc_poison)
			con->poison_supported = true;
		else if (df_poison != umc_poison)
			dev_warn(adev->dev,
				"Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
				df_poison, umc_poison);
	}
}

/*
 * check hardware's ras ability which will be saved in hw_supported.
 * if hardware does not support ras, we can skip some ras initializtion and
 * forbid some ras operations from IP.
 * if software itself, say boot parameter, limit the ras ability. We still
 * need allow IP do some limited operations, like disable. In such case,
 * we have to initialize ras as normal. but need check if operation is
 * allowed or not in each function.
 */
static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
{
	adev->ras_hw_enabled = adev->ras_enabled = 0;

	if (!amdgpu_ras_asic_supported(adev))
		return;

	/* query ras capability from psp */
	if (amdgpu_psp_get_ras_capability(&adev->psp))
		goto init_ras_enabled_flag;

	/* query ras capablity from bios */
	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
		amdgpu_ras_query_ras_capablity_from_vbios(adev);
	} else {
		/* driver only manages a few IP blocks RAS feature
		 * when GPU is connected cpu through XGMI */
@@ -2878,8 +2924,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
					   1 << AMDGPU_RAS_BLOCK__MMHUB);
	}

	/* apply asic specific settings (vega20 only for now) */
	amdgpu_ras_get_quirks(adev);

	/* query poison mode from umc/df ip callback */
	amdgpu_ras_query_poison_mode(adev);

init_ras_enabled_flag:
	/* hw_supported needs to be aligned with RAS block mask. */
	adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;

@@ -2915,39 +2966,6 @@ static void amdgpu_ras_counte_dw(struct work_struct *work)
	pm_runtime_put_autosuspend(dev->dev);
}

static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
{
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
	bool df_poison, umc_poison;

	/* poison setting is useless on SRIOV guest */
	if (amdgpu_sriov_vf(adev) || !con)
		return;

	/* Init poison supported flag, the default value is false */
	if (adev->gmc.xgmi.connected_to_cpu ||
	    adev->gmc.is_app_apu) {
		/* enabled by default when GPU is connected to CPU */
		con->poison_supported = true;
	} else if (adev->df.funcs &&
	    adev->df.funcs->query_ras_poison_mode &&
	    adev->umc.ras &&
	    adev->umc.ras->query_ras_poison_mode) {
		df_poison =
			adev->df.funcs->query_ras_poison_mode(adev);
		umc_poison =
			adev->umc.ras->query_ras_poison_mode(adev);

		/* Only poison is set in both DF and UMC, we can support it */
		if (df_poison && umc_poison)
			con->poison_supported = true;
		else if (df_poison != umc_poison)
			dev_warn(adev->dev,
				"Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
				df_poison, umc_poison);
	}
}

static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
{
	return  amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 |
@@ -3052,8 +3070,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
			goto release_con;
	}

	amdgpu_ras_query_poison_mode(adev);

	/* Packed socket_id to ras feature mask bits[31:29] */
	if (adev->smuio.funcs &&
	    adev->smuio.funcs->get_socket_id)