Commit 60c58d72 authored by Victor Skvortsov's avatar Victor Skvortsov Committed by Alex Deucher
Browse files

drm/amdgpu: Update SRIOV Exchange Headers for RAS Telemetry Support



The SRIOV PF/VF Data exchange is extended by 64KB for VF RAS Telemetry data.
Add Host RAS Telemetry enable capabilities bitfields.
Add a new VF msg REQ_RAS_ERROR_COUNT, the host response data will be populated
in the RAS Telemetry region.

Signed-off-by: default avatarVictor Skvortsov <victor.skvortsov@amd.com>
Reviewed-by: default avatarZhigang Luo <zhigang.luo@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent acbbbd23
Loading
Loading
Loading
Loading
+112 −19
Original line number Diff line number Diff line
@@ -28,17 +28,21 @@
#define AMD_SRIOV_MSG_VBIOS_SIZE_KB	     64
#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB   4

#define AMD_SRIOV_MSG_TMR_OFFSET_KB	     2048
#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB	     2
#define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB	     64
/*
 * layout
 * 0           64KB        65KB        66KB
 * |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | ...
 * |   64KB    |   1KB     |   1KB     |
 * 0           64KB        65KB        66KB           68KB                   132KB
 * |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | RAS Telemetry Region | ...
 * |   64KB    |   1KB     |   1KB     |   2KB        | 64KB                 | ...
 */

#define AMD_SRIOV_MSG_SIZE_KB                   1
#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB           AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB           (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB        (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB   (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB)

/*
 * PF2VF history log:
@@ -94,8 +98,10 @@ union amd_sriov_msg_feature_flags {
		uint32_t reg_indirect_acc	: 1;
		uint32_t av1_support		: 1;
		uint32_t vcn_rb_decouple 	: 1;
		uint32_t mes_info_enable   : 1;
		uint32_t reserved          : 23;
		uint32_t mes_info_dump_enable	: 1;
		uint32_t ras_caps		: 1;
		uint32_t ras_telemetry		: 1;
		uint32_t reserved		: 21;
	} flags;
	uint32_t all;
};
@@ -110,6 +116,33 @@ union amd_sriov_reg_access_flags {
	uint32_t all;
};

union amd_sriov_ras_caps {
	struct {
		uint64_t block_umc			: 1;
		uint64_t block_sdma			: 1;
		uint64_t block_gfx			: 1;
		uint64_t block_mmhub			: 1;
		uint64_t block_athub			: 1;
		uint64_t block_pcie_bif			: 1;
		uint64_t block_hdp			: 1;
		uint64_t block_xgmi_wafl		: 1;
		uint64_t block_df			: 1;
		uint64_t block_smn			: 1;
		uint64_t block_sem			: 1;
		uint64_t block_mp0			: 1;
		uint64_t block_mp1			: 1;
		uint64_t block_fuse			: 1;
		uint64_t block_mca			: 1;
		uint64_t block_vcn			: 1;
		uint64_t block_jpeg			: 1;
		uint64_t block_ih			: 1;
		uint64_t block_mpio			: 1;
		uint64_t poison_propogation_mode	: 1;
		uint64_t reserved			: 44;
	} bits;
	uint64_t all;
};

union amd_sriov_msg_os_info {
	struct {
		uint32_t windows  : 1;
@@ -158,7 +191,7 @@ struct amd_sriov_msg_pf2vf_info_header {
	uint32_t reserved[2];
};

#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (49)
#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (55)
struct amd_sriov_msg_pf2vf_info {
	/* header contains size and version */
	struct amd_sriov_msg_pf2vf_info_header header;
@@ -211,6 +244,12 @@ struct amd_sriov_msg_pf2vf_info {
	uint32_t pcie_atomic_ops_support_flags;
	/* Portion of GPU memory occupied by VF.  MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */
	uint32_t gpu_capacity;
	/* vf bdf on host pci tree for debug only */
	uint32_t bdf_on_host;
	uint32_t more_bp;	//Reserved for future use.
	union amd_sriov_ras_caps ras_en_caps;
	union amd_sriov_ras_caps ras_telemetry_en_caps;

	/* reserved */
	uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE];
} __packed;
@@ -283,8 +322,12 @@ enum amd_sriov_mailbox_request_message {
	MB_REQ_MSG_REL_GPU_FINI_ACCESS,
	MB_REQ_MSG_REQ_GPU_RESET_ACCESS,
	MB_REQ_MSG_REQ_GPU_INIT_DATA,
	MB_REQ_MSG_PSP_VF_CMD_RELAY,

	MB_REQ_MSG_LOG_VF_ERROR = 200,
	MB_REQ_MSG_READY_TO_RESET = 201,
	MB_REQ_MSG_RAS_POISON = 202,
	MB_REQ_RAS_ERROR_COUNT = 203,
};

/* mailbox message send from host to guest  */
@@ -297,10 +340,60 @@ enum amd_sriov_mailbox_response_message {
	MB_RES_MSG_FAIL,
	MB_RES_MSG_QUERY_ALIVE,
	MB_RES_MSG_GPU_INIT_DATA_READY,
	MB_RES_MSG_RAS_ERROR_COUNT_READY = 11,

	MB_RES_MSG_TEXT_MESSAGE = 255
};

enum amd_sriov_ras_telemetry_gpu_block {
	RAS_TELEMETRY_GPU_BLOCK_UMC		= 0,
	RAS_TELEMETRY_GPU_BLOCK_SDMA		= 1,
	RAS_TELEMETRY_GPU_BLOCK_GFX		= 2,
	RAS_TELEMETRY_GPU_BLOCK_MMHUB		= 3,
	RAS_TELEMETRY_GPU_BLOCK_ATHUB		= 4,
	RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF	= 5,
	RAS_TELEMETRY_GPU_BLOCK_HDP		= 6,
	RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL	= 7,
	RAS_TELEMETRY_GPU_BLOCK_DF		= 8,
	RAS_TELEMETRY_GPU_BLOCK_SMN		= 9,
	RAS_TELEMETRY_GPU_BLOCK_SEM		= 10,
	RAS_TELEMETRY_GPU_BLOCK_MP0		= 11,
	RAS_TELEMETRY_GPU_BLOCK_MP1		= 12,
	RAS_TELEMETRY_GPU_BLOCK_FUSE		= 13,
	RAS_TELEMETRY_GPU_BLOCK_MCA		= 14,
	RAS_TELEMETRY_GPU_BLOCK_VCN		= 15,
	RAS_TELEMETRY_GPU_BLOCK_JPEG		= 16,
	RAS_TELEMETRY_GPU_BLOCK_IH		= 17,
	RAS_TELEMETRY_GPU_BLOCK_MPIO		= 18,
	RAS_TELEMETRY_GPU_BLOCK_COUNT		= 19,
};

struct amd_sriov_ras_telemetry_header {
	uint32_t checksum;
	uint32_t used_size;
	uint32_t reserved[2];
};

struct amd_sriov_ras_telemetry_error_count {
	struct {
		uint32_t ce_count;
		uint32_t ue_count;
		uint32_t de_count;
		uint32_t ce_overflow_count;
		uint32_t ue_overflow_count;
		uint32_t de_overflow_count;
		uint32_t reserved[6];
	} block[RAS_TELEMETRY_GPU_BLOCK_COUNT];
};

struct amdsriov_ras_telemetry {
	struct amd_sriov_ras_telemetry_header header;

	union {
		struct amd_sriov_ras_telemetry_error_count error_count;
	} body;
};

/* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */
enum amd_sriov_gpu_init_data_version {
	GPU_INIT_DATA_READY_V1 = 1,
+3 −0
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@ enum idh_request {
	IDH_LOG_VF_ERROR	= 200,
	IDH_READY_TO_RESET 	= 201,
	IDH_RAS_POISON	= 202,
	IDH_REQ_RAS_ERROR_COUNT = 203,
};

enum idh_event {
@@ -54,6 +55,8 @@ enum idh_event {
	IDH_RAS_POISON_READY,
	IDH_PF_SOFT_FLR_NOTIFICATION,
	IDH_RAS_ERROR_DETECTED,
	IDH_RAS_ERROR_COUNT_READY = 11,

	IDH_TEXT_MESSAGE = 255,
};