Commit 07009df6 authored by Ellen Pan's avatar Ellen Pan Committed by Alex Deucher
Browse files

drm/amdgpu: Introduce SRIOV critical regions v2 during VF init



    1. Introduced amdgpu_virt_init_critical_region during VF init.
     - VFs use init_data_header_offset and init_data_header_size_kb
            transmitted via PF2VF mailbox to fetch the offset of
            critical regions' offsets/sizes in VRAM and save to
            adev->virt.crit_region_offsets and adev->virt.crit_region_sizes_kb.

Signed-off-by: default avatarEllen Pan <yunru.pan@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Reviewed-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6d2191d2
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -2782,6 +2782,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
		r = amdgpu_virt_request_full_gpu(adev, true);
		if (r)
			return r;

		r = amdgpu_virt_init_critical_region(adev);
		if (r)
			return r;
	}

	switch (adev->asic_type) {
+174 −0
Original line number Diff line number Diff line
@@ -44,6 +44,18 @@
		vf2pf_info->ucode_info[ucode].version = ver; \
	} while (0)

#define mmRCC_CONFIG_MEMSIZE    0xde3

const char *amdgpu_virt_dynamic_crit_table_name[] = {
	"IP DISCOVERY",
	"VBIOS IMG",
	"RAS TELEMETRY",
	"DATA EXCHANGE",
	"BAD PAGE INFO",
	"INIT HEADER",
	"LAST",
};

bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
	/* By now all MMIO pages except mailbox are blocked */
@@ -843,6 +855,168 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
	adev->virt.ras.cper_rptr = 0;
}

static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end)
{
	uint32_t sum = 0;

	if (buf_start >= buf_end)
		return 0;

	for (; buf_start < buf_end; buf_start++)
		sum += buf_start[0];

	return 0xffffffff - sum;
}

int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
{
	struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
	uint32_t init_hdr_offset = adev->virt.init_data_header.offset;
	uint32_t init_hdr_size = adev->virt.init_data_header.size_kb << 10;
	uint64_t vram_size;
	int r = 0;
	uint8_t checksum = 0;

	/* Skip below init if critical region version != v2 */
	if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
		return 0;

	if (init_hdr_offset < 0) {
		dev_err(adev->dev, "Invalid init header offset\n");
		return -EINVAL;
	}

	vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
	if (!vram_size || vram_size == U32_MAX)
		return -EINVAL;
	vram_size <<= 20;

	if ((init_hdr_offset + init_hdr_size) > vram_size) {
		dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n");
		return -EINVAL;
	}

	/* Allocate for init_data_hdr */
	init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL);
	if (!init_data_hdr)
		return -ENOMEM;

	amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr,
					sizeof(struct amd_sriov_msg_init_data_header), false);

	/* Table validation */
	if (strncmp(init_data_hdr->signature,
				AMDGPU_SRIOV_CRIT_DATA_SIGNATURE,
				AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) {
		dev_err(adev->dev, "Invalid init data signature: %.4s\n",
			init_data_hdr->signature);
		r = -EINVAL;
		goto out;
	}

	checksum = amdgpu_virt_crit_region_calc_checksum(
			(uint8_t *)&init_data_hdr->initdata_offset,
			(uint8_t *)init_data_hdr +
			sizeof(struct amd_sriov_msg_init_data_header));
	if (checksum != init_data_hdr->checksum) {
		dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n",
				checksum, init_data_hdr->checksum);
		r = -EINVAL;
		goto out;
	}

	memset(&adev->virt.crit_regn, 0, sizeof(adev->virt.crit_regn));
	memset(adev->virt.crit_regn_tbl, 0, sizeof(adev->virt.crit_regn_tbl));

	adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
	adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb;

	/* Validation and initialization for each table entry */
	if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_IPD_TABLE_ID)) {
		if (!init_data_hdr->ip_discovery_size_in_kb ||
				init_data_hdr->ip_discovery_size_in_kb > DISCOVERY_TMR_SIZE) {
			dev_err(adev->dev, "Invalid %s size: 0x%x\n",
				amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_IPD_TABLE_ID],
				init_data_hdr->ip_discovery_size_in_kb);
			r = -EINVAL;
			goto out;
		}

		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
			init_data_hdr->ip_discovery_offset;
		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
			init_data_hdr->ip_discovery_size_in_kb;
	}

	if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID)) {
		if (!init_data_hdr->vbios_img_size_in_kb) {
			dev_err(adev->dev, "Invalid %s size: 0x%x\n",
				amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID],
				init_data_hdr->vbios_img_size_in_kb);
			r = -EINVAL;
			goto out;
		}

		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
			init_data_hdr->vbios_img_offset;
		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
			init_data_hdr->vbios_img_size_in_kb;
	}

	if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID)) {
		if (!init_data_hdr->ras_tele_info_size_in_kb) {
			dev_err(adev->dev, "Invalid %s size: 0x%x\n",
				amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID],
				init_data_hdr->ras_tele_info_size_in_kb);
			r = -EINVAL;
			goto out;
		}

		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
			init_data_hdr->ras_tele_info_offset;
		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
			init_data_hdr->ras_tele_info_size_in_kb;
	}

	if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID)) {
		if (!init_data_hdr->dataexchange_size_in_kb) {
			dev_err(adev->dev, "Invalid %s size: 0x%x\n",
				amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID],
				init_data_hdr->dataexchange_size_in_kb);
			r = -EINVAL;
			goto out;
		}

		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
			init_data_hdr->dataexchange_offset;
		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
			init_data_hdr->dataexchange_size_in_kb;
	}

	if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID)) {
		if (!init_data_hdr->bad_page_size_in_kb) {
			dev_err(adev->dev, "Invalid %s size: 0x%x\n",
				amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID],
				init_data_hdr->bad_page_size_in_kb);
			r = -EINVAL;
			goto out;
		}

		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
			init_data_hdr->bad_page_info_offset;
		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
			init_data_hdr->bad_page_size_in_kb;
	}

	adev->virt.is_dynamic_crit_regn_enabled = true;

out:
	kfree(init_data_hdr);
	init_data_hdr = NULL;

	return r;
}

void amdgpu_virt_init(struct amdgpu_device *adev)
{
	bool is_sriov = false;
+11 −0
Original line number Diff line number Diff line
@@ -54,6 +54,12 @@

#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2

/* Signature used to validate the SR-IOV dynamic critical region init data header ("INDA") */
#define AMDGPU_SRIOV_CRIT_DATA_SIGNATURE "INDA"
#define AMDGPU_SRIOV_CRIT_DATA_SIG_LEN   4

#define IS_SRIOV_CRIT_REGN_ENTRY_VALID(hdr, id) ((hdr)->valid_tables & (1 << (id)))

enum amdgpu_sriov_vf_mode {
	SRIOV_VF_MODE_BARE_METAL = 0,
	SRIOV_VF_MODE_ONE_VF,
@@ -296,6 +302,9 @@ struct amdgpu_virt {

	/* dynamic(v2) critical regions */
	struct amdgpu_virt_region init_data_header;
	struct amdgpu_virt_region crit_regn;
	struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID];
	bool is_dynamic_crit_regn_enabled;

	/* vf2pf message */
	struct delayed_work vf2pf_work;
@@ -432,6 +441,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
void amdgpu_virt_init(struct amdgpu_device *adev);

int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);

bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
+31 −0
Original line number Diff line number Diff line
@@ -71,6 +71,37 @@ enum amd_sriov_crit_region_version {
	GPU_CRIT_REGION_V2 = 2,
};

/* v2 layout offset enum (in order of allocation) */
enum amd_sriov_msg_table_id_enum {
	AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
	AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
	AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
	AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
	AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
	AMD_SRIOV_MSG_INITD_H_TABLE_ID,
	AMD_SRIOV_MSG_MAX_TABLE_ID,
};

struct amd_sriov_msg_init_data_header {
	char     signature[4];  /* "INDA"  */
	uint32_t version;
	uint32_t checksum;
	uint32_t initdata_offset; /* 0 */
	uint32_t initdata_size_in_kb; /* 5MB */
	uint32_t valid_tables;
	uint32_t vbios_img_offset;
	uint32_t vbios_img_size_in_kb;
	uint32_t dataexchange_offset;
	uint32_t dataexchange_size_in_kb;
	uint32_t ras_tele_info_offset;
	uint32_t ras_tele_info_size_in_kb;
	uint32_t ip_discovery_offset;
	uint32_t ip_discovery_size_in_kb;
	uint32_t bad_page_info_offset;
	uint32_t bad_page_size_in_kb;
	uint32_t reserved[8];
};

/*
 * PF2VF history log:
 * v1 defined in amdgim