Commit c3abed53 authored by Shane Xiao's avatar Shane Xiao Committed by Alex Deucher
Browse files

drm/amdkfd: Add rec SDMA engines support with limited XGMI



This patch adds recommended SDMA engines with limited XGMI SDMA engines.
It will help improve overall performance for device to device copies
with this optimization.

v2: Update the formatting issues and data type

Signed-off-by: default avatarShane Xiao <shane.xiao@amd.com>
Suggested-by: default avatarJonathan Kim <jonathan.kim@amd.com>
Reviewed-by: default avatarJonathan Kim <jonathan.kim@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 083a0c8d
Loading
Loading
Loading
Loading
+24 −17
Original line number Diff line number Diff line
@@ -1267,34 +1267,41 @@ static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev,
{
	struct kfd_node *gpu = outbound_link->gpu;
	struct amdgpu_device *adev = gpu->adev;
	int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
	unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
	unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu);
	unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu);
	uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1;
	uint32_t xgmi_sdma_eng_id_mask =
			((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines;

	bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu &&
		adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 &&
		kfd_get_num_xgmi_sdma_engines(gpu) >= 14 &&
		(!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8);
		num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) &&
		num_xgmi_nodes == 8);

	if (support_rec_eng) {
		int src_socket_id = adev->gmc.xgmi.physical_node_id;
		int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id;
		unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0;

		outbound_link->rec_sdma_eng_id_mask =
			1 << rec_sdma_eng_map[src_socket_id][dst_socket_id];
			1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift);
		inbound_link->rec_sdma_eng_id_mask =
			1 << rec_sdma_eng_map[dst_socket_id][src_socket_id];
	} else {
		int num_sdma_eng = kfd_get_num_sdma_engines(gpu);
		int i, eng_offset = 0;
			1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift);

		if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
		    kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) {
			eng_offset = num_sdma_eng;
			num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu);
		}
		/* If recommended engine is out of range, need to reset the mask */
		if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
			outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
		if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
			inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;

		for (i = 0; i < num_sdma_eng; i++) {
			outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
			inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
		}
	} else {
		uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
				num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask :
				sdma_eng_id_mask;

		outbound_link->rec_sdma_eng_id_mask = engine_mask;
		inbound_link->rec_sdma_eng_id_mask = engine_mask;
	}
}