Commit 91689b5a authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher
Browse files

drm/amdkfd: Update CWSR area calculations for GFX 12.1



Update the SGPR, VGPR, HWREG size and number of waves supported
for GFX 12.1 CWSR memory limits. The CU calculation changed in
topology, as a result, the values need to be updated.

Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFeifei Xu <Feifei.Xu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent db9ca58e
Loading
Loading
Loading
Loading
+54 −9
Original line number Diff line number Diff line
@@ -392,12 +392,20 @@ int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd,
	return 0;
}

#define SGPR_SIZE_PER_CU	0x4000
#define LDS_SIZE_PER_CU		0x10000
#define HWREG_SIZE_PER_CU	0x1000
#define DEBUGGER_BYTES_ALIGN	64
#define DEBUGGER_BYTES_PER_WAVE	32

static u32 kfd_get_sgpr_size_per_cu(u32 gfxv)
{
	u32 sgpr_size = 0x4000;

	if (gfxv == 120500 ||
	    gfxv == 120501)
		sgpr_size = 0x8000;

	return sgpr_size;
}

static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
{
	u32 vgpr_size = 0x40000;
@@ -413,14 +421,53 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
		 gfxv == 120000 ||		/* GFX_VERSION_GFX1200 */
		 gfxv == 120001)		/* GFX_VERSION_GFX1201 */
		vgpr_size = 0x60000;
	else if (gfxv == 120500 ||		/* GFX_VERSION_GFX1250 */
		 gfxv == 120501)		/* GFX_VERSION_GFX1251 */
		vgpr_size = 0x80000;

	return vgpr_size;
}

static u32 kfd_get_hwreg_size_per_cu(u32 gfxv)
{
	u32 hwreg_size = 0x1000;

	if (gfxv == 120500 || gfxv == 120501)
		hwreg_size = 0x8000;

	return hwreg_size;
}

static u32 kfd_get_lds_size_per_cu(u32 gfxv, struct kfd_node_properties *props)
{
	u32 lds_size = 0x10000;

	if (gfxv == 90500 || gfxv == 120500 || gfxv == 120501)
		lds_size = props->lds_size_in_kb << 10;

	return lds_size;
}

static u32 get_num_waves(struct kfd_node_properties *props, u32 gfxv, u32 cu_num)
{
	u32 wave_num = 0;

	if (gfxv < 100100)
		wave_num = min(cu_num * 40,
				props->array_count / props->simd_arrays_per_engine * 512);
	else if (gfxv < 120500)
		wave_num = cu_num * 32;
	else if (gfxv <= 120501)
		wave_num = cu_num * 64;

	WARN_ON(wave_num == 0);

	return wave_num;
}

#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props)	\
	(kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
	 (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\
	 HWREG_SIZE_PER_CU)
	(kfd_get_vgpr_size_per_cu(gfxv) + kfd_get_sgpr_size_per_cu(gfxv) +\
	 kfd_get_lds_size_per_cu(gfxv, props) + kfd_get_hwreg_size_per_cu(gfxv))

#define CNTL_STACK_BYTES_PER_WAVE(gfxv)	\
	((gfxv) >= 100100 ? 12 : 8)	/* GFX_VERSION_NAVI10*/
@@ -440,9 +487,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
		return;

	cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask);
	wave_num = (gfxv < 100100) ?	/* GFX_VERSION_NAVI10 */
		    min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
		    : cu_num * 32;
	wave_num = get_num_waves(props, gfxv, cu_num);

	wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE);
	ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;