Commit 6ae7ec86 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-fixes-6.19-2025-12-11' of...

Merge tag 'amd-drm-fixes-6.19-2025-12-11' of https://gitlab.freedesktop.org/agd5f/linux

 into drm-next

amd-drm-fixes-6.19-2025-12-11:

amdgpu:
- SI fix
- DC reduce stack usage
- HDMI fixes
- VCN 4.0.5 fix
- DP MST fix
- DC memory allocation fix

amdkfd:
- SVM fix
- Trap handler fix
- VGPR fixes for GC 11.5

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20251211195600.1641924-1-alexander.deucher@amd.com
parents 685f27c1 72e24456
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -1069,7 +1069,9 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
	}

	/* Prepare a TLB flush fence to be attached to PTs */
	if (!params->unlocked) {
	if (!params->unlocked &&
	    /* SI doesn't support pasid or KIQ/MES */
	    params->adev->family > AMDGPU_FAMILY_SI) {
		amdgpu_vm_tlb_fence_create(params->adev, vm, fence);

		/* Makes sure no PD/PT is freed before the flush */
+2 −0
Original line number Diff line number Diff line
@@ -265,6 +265,8 @@ static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block)
	if (amdgpu_sriov_vf(adev))
		amdgpu_virt_free_mm_table(adev);

	amdgpu_vcn_sysfs_reset_mask_fini(adev);

	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
		r = amdgpu_vcn_suspend(adev, i);
		if (r)
+36 −26
Original line number Diff line number Diff line
@@ -3644,14 +3644,18 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
};

static const uint32_t cwsr_trap_gfx12_hex[] = {
	0xbfa00001, 0xbfa002a2,
	0xb0804009, 0xb8f8f804,
	0xbfa00001, 0xbfa002b2,
	0xb0804009, 0xb8eef81a,
	0xbf880000, 0xb980081a,
	0x00000000, 0xb8f8f804,
	0x9177ff77, 0x0c000000,
	0x846e9a6e, 0x8c776e77,
	0x9178ff78, 0x00008c00,
	0xb8fbf811, 0x8b6eff78,
	0x00004000, 0xbfa10008,
	0x8b6eff7b, 0x00000080,
	0xbfa20018, 0x8b6ea07b,
	0xbfa20042, 0xbf830010,
	0xbfa2004a, 0xbf830010,
	0xb8fbf811, 0xbfa0fffb,
	0x8b6eff7b, 0x00000bd0,
	0xbfa20010, 0xb8eef812,
@@ -3662,28 +3666,32 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
	0xf0000000, 0xbfa20005,
	0x8b6fff6f, 0x00000200,
	0xbfa20002, 0x8b6ea07b,
	0xbfa2002c, 0xbefa4d82,
	0xbfa20034, 0xbefa4d82,
	0xbf8a0000, 0x84fa887a,
	0xbf0d8f7b, 0xbfa10002,
	0x8c7bff7b, 0xffff0000,
	0xf4601bbd, 0xf8000010,
	0xbf8a0000, 0x846e976e,
	0x9177ff77, 0x00800000,
	0x8c776e77, 0xf4603bbd,
	0xf8000000, 0xbf8a0000,
	0xf4603ebd, 0xf8000008,
	0xbf8a0000, 0x8bee6e6e,
	0xbfa10001, 0xbe80486e,
	0x8b6eff6d, 0xf0000000,
	0xbfa20009, 0xb8eef811,
	0x8b6eff6e, 0x00000080,
	0xbfa20007, 0x8c78ff78,
	0x00004000, 0x80ec886c,
	0x82ed806d, 0xbfa00002,
	0x806c846c, 0x826d806d,
	0x8b6dff6d, 0x0000ffff,
	0x8bfe7e7e, 0x8bea6a6a,
	0x85788978, 0xb9783244,
	0x8b6eff77, 0x0c000000,
	0x916dff6d, 0x0c000000,
	0x8c6d6e6d, 0xf4601bbd,
	0xf8000010, 0xbf8a0000,
	0x846e976e, 0x9177ff77,
	0x00800000, 0x8c776e77,
	0xf4603bbd, 0xf8000000,
	0xbf8a0000, 0xf4603ebd,
	0xf8000008, 0xbf8a0000,
	0x8bee6e6e, 0xbfa10001,
	0xbe80486e, 0x8b6eff6d,
	0xf0000000, 0xbfa20009,
	0xb8eef811, 0x8b6eff6e,
	0x00000080, 0xbfa20007,
	0x8c78ff78, 0x00004000,
	0x80ec886c, 0x82ed806d,
	0xbfa00002, 0x806c846c,
	0x826d806d, 0x8b6dff6d,
	0x0000ffff, 0x8bfe7e7e,
	0x8bea6a6a, 0x85788978,
	0x936eff77, 0x0002001a,
	0xb96ef81a, 0xb9783244,
	0xbe804a6c, 0xb8faf802,
	0xbf0d987a, 0xbfa10001,
	0xbfb00000, 0x8b6dff6d,
@@ -3981,7 +3989,7 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
	0x008ce800, 0x00000000,
	0x807d817d, 0x8070ff70,
	0x00000080, 0xbf0a7b7d,
	0xbfa2fff7, 0xbfa0016e,
	0xbfa2fff7, 0xbfa00171,
	0xbef4007e, 0x8b75ff7f,
	0x0000ffff, 0x8c75ff75,
	0x00040000, 0xbef60080,
@@ -4163,12 +4171,14 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
	0xf8000074, 0xbf8a0000,
	0x8b6dff6d, 0x0000ffff,
	0x8bfe7e7e, 0x8bea6a6a,
	0xb97af804, 0xbe804ec2,
	0xbf94fffe, 0xbe804a6c,
	0x936eff77, 0x0002001a,
	0xb96ef81a, 0xb97af804,
	0xbe804ec2, 0xbf94fffe,
	0xbfb10000, 0xbf9f0000,
	0xbe804a6c, 0xbe804ec2,
	0xbf94fffe, 0xbfb10000,
	0xbf9f0000, 0xbf9f0000,
	0xbf9f0000, 0xbf9f0000,
	0xbf9f0000, 0x00000000,
};

static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
+37 −0
Original line number Diff line number Diff line
@@ -78,9 +78,16 @@ var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL
var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE	= SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT	= SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE	= 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT

var SQ_WAVE_SCHED_MODE_DEP_MODE_SHIFT		= 0
var SQ_WAVE_SCHED_MODE_DEP_MODE_SIZE		= 2

var BARRIER_STATE_SIGNAL_OFFSET			= 16
var BARRIER_STATE_VALID_OFFSET			= 0

var TTMP11_SCHED_MODE_SHIFT			= 26
var TTMP11_SCHED_MODE_SIZE			= 2
var TTMP11_SCHED_MODE_MASK			= 0xC000000
var TTMP11_DEBUG_TRAP_ENABLED_SHIFT		= 23
var TTMP11_DEBUG_TRAP_ENABLED_MASK		= 0x800000

@@ -160,8 +167,19 @@ L_JUMP_TO_RESTORE:
	s_branch	L_RESTORE

L_SKIP_RESTORE:
	// Assume most relaxed scheduling mode is set. Save and revert to normal mode.
	s_getreg_b32	ttmp2, hwreg(HW_REG_WAVE_SCHED_MODE)
	s_wait_alu	0
	s_setreg_imm32_b32	hwreg(HW_REG_WAVE_SCHED_MODE, \
		SQ_WAVE_SCHED_MODE_DEP_MODE_SHIFT, SQ_WAVE_SCHED_MODE_DEP_MODE_SIZE), 0

	s_getreg_b32	s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV)	//save STATUS since we will change SCC

	// Save SCHED_MODE[1:0] into ttmp11[27:26].
	s_andn2_b32	ttmp11, ttmp11, TTMP11_SCHED_MODE_MASK
	s_lshl_b32	ttmp2, ttmp2, TTMP11_SCHED_MODE_SHIFT
	s_or_b32	ttmp11, ttmp11, ttmp2

	// Clear SPI_PRIO: do not save with elevated priority.
	// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
	s_andn2_b32	s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK
@@ -238,6 +256,13 @@ L_FETCH_2ND_TRAP:
	s_cbranch_scc0	L_NO_SIGN_EXTEND_TMA
	s_or_b32	ttmp15, ttmp15, 0xFFFF0000
L_NO_SIGN_EXTEND_TMA:
#if ASIC_FAMILY == CHIP_GFX12
	// Move SCHED_MODE[1:0] from ttmp11 to unused bits in ttmp1[27:26] (return PC_HI).
	// The second-level trap will restore from ttmp1 for backwards compatibility.
	s_and_b32	ttmp2, ttmp11, TTMP11_SCHED_MODE_MASK
	s_andn2_b32	ttmp1, ttmp1, TTMP11_SCHED_MODE_MASK
	s_or_b32	ttmp1, ttmp1, ttmp2
#endif

	s_load_dword    ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS		// debug trap enabled flag
	s_wait_idle
@@ -287,6 +312,10 @@ L_EXIT_TRAP:
	// STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
	// Only restore fields which the trap handler changes.
	s_lshr_b32	s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT

	// Assume relaxed scheduling mode after this point.
	restore_sched_mode(ttmp2)

	s_setreg_b32	hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
		SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv

@@ -1043,6 +1072,9 @@ L_SKIP_BARRIER_RESTORE:
	s_and_b64	exec, exec, exec					// Restore STATUS.EXECZ, not writable by s_setreg_b32
	s_and_b64	vcc, vcc, vcc						// Restore STATUS.VCCZ, not writable by s_setreg_b32

	// Assume relaxed scheduling mode after this point.
	restore_sched_mode(s_restore_tmp)

	s_setreg_b32	hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv	// SCC is included, which is changed by previous salu

	// Make barrier and LDS state visible to all waves in the group.
@@ -1134,3 +1166,8 @@ function valu_sgpr_hazard
	end
#endif
end

function restore_sched_mode(s_tmp)
	s_bfe_u32	s_tmp, ttmp11, (TTMP11_SCHED_MODE_SHIFT | (TTMP11_SCHED_MODE_SIZE << 0x10))
	s_setreg_b32	hwreg(HW_REG_WAVE_SCHED_MODE), s_tmp
end
+1 −0
Original line number Diff line number Diff line
@@ -409,6 +409,7 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
		vgpr_size = 0x80000;
	else if (gfxv == 110000 ||		/* GFX_VERSION_PLUM_BONITO */
		 gfxv == 110001 ||		/* GFX_VERSION_WHEAT_NAS */
		 gfxv == 110501 ||		/* GFX_VERSION_GFX1151 */
		 gfxv == 120000 ||		/* GFX_VERSION_GFX1200 */
		 gfxv == 120001)		/* GFX_VERSION_GFX1201 */
		vgpr_size = 0x60000;
Loading