Commit bbcad5a8 authored by Jay Cornwall's avatar Jay Cornwall Committed by Alex Deucher
Browse files

drm/amdkfd: gfx12.1 trap handler support for expert scheduling mode



- Leave DEP_MODE unchanged as it is ignored in the trap handler
- Save/restore SCHED_MODE (gfx12.0 saves in ttmp11)

Signed-off-by: default avatarJay Cornwall <jay.cornwall@amd.com>
Reviewed-by: default avatarLancelot Six <lancelot.six@amd.com>
Cc: Vladimir Indic <vladimir.indic@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 29b703d7
Loading
Loading
Loading
Loading
+184 −188
Original line number Diff line number Diff line
@@ -4587,18 +4587,14 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
};

static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
	0xbfa00001, 0xbfa003b4,
	0xb0804009, 0xb8eef81a,
	0xbf880000, 0xb980081a,
	0x00000000, 0xb8f8f804,
	0x9177ff77, 0x0c000000,
	0x846e9a6e, 0x8c776e77,
	0xbfa00001, 0xbfa003ac,
	0xb0804009, 0xb8f8f804,
	0x9178ff78, 0x00008c00,
	0xb8fbf811, 0x8b6eff78,
	0x00004000, 0xbfa10008,
	0x8b6eff7b, 0x00000080,
	0xbfa20018, 0x8b6ea07b,
	0xbfa200d4, 0xbf830010,
	0xbfa200d1, 0xbf830010,
	0xb8fbf811, 0xbfa0fffb,
	0x8b6eff7b, 0x00000bd0,
	0xbfa20010, 0xb8eef812,
@@ -4609,7 +4605,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
	0xf0000000, 0xbfa20005,
	0x8b6fff6f, 0x00000200,
	0xbfa20002, 0x8b6ea07b,
	0xbfa200be, 0x9177ff77,
	0xbfa200bb, 0x9177ff77,
	0x007fc000, 0xb8fa04a1,
	0x847a967a, 0x8c777a77,
	0xb8fa0421, 0x847a957a,
@@ -4702,189 +4698,189 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
	0xb97a0421, 0x857a8e77,
	0xb97a3021, 0x8bfe7e7e,
	0x8bea6a6a, 0x85788978,
	0x936eff77, 0x0002001a,
	0xb96ef81a, 0xb9783244,
	0xbe804a6c, 0xb8faf802,
	0xbf0d987a, 0xbfa10001,
	0xbfb00000, 0x8b6dff6d,
	0x01ffffff, 0xbefa0080,
	0xb97a0151, 0x9177ff77,
	0x007fc000, 0xb8fa04a1,
	0x847a967a, 0x8c777a77,
	0xb8fa0421, 0x847a957a,
	0x8c777a77, 0xb8fa3021,
	0x847a8e7a, 0x8c777a77,
	0xb980f821, 0x00000000,
	0xbf0d847b, 0xbfa20078,
	0xf4003eb6, 0xf8000000,
	0xbfc70000, 0xf4003bb6,
	0xf8000008, 0x8b76ff7a,
	0x80000000, 0xbfa20027,
	0x9376ff7a, 0x00060019,
	0x81f9a376, 0xbf0b8179,
	0xbfa20068, 0x81f9ac76,
	0xbf0b8179, 0xbfa20062,
	0x81f9b776, 0xbf0b8179,
	0xbfa2005f, 0x8b76ff7a,
	0x000001ff, 0xbf06ff76,
	0x000000fe, 0xbfa2005d,
	0xbf06ff76, 0x000000ff,
	0xbfa20057, 0xbf06ff76,
	0x000000fa, 0xbfa20054,
	0x81f9ff76, 0x000000e9,
	0xbf0b8179, 0xbfa20050,
	0x8b76ff7b, 0xffff0000,
	0xbf06ff76, 0xbf860000,
	0xbfa10051, 0x9376ff7b,
	0x0002000e, 0x8b79ff7b,
	0x00003f00, 0x85798679,
	0x8c767976, 0xb9763b01,
	0xbfa00049, 0x8b76ff7a,
	0xfc000000, 0xbf06ff76,
	0xd4000000, 0xbfa20013,
	0xbf06ff76, 0xc8000000,
	0xbfa20027, 0x8b76ff7a,
	0xff000000, 0xbf06ff76,
	0xcf000000, 0xbfa20039,
	0x8b79ff7a, 0xffff0000,
	0xbf06ff79, 0xcc350000,
	0xbfa20037, 0xbf06ff79,
	0xcc3a0000, 0xbfa20034,
	0xbf06ff76, 0xcc000000,
	0xbfa10031, 0x8b76ff7b,
	0x000001ff, 0xbf06ff76,
	0x000000ff, 0xbfa20029,
	0xbf06ff76, 0x000000fa,
	0xbfa20026, 0x81f6ff76,
	0x000000e9, 0xbf0b8176,
	0xbfa20022, 0x8b76ff7b,
	0x0003fe00, 0xbf06ff76,
	0x0001fe00, 0xbfa2001d,
	0x8b76ff7b, 0x07fc0000,
	0xbf06ff76, 0x03fc0000,
	0xbfa20018, 0xbfa00014,
	0x9376ff7a, 0x00040016,
	0x81f68176, 0xbf0b8176,
	0xbfa20012, 0x9376ff7a,
	0x00050011, 0x81f68176,
	0xbf0b8176, 0xbfa2000d,
	0xb9783244, 0xbe804a6c,
	0xb8faf802, 0xbf0d987a,
	0xbfa10001, 0xbfb00000,
	0x8b6dff6d, 0x01ffffff,
	0xbefa0080, 0xb97a0151,
	0x9177ff77, 0x007fc000,
	0xb8fa04a1, 0x847a967a,
	0x8c777a77, 0xb8fa0421,
	0x847a957a, 0x8c777a77,
	0xb8fa3021, 0x847a8e7a,
	0x8c777a77, 0xb980f821,
	0x00000000, 0xbf0d847b,
	0xbfa20078, 0xf4003eb6,
	0xf8000000, 0xbfc70000,
	0xf4003bb6, 0xf8000008,
	0x8b76ff7a, 0x80000000,
	0xbfa20027, 0x9376ff7a,
	0x00060019, 0x81f9a376,
	0xbf0b8179, 0xbfa20068,
	0x81f9ac76, 0xbf0b8179,
	0xbfa20062, 0x81f9b776,
	0xbf0b8179, 0xbfa2005f,
	0x8b76ff7a, 0x000001ff,
	0xbf06ff76, 0x000000fe,
	0xbfa2005d, 0xbf06ff76,
	0x000000ff, 0xbfa20057,
	0xbf06ff76, 0x000000fa,
	0xbfa20054, 0x81f9ff76,
	0x000000e9, 0xbf0b8179,
	0xbfa20050, 0x8b76ff7b,
	0xffff0000, 0xbf06ff76,
	0xbf860000, 0xbfa10051,
	0x9376ff7b, 0x0002000e,
	0x8b79ff7b, 0x00003f00,
	0x85798679, 0x8c767976,
	0xb9763b01, 0xbfa00049,
	0x8b76ff7a, 0xfc000000,
	0xbf06ff76, 0xd4000000,
	0xbfa20013, 0xbf06ff76,
	0xc8000000, 0xbfa20027,
	0x8b76ff7a, 0xff000000,
	0xbf06ff76, 0xcf000000,
	0xbfa20039, 0x8b79ff7a,
	0xffff0000, 0xbf06ff79,
	0xcc350000, 0xbfa20037,
	0xbf06ff79, 0xcc3a0000,
	0xbfa20034, 0xbf06ff76,
	0xcc000000, 0xbfa10031,
	0x8b76ff7b, 0x000001ff,
	0xbf06ff76, 0x000000ff,
	0xbfa20008, 0x8b76ff7b,
	0xbfa20029, 0xbf06ff76,
	0x000000fa, 0xbfa20026,
	0x81f6ff76, 0x000000e9,
	0xbf0b8176, 0xbfa20022,
	0x8b76ff7b, 0x0003fe00,
	0xbf06ff76, 0x0001fe00,
	0xbfa2001d, 0x8b76ff7b,
	0x07fc0000, 0xbf06ff76,
	0x03fc0000, 0xbfa20018,
	0xbfa00014, 0x9376ff7a,
	0x00040016, 0x81f68176,
	0xbf0b8176, 0xbfa20012,
	0x9376ff7a, 0x00050011,
	0x81f68176, 0xbf0b8176,
	0xbfa2000d, 0x8b76ff7a,
	0x000001ff, 0xbf06ff76,
	0x000000ff, 0xbfa20003,
	0xbfc70000, 0xbefb006e,
	0xbfa0ffad, 0xbfc70000,
	0xbefb006f, 0xbfa0ffaa,
	0xbfc70000, 0xbeee007e,
	0xbeef007f, 0xbefe0180,
	0xbefe4d84, 0xbf8a0000,
	0x8b7aff7f, 0x04000000,
	0x847a857a, 0x8c6d7a6d,
	0xb8eff822, 0xb980f822,
	0x00000000, 0xb8fa2b01,
	0x847a997a, 0x8c6d7a6d,
	0xbefa0080, 0xb97a2b01,
	0xbefa007e, 0x8b7bff7f,
	0x01ffffff, 0xbefe00c1,
	0xbeff00c1, 0xee0a407a,
	0x000c0000, 0x00000000,
	0x7e000280, 0xbefe007a,
	0xbeff007b, 0xb8fb0742,
	0x847b997b, 0xb8fa3b05,
	0x807a817a, 0xbf0d997b,
	0xbfa20002, 0x847a897a,
	0xbfa00001, 0x847a8a7a,
	0x000000ff, 0xbfa20008,
	0x8b76ff7b, 0x000001ff,
	0xbf06ff76, 0x000000ff,
	0xbfa20003, 0xbfc70000,
	0xbefb006e, 0xbfa0ffad,
	0xbfc70000, 0xbefb006f,
	0xbfa0ffaa, 0xbfc70000,
	0xbeee007e, 0xbeef007f,
	0xbefe0180, 0xbefe4d84,
	0xbf8a0000, 0x8b7aff7f,
	0x04000000, 0x847a857a,
	0x8c6d7a6d, 0xb8eff822,
	0xb980f822, 0x00000000,
	0xb8fa2b01, 0x847a997a,
	0x8c6d7a6d, 0xbefa0080,
	0xb97a2b01, 0xbefa007e,
	0x8b7bff7f, 0x01ffffff,
	0x807aff7a, 0x000001c0,
	0x807a7e7a, 0x827b807b,
	0xd7610000, 0x00010870,
	0xd7610000, 0x00010a71,
	0xd7610000, 0x00010c72,
	0xd7610000, 0x00010e73,
	0xd7610000, 0x00011074,
	0xd7610000, 0x00011275,
	0xd7610000, 0x00011476,
	0xd7610000, 0x00011677,
	0xd7610000, 0x00011a79,
	0xd7610000, 0x00011c7e,
	0xd7610000, 0x00011e7f,
	0xbefe00ff, 0x00003fff,
	0xbeff0080, 0xee0a407a,
	0x000c0000, 0x00000000,
	0xd760007a, 0x00011d00,
	0xd760007b, 0x00011f00,
	0xbefe00c1, 0xbeff00c1,
	0xee0a407a, 0x000c0000,
	0x00000000, 0x7e000280,
	0xbefe007a, 0xbeff007b,
	0xbef4007e, 0x8b75ff7f,
	0x01ffffff, 0xbef1007d,
	0xb8f30742, 0x84739973,
	0xbefe00c1, 0x857d9973,
	0x8b7d817d, 0xbf06817d,
	0xbfa20002, 0xbeff0080,
	0xbfa00002, 0xbeff00c1,
	0xbfa0000a, 0xee0a4074,
	0x008c0000, 0x00008000,
	0xee0a4074, 0x010c0000,
	0xb8fb0742, 0x847b997b,
	0xb8fa3b05, 0x807a817a,
	0xbf0d997b, 0xbfa20002,
	0x847a897a, 0xbfa00001,
	0x847a8a7a, 0x8b7bff7f,
	0x01ffffff, 0x807aff7a,
	0x000001c0, 0x807a7e7a,
	0x827b807b, 0xd7610000,
	0x00010870, 0xd7610000,
	0x00010a71, 0xd7610000,
	0x00010c72, 0xd7610000,
	0x00010e73, 0xd7610000,
	0x00011074, 0xd7610000,
	0x00011275, 0xd7610000,
	0x00011476, 0xd7610000,
	0x00011677, 0xd7610000,
	0x00011a79, 0xd7610000,
	0x00011c7e, 0xd7610000,
	0x00011e7f, 0xbefe00ff,
	0x00003fff, 0xbeff0080,
	0xee0a407a, 0x000c0000,
	0x00000000, 0xd760007a,
	0x00011d00, 0xd760007b,
	0x00011f00, 0xbefe007a,
	0xbeff007b, 0xbef4007e,
	0x8b75ff7f, 0x01ffffff,
	0xbef1007d, 0xb8f30742,
	0x84739973, 0xbefe00c1,
	0x857d9973, 0x8b7d817d,
	0xbf06817d, 0xbfa20002,
	0xbeff0080, 0xbfa00002,
	0xbeff00c1, 0xbfa0000a,
	0xee0a4074, 0x008c0000,
	0x00008000, 0xee0a4074,
	0x010c0000, 0x00010000,
	0xee0a4074, 0x018c0000,
	0x00018000, 0xbfa00009,
	0xee0a4074, 0x008c0000,
	0x00010000, 0xee0a4074,
	0x018c0000, 0x00018000,
	0xbfa00009, 0xee0a4074,
	0x008c0000, 0x00010000,
	0xee0a4074, 0x010c0000,
	0x00020000, 0xee0a4074,
	0x018c0000, 0x00030000,
	0xb8f03b05, 0x80708170,
	0xbf0d9973, 0xbfa20002,
	0x84708970, 0xbfa00001,
	0x84708a70, 0x8070ff70,
	0x00000200, 0x7e000280,
	0x7e020280, 0x7e040280,
	0xbefd0080, 0xd7610002,
	0x0000fa71, 0x807d817d,
	0xb8faf802, 0xbf0c8b7a,
	0xbfa20003, 0xbe804fc2,
	0xbf94fffe, 0xbfa10001,
	0xbe804ec4, 0xbf94fffc,
	0xbefa4c88, 0xbfc70000,
	0xbf0c807a, 0xbfa20006,
	0x9371ff7a, 0x00070004,
	0x937aff7a, 0x00070010,
	0xbf06717a, 0xbfa2fff6,
	0xb8faf804, 0x8b7aff7a,
	0x0001000c, 0x9178ff78,
	0x0001000c, 0x8c787a78,
	0xd7610002, 0x0000fa6c,
	0x807d817d, 0x917aff6d,
	0x80000000, 0xd7610002,
	0x010c0000, 0x00020000,
	0xee0a4074, 0x018c0000,
	0x00030000, 0xb8f03b05,
	0x80708170, 0xbf0d9973,
	0xbfa20002, 0x84708970,
	0xbfa00001, 0x84708a70,
	0x8070ff70, 0x00000200,
	0x7e000280, 0x7e020280,
	0x7e040280, 0xbefd0080,
	0xd7610002, 0x0000fa71,
	0x807d817d, 0xb8faf802,
	0xbf0c8b7a, 0xbfa20003,
	0xbe804fc2, 0xbf94fffe,
	0xbfa10001, 0xbe804ec4,
	0xbf94fffc, 0xbefa4c88,
	0xbfc70000, 0xbf0c807a,
	0xbfa20006, 0x9371ff7a,
	0x00070004, 0x937aff7a,
	0x00070010, 0xbf06717a,
	0xbfa2fff6, 0xb8faf804,
	0x8b7aff7a, 0x0001000c,
	0x9178ff78, 0x0001000c,
	0x8c787a78, 0xd7610002,
	0x0000fa6c, 0x807d817d,
	0x917aff6d, 0x80000000,
	0xd7610002, 0x0000fa7a,
	0x807d817d, 0xd7610002,
	0x0000fa6e, 0x807d817d,
	0xbefa0080, 0xd7610002,
	0x0000fa7a, 0x807d817d,
	0xd7610002, 0x0000fa6e,
	0x807d817d, 0xbefa0080,
	0xd7610002, 0x0000fa78,
	0x807d817d, 0xb8faf811,
	0xd7610002, 0x0000fa7a,
	0x807d817d, 0xd7610002,
	0x0000fa78, 0x807d817d,
	0xb8faf811, 0xd7610002,
	0x0000fa6f, 0x807d817d,
	0xb8f1f801, 0x937aff6d,
	0x00060019, 0x847a8c7a,
	0x8c717a71, 0xd7610002,
	0x0000fa71, 0x807d817d,
	0xb8f1f814, 0xd7610002,
	0x0000fa71, 0x807d817d,
	0xb8f1f815, 0xd7610002,
	0x0000fa71, 0x807d817d,
	0xb8f1f812, 0xd7610002,
	0x0000fa71, 0x807d817d,
	0xb8f1f813, 0xd7610002,
	0x0000fa71, 0x807d817d,
	0xb8faf802, 0xd7610002,
	0x0000fa7a, 0x807d817d,
	0xd7610002, 0x0000fa6f,
	0x807d817d, 0xb8f1f801,
	0x937aff6d, 0x00060019,
	0x847a8c7a, 0x8c717a71,
	0xd7610002, 0x0000fa71,
	0x807d817d, 0xb8f1f814,
	0xd7610002, 0x0000fa71,
	0x807d817d, 0xb8f1f815,
	0xd7610002, 0x0000fa71,
	0x807d817d, 0xb8f1f812,
	0xd7610002, 0x0000fa71,
	0x807d817d, 0xb8f1f813,
	0xd7610002, 0x0000fa71,
	0x807d817d, 0xb8faf802,
	0xbefa50c1, 0xbfc70000,
	0xd7610002, 0x0000fa7a,
	0x807d817d, 0xbefa50c1,
	0x807d817d, 0xbefa4c88,
	0xbfc70000, 0xd7610002,
	0x0000fa7a, 0x807d817d,
	0xbefa4c88, 0xbfc70000,
	0xd7610002, 0x0000fa7a,
	0x807d817d, 0xbefe00ff,
	0x0000ffff, 0xbeff0080,
	0xb8faf81a, 0xd7610002,
	0x0000fa7a, 0x807d817d,
	0xbefe00c1, 0xbeff0080,
	0x80767074, 0x82778075,
	0xee0a4076, 0x010c0000,
	0x00000000, 0xbefe00c1,
@@ -5061,7 +5057,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
	0x018c0000, 0x00030000,
	0x807d847d, 0x8070ff70,
	0x00000400, 0xbf0a7b7d,
	0xbfa2ffe9, 0xbfa00183,
	0xbfa2ffe9, 0xbfa00184,
	0xbef4007e, 0x8b75ff7f,
	0x01ffffff, 0xbef1007f,
	0xb8f20742, 0x84729972,
@@ -5229,6 +5225,8 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
	0x856e906e, 0x8b6e6e6e,
	0xbfa10003, 0xbe804ec3,
	0x816ec16e, 0xbfa0fffb,
	0xf4601bbb, 0xf8000040,
	0xbfc70000, 0xb96ef81a,
	0xbefd006f, 0xbefe0070,
	0xbeff0071, 0xb979f822,
	0xb97b2011, 0x857b867b,
@@ -5248,19 +5246,17 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
	0x856e8e77, 0xb96e3021,
	0x8b6dff6d, 0x01ffffff,
	0x8bfe7e7e, 0x8bea6a6a,
	0x936eff77, 0x0002001a,
	0xb96ef81a, 0xb97af804,
	0xb97af804, 0xb8eef802,
	0xbf0c8b6e, 0xbfa20003,
	0xbe804fc2, 0xbf94fffe,
	0xbfa10001, 0xbe804ec4,
	0xbf94fffc, 0x857a897a,
	0xb97a0244, 0xbe804a6c,
	0xb8eef802, 0xbf0c8b6e,
	0xbfa20003, 0xbe804fc2,
	0xbf94fffe, 0xbfa10001,
	0xbe804ec4, 0xbf94fffc,
	0x857a897a, 0xb97a0244,
	0xbe804a6c, 0xb8eef802,
	0xbf0c8b6e, 0xbfa20003,
	0xbe804fc2, 0xbf94fffe,
	0xbfa10001, 0xbe804ec4,
	0xbf94fffc, 0xbfb10000,
	0xbfb10000, 0xbf9f0000,
	0xbf9f0000, 0xbf9f0000,
	0xbf9f0000, 0xbf9f0000,
	0xbf9f0000, 0x00000000,
};
+34 −1
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#define NUM_NAMED_BARRIERS (ASIC_FAMILY == CHIP_GC_12_0_3 ? 0x10 : 0)
#define HAVE_CLUSTER_BARRIER (ASIC_FAMILY == CHIP_GC_12_0_3)
#define CLUSTER_BARRIER_SERIALIZE_WORKAROUND (ASIC_FAMILY == CHIP_GC_12_0_3)
#define RELAXED_SCHEDULING_IN_TRAP (ASIC_FAMILY == CHIP_GFX12)

#define SINGLE_STEP_MISSED_WORKAROUND 1	//workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
@@ -110,6 +111,12 @@ var BARRIER_STATE_MEMBER_OFFSET = 4
var BARRIER_STATE_MEMBER_SIZE			= 7
var BARRIER_STATE_VALID_OFFSET			= 0

#if RELAXED_SCHEDULING_IN_TRAP
var TTMP11_SCHED_MODE_SHIFT			= 26
var TTMP11_SCHED_MODE_SIZE			= 2
var TTMP11_SCHED_MODE_MASK			= 0xC000000
#endif

var NAMED_BARRIERS_SR_OFFSET_FROM_HWREG		= 0x80
var S_BARRIER_INIT_MEMBERCNT_MASK		= 0x7F0000
var S_BARRIER_INIT_MEMBERCNT_SHIFT		= 0x10
@@ -222,18 +229,22 @@ L_JUMP_TO_RESTORE:
	s_branch	L_RESTORE

L_SKIP_RESTORE:
#if RELAXED_SCHEDULING_IN_TRAP
	// Assume most relaxed scheduling mode is set. Save and revert to normal mode.
	s_getreg_b32	ttmp2, hwreg(HW_REG_WAVE_SCHED_MODE)
	s_wait_alu	0
	s_setreg_imm32_b32	hwreg(HW_REG_WAVE_SCHED_MODE, \
		SQ_WAVE_SCHED_MODE_DEP_MODE_SHIFT, SQ_WAVE_SCHED_MODE_DEP_MODE_SIZE), 0
#endif

	s_getreg_b32	s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV)	//save STATUS since we will change SCC

#if RELAXED_SCHEDULING_IN_TRAP
	// Save SCHED_MODE[1:0] into ttmp11[27:26].
	s_andn2_b32	ttmp11, ttmp11, TTMP11_SCHED_MODE_MASK
	s_lshl_b32	ttmp2, ttmp2, TTMP11_SCHED_MODE_SHIFT
	s_or_b32	ttmp11, ttmp11, ttmp2
#endif

	// Clear SPI_PRIO: do not save with elevated priority.
	// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
@@ -315,7 +326,7 @@ L_FETCH_2ND_TRAP:
	s_cbranch_scc0	L_NO_SIGN_EXTEND_TMA
	s_or_b32	ttmp15, ttmp15, ~ADDRESS_HI32_MASK
L_NO_SIGN_EXTEND_TMA:
#if ASIC_FAMILY == CHIP_GFX12
#if RELAXED_SCHEDULING_IN_TRAP
	// Move SCHED_MODE[1:0] from ttmp11 to unused bits in ttmp1[27:26] (return PC_HI).
	// The second-level trap will restore from ttmp1 for backwards compatibility.
	s_and_b32	ttmp2, ttmp11, TTMP11_SCHED_MODE_MASK
@@ -381,8 +392,10 @@ L_EXIT_TRAP:
	// Only restore fields which the trap handler changes.
	s_lshr_b32	s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT

#if RELAXED_SCHEDULING_IN_TRAP
	// Assume relaxed scheduling mode after this point.
	restore_sched_mode(ttmp2)
#endif

	s_setreg_b32	hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
		SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv
@@ -590,8 +603,18 @@ L_SAVE_HWREG:
	write_hwreg_to_v2(s_save_tmp)
#endif

#if ASIC_FAMILY >= CHIP_GC_12_0_3
	s_getreg_b32	s_save_tmp, hwreg(HW_REG_WAVE_SCHED_MODE)
	write_hwreg_to_v2(s_save_tmp)
#endif

#if ! SAVE_TTMPS_IN_SGPR_BLOCK
	// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
	s_mov_b32       exec_lo, 0xFFFF
#else
	// All 128 bytes are available for HWREGs.
	s_mov_b32       exec_lo, 0xFFFFFFFF
#endif
	s_mov_b32	exec_hi, 0x0
	s_add_u32	s_save_addr_lo, s_save_base_addr_lo, s_save_mem_offset
	s_addc_u32	s_save_addr_hi, s_save_base_addr_hi, 0x0
@@ -1154,6 +1177,12 @@ L_SKIP_TRAP_CLUSTER_BARRIER_SIGNAL:
L_SKIP_CLUSTER_BARRIER_RESTORE:
#endif

#if ASIC_FAMILY >= CHIP_GC_12_0_3
	s_load_b32	s_restore_tmp, [s_restore_addr_lo, s_restore_addr_hi], null scope:SCOPE_SYS offset:0x40
	s_wait_kmcnt	0
	s_setreg_b32	hwreg(HW_REG_WAVE_SCHED_MODE), s_restore_tmp
#endif

	s_mov_b32	m0, s_restore_m0
	s_mov_b32	exec_lo, s_restore_exec_lo
	s_mov_b32	exec_hi, s_restore_exec_hi
@@ -1193,8 +1222,10 @@ L_SKIP_CLUSTER_BARRIER_RESTORE:
	s_and_b64	exec, exec, exec					// Restore STATUS.EXECZ, not writable by s_setreg_b32
	s_and_b64	vcc, vcc, vcc						// Restore STATUS.VCCZ, not writable by s_setreg_b32

#if RELAXED_SCHEDULING_IN_TRAP
	// Assume relaxed scheduling mode after this point.
	restore_sched_mode(s_restore_tmp)
#endif

	s_setreg_b32	hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv	// SCC is included, which is changed by previous salu

@@ -1346,10 +1377,12 @@ L_NOT_IN_CLUSTER:
#endif
end

#if RELAXED_SCHEDULING_IN_TRAP
function restore_sched_mode(s_tmp)
	s_bfe_u32	s_tmp, ttmp11, (TTMP11_SCHED_MODE_SHIFT | (TTMP11_SCHED_MODE_SIZE << 0x10))
	s_setreg_b32	hwreg(HW_REG_WAVE_SCHED_MODE), s_tmp
end
#endif

function restore_barrier_signal_count(barrier_id)
	// extract the saved signal count from s_restore_tmp