Commit 05c899ea authored by Jay Cornwall's avatar Jay Cornwall Committed by Alex Deucher

drm/amdkfd: Sign-extend TMA address in trap handler

SMEM instructions can reach addresses above 47 bits but require
bit 47 to be sign-extended through bits [63:48].

This allows the TMA to be relocated in a following patch.
Signed-off-by: default avatarJay Cornwall <jay.cornwall@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 96c211f1
...@@ -274,14 +274,14 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { ...@@ -274,14 +274,14 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
static const uint32_t cwsr_trap_gfx9_hex[] = { static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf820001, 0xbf820254, 0xbf820001, 0xbf820258,
0xb8f8f802, 0x8978ff78, 0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803, 0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000, 0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d, 0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e, 0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400, 0x866eff7b, 0x00000400,
0xbf850051, 0xbf8e0010, 0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa, 0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900, 0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b, 0xbf850015, 0x866eff7b,
...@@ -294,13 +294,15 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { ...@@ -294,13 +294,15 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf850007, 0xb8eef801, 0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800, 0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b, 0xbf850003, 0x866eff7b,
0x00000400, 0xbf850036, 0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a, 0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a, 0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000, 0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807, 0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812, 0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a, 0xb8fbf813, 0x8efa887a,
0xbf0d8f7b, 0xbf840002,
0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010, 0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e, 0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000, 0x8977ff77, 0x00800000,
...@@ -676,14 +678,14 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { ...@@ -676,14 +678,14 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
}; };
static const uint32_t cwsr_trap_nv1x_hex[] = { static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf820001, 0xbf8201f1, 0xbf820001, 0xbf8201f5,
0xb0804004, 0xb978f802, 0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006, 0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78, 0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009, 0x00002000, 0xbf840009,
0x876eff6d, 0x00ff0000, 0x876eff6d, 0x00ff0000,
0xbf85001e, 0x876eff7b, 0xbf85001e, 0x876eff7b,
0x00000400, 0xbf850057, 0x00000400, 0xbf85005b,
0xbf8e0010, 0xb97bf803, 0xbf8e0010, 0xb97bf803,
0xbf82fffa, 0x876eff7b, 0xbf82fffa, 0x876eff7b,
0x00000900, 0xbf850015, 0x00000900, 0xbf850015,
...@@ -697,7 +699,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { ...@@ -697,7 +699,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xb96ef801, 0x876eff6e, 0xb96ef801, 0x876eff6e,
0x00000800, 0xbf850003, 0x00000800, 0xbf850003,
0x876eff7b, 0x00000400, 0x876eff7b, 0x00000400,
0xbf85003c, 0x8a77ff77, 0xbf850040, 0x8a77ff77,
0xff000000, 0xb97af807, 0xff000000, 0xb97af807,
0x877bff7a, 0x02000000, 0x877bff7a, 0x02000000,
0x8f7b867b, 0x88777b77, 0x8f7b867b, 0x88777b77,
...@@ -706,6 +708,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { ...@@ -706,6 +708,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x8a7aff7a, 0x023f8000, 0x8a7aff7a, 0x023f8000,
0xb9faf807, 0xb97af812, 0xb9faf807, 0xb97af812,
0xb97bf813, 0x8ffa887a, 0xb97bf813, 0x8ffa887a,
0xbf0d8f7b, 0xbf840002,
0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010, 0xf4011bbd, 0xfa000010,
0xbf8cc07f, 0x8f6e976e, 0xbf8cc07f, 0x8f6e976e,
0x8a77ff77, 0x00800000, 0x8a77ff77, 0x00800000,
...@@ -1094,14 +1098,14 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { ...@@ -1094,14 +1098,14 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
}; };
static const uint32_t cwsr_trap_arcturus_hex[] = { static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf820001, 0xbf8202d0, 0xbf820001, 0xbf8202d4,
0xb8f8f802, 0x8978ff78, 0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803, 0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000, 0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d, 0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e, 0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400, 0x866eff7b, 0x00000400,
0xbf850051, 0xbf8e0010, 0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa, 0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900, 0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b, 0xbf850015, 0x866eff7b,
...@@ -1114,13 +1118,15 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { ...@@ -1114,13 +1118,15 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf850007, 0xb8eef801, 0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800, 0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b, 0xbf850003, 0x866eff7b,
0x00000400, 0xbf850036, 0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a, 0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a, 0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000, 0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807, 0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812, 0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a, 0xb8fbf813, 0x8efa887a,
0xbf0d8f7b, 0xbf840002,
0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010, 0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e, 0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000, 0x8977ff77, 0x00800000,
...@@ -1572,14 +1578,14 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { ...@@ -1572,14 +1578,14 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
}; };
static const uint32_t cwsr_trap_aldebaran_hex[] = { static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xbf820001, 0xbf8202db, 0xbf820001, 0xbf8202df,
0xb8f8f802, 0x8978ff78, 0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803, 0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000, 0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d, 0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e, 0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400, 0x866eff7b, 0x00000400,
0xbf850051, 0xbf8e0010, 0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa, 0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900, 0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b, 0xbf850015, 0x866eff7b,
...@@ -1592,13 +1598,15 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { ...@@ -1592,13 +1598,15 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xbf850007, 0xb8eef801, 0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800, 0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b, 0xbf850003, 0x866eff7b,
0x00000400, 0xbf850036, 0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a, 0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a, 0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000, 0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807, 0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812, 0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a, 0xb8fbf813, 0x8efa887a,
0xbf0d8f7b, 0xbf840002,
0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010, 0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e, 0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000, 0x8977ff77, 0x00800000,
...@@ -2061,14 +2069,14 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { ...@@ -2061,14 +2069,14 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
}; };
static const uint32_t cwsr_trap_gfx10_hex[] = { static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf820001, 0xbf82021c, 0xbf820001, 0xbf820220,
0xb0804004, 0xb978f802, 0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006, 0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78, 0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009, 0x00002000, 0xbf840009,
0x876eff6d, 0x00ff0000, 0x876eff6d, 0x00ff0000,
0xbf85001e, 0x876eff7b, 0xbf85001e, 0x876eff7b,
0x00000400, 0xbf850041, 0x00000400, 0xbf850045,
0xbf8e0010, 0xb97bf803, 0xbf8e0010, 0xb97bf803,
0xbf82fffa, 0x876eff7b, 0xbf82fffa, 0x876eff7b,
0x00000900, 0xbf850015, 0x00000900, 0xbf850015,
...@@ -2082,8 +2090,10 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { ...@@ -2082,8 +2090,10 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xb96ef801, 0x876eff6e, 0xb96ef801, 0x876eff6e,
0x00000800, 0xbf850003, 0x00000800, 0xbf850003,
0x876eff7b, 0x00000400, 0x876eff7b, 0x00000400,
0xbf850026, 0xb97af812, 0xbf85002a, 0xb97af812,
0xb97bf813, 0x8ffa887a, 0xb97bf813, 0x8ffa887a,
0xbf0d8f7b, 0xbf840002,
0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010, 0xf4011bbd, 0xfa000010,
0xbf8cc07f, 0x8f6e976e, 0xbf8cc07f, 0x8f6e976e,
0x8a77ff77, 0x00800000, 0x8a77ff77, 0x00800000,
...@@ -2496,7 +2506,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { ...@@ -2496,7 +2506,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
}; };
static const uint32_t cwsr_trap_gfx11_hex[] = { static const uint32_t cwsr_trap_gfx11_hex[] = {
0xbfa00001, 0xbfa00221, 0xbfa00001, 0xbfa00225,
0xb0804006, 0xb8f8f802, 0xb0804006, 0xb8f8f802,
0x9178ff78, 0x00020006, 0x9178ff78, 0x00020006,
0xb8fbf803, 0xbf0d9e6d, 0xb8fbf803, 0xbf0d9e6d,
...@@ -2506,7 +2516,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { ...@@ -2506,7 +2516,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0xbfa10009, 0x8b6eff6d, 0xbfa10009, 0x8b6eff6d,
0x00ff0000, 0xbfa2001e, 0x00ff0000, 0xbfa2001e,
0x8b6eff7b, 0x00000400, 0x8b6eff7b, 0x00000400,
0xbfa20041, 0xbf830010, 0xbfa20045, 0xbf830010,
0xb8fbf803, 0xbfa0fffa, 0xb8fbf803, 0xbfa0fffa,
0x8b6eff7b, 0x00000900, 0x8b6eff7b, 0x00000900,
0xbfa20015, 0x8b6eff7b, 0xbfa20015, 0x8b6eff7b,
...@@ -2519,9 +2529,11 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { ...@@ -2519,9 +2529,11 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0xbfa20007, 0xb8eef801, 0xbfa20007, 0xb8eef801,
0x8b6eff6e, 0x00000800, 0x8b6eff6e, 0x00000800,
0xbfa20003, 0x8b6eff7b, 0xbfa20003, 0x8b6eff7b,
0x00000400, 0xbfa20026, 0x00000400, 0xbfa2002a,
0xbefa4d82, 0xbf89fc07, 0xbefa4d82, 0xbf89fc07,
0x84fa887a, 0xf4005bbd, 0x84fa887a, 0xbf0d8f7b,
0xbfa10002, 0x8c7bff7b,
0xffff0000, 0xf4005bbd,
0xf8000010, 0xbf89fc07, 0xf8000010, 0xbf89fc07,
0x846e976e, 0x9177ff77, 0x846e976e, 0x9177ff77,
0x00800000, 0x8c776e77, 0x00800000, 0x8c776e77,
...@@ -2939,14 +2951,14 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { ...@@ -2939,14 +2951,14 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
}; };
static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0xbf820001, 0xbf8202d7, 0xbf820001, 0xbf8202db,
0xb8f8f802, 0x8978ff78, 0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803, 0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000, 0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d, 0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001a, 0x00ff0000, 0xbf85001a,
0x866eff7b, 0x00000400, 0x866eff7b, 0x00000400,
0xbf85004d, 0xbf8e0010, 0xbf850051, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa, 0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900, 0x866eff7b, 0x03c00900,
0xbf850011, 0x866eff7b, 0xbf850011, 0x866eff7b,
...@@ -2957,13 +2969,15 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { ...@@ -2957,13 +2969,15 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0x866e6f6e, 0xbf850006, 0x866e6f6e, 0xbf850006,
0x866eff6d, 0x00ff0000, 0x866eff6d, 0x00ff0000,
0xbf850003, 0x866eff7b, 0xbf850003, 0x866eff7b,
0x00000400, 0xbf850036, 0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a, 0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a, 0x001f8000, 0x8e7a8b7a,
0x8979ff79, 0xfc000000, 0x8979ff79, 0xfc000000,
0x87797a79, 0xba7ff807, 0x87797a79, 0xba7ff807,
0x00000000, 0xb8faf812, 0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a, 0xb8fbf813, 0x8efa887a,
0xbf0d8f7b, 0xbf840002,
0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010, 0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e, 0xbf8cc07f, 0x8e6e976e,
0x8979ff79, 0x00800000, 0x8979ff79, 0x00800000,
......
...@@ -276,6 +276,11 @@ L_FETCH_2ND_TRAP: ...@@ -276,6 +276,11 @@ L_FETCH_2ND_TRAP:
#endif #endif
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
s_bitcmp1_b32 ttmp15, 0xF
s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
s_or_b32 ttmp15, ttmp15, 0xFFFF0000
L_NO_SIGN_EXTEND_TMA:
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
s_waitcnt lgkmcnt(0) s_waitcnt lgkmcnt(0)
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
......
...@@ -283,6 +283,11 @@ L_FETCH_2ND_TRAP: ...@@ -283,6 +283,11 @@ L_FETCH_2ND_TRAP:
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI) s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
s_bitcmp1_b32 ttmp15, 0xF
s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
s_or_b32 ttmp15, ttmp15, 0xFFFF0000
L_NO_SIGN_EXTEND_TMA:
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
s_waitcnt lgkmcnt(0) s_waitcnt lgkmcnt(0)
s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment