Commit f77c9aff authored by Huang Rui's avatar Huang Rui Committed by Alex Deucher

drm/amdgpu: Fix per-IB secure flag GFX hang

Since commit "Move to a per-IB secure flag (TMZ)",
we've been seeing hangs in GFX. We need to send
FRAME CONTROL stop/start back-to-back, every time
we flip the TMZ flag. That is, when we transition
from TMZ to non-TMZ we have to send a stop with
TMZ followed by a start with non-TMZ, and
similarly for transitioning from non-TMZ into TMZ.

This patch implements this, thus fixing the GFX
hang.

v1 -> v2:
As suggested by Luben, and accept part of implemetation from this patch:
- Put "secure" closed to the loop and use optimization
- Change "secure" to bool again, and move "secure == -1" out of loop.
v3: Small fixes/optimizations.
Reported-and-Tested-by: default avatarPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: default avatarHuang Rui <ray.huang@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarLuben Tuikov <luben.tuikov@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b71a564e
...@@ -218,7 +218,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -218,7 +218,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
amdgpu_ring_emit_cntxcntl(ring, status); amdgpu_ring_emit_cntxcntl(ring, status);
} }
/* Setup initial TMZiness and send it off.
*/
secure = false; secure = false;
if (job && ring->funcs->emit_frame_cntl) {
secure = ib->flags & AMDGPU_IB_FLAGS_SECURE;
amdgpu_ring_emit_frame_cntl(ring, true, secure);
}
for (i = 0; i < num_ibs; ++i) { for (i = 0; i < num_ibs; ++i) {
ib = &ibs[i]; ib = &ibs[i];
...@@ -230,27 +237,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -230,27 +237,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
continue; continue;
/* If this IB is TMZ, add frame TMZ start packet, if (job && ring->funcs->emit_frame_cntl) {
* else, turn off TMZ. if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
*/ amdgpu_ring_emit_frame_cntl(ring, false, secure);
if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) { secure = !secure;
if (!secure) { amdgpu_ring_emit_frame_cntl(ring, true, secure);
secure = true;
amdgpu_ring_emit_tmz(ring, true);
} }
} else if (secure) {
secure = false;
amdgpu_ring_emit_tmz(ring, false);
} }
amdgpu_ring_emit_ib(ring, job, ib, status); amdgpu_ring_emit_ib(ring, job, ib, status);
status &= ~AMDGPU_HAVE_CTX_SWITCH; status &= ~AMDGPU_HAVE_CTX_SWITCH;
} }
if (secure) { if (job && ring->funcs->emit_frame_cntl)
secure = false; amdgpu_ring_emit_frame_cntl(ring, false, secure);
amdgpu_ring_emit_tmz(ring, false);
}
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
if (!(adev->flags & AMD_IS_APU)) if (!(adev->flags & AMD_IS_APU))
......
...@@ -177,7 +177,8 @@ struct amdgpu_ring_funcs { ...@@ -177,7 +177,8 @@ struct amdgpu_ring_funcs {
void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
uint32_t reg0, uint32_t reg1, uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask); uint32_t ref, uint32_t mask);
void (*emit_tmz)(struct amdgpu_ring *ring, bool start); void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
bool secure);
/* Try to soft recover the ring to make the fence signal */ /* Try to soft recover the ring to make the fence signal */
void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
int (*preempt_ib)(struct amdgpu_ring *ring); int (*preempt_ib)(struct amdgpu_ring *ring);
...@@ -256,7 +257,7 @@ struct amdgpu_ring { ...@@ -256,7 +257,7 @@ struct amdgpu_ring {
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
......
...@@ -3037,7 +3037,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); ...@@ -3037,7 +3037,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{ {
...@@ -7599,12 +7599,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) ...@@ -7599,12 +7599,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
sizeof(de_payload) >> 2); sizeof(de_payload) >> 2);
} }
static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
bool secure)
{ {
if (amdgpu_is_tmz(ring->adev)) { uint32_t v = secure ? FRAME_TMZ : 0;
amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
} amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
} }
static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
...@@ -8058,7 +8059,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { ...@@ -8058,7 +8059,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v10_0_ring_preempt_ib, .preempt_ib = gfx_v10_0_ring_preempt_ib,
.emit_tmz = gfx_v10_0_ring_emit_tmz, .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
......
...@@ -5442,12 +5442,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) ...@@ -5442,12 +5442,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
} }
static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
bool secure)
{ {
if (amdgpu_is_tmz(ring->adev)) { uint32_t v = secure ? FRAME_TMZ : 0;
amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
} amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
} }
static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
...@@ -6699,7 +6700,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { ...@@ -6699,7 +6700,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.emit_tmz = gfx_v9_0_ring_emit_tmz, .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment