Commit bd4bea5a authored by Alex Deucher's avatar Alex Deucher

drm/amdgpu/gfx9.4.3: Enable bad opcode interrupt

For the bad opcode case, it will cause CP/ME hang.
The firmware will prevent the ME side from hanging by raising a bad opcode interrupt.
And the driver needs to perform a vmid reset when receiving the interrupt.
Acked-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 238352b4
...@@ -901,6 +901,13 @@ static int gfx_v9_4_3_sw_init(void *handle) ...@@ -901,6 +901,13 @@ static int gfx_v9_4_3_sw_init(void *handle)
if (r) if (r)
return r; return r;
/* Bad opcode Event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
&adev->gfx.bad_op_irq);
if (r)
return r;
/* Privileged reg */ /* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq); &adev->gfx.priv_reg_irq);
...@@ -2162,6 +2169,7 @@ static int gfx_v9_4_3_hw_fini(void *handle) ...@@ -2162,6 +2169,7 @@ static int gfx_v9_4_3_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
num_xcc = NUM_XCC(adev->gfx.xcc_mask); num_xcc = NUM_XCC(adev->gfx.xcc_mask);
for (i = 0; i < num_xcc; i++) { for (i = 0; i < num_xcc; i++) {
...@@ -2327,6 +2335,10 @@ static int gfx_v9_4_3_late_init(void *handle) ...@@ -2327,6 +2335,10 @@ static int gfx_v9_4_3_late_init(void *handle)
if (r) if (r)
return r; return r;
r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
if (r)
return r;
if (adev->gfx.ras && if (adev->gfx.ras &&
adev->gfx.ras->enable_watchdog_timer) adev->gfx.ras->enable_watchdog_timer)
adev->gfx.ras->enable_watchdog_timer(adev); adev->gfx.ras->enable_watchdog_timer(adev);
...@@ -2964,6 +2976,46 @@ static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, ...@@ -2964,6 +2976,46 @@ static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev,
return 0; return 0;
} }
static int gfx_v9_4_3_set_bad_op_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
enum amdgpu_interrupt_state state)
{
u32 mec_int_cntl_reg, mec_int_cntl;
int i, j, k, num_xcc;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
for (i = 0; i < num_xcc; i++) {
WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
OPCODE_ERROR_INT_ENABLE,
state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
for (j = 0; j < adev->gfx.mec.num_mec; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
/* MECs start at 1 */
mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k);
if (mec_int_cntl_reg) {
mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
OPCODE_ERROR_INT_ENABLE,
state == AMDGPU_IRQ_STATE_ENABLE ?
1 : 0);
WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i);
}
}
}
}
break;
default:
break;
}
return 0;
}
static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev, static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
unsigned type, unsigned type,
...@@ -3116,6 +3168,15 @@ static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev, ...@@ -3116,6 +3168,15 @@ static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev,
return 0; return 0;
} }
static int gfx_v9_4_3_bad_op_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal opcode in command stream\n");
gfx_v9_4_3_fault(adev, entry);
return 0;
}
static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev, static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
...@@ -4228,6 +4289,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = { ...@@ -4228,6 +4289,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = {
.process = gfx_v9_4_3_priv_reg_irq, .process = gfx_v9_4_3_priv_reg_irq,
}; };
static const struct amdgpu_irq_src_funcs gfx_v9_4_3_bad_op_irq_funcs = {
.set = gfx_v9_4_3_set_bad_op_fault_state,
.process = gfx_v9_4_3_bad_op_irq,
};
static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = { static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = {
.set = gfx_v9_4_3_set_priv_inst_fault_state, .set = gfx_v9_4_3_set_priv_inst_fault_state,
.process = gfx_v9_4_3_priv_inst_irq, .process = gfx_v9_4_3_priv_inst_irq,
...@@ -4241,6 +4307,9 @@ static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev) ...@@ -4241,6 +4307,9 @@ static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs; adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs;
adev->gfx.bad_op_irq.num_types = 1;
adev->gfx.bad_op_irq.funcs = &gfx_v9_4_3_bad_op_irq_funcs;
adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs; adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment