Commit 13d8850a authored by Sunil Khatri's avatar Sunil Khatri Committed by Alex Deucher

drm/amdgpu: trigger ip dump before suspend of IP's

Problem:
IP dump right now is done post suspend of all
IP's which for some IP's could change power
state and software state too which we do not want
to reflect in the dump as it might not be same at
the time of hang.

Solution:
IP should be dumped as close to the HW state when
the GPU was in hung state without trying to reinitialize
any resource.
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarSunil Khatri <sunil.khatri@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent a1526878
...@@ -5277,11 +5277,29 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) ...@@ -5277,11 +5277,29 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
return ret; return ret;
} }
static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
{
int i;
lockdep_assert_held(&adev->reset_domain->sem);
for (i = 0; i < adev->reset_info.num_regs; i++) {
adev->reset_info.reset_dump_reg_value[i] =
RREG32(adev->reset_info.reset_dump_reg_list[i]);
trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
adev->reset_info.reset_dump_reg_value[i]);
}
return 0;
}
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context) struct amdgpu_reset_context *reset_context)
{ {
int i, r = 0; int i, r = 0;
struct amdgpu_job *job = NULL; struct amdgpu_job *job = NULL;
struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
bool need_full_reset = bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
...@@ -5340,6 +5358,18 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, ...@@ -5340,6 +5358,18 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
} }
} }
if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
amdgpu_reset_reg_dumps(tmp_adev);
dev_info(tmp_adev->dev, "Dumping IP State\n");
/* Trigger ip dump before we reset the asic */
for (i = 0; i < tmp_adev->num_ip_blocks; i++)
if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
tmp_adev->ip_blocks[i].version->funcs
->dump_ip_state((void *)tmp_adev);
dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
}
if (need_full_reset) if (need_full_reset)
r = amdgpu_device_ip_suspend(adev); r = amdgpu_device_ip_suspend(adev);
if (need_full_reset) if (need_full_reset)
...@@ -5352,47 +5382,17 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, ...@@ -5352,47 +5382,17 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
return r; return r;
} }
static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
{
int i;
lockdep_assert_held(&adev->reset_domain->sem);
for (i = 0; i < adev->reset_info.num_regs; i++) {
adev->reset_info.reset_dump_reg_value[i] =
RREG32(adev->reset_info.reset_dump_reg_list[i]);
trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
adev->reset_info.reset_dump_reg_value[i]);
}
return 0;
}
int amdgpu_do_asic_reset(struct list_head *device_list_handle, int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_reset_context *reset_context) struct amdgpu_reset_context *reset_context)
{ {
struct amdgpu_device *tmp_adev = NULL; struct amdgpu_device *tmp_adev = NULL;
bool need_full_reset, skip_hw_reset, vram_lost = false; bool need_full_reset, skip_hw_reset, vram_lost = false;
int r = 0; int r = 0;
uint32_t i;
/* Try reset handler method first */ /* Try reset handler method first */
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list); reset_list);
if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
amdgpu_reset_reg_dumps(tmp_adev);
dev_info(tmp_adev->dev, "Dumping IP State\n");
/* Trigger ip dump before we reset the asic */
for (i = 0; i < tmp_adev->num_ip_blocks; i++)
if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
tmp_adev->ip_blocks[i].version->funcs
->dump_ip_state((void *)tmp_adev);
dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
}
reset_context->reset_device_list = device_list_handle; reset_context->reset_device_list = device_list_handle;
r = amdgpu_reset_perform_reset(tmp_adev, reset_context); r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
/* If reset handler not implemented, continue; otherwise return */ /* If reset handler not implemented, continue; otherwise return */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment