Commit 194eb174 authored by Victor Zhao's avatar Victor Zhao Committed by Alex Deucher

drm/amdgpu: reduce reset time

In multi container use case, reset time is important, so skip ring
tests and cp halt wait during ip suspending for reset as they are
going to fail and cost more time on reset

v2: add a hang flag to indicate the reset comes from a job timeout,
skip ring test and cp halt wait in this case

v3: move hang flag to adev
Signed-off-by: default avatarVictor Zhao <Victor.Zhao@amd.com>
Acked-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 72fadb13
...@@ -1065,6 +1065,7 @@ struct amdgpu_device { ...@@ -1065,6 +1065,7 @@ struct amdgpu_device {
struct work_struct reset_work; struct work_struct reset_work;
uint32_t amdgpu_reset_level_mask; uint32_t amdgpu_reset_level_mask;
bool job_hang;
}; };
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
......
...@@ -477,7 +477,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) ...@@ -477,7 +477,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
RESET_QUEUES, 0, 0); RESET_QUEUES, 0, 0);
if (adev->gfx.kiq.ring.sched.ready) if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang)
r = amdgpu_ring_test_helper(kiq_ring); r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&adev->gfx.kiq.ring_lock); spin_unlock(&adev->gfx.kiq.ring_lock);
......
...@@ -49,6 +49,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ...@@ -49,6 +49,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
} }
memset(&ti, 0, sizeof(struct amdgpu_task_info)); memset(&ti, 0, sizeof(struct amdgpu_task_info));
adev->job_hang = true;
if (amdgpu_gpu_recovery && if (amdgpu_gpu_recovery &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
...@@ -83,6 +84,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ...@@ -83,6 +84,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
} }
exit: exit:
adev->job_hang = false;
drm_dev_exit(idx); drm_dev_exit(idx);
return DRM_GPU_SCHED_STAT_NOMINAL; return DRM_GPU_SCHED_STAT_NOMINAL;
} }
......
...@@ -5971,6 +5971,9 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) ...@@ -5971,6 +5971,9 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
} }
if (adev->job_hang && !enable)
return 0;
for (i = 0; i < adev->usec_timeout; i++) { for (i = 0; i < adev->usec_timeout; i++) {
if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
break; break;
...@@ -7569,8 +7572,10 @@ static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev) ...@@ -7569,8 +7572,10 @@ static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_gfx_rings; i++) for (i = 0; i < adev->gfx.num_gfx_rings; i++)
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
PREEMPT_QUEUES, 0, 0); PREEMPT_QUEUES, 0, 0);
if (!adev->job_hang)
return amdgpu_ring_test_helper(kiq_ring); return amdgpu_ring_test_helper(kiq_ring);
else
return 0;
} }
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment