Commit 71182665 authored by Monk Liu's avatar Monk Liu Committed by Alex Deucher

drm/amdgpu: stop all rings before doing gpu recover

found recover_vram_from_shadow sometimes get executed
in paralle with SDMA scheduler, should stop all
schedulers before doing gpu reset/recover
Signed-off-by: default avatarMonk Liu <Monk.Liu@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Tested-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent d869ae09
...@@ -2648,22 +2648,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -2648,22 +2648,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/* block TTM */ /* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
/* store modesetting */ /* store modesetting */
if (amdgpu_device_has_dc_support(adev)) if (amdgpu_device_has_dc_support(adev))
state = drm_atomic_helper_suspend(adev->ddev); state = drm_atomic_helper_suspend(adev->ddev);
/* block scheduler */ /* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i]; struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->sched.thread) if (!ring || !ring->sched.thread)
continue; continue;
/* only focus on the ring hit timeout if &job not NULL */ kthread_park(ring->sched.thread);
if (job && job->ring->idx != i) if (job && job->ring->idx != i)
continue; continue;
kthread_park(ring->sched.thread);
drm_sched_hw_job_reset(&ring->sched, &job->base); drm_sched_hw_job_reset(&ring->sched, &job->base);
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */ /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
...@@ -2706,6 +2707,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -2706,6 +2707,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
} }
dma_fence_put(fence); dma_fence_put(fence);
} }
}
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i]; struct amdgpu_ring *ring = adev->rings[i];
...@@ -2713,26 +2715,14 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -2713,26 +2715,14 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
if (!ring || !ring->sched.thread) if (!ring || !ring->sched.thread)
continue; continue;
/* only focus on the ring hit timeout if &job not NULL */ /* only need recovery sched of the given job's ring
if (job && job->ring->idx != i) * or all rings (in the case @job is NULL)
continue; * after above amdgpu_reset accomplished
*/
if ((!job || job->ring->idx == i) && !r)
drm_sched_job_recovery(&ring->sched); drm_sched_job_recovery(&ring->sched);
kthread_unpark(ring->sched.thread);
}
} else {
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->sched.thread)
continue;
/* only focus on the ring hit timeout if &job not NULL */ kthread_unpark(ring->sched.thread);
if (job && job->ring->idx != i)
continue;
kthread_unpark(adev->rings[i]->sched.thread);
}
} }
if (amdgpu_device_has_dc_support(adev)) { if (amdgpu_device_has_dc_support(adev)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment