Commit 1fb37a3d authored by Monk Liu's avatar Monk Liu Committed by Alex Deucher

drm/amdgpu:fix kiq_resume routine (V2)

v2:
use in_rest to fix compute ring test failure issue
which occured after FLR/gpu_reset.

we need backup a clean status of MQD which was created in drv load
stage, and use it in resume stage, otherwise KCQ and KIQ all may
faild in ring/ib test.
Signed-off-by: default avatarMonk Liu <Monk.Liu@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Reviewed-by: default avatarXiangliang Yu <Xiangliang.Yu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent f6bd7942
...@@ -2349,6 +2349,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) ...@@ -2349,6 +2349,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
mutex_lock(&adev->virt.lock_reset); mutex_lock(&adev->virt.lock_reset);
atomic_inc(&adev->gpu_reset_counter); atomic_inc(&adev->gpu_reset_counter);
adev->gfx.in_reset = true;
/* block TTM */ /* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
...@@ -2433,6 +2434,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) ...@@ -2433,6 +2434,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
dev_info(adev->dev, "GPU reset failed\n"); dev_info(adev->dev, "GPU reset failed\n");
} }
adev->gfx.in_reset = false;
mutex_unlock(&adev->virt.lock_reset); mutex_unlock(&adev->virt.lock_reset);
return r; return r;
} }
......
...@@ -4883,24 +4883,46 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring, ...@@ -4883,24 +4883,46 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_kiq *kiq = &adev->gfx.kiq;
uint64_t eop_gpu_addr; uint64_t eop_gpu_addr;
bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
if (is_kiq) { if (is_kiq) {
eop_gpu_addr = kiq->eop_gpu_addr; eop_gpu_addr = kiq->eop_gpu_addr;
gfx_v8_0_kiq_setting(&kiq->ring); gfx_v8_0_kiq_setting(&kiq->ring);
} else } else {
eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
ring->queue * MEC_HPD_SIZE; ring->queue * MEC_HPD_SIZE;
mqd_idx = ring - &adev->gfx.compute_ring[0];
}
mutex_lock(&adev->srbm_mutex); if (!adev->gfx.in_reset) {
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
if (is_kiq)
gfx_v8_0_kiq_init_register(adev, mqd, ring);
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring); if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else { /* for GPU_RESET case */
/* reset MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
if (is_kiq) /* reset ring buffer */
gfx_v8_0_kiq_init_register(adev, mqd, ring); ring->wptr = 0;
amdgpu_ring_clear_ring(ring);
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex); if (is_kiq) {
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v8_0_kiq_init_register(adev, mqd, ring);
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
}
if (is_kiq) if (is_kiq)
gfx_v8_0_kiq_enable(ring); gfx_v8_0_kiq_enable(ring);
...@@ -4919,9 +4941,9 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) ...@@ -4919,9 +4941,9 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
ring = &adev->gfx.kiq.ring; ring = &adev->gfx.kiq.ring;
if (!amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr)) { if (!amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr)) {
memset((void *)ring->mqd_ptr, 0, sizeof(struct vi_mqd));
r = gfx_v8_0_kiq_init_queue(ring, ring->mqd_ptr, ring->mqd_gpu_addr); r = gfx_v8_0_kiq_init_queue(ring, ring->mqd_ptr, ring->mqd_gpu_addr);
amdgpu_bo_kunmap(ring->mqd_obj); amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
if (r) if (r)
return r; return r;
} else { } else {
...@@ -4931,9 +4953,9 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) ...@@ -4931,9 +4953,9 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_compute_rings; i++) { for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i]; ring = &adev->gfx.compute_ring[i];
if (!amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr)) { if (!amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr)) {
memset((void *)ring->mqd_ptr, 0, sizeof(struct vi_mqd));
r = gfx_v8_0_kiq_init_queue(ring, ring->mqd_ptr, ring->mqd_gpu_addr); r = gfx_v8_0_kiq_init_queue(ring, ring->mqd_ptr, ring->mqd_gpu_addr);
amdgpu_bo_kunmap(ring->mqd_obj); amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
if (r) if (r)
return r; return r;
} else { } else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment