Commit 373008bf authored by Dusica Milinkovic's avatar Dusica Milinkovic Committed by Alex Deucher

drm/amdgpu: Increase tlb flush timeout for sriov

[Why]
During multi-vf executing benchmark (Luxmark) observed kiq error timeout.
It happenes because all of VFs do the tlb invalidation at the same time.
Although each VF has the invalidate register set, from hardware side
the invalidate requests are queue to execute.

[How]
In case of 12 VF increase timeout on 12*100ms
Signed-off-by: default avatarDusica Milinkovic <Dusica.Milinkovic@amd.com>
Acked-by: default avatarShaoyun Liu <shaoyun.liu@amd.com>
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent c7dafdfa
...@@ -317,7 +317,7 @@ enum amdgpu_kiq_irq { ...@@ -317,7 +317,7 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST AMDGPU_CP_KIQ_IRQ_LAST
}; };
#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000 #define MAX_KIQ_REG_TRY 1000
......
...@@ -419,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, ...@@ -419,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint32_t seq; uint32_t seq;
uint16_t queried_pasid; uint16_t queried_pasid;
bool ret; bool ret;
u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_kiq *kiq = &adev->gfx.kiq;
...@@ -437,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, ...@@ -437,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock); spin_unlock(&adev->gfx.kiq.ring_lock);
r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
if (r < 1) { if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
return -ETIME; return -ETIME;
......
...@@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, ...@@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint32_t seq; uint32_t seq;
uint16_t queried_pasid; uint16_t queried_pasid;
bool ret; bool ret;
u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_kiq *kiq = &adev->gfx.kiq;
...@@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, ...@@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock); spin_unlock(&adev->gfx.kiq.ring_lock);
r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
if (r < 1) { if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
up_read(&adev->reset_domain->sem); up_read(&adev->reset_domain->sem);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment