Commit 742c085f authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: switch back to 32bit hw fences v2

We don't need to extend them to 64bits any more, so avoid the extra overhead.

v2: update commit message.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Reviewed-by: default avatarChunming Zhou <david1.zhou@amd.com>
parent 480d0bf0
...@@ -386,8 +386,8 @@ struct amdgpu_fence_driver { ...@@ -386,8 +386,8 @@ struct amdgpu_fence_driver {
uint64_t gpu_addr; uint64_t gpu_addr;
volatile uint32_t *cpu_addr; volatile uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */ /* sync_seq is protected by ring emission lock */
uint64_t sync_seq; uint32_t sync_seq;
atomic64_t last_seq; atomic_t last_seq;
bool initialized; bool initialized;
struct amdgpu_irq_src *irq_src; struct amdgpu_irq_src *irq_src;
unsigned irq_type; unsigned irq_type;
......
...@@ -52,7 +52,6 @@ struct amdgpu_fence { ...@@ -52,7 +52,6 @@ struct amdgpu_fence {
/* RB, DMA, etc. */ /* RB, DMA, etc. */
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
uint64_t seq;
}; };
static struct kmem_cache *amdgpu_fence_slab; static struct kmem_cache *amdgpu_fence_slab;
...@@ -104,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) ...@@ -104,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
if (drv->cpu_addr) if (drv->cpu_addr)
seq = le32_to_cpu(*drv->cpu_addr); seq = le32_to_cpu(*drv->cpu_addr);
else else
seq = lower_32_bits(atomic64_read(&drv->last_seq)); seq = atomic_read(&drv->last_seq);
return seq; return seq;
} }
...@@ -123,23 +122,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) ...@@ -123,23 +122,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
struct amdgpu_fence *fence; struct amdgpu_fence *fence;
struct fence **ptr; struct fence **ptr;
unsigned idx; uint32_t seq;
fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
if (fence == NULL) if (fence == NULL)
return -ENOMEM; return -ENOMEM;
fence->seq = ++ring->fence_drv.sync_seq; seq = ++ring->fence_drv.sync_seq;
fence->ring = ring; fence->ring = ring;
fence_init(&fence->base, &amdgpu_fence_ops, fence_init(&fence->base, &amdgpu_fence_ops,
&ring->fence_drv.lock, &ring->fence_drv.lock,
adev->fence_context + ring->idx, adev->fence_context + ring->idx,
fence->seq); seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
fence->seq, AMDGPU_FENCE_FLAG_INT); seq, AMDGPU_FENCE_FLAG_INT);
idx = fence->seq & ring->fence_drv.num_fences_mask; ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
ptr = &ring->fence_drv.fences[idx];
/* This function can't be called concurrently anyway, otherwise /* This function can't be called concurrently anyway, otherwise
* emitting the fence would mess up the hardware ring buffer. * emitting the fence would mess up the hardware ring buffer.
*/ */
...@@ -177,22 +175,16 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) ...@@ -177,22 +175,16 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
void amdgpu_fence_process(struct amdgpu_ring *ring) void amdgpu_fence_process(struct amdgpu_ring *ring)
{ {
struct amdgpu_fence_driver *drv = &ring->fence_drv; struct amdgpu_fence_driver *drv = &ring->fence_drv;
uint64_t seq, last_seq, last_emitted; uint32_t seq, last_seq;
int r; int r;
do { do {
last_seq = atomic64_read(&ring->fence_drv.last_seq); last_seq = atomic_read(&ring->fence_drv.last_seq);
last_emitted = ring->fence_drv.sync_seq;
seq = amdgpu_fence_read(ring); seq = amdgpu_fence_read(ring);
seq |= last_seq & 0xffffffff00000000LL;
if (seq < last_seq) {
seq &= 0xffffffff;
seq |= last_emitted & 0xffffffff00000000LL;
}
} while (atomic64_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
if (seq < last_emitted) if (seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring); amdgpu_fence_schedule_fallback(ring);
while (last_seq != seq) { while (last_seq != seq) {
...@@ -279,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) ...@@ -279,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
* but it's ok to report slightly wrong fence count here. * but it's ok to report slightly wrong fence count here.
*/ */
amdgpu_fence_process(ring); amdgpu_fence_process(ring);
emitted = ring->fence_drv.sync_seq emitted = 0x100000000ull;
- atomic64_read(&ring->fence_drv.last_seq); emitted -= atomic_read(&ring->fence_drv.last_seq);
/* to avoid 32bits warp around */ emitted += ACCESS_ONCE(ring->fence_drv.sync_seq);
if (emitted > 0x10000000) return lower_32_bits(emitted);
emitted = 0x10000000;
return (unsigned)emitted;
} }
/** /**
...@@ -317,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ...@@ -317,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
} }
amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq)); amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
amdgpu_irq_get(adev, irq_src, irq_type); amdgpu_irq_get(adev, irq_src, irq_type);
ring->fence_drv.irq_src = irq_src; ring->fence_drv.irq_src = irq_src;
...@@ -353,7 +342,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, ...@@ -353,7 +342,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
ring->fence_drv.cpu_addr = NULL; ring->fence_drv.cpu_addr = NULL;
ring->fence_drv.gpu_addr = 0; ring->fence_drv.gpu_addr = 0;
ring->fence_drv.sync_seq = 0; ring->fence_drv.sync_seq = 0;
atomic64_set(&ring->fence_drv.last_seq, 0); atomic_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false; ring->fence_drv.initialized = false;
setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
...@@ -621,9 +610,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) ...@@ -621,9 +610,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
amdgpu_fence_process(ring); amdgpu_fence_process(ring);
seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
seq_printf(m, "Last signaled fence 0x%016llx\n", seq_printf(m, "Last signaled fence 0x%08x\n",
(unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); atomic_read(&ring->fence_drv.last_seq));
seq_printf(m, "Last emitted 0x%016llx\n", seq_printf(m, "Last emitted 0x%08x\n",
ring->fence_drv.sync_seq); ring->fence_drv.sync_seq);
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment