Commit ec23802d authored by Ben Skeggs's avatar Ben Skeggs

drm/nv50: drop explicit yields in favour of smaller PFIFO timeslice

This gives a small, but noticeable performance gain at lower performance
levels, and unchanged at the higher ones.

With this commit, we're now using the same timeslice size as the NVIDIA
binary driver currently does, and dropping an unknown bit that NVIDIA
no longer appear to set.
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent e3b7ed5e
...@@ -330,18 +330,9 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema) ...@@ -330,18 +330,9 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
int ret; int ret;
if (dev_priv->chipset < 0x84) { if (dev_priv->chipset < 0x84) {
if (dev_priv->chipset < 0x50) { ret = RING_SPACE(chan, 3);
ret = RING_SPACE(chan, 3); if (ret)
if (ret) return ret;
return ret;
} else {
ret = RING_SPACE(chan, 5);
if (ret)
return ret;
BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
OUT_RING (chan, 0);
}
BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 2); BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 2);
OUT_RING (chan, sema->mem->start); OUT_RING (chan, sema->mem->start);
...@@ -351,29 +342,10 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema) ...@@ -351,29 +342,10 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
struct nouveau_vma *vma = &dev_priv->fence.bo->vma; struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
u64 offset = vma->offset + sema->mem->start; u64 offset = vma->offset + sema->mem->start;
/* ret = RING_SPACE(chan, 5);
* NV50 tries to be too smart and context-switch
* between semaphores instead of doing a "first come,
* first served" strategy like previous cards
* do.
*
* That's bad because the ACQUIRE latency can get as
* large as the PFIFO context time slice in the
* typical DRI2 case where you have several
* outstanding semaphores at the same moment.
*
* If we're going to ACQUIRE, force the card to
* context switch before, just in case the matching
* RELEASE is already scheduled to be executed in
* another channel.
*/
ret = RING_SPACE(chan, 7);
if (ret) if (ret)
return ret; return ret;
BEGIN_RING(chan, NvSubSw, 0x0080, 1);
OUT_RING (chan, 0);
BEGIN_RING(chan, NvSubSw, 0x0010, 4); BEGIN_RING(chan, NvSubSw, 0x0010, 4);
OUT_RING (chan, upper_32_bits(offset)); OUT_RING (chan, upper_32_bits(offset));
OUT_RING (chan, lower_32_bits(offset)); OUT_RING (chan, lower_32_bits(offset));
...@@ -413,7 +385,7 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema) ...@@ -413,7 +385,7 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
int ret; int ret;
if (dev_priv->chipset < 0x84) { if (dev_priv->chipset < 0x84) {
ret = RING_SPACE(chan, (dev_priv->chipset != 0x50) ? 4 : 6); ret = RING_SPACE(chan, 4);
if (ret) if (ret)
return ret; return ret;
...@@ -421,22 +393,12 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema) ...@@ -421,22 +393,12 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
OUT_RING (chan, sema->mem->start); OUT_RING (chan, sema->mem->start);
BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_RELEASE, 1); BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_RELEASE, 1);
OUT_RING (chan, 1); OUT_RING (chan, 1);
if (dev_priv->chipset == 0x50) {
BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
OUT_RING (chan, 0);
}
} else } else
if (dev_priv->chipset < 0xc0) { if (dev_priv->chipset < 0xc0) {
struct nouveau_vma *vma = &dev_priv->fence.bo->vma; struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
u64 offset = vma->offset + sema->mem->start; u64 offset = vma->offset + sema->mem->start;
/* ret = RING_SPACE(chan, 5);
* Emits release and forces the card to context switch right
* afterwards, there may be another channel waiting for the
* semaphore
*/
ret = RING_SPACE(chan, 7);
if (ret) if (ret)
return ret; return ret;
...@@ -445,8 +407,6 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema) ...@@ -445,8 +407,6 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
OUT_RING (chan, lower_32_bits(offset)); OUT_RING (chan, lower_32_bits(offset));
OUT_RING (chan, 1); OUT_RING (chan, 1);
OUT_RING (chan, 2); /* RELEASE */ OUT_RING (chan, 2); /* RELEASE */
BEGIN_RING(chan, NvSubSw, 0x0080, 1);
OUT_RING (chan, 0);
} else { } else {
struct nouveau_vma *vma = &dev_priv->fence.bo->vma; struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
u64 offset = vma->offset + sema->mem->start; u64 offset = vma->offset + sema->mem->start;
......
...@@ -149,6 +149,7 @@ nv50_fifo_init_regs(struct drm_device *dev) ...@@ -149,6 +149,7 @@ nv50_fifo_init_regs(struct drm_device *dev)
nv_wr32(dev, 0x3204, 0); nv_wr32(dev, 0x3204, 0);
nv_wr32(dev, 0x3210, 0); nv_wr32(dev, 0x3210, 0);
nv_wr32(dev, 0x3270, 0); nv_wr32(dev, 0x3270, 0);
nv_wr32(dev, 0x2044, 0x01003fff);
/* Enable dummy channels setup by nv50_instmem.c */ /* Enable dummy channels setup by nv50_instmem.c */
nv50_fifo_channel_enable(dev, 0); nv50_fifo_channel_enable(dev, 0);
...@@ -273,7 +274,7 @@ nv50_fifo_create_context(struct nouveau_channel *chan) ...@@ -273,7 +274,7 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
nv_wo32(ramfc, 0x80, ((chan->ramht->bits - 9) << 27) | nv_wo32(ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
(4 << 24) /* SEARCH_FULL */ | (4 << 24) /* SEARCH_FULL */ |
(chan->ramht->gpuobj->cinst >> 4)); (chan->ramht->gpuobj->cinst >> 4));
nv_wo32(ramfc, 0x44, 0x2101ffff); nv_wo32(ramfc, 0x44, 0x01003fff);
nv_wo32(ramfc, 0x60, 0x7fffffff); nv_wo32(ramfc, 0x60, 0x7fffffff);
nv_wo32(ramfc, 0x40, 0x00000000); nv_wo32(ramfc, 0x40, 0x00000000);
nv_wo32(ramfc, 0x7c, 0x30000001); nv_wo32(ramfc, 0x7c, 0x30000001);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment