Commit c59a333f authored by Chris Wilson's avatar Chris Wilson

drm/i915: Only wait on a pending flip if we intend to write to the buffer

... as if we are only reading from it, we can do that concurrently with
the queue flip.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
parent 3d3dc149
...@@ -37,6 +37,7 @@ struct change_domains { ...@@ -37,6 +37,7 @@ struct change_domains {
uint32_t invalidate_domains; uint32_t invalidate_domains;
uint32_t flush_domains; uint32_t flush_domains;
uint32_t flush_rings; uint32_t flush_rings;
uint32_t flips;
}; };
/* /*
...@@ -190,6 +191,9 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, ...@@ -190,6 +191,9 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT) if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT)
i915_gem_release_mmap(obj); i915_gem_release_mmap(obj);
if (obj->base.pending_write_domain)
cd->flips |= atomic_read(&obj->pending_flip);
/* The actual obj->write_domain will be updated with /* The actual obj->write_domain will be updated with
* pending_write_domain after we emit the accumulated flush for all * pending_write_domain after we emit the accumulated flush for all
* of our domain changes in execbuffers (which clears objects' * of our domain changes in execbuffers (which clears objects'
...@@ -773,6 +777,39 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, ...@@ -773,6 +777,39 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
return intel_ring_sync(to, from, seqno - 1); return intel_ring_sync(to, from, seqno - 1);
} }
static int
i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
{
u32 plane, flip_mask;
int ret;
/* Check for any pending flips. As we only maintain a flip queue depth
* of 1, we can simply insert a WAIT for the next display flip prior
* to executing the batch and avoid stalling the CPU.
*/
for (plane = 0; flips >> plane; plane++) {
if (((flips >> plane) & 1) == 0)
continue;
if (plane)
flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
else
flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
ret = intel_ring_begin(ring, 2);
if (ret)
return ret;
intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
}
return 0;
}
static int static int
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
struct list_head *objects) struct list_head *objects)
...@@ -781,9 +818,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, ...@@ -781,9 +818,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
struct change_domains cd; struct change_domains cd;
int ret; int ret;
cd.invalidate_domains = 0; memset(&cd, 0, sizeof(cd));
cd.flush_domains = 0;
cd.flush_rings = 0;
list_for_each_entry(obj, objects, exec_list) list_for_each_entry(obj, objects, exec_list)
i915_gem_object_set_to_gpu_domain(obj, ring, &cd); i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
...@@ -796,6 +831,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, ...@@ -796,6 +831,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
return ret; return ret;
} }
if (cd.flips) {
ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
if (ret)
return ret;
}
list_for_each_entry(obj, objects, exec_list) { list_for_each_entry(obj, objects, exec_list) {
ret = i915_gem_execbuffer_sync_rings(obj, ring); ret = i915_gem_execbuffer_sync_rings(obj, ring);
if (ret) if (ret)
...@@ -842,47 +883,6 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, ...@@ -842,47 +883,6 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
return 0; return 0;
} }
static int
i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring,
struct list_head *objects)
{
struct drm_i915_gem_object *obj;
int flips;
/* Check for any pending flips. As we only maintain a flip queue depth
* of 1, we can simply insert a WAIT for the next display flip prior
* to executing the batch and avoid stalling the CPU.
*/
flips = 0;
list_for_each_entry(obj, objects, exec_list) {
if (obj->base.write_domain)
flips |= atomic_read(&obj->pending_flip);
}
if (flips) {
int plane, flip_mask, ret;
for (plane = 0; flips >> plane; plane++) {
if (((flips >> plane) & 1) == 0)
continue;
if (plane)
flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
else
flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
ret = intel_ring_begin(ring, 2);
if (ret)
return ret;
intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
}
}
return 0;
}
static void static void
i915_gem_execbuffer_move_to_active(struct list_head *objects, i915_gem_execbuffer_move_to_active(struct list_head *objects,
struct intel_ring_buffer *ring, struct intel_ring_buffer *ring,
...@@ -1133,10 +1133,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ...@@ -1133,10 +1133,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (ret) if (ret)
goto err; goto err;
ret = i915_gem_execbuffer_wait_for_flips(ring, &objects);
if (ret)
goto err;
seqno = i915_gem_next_request_seqno(ring); seqno = i915_gem_next_request_seqno(ring);
for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) { for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
if (seqno < ring->sync_seqno[i]) { if (seqno < ring->sync_seqno[i]) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment