Commit 0201f1ec authored by Chris Wilson's avatar Chris Wilson Committed by Daniel Vetter

drm/i915: Replace the pending_gpu_write flag with an explicit seqno

As we always flush the GPU cache prior to emitting the breadcrumb, we no
longer have to worry about the deferred flush causing the
pending_gpu_write to be delayed. So we can instead utilize the known
last_write_seqno to hopefully minimise the wait times.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent e5f1d962
...@@ -121,14 +121,15 @@ static const char *cache_level_str(int type) ...@@ -121,14 +121,15 @@ static const char *cache_level_str(int type)
static void static void
describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
{ {
seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d%s%s%s", seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d %d%s%s%s",
&obj->base, &obj->base,
get_pin_flag(obj), get_pin_flag(obj),
get_tiling_flag(obj), get_tiling_flag(obj),
obj->base.size / 1024, obj->base.size / 1024,
obj->base.read_domains, obj->base.read_domains,
obj->base.write_domain, obj->base.write_domain,
obj->last_rendering_seqno, obj->last_read_seqno,
obj->last_write_seqno,
obj->last_fenced_seqno, obj->last_fenced_seqno,
cache_level_str(obj->cache_level), cache_level_str(obj->cache_level),
obj->dirty ? " dirty" : "", obj->dirty ? " dirty" : "",
...@@ -630,12 +631,12 @@ static void print_error_buffers(struct seq_file *m, ...@@ -630,12 +631,12 @@ static void print_error_buffers(struct seq_file *m,
seq_printf(m, "%s [%d]:\n", name, count); seq_printf(m, "%s [%d]:\n", name, count);
while (count--) { while (count--) {
seq_printf(m, " %08x %8u %04x %04x %08x%s%s%s%s%s%s%s", seq_printf(m, " %08x %8u %04x %04x %x %x%s%s%s%s%s%s%s",
err->gtt_offset, err->gtt_offset,
err->size, err->size,
err->read_domains, err->read_domains,
err->write_domain, err->write_domain,
err->seqno, err->rseqno, err->wseqno,
pin_flag(err->pinned), pin_flag(err->pinned),
tiling_flag(err->tiling), tiling_flag(err->tiling),
dirty_flag(err->dirty), dirty_flag(err->dirty),
......
...@@ -221,7 +221,7 @@ struct drm_i915_error_state { ...@@ -221,7 +221,7 @@ struct drm_i915_error_state {
struct drm_i915_error_buffer { struct drm_i915_error_buffer {
u32 size; u32 size;
u32 name; u32 name;
u32 seqno; u32 rseqno, wseqno;
u32 gtt_offset; u32 gtt_offset;
u32 read_domains; u32 read_domains;
u32 write_domain; u32 write_domain;
...@@ -894,12 +894,6 @@ struct drm_i915_gem_object { ...@@ -894,12 +894,6 @@ struct drm_i915_gem_object {
*/ */
unsigned int dirty:1; unsigned int dirty:1;
/**
* This is set if the object has been written to since the last
* GPU flush.
*/
unsigned int pending_gpu_write:1;
/** /**
* Fence register bits (if any) for this object. Will be set * Fence register bits (if any) for this object. Will be set
* as needed when mapped into the GTT. * as needed when mapped into the GTT.
...@@ -992,7 +986,8 @@ struct drm_i915_gem_object { ...@@ -992,7 +986,8 @@ struct drm_i915_gem_object {
struct intel_ring_buffer *ring; struct intel_ring_buffer *ring;
/** Breadcrumb of last rendering to the buffer. */ /** Breadcrumb of last rendering to the buffer. */
uint32_t last_rendering_seqno; uint32_t last_read_seqno;
uint32_t last_write_seqno;
/** Breadcrumb of last fenced GPU access to the buffer. */ /** Breadcrumb of last fenced GPU access to the buffer. */
uint32_t last_fenced_seqno; uint32_t last_fenced_seqno;
...@@ -1291,7 +1286,6 @@ void i915_gem_lastclose(struct drm_device *dev); ...@@ -1291,7 +1286,6 @@ void i915_gem_lastclose(struct drm_device *dev);
int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
gfp_t gfpmask); gfp_t gfpmask);
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
int __must_check i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj);
int i915_gem_object_sync(struct drm_i915_gem_object *obj, int i915_gem_object_sync(struct drm_i915_gem_object *obj,
struct intel_ring_buffer *to); struct intel_ring_buffer *to);
void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
......
...@@ -1441,7 +1441,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, ...@@ -1441,7 +1441,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
list_move_tail(&obj->ring_list, &ring->active_list); list_move_tail(&obj->ring_list, &ring->active_list);
obj->last_rendering_seqno = seqno; obj->last_read_seqno = seqno;
if (obj->fenced_gpu_access) { if (obj->fenced_gpu_access) {
obj->last_fenced_seqno = seqno; obj->last_fenced_seqno = seqno;
...@@ -1461,7 +1461,8 @@ static void ...@@ -1461,7 +1461,8 @@ static void
i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
{ {
list_del_init(&obj->ring_list); list_del_init(&obj->ring_list);
obj->last_rendering_seqno = 0; obj->last_read_seqno = 0;
obj->last_write_seqno = 0;
obj->last_fenced_seqno = 0; obj->last_fenced_seqno = 0;
} }
...@@ -1493,7 +1494,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) ...@@ -1493,7 +1494,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
obj->fenced_gpu_access = false; obj->fenced_gpu_access = false;
obj->active = 0; obj->active = 0;
obj->pending_gpu_write = false;
drm_gem_object_unreference(&obj->base); drm_gem_object_unreference(&obj->base);
WARN_ON(i915_verify_lists(dev)); WARN_ON(i915_verify_lists(dev));
...@@ -1812,7 +1812,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) ...@@ -1812,7 +1812,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
struct drm_i915_gem_object, struct drm_i915_gem_object,
ring_list); ring_list);
if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) if (!i915_seqno_passed(seqno, obj->last_read_seqno))
break; break;
if (obj->base.write_domain != 0) if (obj->base.write_domain != 0)
...@@ -2036,9 +2036,11 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) ...@@ -2036,9 +2036,11 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
* Ensures that all rendering to the object has completed and the object is * Ensures that all rendering to the object has completed and the object is
* safe to unbind from the GTT or access from the CPU. * safe to unbind from the GTT or access from the CPU.
*/ */
int static __must_check int
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
bool readonly)
{ {
u32 seqno;
int ret; int ret;
/* This function only exists to support waiting for existing rendering, /* This function only exists to support waiting for existing rendering,
...@@ -2049,13 +2051,27 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) ...@@ -2049,13 +2051,27 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
/* If there is rendering queued on the buffer being evicted, wait for /* If there is rendering queued on the buffer being evicted, wait for
* it. * it.
*/ */
if (obj->active) { if (readonly)
ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno); seqno = obj->last_write_seqno;
else
seqno = obj->last_read_seqno;
if (seqno == 0)
return 0;
ret = i915_wait_seqno(obj->ring, seqno);
if (ret) if (ret)
return ret; return ret;
i915_gem_retire_requests_ring(obj->ring);
/* Manually manage the write flush as we may have not yet retired
* the buffer.
*/
if (obj->last_write_seqno &&
i915_seqno_passed(seqno, obj->last_write_seqno)) {
obj->last_write_seqno = 0;
obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
} }
i915_gem_retire_requests_ring(obj->ring);
return 0; return 0;
} }
...@@ -2074,10 +2090,10 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) ...@@ -2074,10 +2090,10 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
if (ret) if (ret)
return ret; return ret;
ret = i915_gem_check_olr(obj->ring, ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
obj->last_rendering_seqno);
if (ret) if (ret)
return ret; return ret;
i915_gem_retire_requests_ring(obj->ring); i915_gem_retire_requests_ring(obj->ring);
} }
...@@ -2137,7 +2153,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -2137,7 +2153,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto out; goto out;
if (obj->active) { if (obj->active) {
seqno = obj->last_rendering_seqno; seqno = obj->last_read_seqno;
ring = obj->ring; ring = obj->ring;
} }
...@@ -2192,11 +2208,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, ...@@ -2192,11 +2208,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
return 0; return 0;
if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
return i915_gem_object_wait_rendering(obj); return i915_gem_object_wait_rendering(obj, false);
idx = intel_ring_sync_index(from, to); idx = intel_ring_sync_index(from, to);
seqno = obj->last_rendering_seqno; seqno = obj->last_read_seqno;
if (seqno <= from->sync_seqno[idx]) if (seqno <= from->sync_seqno[idx])
return 0; return 0;
...@@ -2940,11 +2956,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) ...@@ -2940,11 +2956,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
if (ret) if (ret)
return ret; return ret;
if (obj->pending_gpu_write || write) { ret = i915_gem_object_wait_rendering(obj, !write);
ret = i915_gem_object_wait_rendering(obj);
if (ret) if (ret)
return ret; return ret;
}
i915_gem_object_flush_cpu_write_domain(obj); i915_gem_object_flush_cpu_write_domain(obj);
...@@ -3115,7 +3129,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) ...@@ -3115,7 +3129,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
return ret; return ret;
} }
ret = i915_gem_object_wait_rendering(obj); ret = i915_gem_object_wait_rendering(obj, false);
if (ret) if (ret)
return ret; return ret;
...@@ -3143,11 +3157,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) ...@@ -3143,11 +3157,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
if (ret) if (ret)
return ret; return ret;
if (write || obj->pending_gpu_write) { ret = i915_gem_object_wait_rendering(obj, !write);
ret = i915_gem_object_wait_rendering(obj);
if (ret) if (ret)
return ret; return ret;
}
i915_gem_object_flush_gtt_write_domain(obj); i915_gem_object_flush_gtt_write_domain(obj);
......
...@@ -954,7 +954,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, ...@@ -954,7 +954,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
i915_gem_object_move_to_active(obj, ring, seqno); i915_gem_object_move_to_active(obj, ring, seqno);
if (obj->base.write_domain) { if (obj->base.write_domain) {
obj->dirty = 1; obj->dirty = 1;
obj->pending_gpu_write = true; obj->last_write_seqno = seqno;
list_move_tail(&obj->gpu_write_list, list_move_tail(&obj->gpu_write_list,
&ring->gpu_write_list); &ring->gpu_write_list);
if (obj->pin_count) /* check for potential scanout */ if (obj->pin_count) /* check for potential scanout */
......
...@@ -950,7 +950,8 @@ static void capture_bo(struct drm_i915_error_buffer *err, ...@@ -950,7 +950,8 @@ static void capture_bo(struct drm_i915_error_buffer *err,
{ {
err->size = obj->base.size; err->size = obj->base.size;
err->name = obj->base.name; err->name = obj->base.name;
err->seqno = obj->last_rendering_seqno; err->rseqno = obj->last_read_seqno;
err->wseqno = obj->last_write_seqno;
err->gtt_offset = obj->gtt_offset; err->gtt_offset = obj->gtt_offset;
err->read_domains = obj->base.read_domains; err->read_domains = obj->base.read_domains;
err->write_domain = obj->base.write_domain; err->write_domain = obj->base.write_domain;
...@@ -1045,7 +1046,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, ...@@ -1045,7 +1046,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
if (obj->ring != ring) if (obj->ring != ring)
continue; continue;
if (i915_seqno_passed(seqno, obj->last_rendering_seqno)) if (i915_seqno_passed(seqno, obj->last_read_seqno))
continue; continue;
if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0) if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment