Commit a2bc4695 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Prepare object synchronisation for asynchronicity

We are about to specialize object synchronisation to enable nonblocking
execbuf submission. First we make a copy of the current object
synchronisation for execbuffer. The general i915_gem_object_sync() will
be removed following the removal of CS flips in the near future.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarJohn Harrison <john.c.harrison@intel.com>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160909131201.16673-16-chris@chris-wilson.co.uk
parent 0f25dff6
......@@ -3221,8 +3221,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
}
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
int i915_gem_object_sync(struct drm_i915_gem_object *obj,
struct drm_i915_gem_request *to);
void i915_vma_move_to_active(struct i915_vma *vma,
struct drm_i915_gem_request *req,
unsigned int flags);
......
......@@ -2818,97 +2818,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
return ret;
}
static int
__i915_gem_object_sync(struct drm_i915_gem_request *to,
struct drm_i915_gem_request *from)
{
int ret;
if (to->engine == from->engine)
return 0;
if (!i915.semaphores) {
ret = i915_wait_request(from,
from->i915->mm.interruptible |
I915_WAIT_LOCKED,
NULL,
NO_WAITBOOST);
if (ret)
return ret;
} else {
int idx = intel_engine_sync_index(from->engine, to->engine);
if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
return 0;
trace_i915_gem_ring_sync_to(to, from);
ret = to->engine->semaphore.sync_to(to, from);
if (ret)
return ret;
from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
}
return 0;
}
/**
* i915_gem_object_sync - sync an object to a ring.
*
* @obj: object which may be in use on another ring.
* @to: request we are wishing to use
*
* This code is meant to abstract object synchronization with the GPU.
* Conceptually we serialise writes between engines inside the GPU.
* We only allow one engine to write into a buffer at any time, but
* multiple readers. To ensure each has a coherent view of memory, we must:
*
* - If there is an outstanding write request to the object, the new
* request must wait for it to complete (either CPU or in hw, requests
* on the same ring will be naturally ordered).
*
* - If we are a write request (pending_write_domain is set), the new
* request must wait for outstanding read requests to complete.
*
* Returns 0 if successful, else propagates up the lower layer error.
*/
int
i915_gem_object_sync(struct drm_i915_gem_object *obj,
struct drm_i915_gem_request *to)
{
struct i915_gem_active *active;
unsigned long active_mask;
int idx;
lockdep_assert_held(&obj->base.dev->struct_mutex);
active_mask = i915_gem_object_get_active(obj);
if (!active_mask)
return 0;
if (obj->base.pending_write_domain) {
active = obj->last_read;
} else {
active_mask = 1;
active = &obj->last_write;
}
for_each_active(active_mask, idx) {
struct drm_i915_gem_request *request;
int ret;
request = i915_gem_active_peek(&active[idx],
&obj->base.dev->struct_mutex);
if (!request)
continue;
ret = __i915_gem_object_sync(to, request);
if (ret)
return ret;
}
return 0;
}
static void __i915_vma_iounmap(struct i915_vma *vma)
{
GEM_BUG_ON(i915_vma_is_pinned(vma));
......
......@@ -1133,7 +1133,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
struct drm_i915_gem_object *obj = vma->obj;
if (obj->flags & other_rings) {
ret = i915_gem_object_sync(obj, req);
ret = i915_gem_request_await_object
(req, obj, obj->base.pending_write_domain);
if (ret)
return ret;
}
......
......@@ -460,6 +460,93 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
return ERR_PTR(ret);
}
static int
i915_gem_request_await_request(struct drm_i915_gem_request *to,
struct drm_i915_gem_request *from)
{
int idx, ret;
GEM_BUG_ON(to == from);
if (to->engine == from->engine)
return 0;
idx = intel_engine_sync_index(from->engine, to->engine);
if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
return 0;
trace_i915_gem_ring_sync_to(to, from);
if (!i915.semaphores) {
ret = i915_wait_request(from,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED,
NULL, NO_WAITBOOST);
if (ret)
return ret;
} else {
ret = to->engine->semaphore.sync_to(to, from);
if (ret)
return ret;
}
from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
return 0;
}
/**
* i915_gem_request_await_object - set this request to (async) wait upon a bo
*
* @to: request we are wishing to use
* @obj: object which may be in use on another ring.
*
* This code is meant to abstract object synchronization with the GPU.
* Conceptually we serialise writes between engines inside the GPU.
* We only allow one engine to write into a buffer at any time, but
* multiple readers. To ensure each has a coherent view of memory, we must:
*
* - If there is an outstanding write request to the object, the new
* request must wait for it to complete (either CPU or in hw, requests
* on the same ring will be naturally ordered).
*
* - If we are a write request (pending_write_domain is set), the new
* request must wait for outstanding read requests to complete.
*
* Returns 0 if successful, else propagates up the lower layer error.
*/
int
i915_gem_request_await_object(struct drm_i915_gem_request *to,
struct drm_i915_gem_object *obj,
bool write)
{
struct i915_gem_active *active;
unsigned long active_mask;
int idx;
if (write) {
active_mask = i915_gem_object_get_active(obj);
active = obj->last_read;
} else {
active_mask = 1;
active = &obj->last_write;
}
for_each_active(active_mask, idx) {
struct drm_i915_gem_request *request;
int ret;
request = i915_gem_active_peek(&active[idx],
&obj->base.dev->struct_mutex);
if (!request)
continue;
ret = i915_gem_request_await_request(to, request);
if (ret)
return ret;
}
return 0;
}
static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
......
......@@ -209,6 +209,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
*pdst = src;
}
int
i915_gem_request_await_object(struct drm_i915_gem_request *to,
struct drm_i915_gem_object *obj,
bool write);
void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
#define i915_add_request(req) \
__i915_add_request(req, true)
......
......@@ -12273,7 +12273,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
goto cleanup_unpin;
}
ret = i915_gem_object_sync(obj, request);
ret = i915_gem_request_await_object(request, obj, false);
if (ret)
goto cleanup_request;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment