Commit 7b98da66 authored by Chris Wilson's avatar Chris Wilson Committed by Daniel Vetter

drm/i915: Force CPU synchronisation even if userspace requests ASYNC

The goal here was to minimise doing any thing or any check inside the
kernel that was not strictly required. For a userspace that assumes
complete control over the cache domains, the kernel is usually using
outdated information and may trigger clflushes where none were
required.

However, swapping is a situation where userspace has no knowledge of the
domain transfer, and will leave the object in the CPU cache. The kernel
must flush this out to the backing storage prior to use with the GPU. As
we use an asynchronous task tracked by an implicit fence for this, we
also need to cancel the ASYNC flag on the object so that the object will
wait for the clflush to complete before being executed. This also absolves
userspace of the responsibility imposed by commit 77ae9957 ("drm/i915:
Enable userspace to opt-out of implicit fencing") that its needed to ensure
that the object was out of the CPU cache prior to use on the GPU.

Fixes: 77ae9957 ("drm/i915: Enable userspace to opt-out of implicit fencing")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101571Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: default avatarJason Ekstrand <jason@jlekstrand.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20170721145037.25105-5-chris@chris-wilson.co.ukReviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
(cherry picked from commit 0f46daa1)
Cc: stable@vger.kernel.org
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent adf27835
...@@ -114,7 +114,7 @@ i915_clflush_notify(struct i915_sw_fence *fence, ...@@ -114,7 +114,7 @@ i915_clflush_notify(struct i915_sw_fence *fence,
return NOTIFY_DONE; return NOTIFY_DONE;
} }
void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
unsigned int flags) unsigned int flags)
{ {
struct clflush *clflush; struct clflush *clflush;
...@@ -128,7 +128,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, ...@@ -128,7 +128,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
*/ */
if (!i915_gem_object_has_struct_page(obj)) { if (!i915_gem_object_has_struct_page(obj)) {
obj->cache_dirty = false; obj->cache_dirty = false;
return; return false;
} }
/* If the GPU is snooping the contents of the CPU cache, /* If the GPU is snooping the contents of the CPU cache,
...@@ -140,7 +140,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, ...@@ -140,7 +140,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* tracking. * tracking.
*/ */
if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent) if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
return; return false;
trace_i915_gem_object_clflush(obj); trace_i915_gem_object_clflush(obj);
...@@ -179,4 +179,5 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, ...@@ -179,4 +179,5 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
} }
obj->cache_dirty = false; obj->cache_dirty = false;
return true;
} }
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
struct drm_i915_private; struct drm_i915_private;
struct drm_i915_gem_object; struct drm_i915_gem_object;
void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
unsigned int flags); unsigned int flags);
#define I915_CLFLUSH_FORCE BIT(0) #define I915_CLFLUSH_FORCE BIT(0)
#define I915_CLFLUSH_SYNC BIT(1) #define I915_CLFLUSH_SYNC BIT(1)
......
...@@ -1825,7 +1825,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) ...@@ -1825,7 +1825,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
int err; int err;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
struct i915_vma *vma = exec_to_vma(entry); struct i915_vma *vma = exec_to_vma(entry);
struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_object *obj = vma->obj;
...@@ -1841,12 +1841,14 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) ...@@ -1841,12 +1841,14 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
eb->request->capture_list = capture; eb->request->capture_list = capture;
} }
if (unlikely(obj->cache_dirty && !obj->cache_coherent)) {
if (i915_gem_clflush_object(obj, 0))
entry->flags &= ~EXEC_OBJECT_ASYNC;
}
if (entry->flags & EXEC_OBJECT_ASYNC) if (entry->flags & EXEC_OBJECT_ASYNC)
goto skip_flushes; goto skip_flushes;
if (unlikely(obj->cache_dirty && !obj->cache_coherent))
i915_gem_clflush_object(obj, 0);
err = i915_gem_request_await_object err = i915_gem_request_await_object
(eb->request, obj, entry->flags & EXEC_OBJECT_WRITE); (eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
if (err) if (err)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment