Commit 8489731c authored by Daniel Vetter's avatar Daniel Vetter

drm/i915: move clflushing into shmem_pread

This is obviously gonna slow down pread. But for a half-way realistic
micro-benchmark, it doesn't matter: Non-broken userspace reads back
data from the gpu once before the gpu again dirties it.

So all this ranged clflush tracking is just a waste of time.

No pread performance change (neglecting the dumb benchmark of
constantly reading the same data) measured.

As an added bonus, this avoids clflush on read on coherent objects.
Which means that partial preads on snb are now roughly 4x as fast.
This will be usefull for e.g. the libva encoder - when I finally get
around to fix that up.

v2: Properly sync with the gpu on LLC machines.
Tested-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 6d5cd9cb
...@@ -304,12 +304,25 @@ i915_gem_shmem_pread(struct drm_device *dev, ...@@ -304,12 +304,25 @@ i915_gem_shmem_pread(struct drm_device *dev,
int shmem_page_offset, page_length, ret = 0; int shmem_page_offset, page_length, ret = 0;
int obj_do_bit17_swizzling, page_do_bit17_swizzling; int obj_do_bit17_swizzling, page_do_bit17_swizzling;
int hit_slowpath = 0; int hit_slowpath = 0;
int needs_clflush = 0;
user_data = (char __user *) (uintptr_t) args->data_ptr; user_data = (char __user *) (uintptr_t) args->data_ptr;
remain = args->size; remain = args->size;
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
/* If we're not in the cpu read domain, set ourself into the gtt
* read domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will dirty the data
* anyway again before the next pread happens. */
if (obj->cache_level == I915_CACHE_NONE)
needs_clflush = 1;
ret = i915_gem_object_set_to_gtt_domain(obj, false);
if (ret)
return ret;
}
offset = args->offset; offset = args->offset;
while (remain > 0) { while (remain > 0) {
...@@ -337,6 +350,9 @@ i915_gem_shmem_pread(struct drm_device *dev, ...@@ -337,6 +350,9 @@ i915_gem_shmem_pread(struct drm_device *dev,
if (!page_do_bit17_swizzling) { if (!page_do_bit17_swizzling) {
vaddr = kmap_atomic(page); vaddr = kmap_atomic(page);
if (needs_clflush)
drm_clflush_virt_range(vaddr + shmem_page_offset,
page_length);
ret = __copy_to_user_inatomic(user_data, ret = __copy_to_user_inatomic(user_data,
vaddr + shmem_page_offset, vaddr + shmem_page_offset,
page_length); page_length);
...@@ -350,6 +366,10 @@ i915_gem_shmem_pread(struct drm_device *dev, ...@@ -350,6 +366,10 @@ i915_gem_shmem_pread(struct drm_device *dev,
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
vaddr = kmap(page); vaddr = kmap(page);
if (needs_clflush)
drm_clflush_virt_range(vaddr + shmem_page_offset,
page_length);
if (page_do_bit17_swizzling) if (page_do_bit17_swizzling)
ret = __copy_to_user_swizzled(user_data, ret = __copy_to_user_swizzled(user_data,
vaddr, shmem_page_offset, vaddr, shmem_page_offset,
...@@ -430,12 +450,6 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, ...@@ -430,12 +450,6 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
trace_i915_gem_object_pread(obj, args->offset, args->size); trace_i915_gem_object_pread(obj, args->offset, args->size);
ret = i915_gem_object_set_cpu_read_domain_range(obj,
args->offset,
args->size);
if (ret)
goto out;
ret = i915_gem_shmem_pread(dev, obj, args, file); ret = i915_gem_shmem_pread(dev, obj, args, file);
out: out:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment