Commit ed13033f authored by Chris Wilson's avatar Chris Wilson

drm/i915/cmdparser: Only cache the dst vmap

For simplicity, we want to continue using a contiguous mapping of the
command buffer, but we can reduce the number of vmappings we hold by
switching over to a page-by-page copy from the user batch buffer to the
shadow. The cost for saving one linear mapping is about 5% in trivial
workloads - which is more or less the overhead in calling kmap_atomic().
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-34-chris@chris-wilson.co.uk
parent 0b537272
...@@ -946,7 +946,8 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, ...@@ -946,7 +946,8 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
{ {
unsigned int src_needs_clflush; unsigned int src_needs_clflush;
unsigned int dst_needs_clflush; unsigned int dst_needs_clflush;
void *src, *dst; void *dst, *ptr;
int offset, n;
int ret; int ret;
ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush); ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
...@@ -959,19 +960,12 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, ...@@ -959,19 +960,12 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
goto unpin_src; goto unpin_src;
} }
src = i915_gem_object_pin_map(src_obj, I915_MAP_WB);
if (IS_ERR(src)) {
dst = src;
goto unpin_dst;
}
dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
if (IS_ERR(dst)) if (IS_ERR(dst))
goto unmap_src; goto unpin_dst;
src += batch_start_offset; ptr = dst;
if (src_needs_clflush) offset = offset_in_page(batch_start_offset);
drm_clflush_virt_range(src, batch_len);
/* We can avoid clflushing partial cachelines before the write if we /* We can avoid clflushing partial cachelines before the write if we
* only every write full cache-lines. Since we know that both the * only every write full cache-lines. Since we know that both the
...@@ -982,13 +976,24 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, ...@@ -982,13 +976,24 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
if (dst_needs_clflush & CLFLUSH_BEFORE) if (dst_needs_clflush & CLFLUSH_BEFORE)
batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size); batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size);
memcpy(dst, src, batch_len); for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
int len = min_t(int, batch_len, PAGE_SIZE - offset);
void *vaddr;
vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n));
if (src_needs_clflush)
drm_clflush_virt_range(vaddr + offset, len);
memcpy(ptr, vaddr + offset, len);
kunmap_atomic(vaddr);
ptr += len;
batch_len -= len;
offset = 0;
}
/* dst_obj is returned with vmap pinned */ /* dst_obj is returned with vmap pinned */
*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
unmap_src:
i915_gem_object_unpin_map(src_obj);
unpin_dst: unpin_dst:
i915_gem_obj_finish_shmem_access(dst_obj); i915_gem_obj_finish_shmem_access(dst_obj);
unpin_src: unpin_src:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment