Commit 0839ccb8 authored by Keith Packard's avatar Keith Packard Committed by Ingo Molnar

i915: use io-mapping interfaces instead of a variety of mapping kludges

Impact: optimize/clean-up the IO mapping implementation of the i915 DRM driver

Switch the i915 device aperture mapping to the io-mapping interface, taking
advantage of the cleaner API to extend it across all of the mapping uses,
including both pwrite and relocation updates.

This dramatically improves performance on 64-bit kernels which were using
the same slow path as 32-bit non-HIGHMEM kernels prior to this patch.
Signed-off-by: default avatarKeith Packard <keithp@keithp.com>
Signed-off-by: default avatarEric Anholt <eric@anholt.net>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 9663f2e6
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#define _I915_DRV_H_ #define _I915_DRV_H_
#include "i915_reg.h" #include "i915_reg.h"
#include <linux/io-mapping.h>
/* General customization: /* General customization:
*/ */
...@@ -246,6 +247,8 @@ typedef struct drm_i915_private { ...@@ -246,6 +247,8 @@ typedef struct drm_i915_private {
struct { struct {
struct drm_mm gtt_space; struct drm_mm gtt_space;
struct io_mapping *gtt_mapping;
/** /**
* List of objects currently involved in rendering from the * List of objects currently involved in rendering from the
* ringbuffer. * ringbuffer.
......
...@@ -171,35 +171,50 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, ...@@ -171,35 +171,50 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
return 0; return 0;
} }
/* /* This is the fast write path which cannot handle
* Try to write quickly with an atomic kmap. Return true on success. * page faults in the source data
*
* If this fails (which includes a partial write), we'll redo the whole
* thing with the slow version.
*
* This is a workaround for the low performance of iounmap (approximate
* 10% cpu cost on normal 3D workloads). kmap_atomic on HIGHMEM kernels
* happens to let us map card memory without taking IPIs. When the vmap
* rework lands we should be able to dump this hack.
*/ */
static inline int fast_user_write(unsigned long pfn, char __user *user_data,
int l, int o) static inline int
fast_user_write(struct io_mapping *mapping,
loff_t page_base, int page_offset,
char __user *user_data,
int length)
{ {
#ifdef CONFIG_HIGHMEM
unsigned long unwritten;
char *vaddr_atomic; char *vaddr_atomic;
unsigned long unwritten;
vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0); vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
#if WATCH_PWRITE unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n", user_data, length);
i, o, l, pfn, vaddr_atomic); io_mapping_unmap_atomic(vaddr_atomic);
#endif if (unwritten)
unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l); return -EFAULT;
kunmap_atomic(vaddr_atomic, KM_USER0); return 0;
return !unwritten; }
#else
/* Here's the write path which can sleep for
* page faults
*/
static inline int
slow_user_write(struct io_mapping *mapping,
loff_t page_base, int page_offset,
char __user *user_data,
int length)
{
char __iomem *vaddr;
unsigned long unwritten;
vaddr = io_mapping_map_wc(mapping, page_base);
if (vaddr == NULL)
return -EFAULT;
unwritten = __copy_from_user(vaddr + page_offset,
user_data, length);
io_mapping_unmap(vaddr);
if (unwritten)
return -EFAULT;
return 0; return 0;
#endif
} }
static int static int
...@@ -208,10 +223,12 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, ...@@ -208,10 +223,12 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
struct drm_file *file_priv) struct drm_file *file_priv)
{ {
struct drm_i915_gem_object *obj_priv = obj->driver_private; struct drm_i915_gem_object *obj_priv = obj->driver_private;
drm_i915_private_t *dev_priv = dev->dev_private;
ssize_t remain; ssize_t remain;
loff_t offset; loff_t offset, page_base;
char __user *user_data; char __user *user_data;
int ret = 0; int page_offset, page_length;
int ret;
user_data = (char __user *) (uintptr_t) args->data_ptr; user_data = (char __user *) (uintptr_t) args->data_ptr;
remain = args->size; remain = args->size;
...@@ -235,57 +252,37 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, ...@@ -235,57 +252,37 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
obj_priv->dirty = 1; obj_priv->dirty = 1;
while (remain > 0) { while (remain > 0) {
unsigned long pfn;
int i, o, l;
/* Operation in this page /* Operation in this page
* *
* i = page number * page_base = page offset within aperture
* o = offset within page * page_offset = offset within page
* l = bytes to copy * page_length = bytes to copy for this page
*/ */
i = offset >> PAGE_SHIFT; page_base = (offset & ~(PAGE_SIZE-1));
o = offset & (PAGE_SIZE-1); page_offset = offset & (PAGE_SIZE-1);
l = remain; page_length = remain;
if ((o + l) > PAGE_SIZE) if ((page_offset + remain) > PAGE_SIZE)
l = PAGE_SIZE - o; page_length = PAGE_SIZE - page_offset;
pfn = (dev->agp->base >> PAGE_SHIFT) + i; ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
page_offset, user_data, page_length);
if (!fast_user_write(pfn, user_data, l, o)) {
unsigned long unwritten; /* If we get a fault while copying data, then (presumably) our
char __iomem *vaddr; * source page isn't available. In this case, use the
* non-atomic function
vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); */
#if WATCH_PWRITE if (ret) {
DRM_INFO("pwrite slow i %d o %d l %d " ret = slow_user_write (dev_priv->mm.gtt_mapping,
"pfn %ld vaddr %p\n", page_base, page_offset,
i, o, l, pfn, vaddr); user_data, page_length);
#endif if (ret)
if (vaddr == NULL) {
ret = -EFAULT;
goto fail;
}
unwritten = __copy_from_user(vaddr + o, user_data, l);
#if WATCH_PWRITE
DRM_INFO("unwritten %ld\n", unwritten);
#endif
iounmap(vaddr);
if (unwritten) {
ret = -EFAULT;
goto fail; goto fail;
}
} }
remain -= l; remain -= page_length;
user_data += l; user_data += page_length;
offset += l; offset += page_length;
} }
#if WATCH_PWRITE && 1
i915_gem_clflush_object(obj);
i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0);
i915_gem_clflush_object(obj);
#endif
fail: fail:
i915_gem_object_unpin(obj); i915_gem_object_unpin(obj);
...@@ -1503,12 +1500,12 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, ...@@ -1503,12 +1500,12 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
struct drm_i915_gem_exec_object *entry) struct drm_i915_gem_exec_object *entry)
{ {
struct drm_device *dev = obj->dev; struct drm_device *dev = obj->dev;
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_gem_relocation_entry reloc; struct drm_i915_gem_relocation_entry reloc;
struct drm_i915_gem_relocation_entry __user *relocs; struct drm_i915_gem_relocation_entry __user *relocs;
struct drm_i915_gem_object *obj_priv = obj->driver_private; struct drm_i915_gem_object *obj_priv = obj->driver_private;
int i, ret; int i, ret;
uint32_t last_reloc_offset = -1; void __iomem *reloc_page;
void __iomem *reloc_page = NULL;
/* Choose the GTT offset for our buffer and put it there. */ /* Choose the GTT offset for our buffer and put it there. */
ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
...@@ -1631,26 +1628,11 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, ...@@ -1631,26 +1628,11 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
* perform. * perform.
*/ */
reloc_offset = obj_priv->gtt_offset + reloc.offset; reloc_offset = obj_priv->gtt_offset + reloc.offset;
if (reloc_page == NULL || reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
(last_reloc_offset & ~(PAGE_SIZE - 1)) != (reloc_offset &
(reloc_offset & ~(PAGE_SIZE - 1))) { ~(PAGE_SIZE - 1)));
if (reloc_page != NULL)
iounmap(reloc_page);
reloc_page = ioremap_wc(dev->agp->base +
(reloc_offset &
~(PAGE_SIZE - 1)),
PAGE_SIZE);
last_reloc_offset = reloc_offset;
if (reloc_page == NULL) {
drm_gem_object_unreference(target_obj);
i915_gem_object_unpin(obj);
return -ENOMEM;
}
}
reloc_entry = (uint32_t __iomem *)(reloc_page + reloc_entry = (uint32_t __iomem *)(reloc_page +
(reloc_offset & (PAGE_SIZE - 1))); (reloc_offset & (PAGE_SIZE - 1)));
reloc_val = target_obj_priv->gtt_offset + reloc.delta; reloc_val = target_obj_priv->gtt_offset + reloc.delta;
#if WATCH_BUF #if WATCH_BUF
...@@ -1659,6 +1641,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, ...@@ -1659,6 +1641,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
readl(reloc_entry), reloc_val); readl(reloc_entry), reloc_val);
#endif #endif
writel(reloc_val, reloc_entry); writel(reloc_val, reloc_entry);
io_mapping_unmap_atomic(reloc_page);
/* Write the updated presumed offset for this entry back out /* Write the updated presumed offset for this entry back out
* to the user. * to the user.
...@@ -1674,9 +1657,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, ...@@ -1674,9 +1657,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
drm_gem_object_unreference(target_obj); drm_gem_object_unreference(target_obj);
} }
if (reloc_page != NULL)
iounmap(reloc_page);
#if WATCH_BUF #if WATCH_BUF
if (0) if (0)
i915_gem_dump_object(obj, 128, __func__, ~0); i915_gem_dump_object(obj, 128, __func__, ~0);
...@@ -2518,6 +2498,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, ...@@ -2518,6 +2498,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
if (ret != 0) if (ret != 0)
return ret; return ret;
dev_priv->mm.gtt_mapping = io_mapping_create_wc(dev->agp->base,
dev->agp->agp_info.aper_size
* 1024 * 1024);
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
BUG_ON(!list_empty(&dev_priv->mm.active_list)); BUG_ON(!list_empty(&dev_priv->mm.active_list));
BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
...@@ -2535,11 +2519,13 @@ int ...@@ -2535,11 +2519,13 @@ int
i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv) struct drm_file *file_priv)
{ {
drm_i915_private_t *dev_priv = dev->dev_private;
int ret; int ret;
ret = i915_gem_idle(dev); ret = i915_gem_idle(dev);
drm_irq_uninstall(dev); drm_irq_uninstall(dev);
io_mapping_free(dev_priv->mm.gtt_mapping);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment