Commit bcf50e27 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Handle pagefaults in execbuffer user relocations

Currently if we hit a pagefault when applying a user relocation for the
execbuffer, we bail and return EFAULT to the application. Instead, we
need to unwind, drop the dev->struct_mutex, copy all the relocation
entries to a vmalloc array (to avoid any potential circular deadlocks
when resolving the pagefault), retake the mutex and then apply the
relocations.  Afterwards, we need to again drop the lock and copy the
vmalloc array back to userspace.

v2: Incorporate feedback from Daniel Vetter.
Reported-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent da79de97
...@@ -3254,192 +3254,230 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, ...@@ -3254,192 +3254,230 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
return 0; return 0;
} }
/**
* Pin an object to the GTT and evaluate the relocations landing in it.
*/
static int static int
i915_gem_execbuffer_relocate(struct drm_i915_gem_object *obj, i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
struct drm_file *file_priv, struct drm_file *file_priv,
struct drm_i915_gem_exec_object2 *entry) struct drm_i915_gem_exec_object2 *entry,
struct drm_i915_gem_relocation_entry *reloc)
{ {
struct drm_device *dev = obj->base.dev; struct drm_device *dev = obj->base.dev;
drm_i915_private_t *dev_priv = dev->dev_private; struct drm_gem_object *target_obj;
struct drm_i915_gem_relocation_entry __user *user_relocs; uint32_t target_offset;
struct drm_gem_object *target_obj = NULL; int ret = -EINVAL;
uint32_t target_handle = 0;
int i, ret = 0;
user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; target_obj = drm_gem_object_lookup(dev, file_priv,
for (i = 0; i < entry->relocation_count; i++) { reloc->target_handle);
struct drm_i915_gem_relocation_entry reloc; if (target_obj == NULL)
uint32_t target_offset; return -ENOENT;
if (__copy_from_user_inatomic(&reloc, target_offset = to_intel_bo(target_obj)->gtt_offset;
user_relocs+i,
sizeof(reloc))) {
ret = -EFAULT;
break;
}
if (reloc.target_handle != target_handle) { #if WATCH_RELOC
drm_gem_object_unreference(target_obj); DRM_INFO("%s: obj %p offset %08x target %d "
"read %08x write %08x gtt %08x "
"presumed %08x delta %08x\n",
__func__,
obj,
(int) reloc->offset,
(int) reloc->target_handle,
(int) reloc->read_domains,
(int) reloc->write_domain,
(int) target_offset,
(int) reloc->presumed_offset,
reloc->delta);
#endif
target_obj = drm_gem_object_lookup(dev, file_priv, /* The target buffer should have appeared before us in the
reloc.target_handle); * exec_object list, so it should have a GTT space bound by now.
if (target_obj == NULL) { */
ret = -ENOENT; if (target_offset == 0) {
break; DRM_ERROR("No GTT space found for object %d\n",
} reloc->target_handle);
goto err;
}
target_handle = reloc.target_handle; /* Validate that the target is in a valid r/w GPU domain */
} if (reloc->write_domain & (reloc->write_domain - 1)) {
target_offset = to_intel_bo(target_obj)->gtt_offset; DRM_ERROR("reloc with multiple write domains: "
"obj %p target %d offset %d "
"read %08x write %08x",
obj, reloc->target_handle,
(int) reloc->offset,
reloc->read_domains,
reloc->write_domain);
goto err;
}
if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
reloc->read_domains & I915_GEM_DOMAIN_CPU) {
DRM_ERROR("reloc with read/write CPU domains: "
"obj %p target %d offset %d "
"read %08x write %08x",
obj, reloc->target_handle,
(int) reloc->offset,
reloc->read_domains,
reloc->write_domain);
goto err;
}
if (reloc->write_domain && target_obj->pending_write_domain &&
reloc->write_domain != target_obj->pending_write_domain) {
DRM_ERROR("Write domain conflict: "
"obj %p target %d offset %d "
"new %08x old %08x\n",
obj, reloc->target_handle,
(int) reloc->offset,
reloc->write_domain,
target_obj->pending_write_domain);
goto err;
}
#if WATCH_RELOC target_obj->pending_read_domains |= reloc->read_domains;
DRM_INFO("%s: obj %p offset %08x target %d " target_obj->pending_write_domain |= reloc->write_domain;
"read %08x write %08x gtt %08x "
"presumed %08x delta %08x\n",
__func__,
obj,
(int) reloc.offset,
(int) reloc.target_handle,
(int) reloc.read_domains,
(int) reloc.write_domain,
(int) target_offset,
(int) reloc.presumed_offset,
reloc.delta);
#endif
/* The target buffer should have appeared before us in the /* If the relocation already has the right value in it, no
* exec_object list, so it should have a GTT space bound by now. * more work needs to be done.
*/ */
if (target_offset == 0) { if (target_offset == reloc->presumed_offset)
DRM_ERROR("No GTT space found for object %d\n", goto out;
reloc.target_handle);
ret = -EINVAL;
break;
}
/* Validate that the target is in a valid r/w GPU domain */ /* Check that the relocation address is valid... */
if (reloc.write_domain & (reloc.write_domain - 1)) { if (reloc->offset > obj->base.size - 4) {
DRM_ERROR("reloc with multiple write domains: " DRM_ERROR("Relocation beyond object bounds: "
"obj %p target %d offset %d " "obj %p target %d offset %d size %d.\n",
"read %08x write %08x", obj, reloc->target_handle,
obj, reloc.target_handle, (int) reloc->offset,
(int) reloc.offset, (int) obj->base.size);
reloc.read_domains, goto err;
reloc.write_domain); }
ret = -EINVAL; if (reloc->offset & 3) {
break; DRM_ERROR("Relocation not 4-byte aligned: "
} "obj %p target %d offset %d.\n",
if (reloc.write_domain & I915_GEM_DOMAIN_CPU || obj, reloc->target_handle,
reloc.read_domains & I915_GEM_DOMAIN_CPU) { (int) reloc->offset);
DRM_ERROR("reloc with read/write CPU domains: " goto err;
"obj %p target %d offset %d " }
"read %08x write %08x",
obj, reloc.target_handle,
(int) reloc.offset,
reloc.read_domains,
reloc.write_domain);
ret = -EINVAL;
break;
}
if (reloc.write_domain && target_obj->pending_write_domain &&
reloc.write_domain != target_obj->pending_write_domain) {
DRM_ERROR("Write domain conflict: "
"obj %p target %d offset %d "
"new %08x old %08x\n",
obj, reloc.target_handle,
(int) reloc.offset,
reloc.write_domain,
target_obj->pending_write_domain);
ret = -EINVAL;
break;
}
target_obj->pending_read_domains |= reloc.read_domains; /* and points to somewhere within the target object. */
target_obj->pending_write_domain |= reloc.write_domain; if (reloc->delta >= target_obj->size) {
DRM_ERROR("Relocation beyond target object bounds: "
"obj %p target %d delta %d size %d.\n",
obj, reloc->target_handle,
(int) reloc->delta,
(int) target_obj->size);
goto err;
}
/* If the relocation already has the right value in it, no reloc->delta += target_offset;
* more work needs to be done. if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
*/ uint32_t page_offset = reloc->offset & ~PAGE_MASK;
if (target_offset == reloc.presumed_offset) char *vaddr;
continue;
/* Check that the relocation address is valid... */ vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
if (reloc.offset > obj->base.size - 4) { *(uint32_t *)(vaddr + page_offset) = reloc->delta;
DRM_ERROR("Relocation beyond object bounds: " kunmap_atomic(vaddr);
"obj %p target %d offset %d size %d.\n", } else {
obj, reloc.target_handle, struct drm_i915_private *dev_priv = dev->dev_private;
(int) reloc.offset, (int) obj->base.size); uint32_t __iomem *reloc_entry;
ret = -EINVAL; void __iomem *reloc_page;
break;
}
if (reloc.offset & 3) {
DRM_ERROR("Relocation not 4-byte aligned: "
"obj %p target %d offset %d.\n",
obj, reloc.target_handle,
(int) reloc.offset);
ret = -EINVAL;
break;
}
/* and points to somewhere within the target object. */ ret = i915_gem_object_set_to_gtt_domain(&obj->base, 1);
if (reloc.delta >= target_obj->size) { if (ret)
DRM_ERROR("Relocation beyond target object bounds: " goto err;
"obj %p target %d delta %d size %d.\n",
obj, reloc.target_handle,
(int) reloc.delta, (int) target_obj->size);
ret = -EINVAL;
break;
}
reloc.delta += target_offset; /* Map the page containing the relocation we're going to perform. */
if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) { reloc->offset += obj->gtt_offset;
uint32_t page_offset = reloc.offset & ~PAGE_MASK; reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
char *vaddr; reloc->offset & PAGE_MASK);
reloc_entry = (uint32_t __iomem *)
(reloc_page + (reloc->offset & ~PAGE_MASK));
iowrite32(reloc->delta, reloc_entry);
io_mapping_unmap_atomic(reloc_page);
}
vaddr = kmap_atomic(obj->pages[reloc.offset >> PAGE_SHIFT]); /* and update the user's relocation entry */
*(uint32_t *)(vaddr + page_offset) = reloc.delta; reloc->presumed_offset = target_offset;
kunmap_atomic(vaddr);
} else {
uint32_t __iomem *reloc_entry;
void __iomem *reloc_page;
ret = i915_gem_object_set_to_gtt_domain(&obj->base, 1); out:
if (ret) ret = 0;
break; err:
drm_gem_object_unreference(target_obj);
return ret;
}
/* Map the page containing the relocation we're going to perform. */ static int
reloc.offset += obj->gtt_offset; i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, struct drm_file *file_priv,
reloc.offset & PAGE_MASK); struct drm_i915_gem_exec_object2 *entry)
reloc_entry = (uint32_t __iomem *) {
(reloc_page + (reloc.offset & ~PAGE_MASK)); struct drm_i915_gem_relocation_entry __user *user_relocs;
iowrite32(reloc.delta, reloc_entry); int i, ret;
io_mapping_unmap_atomic(reloc_page);
} user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
for (i = 0; i < entry->relocation_count; i++) {
struct drm_i915_gem_relocation_entry reloc;
if (__copy_from_user_inatomic(&reloc,
user_relocs+i,
sizeof(reloc)))
return -EFAULT;
ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &reloc);
if (ret)
return ret;
/* and update the user's relocation entry */
reloc.presumed_offset = target_offset;
if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset, if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
&reloc.presumed_offset, &reloc.presumed_offset,
sizeof(reloc.presumed_offset))) { sizeof(reloc.presumed_offset)))
ret = -EFAULT; return -EFAULT;
break;
}
} }
drm_gem_object_unreference(target_obj); return 0;
return ret; }
static int
i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
struct drm_file *file_priv,
struct drm_i915_gem_exec_object2 *entry,
struct drm_i915_gem_relocation_entry *relocs)
{
int i, ret;
for (i = 0; i < entry->relocation_count; i++) {
ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &relocs[i]);
if (ret)
return ret;
}
return 0;
}
static int
i915_gem_execbuffer_relocate(struct drm_device *dev,
struct drm_file *file,
struct drm_gem_object **object_list,
struct drm_i915_gem_exec_object2 *exec_list,
int count)
{
int i, ret;
for (i = 0; i < count; i++) {
struct drm_i915_gem_object *obj = to_intel_bo(object_list[i]);
obj->base.pending_read_domains = 0;
obj->base.pending_write_domain = 0;
ret = i915_gem_execbuffer_relocate_object(obj, file,
&exec_list[i]);
if (ret)
return ret;
}
return 0;
} }
static int static int
i915_gem_execbuffer_pin(struct drm_device *dev, i915_gem_execbuffer_reserve(struct drm_device *dev,
struct drm_file *file, struct drm_file *file,
struct drm_gem_object **object_list, struct drm_gem_object **object_list,
struct drm_i915_gem_exec_object2 *exec_list, struct drm_i915_gem_exec_object2 *exec_list,
int count) int count)
{ {
struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_private *dev_priv = dev->dev_private;
int ret, i, retry; int ret, i, retry;
...@@ -3501,6 +3539,87 @@ i915_gem_execbuffer_pin(struct drm_device *dev, ...@@ -3501,6 +3539,87 @@ i915_gem_execbuffer_pin(struct drm_device *dev,
return 0; return 0;
} }
static int
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
struct drm_file *file,
struct drm_gem_object **object_list,
struct drm_i915_gem_exec_object2 *exec_list,
int count)
{
struct drm_i915_gem_relocation_entry *reloc;
int i, total, ret;
for (i = 0; i < count; i++) {
struct drm_i915_gem_object *obj = to_intel_bo(object_list[i]);
obj->in_execbuffer = false;
}
mutex_unlock(&dev->struct_mutex);
total = 0;
for (i = 0; i < count; i++)
total += exec_list[i].relocation_count;
reloc = drm_malloc_ab(total, sizeof(*reloc));
if (reloc == NULL) {
mutex_lock(&dev->struct_mutex);
return -ENOMEM;
}
total = 0;
for (i = 0; i < count; i++) {
struct drm_i915_gem_relocation_entry __user *user_relocs;
user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
if (copy_from_user(reloc+total, user_relocs,
exec_list[i].relocation_count *
sizeof(*reloc))) {
ret = -EFAULT;
mutex_lock(&dev->struct_mutex);
goto err;
}
total += exec_list[i].relocation_count;
}
ret = i915_mutex_lock_interruptible(dev);
if (ret) {
mutex_lock(&dev->struct_mutex);
goto err;
}
ret = i915_gem_execbuffer_reserve(dev, file,
object_list, exec_list,
count);
if (ret)
goto err;
total = 0;
for (i = 0; i < count; i++) {
struct drm_i915_gem_object *obj = to_intel_bo(object_list[i]);
obj->base.pending_read_domains = 0;
obj->base.pending_write_domain = 0;
ret = i915_gem_execbuffer_relocate_object_slow(obj, file,
&exec_list[i],
reloc + total);
if (ret)
goto err;
total += exec_list[i].relocation_count;
}
/* Leave the user relocations as are, this is the painfully slow path,
* and we want to avoid the complication of dropping the lock whilst
* having buffers reserved in the aperture and so causing spurious
* ENOSPC for random operations.
*/
err:
drm_free_large(reloc);
return ret;
}
static int static int
i915_gem_execbuffer_move_to_gpu(struct drm_device *dev, i915_gem_execbuffer_move_to_gpu(struct drm_device *dev,
struct drm_file *file, struct drm_file *file,
...@@ -3781,18 +3900,24 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ...@@ -3781,18 +3900,24 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
} }
/* Move the objects en-masse into the GTT, evicting if necessary. */ /* Move the objects en-masse into the GTT, evicting if necessary. */
ret = i915_gem_execbuffer_pin(dev, file, ret = i915_gem_execbuffer_reserve(dev, file,
object_list, exec_list, object_list, exec_list,
args->buffer_count); args->buffer_count);
if (ret) if (ret)
goto err; goto err;
/* The objects are in their final locations, apply the relocations. */ /* The objects are in their final locations, apply the relocations. */
for (i = 0; i < args->buffer_count; i++) { ret = i915_gem_execbuffer_relocate(dev, file,
struct drm_i915_gem_object *obj = to_intel_bo(object_list[i]); object_list, exec_list,
obj->base.pending_read_domains = 0; args->buffer_count);
obj->base.pending_write_domain = 0; if (ret) {
ret = i915_gem_execbuffer_relocate(obj, file, &exec_list[i]); if (ret == -EFAULT) {
ret = i915_gem_execbuffer_relocate_slow(dev, file,
object_list,
exec_list,
args->buffer_count);
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
}
if (ret) if (ret)
goto err; goto err;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment