Commit b8f9096d authored by Chris Wilson's avatar Chris Wilson

drm/i915: Convert non-blocking waits for requests over to using RCU

We can completely avoid taking the struct_mutex around the non-blocking
waits by switching over to the RCU request management (trading the mutex
for a RCU read lock and some complex atomic operations). The improvement
is that we gain further contention reduction, and overall the code
become simpler due to the reduced mutex dancing.

v2: Move i915_gem_fault tracepoint back to the start of the function,
before the unlocked wait.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-2-git-send-email-chris@chris-wilson.co.uk
parent 2467658e
...@@ -349,24 +349,20 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, ...@@ -349,24 +349,20 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
return 0; return 0;
} }
/* A nonblocking variant of the above wait. This is a highly dangerous routine /* A nonblocking variant of the above wait. Must be called prior to
* as the object state may change during this call. * acquiring the mutex for the object, as the object state may change
* during this call. A reference must be held by the caller for the object.
*/ */
static __must_check int static __must_check int
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, __unsafe_wait_rendering(struct drm_i915_gem_object *obj,
struct intel_rps_client *rps, struct intel_rps_client *rps,
bool readonly) bool readonly)
{ {
struct drm_device *dev = obj->base.dev;
struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
struct i915_gem_active *active; struct i915_gem_active *active;
unsigned long active_mask; unsigned long active_mask;
int ret, i, n = 0; int idx;
lockdep_assert_held(&dev->struct_mutex);
GEM_BUG_ON(!to_i915(dev)->mm.interruptible);
active_mask = i915_gem_object_get_active(obj); active_mask = __I915_BO_ACTIVE(obj);
if (!active_mask) if (!active_mask)
return 0; return 0;
...@@ -377,25 +373,16 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, ...@@ -377,25 +373,16 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
active = &obj->last_write; active = &obj->last_write;
} }
for_each_active(active_mask, i) { for_each_active(active_mask, idx) {
struct drm_i915_gem_request *req; int ret;
req = i915_gem_active_get(&active[i], ret = i915_gem_active_wait_unlocked(&active[idx],
&obj->base.dev->struct_mutex); true, NULL, rps);
if (req) if (ret)
requests[n++] = req; return ret;
} }
mutex_unlock(&dev->struct_mutex); return 0;
ret = 0;
for (i = 0; ret == 0 && i < n; i++)
ret = i915_wait_request(requests[i], true, NULL, rps);
mutex_lock(&dev->struct_mutex);
for (i = 0; i < n; i++)
i915_gem_request_put(requests[i]);
return ret;
} }
static struct intel_rps_client *to_rps_client(struct drm_file *file) static struct intel_rps_client *to_rps_client(struct drm_file *file)
...@@ -1467,10 +1454,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, ...@@ -1467,10 +1454,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
int ret; int ret;
/* Only handle setting domains to types used by the CPU. */ /* Only handle setting domains to types used by the CPU. */
if (write_domain & I915_GEM_GPU_DOMAINS) if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
return -EINVAL;
if (read_domains & I915_GEM_GPU_DOMAINS)
return -EINVAL; return -EINVAL;
/* Having something in the write domain implies it's in the read /* Having something in the write domain implies it's in the read
...@@ -1479,25 +1463,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, ...@@ -1479,25 +1463,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if (write_domain != 0 && read_domains != write_domain) if (write_domain != 0 && read_domains != write_domain)
return -EINVAL; return -EINVAL;
ret = i915_mutex_lock_interruptible(dev);
if (ret)
return ret;
obj = i915_gem_object_lookup(file, args->handle); obj = i915_gem_object_lookup(file, args->handle);
if (!obj) { if (!obj)
ret = -ENOENT; return -ENOENT;
goto unlock;
}
/* Try to flush the object off the GPU without holding the lock. /* Try to flush the object off the GPU without holding the lock.
* We will repeat the flush holding the lock in the normal manner * We will repeat the flush holding the lock in the normal manner
* to catch cases where we are gazumped. * to catch cases where we are gazumped.
*/ */
ret = i915_gem_object_wait_rendering__nonblocking(obj, ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
to_rps_client(file),
!write_domain);
if (ret) if (ret)
goto unref; goto err;
ret = i915_mutex_lock_interruptible(dev);
if (ret)
goto err;
if (read_domains & I915_GEM_DOMAIN_GTT) if (read_domains & I915_GEM_DOMAIN_GTT)
ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
...@@ -1507,11 +1487,13 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, ...@@ -1507,11 +1487,13 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if (write_domain != 0) if (write_domain != 0)
intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
unref:
i915_gem_object_put(obj); i915_gem_object_put(obj);
unlock:
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
return ret; return ret;
err:
i915_gem_object_put_unlocked(obj);
return ret;
} }
/** /**
...@@ -1648,36 +1630,36 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -1648,36 +1630,36 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct i915_ggtt_view view = i915_ggtt_view_normal; struct i915_ggtt_view view = i915_ggtt_view_normal;
bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
pgoff_t page_offset; pgoff_t page_offset;
unsigned long pfn; unsigned long pfn;
int ret = 0; int ret;
bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
intel_runtime_pm_get(dev_priv);
/* We don't use vmf->pgoff since that has the fake offset */ /* We don't use vmf->pgoff since that has the fake offset */
page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
PAGE_SHIFT; PAGE_SHIFT;
ret = i915_mutex_lock_interruptible(dev);
if (ret)
goto out;
trace_i915_gem_object_fault(obj, page_offset, true, write); trace_i915_gem_object_fault(obj, page_offset, true, write);
/* Try to flush the object off the GPU first without holding the lock. /* Try to flush the object off the GPU first without holding the lock.
* Upon reacquiring the lock, we will perform our sanity checks and then * Upon acquiring the lock, we will perform our sanity checks and then
* repeat the flush holding the lock in the normal manner to catch cases * repeat the flush holding the lock in the normal manner to catch cases
* where we are gazumped. * where we are gazumped.
*/ */
ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); ret = __unsafe_wait_rendering(obj, NULL, !write);
if (ret) if (ret)
goto unlock; goto err;
intel_runtime_pm_get(dev_priv);
ret = i915_mutex_lock_interruptible(dev);
if (ret)
goto err_rpm;
/* Access to snoopable pages through the GTT is incoherent. */ /* Access to snoopable pages through the GTT is incoherent. */
if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
ret = -EFAULT; ret = -EFAULT;
goto unlock; goto err_unlock;
} }
/* Use a partial view if the object is bigger than the aperture. */ /* Use a partial view if the object is bigger than the aperture. */
...@@ -1698,15 +1680,15 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -1698,15 +1680,15 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
/* Now pin it into the GTT if needed */ /* Now pin it into the GTT if needed */
ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
if (ret) if (ret)
goto unlock; goto err_unlock;
ret = i915_gem_object_set_to_gtt_domain(obj, write); ret = i915_gem_object_set_to_gtt_domain(obj, write);
if (ret) if (ret)
goto unpin; goto err_unpin;
ret = i915_gem_object_get_fence(obj); ret = i915_gem_object_get_fence(obj);
if (ret) if (ret)
goto unpin; goto err_unpin;
/* Finally, remap it using the new GTT offset */ /* Finally, remap it using the new GTT offset */
pfn = ggtt->mappable_base + pfn = ggtt->mappable_base +
...@@ -1751,11 +1733,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -1751,11 +1733,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
(unsigned long)vmf->virtual_address, (unsigned long)vmf->virtual_address,
pfn + page_offset); pfn + page_offset);
} }
unpin: err_unpin:
i915_gem_object_ggtt_unpin_view(obj, &view); i915_gem_object_ggtt_unpin_view(obj, &view);
unlock: err_unlock:
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
out: err_rpm:
intel_runtime_pm_put(dev_priv);
err:
switch (ret) { switch (ret) {
case -EIO: case -EIO:
/* /*
...@@ -1796,8 +1780,6 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -1796,8 +1780,6 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
break; break;
} }
intel_runtime_pm_put(dev_priv);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment