Commit 62eb3c24 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Apply rps waitboosting for dma_fence_wait_timeout()

As time goes by, usage of generic ioctls such as drm_syncobj and
sync_file are on the increase bypassing i915-specific ioctls like
GEM_WAIT. Currently, we only apply waitboosting to our driver ioctls as
we track the file/client and account the waitboosting to them. However,
since commit 7b92c1bd ("drm/i915: Avoid keeping waitboost active for
signaling threads"), we no longer have been applying the client
ratelimiting on waitboosts and so that information has only been used
for debug tracking.

Push the application of waitboosting down to the common
i915_request_wait, and apply it to all foreign fence waits as well.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Eero Tamminen <eero.t.tamminen@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190213092504.25709-1-chris@chris-wilson.co.uk
parent e6ed078d
...@@ -2019,11 +2019,9 @@ static const char *rps_power_to_str(unsigned int power) ...@@ -2019,11 +2019,9 @@ static const char *rps_power_to_str(unsigned int power)
static int i915_rps_boost_info(struct seq_file *m, void *data) static int i915_rps_boost_info(struct seq_file *m, void *data)
{ {
struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct intel_rps *rps = &dev_priv->gt_pm.rps; struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 act_freq = rps->cur_freq; u32 act_freq = rps->cur_freq;
intel_wakeref_t wakeref; intel_wakeref_t wakeref;
struct drm_file *file;
with_intel_runtime_pm_if_in_use(dev_priv, wakeref) { with_intel_runtime_pm_if_in_use(dev_priv, wakeref) {
if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
...@@ -2057,22 +2055,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) ...@@ -2057,22 +2055,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
intel_gpu_freq(dev_priv, rps->efficient_freq), intel_gpu_freq(dev_priv, rps->efficient_freq),
intel_gpu_freq(dev_priv, rps->boost_freq)); intel_gpu_freq(dev_priv, rps->boost_freq));
mutex_lock(&dev->filelist_mutex); seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct drm_i915_file_private *file_priv = file->driver_priv;
struct task_struct *task;
rcu_read_lock();
task = pid_task(file->pid, PIDTYPE_PID);
seq_printf(m, "%s [%d]: %d boosts\n",
task ? task->comm : "<unknown>",
task ? task->pid : -1,
atomic_read(&file_priv->rps_client.boosts));
rcu_read_unlock();
}
seq_printf(m, "Kernel (anonymous) boosts: %d\n",
atomic_read(&rps->boosts));
mutex_unlock(&dev->filelist_mutex);
if (INTEL_GEN(dev_priv) >= 6 && if (INTEL_GEN(dev_priv) >= 6 &&
rps->enabled && rps->enabled &&
......
...@@ -217,10 +217,6 @@ struct drm_i915_file_private { ...@@ -217,10 +217,6 @@ struct drm_i915_file_private {
} mm; } mm;
struct idr context_idr; struct idr context_idr;
struct intel_rps_client {
atomic_t boosts;
} rps_client;
unsigned int bsd_engine; unsigned int bsd_engine;
/* /*
...@@ -3056,8 +3052,7 @@ void i915_gem_resume(struct drm_i915_private *dev_priv); ...@@ -3056,8 +3052,7 @@ void i915_gem_resume(struct drm_i915_private *dev_priv);
vm_fault_t i915_gem_fault(struct vm_fault *vmf); vm_fault_t i915_gem_fault(struct vm_fault *vmf);
int i915_gem_object_wait(struct drm_i915_gem_object *obj, int i915_gem_object_wait(struct drm_i915_gem_object *obj,
unsigned int flags, unsigned int flags,
long timeout, long timeout);
struct intel_rps_client *rps);
int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
unsigned int flags, unsigned int flags,
const struct i915_sched_attr *attr); const struct i915_sched_attr *attr);
......
...@@ -416,8 +416,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) ...@@ -416,8 +416,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
static long static long
i915_gem_object_wait_fence(struct dma_fence *fence, i915_gem_object_wait_fence(struct dma_fence *fence,
unsigned int flags, unsigned int flags,
long timeout, long timeout)
struct intel_rps_client *rps_client)
{ {
struct i915_request *rq; struct i915_request *rq;
...@@ -435,27 +434,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence, ...@@ -435,27 +434,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
if (i915_request_completed(rq)) if (i915_request_completed(rq))
goto out; goto out;
/*
* This client is about to stall waiting for the GPU. In many cases
* this is undesirable and limits the throughput of the system, as
* many clients cannot continue processing user input/output whilst
* blocked. RPS autotuning may take tens of milliseconds to respond
* to the GPU load and thus incurs additional latency for the client.
* We can circumvent that by promoting the GPU frequency to maximum
* before we wait. This makes the GPU throttle up much more quickly
* (good for benchmarks and user experience, e.g. window animations),
* but at a cost of spending more power processing the workload
* (bad for battery). Not all clients even want their results
* immediately and for them we should just let the GPU select its own
* frequency to maximise efficiency. To prevent a single client from
* forcing the clocks too high for the whole system, we only allow
* each client to waitboost once in a busy period.
*/
if (rps_client && !i915_request_started(rq)) {
if (INTEL_GEN(rq->i915) >= 6)
gen6_rps_boost(rq, rps_client);
}
timeout = i915_request_wait(rq, flags, timeout); timeout = i915_request_wait(rq, flags, timeout);
out: out:
...@@ -468,8 +446,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence, ...@@ -468,8 +446,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
static long static long
i915_gem_object_wait_reservation(struct reservation_object *resv, i915_gem_object_wait_reservation(struct reservation_object *resv,
unsigned int flags, unsigned int flags,
long timeout, long timeout)
struct intel_rps_client *rps_client)
{ {
unsigned int seq = __read_seqcount_begin(&resv->seq); unsigned int seq = __read_seqcount_begin(&resv->seq);
struct dma_fence *excl; struct dma_fence *excl;
...@@ -487,8 +464,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, ...@@ -487,8 +464,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
timeout = i915_gem_object_wait_fence(shared[i], timeout = i915_gem_object_wait_fence(shared[i],
flags, timeout, flags, timeout);
rps_client);
if (timeout < 0) if (timeout < 0)
break; break;
...@@ -514,8 +490,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, ...@@ -514,8 +490,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
} }
if (excl && timeout >= 0) if (excl && timeout >= 0)
timeout = i915_gem_object_wait_fence(excl, flags, timeout, timeout = i915_gem_object_wait_fence(excl, flags, timeout);
rps_client);
dma_fence_put(excl); dma_fence_put(excl);
...@@ -609,30 +584,19 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, ...@@ -609,30 +584,19 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
* @obj: i915 gem object * @obj: i915 gem object
* @flags: how to wait (under a lock, for all rendering or just for writes etc) * @flags: how to wait (under a lock, for all rendering or just for writes etc)
* @timeout: how long to wait * @timeout: how long to wait
* @rps_client: client (user process) to charge for any waitboosting
*/ */
int int
i915_gem_object_wait(struct drm_i915_gem_object *obj, i915_gem_object_wait(struct drm_i915_gem_object *obj,
unsigned int flags, unsigned int flags,
long timeout, long timeout)
struct intel_rps_client *rps_client)
{ {
might_sleep(); might_sleep();
GEM_BUG_ON(timeout < 0); GEM_BUG_ON(timeout < 0);
timeout = i915_gem_object_wait_reservation(obj->resv, timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
flags, timeout,
rps_client);
return timeout < 0 ? timeout : 0; return timeout < 0 ? timeout : 0;
} }
static struct intel_rps_client *to_rps_client(struct drm_file *file)
{
struct drm_i915_file_private *fpriv = file->driver_priv;
return &fpriv->rps_client;
}
static int static int
i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args, struct drm_i915_gem_pwrite *args,
...@@ -838,8 +802,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, ...@@ -838,8 +802,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
ret = i915_gem_object_wait(obj, ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED, I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT);
NULL);
if (ret) if (ret)
return ret; return ret;
...@@ -891,8 +854,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, ...@@ -891,8 +854,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED | I915_WAIT_LOCKED |
I915_WAIT_ALL, I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT);
NULL);
if (ret) if (ret)
return ret; return ret;
...@@ -1154,8 +1116,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, ...@@ -1154,8 +1116,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_object_wait(obj, ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE, I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT);
to_rps_client(file));
if (ret) if (ret)
goto out; goto out;
...@@ -1454,8 +1415,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ...@@ -1454,8 +1415,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_object_wait(obj, ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL, I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT);
to_rps_client(file));
if (ret) if (ret)
goto err; goto err;
...@@ -1553,8 +1513,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, ...@@ -1553,8 +1513,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY | I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0), (write_domain ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT);
to_rps_client(file));
if (err) if (err)
goto out; goto out;
...@@ -1863,8 +1822,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) ...@@ -1863,8 +1822,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
*/ */
ret = i915_gem_object_wait(obj, ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE, I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT);
NULL);
if (ret) if (ret)
goto err; goto err;
...@@ -3195,8 +3153,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -3195,8 +3153,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
I915_WAIT_INTERRUPTIBLE | I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY | I915_WAIT_PRIORITY |
I915_WAIT_ALL, I915_WAIT_ALL,
to_wait_timeout(args->timeout_ns), to_wait_timeout(args->timeout_ns));
to_rps_client(file));
if (args->timeout_ns > 0) { if (args->timeout_ns > 0) {
args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
...@@ -3265,7 +3222,7 @@ wait_for_timelines(struct drm_i915_private *i915, ...@@ -3265,7 +3222,7 @@ wait_for_timelines(struct drm_i915_private *i915,
* stalls, so allow the gpu to boost to maximum clocks. * stalls, so allow the gpu to boost to maximum clocks.
*/ */
if (flags & I915_WAIT_FOR_IDLE_BOOST) if (flags & I915_WAIT_FOR_IDLE_BOOST)
gen6_rps_boost(rq, NULL); gen6_rps_boost(rq);
timeout = i915_request_wait(rq, flags, timeout); timeout = i915_request_wait(rq, flags, timeout);
i915_request_put(rq); i915_request_put(rq);
...@@ -3360,8 +3317,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) ...@@ -3360,8 +3317,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
I915_WAIT_INTERRUPTIBLE | I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED | I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0), (write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT);
NULL);
if (ret) if (ret)
return ret; return ret;
...@@ -3423,8 +3379,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) ...@@ -3423,8 +3379,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
I915_WAIT_INTERRUPTIBLE | I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED | I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0), (write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT);
NULL);
if (ret) if (ret)
return ret; return ret;
...@@ -3539,8 +3494,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, ...@@ -3539,8 +3494,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED | I915_WAIT_LOCKED |
I915_WAIT_ALL, I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT);
NULL);
if (ret) if (ret)
return ret; return ret;
...@@ -3678,8 +3632,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, ...@@ -3678,8 +3632,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_object_wait(obj, ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE, I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT);
to_rps_client(file));
if (ret) if (ret)
goto out; goto out;
...@@ -3805,8 +3758,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) ...@@ -3805,8 +3758,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
I915_WAIT_INTERRUPTIBLE | I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED | I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0), (write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT, MAX_SCHEDULE_TIMEOUT);
NULL);
if (ret) if (ret)
return ret; return ret;
......
...@@ -68,7 +68,9 @@ static signed long i915_fence_wait(struct dma_fence *fence, ...@@ -68,7 +68,9 @@ static signed long i915_fence_wait(struct dma_fence *fence,
bool interruptible, bool interruptible,
signed long timeout) signed long timeout)
{ {
return i915_request_wait(to_request(fence), interruptible, timeout); return i915_request_wait(to_request(fence),
interruptible | I915_WAIT_PRIORITY,
timeout);
} }
static void i915_fence_release(struct dma_fence *fence) static void i915_fence_release(struct dma_fence *fence)
...@@ -1136,8 +1138,23 @@ long i915_request_wait(struct i915_request *rq, ...@@ -1136,8 +1138,23 @@ long i915_request_wait(struct i915_request *rq,
if (__i915_spin_request(rq, state, 5)) if (__i915_spin_request(rq, state, 5))
goto out; goto out;
if (flags & I915_WAIT_PRIORITY) /*
* This client is about to stall waiting for the GPU. In many cases
* this is undesirable and limits the throughput of the system, as
* many clients cannot continue processing user input/output whilst
* blocked. RPS autotuning may take tens of milliseconds to respond
* to the GPU load and thus incurs additional latency for the client.
* We can circumvent that by promoting the GPU frequency to maximum
* before we sleep. This makes the GPU throttle up much more quickly
* (good for benchmarks and user experience, e.g. window animations),
* but at a cost of spending more power processing the workload
* (bad for battery).
*/
if (flags & I915_WAIT_PRIORITY) {
if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
gen6_rps_boost(rq);
i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
}
wait.tsk = current; wait.tsk = current;
if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake)) if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
......
...@@ -13559,7 +13559,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait, ...@@ -13559,7 +13559,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait,
* vblank without our intervention, so leave RPS alone. * vblank without our intervention, so leave RPS alone.
*/ */
if (!i915_request_started(rq)) if (!i915_request_started(rq))
gen6_rps_boost(rq, NULL); gen6_rps_boost(rq);
i915_request_put(rq); i915_request_put(rq);
drm_crtc_vblank_put(wait->crtc); drm_crtc_vblank_put(wait->crtc);
......
...@@ -2266,7 +2266,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); ...@@ -2266,7 +2266,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv);
void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
void gen6_rps_idle(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv);
void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps); void gen6_rps_boost(struct i915_request *rq);
void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv); void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv);
void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv); void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv);
void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv); void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv);
......
...@@ -6768,8 +6768,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) ...@@ -6768,8 +6768,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
mutex_unlock(&dev_priv->pcu_lock); mutex_unlock(&dev_priv->pcu_lock);
} }
void gen6_rps_boost(struct i915_request *rq, void gen6_rps_boost(struct i915_request *rq)
struct intel_rps_client *rps_client)
{ {
struct intel_rps *rps = &rq->i915->gt_pm.rps; struct intel_rps *rps = &rq->i915->gt_pm.rps;
unsigned long flags; unsigned long flags;
...@@ -6798,7 +6797,7 @@ void gen6_rps_boost(struct i915_request *rq, ...@@ -6798,7 +6797,7 @@ void gen6_rps_boost(struct i915_request *rq,
if (READ_ONCE(rps->cur_freq) < rps->boost_freq) if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
schedule_work(&rps->work); schedule_work(&rps->work);
atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts); atomic_inc(&rps->boosts);
} }
int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment