Commit e95433c7 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Rearrange i915_wait_request() accounting with callers

Our low-level wait routine has evolved from our generic wait interface
that handled unlocked, RPS boosting, waits with time tracking. If we
push our GEM fence tracking to use reservation_objects (required for
handling multiple timelines), we lose the ability to pass the required
information down to i915_wait_request(). However, if we push the extra
functionality from i915_wait_request() to the individual callsites
(i915_gem_object_wait_rendering and i915_gem_wait_ioctl) that make use
of those extras, we can both simplify our low level wait and prepare for
extending the GEM interface for use of reservation_objects.

v2: Rewrite i915_wait_request() kerneldocs
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-4-chris@chris-wilson.co.uk
parent c92ac094
......@@ -400,6 +400,7 @@ static int workload_thread(void *priv)
int ring_id = p->ring_id;
struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
struct intel_vgpu_workload *workload = NULL;
long lret;
int ret;
bool need_force_wake = IS_SKYLAKE(gvt->dev_priv);
DEFINE_WAIT_FUNC(wait, woken_wake_function);
......@@ -449,10 +450,12 @@ static int workload_thread(void *priv)
gvt_dbg_sched("ring id %d wait workload %p\n",
workload->ring_id, workload);
workload->status = i915_wait_request(workload->req,
0, NULL, NULL);
if (workload->status != 0)
lret = i915_wait_request(workload->req,
0, MAX_SCHEDULE_TIMEOUT);
if (lret < 0) {
workload->status = lret;
gvt_err("fail to wait workload, skip\n");
}
complete:
gvt_dbg_sched("will complete workload %p\n, status: %d\n",
......
......@@ -3319,9 +3319,10 @@ int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
int __must_check i915_gem_suspend(struct drm_device *dev);
void i915_gem_resume(struct drm_device *dev);
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
int __must_check
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
bool readonly);
int i915_gem_object_wait(struct drm_i915_gem_object *obj,
unsigned int flags,
long timeout,
struct intel_rps_client *rps);
int __must_check
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
bool write);
......
This diff is collapsed.
......@@ -59,31 +59,9 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence)
static signed long i915_fence_wait(struct dma_fence *fence,
bool interruptible,
signed long timeout_jiffies)
signed long timeout)
{
s64 timeout_ns, *timeout;
int ret;
if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
timeout_ns = jiffies_to_nsecs(timeout_jiffies);
timeout = &timeout_ns;
} else {
timeout = NULL;
}
ret = i915_wait_request(to_request(fence),
interruptible, timeout,
NO_WAITBOOST);
if (ret == -ETIME)
return 0;
if (ret < 0)
return ret;
if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
timeout_jiffies = nsecs_to_jiffies(timeout_ns);
return timeout_jiffies;
return i915_wait_request(to_request(fence), interruptible, timeout);
}
static void i915_fence_value_str(struct dma_fence *fence, char *str, int size)
......@@ -166,7 +144,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
struct i915_gem_active *active, *next;
trace_i915_gem_request_retire(request);
list_del(&request->link);
list_del_init(&request->link);
/* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position
......@@ -224,7 +202,8 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
struct drm_i915_gem_request *tmp;
lockdep_assert_held(&req->i915->drm.struct_mutex);
GEM_BUG_ON(list_empty(&req->link));
if (list_empty(&req->link))
return;
do {
tmp = list_first_entry(&engine->request_list,
......@@ -780,75 +759,48 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
/**
* i915_wait_request - wait until execution of request has finished
* @req: duh!
* @req: the request to wait upon
* @flags: how to wait
* @timeout: in - how long to wait (NULL forever); out - how much time remaining
* @rps: client to charge for RPS boosting
* @timeout: how long to wait in jiffies
*
* i915_wait_request() waits for the request to be completed, for a
* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
* unbounded wait).
*
* Note: It is of utmost importance that the passed in seqno and reset_counter
* values have been read by the caller in an smp safe manner. Where read-side
* locks are involved, it is sufficient to read the reset_counter before
* unlocking the lock that protects the seqno. For lockless tricks, the
* reset_counter _must_ be read before, and an appropriate smp_rmb must be
* inserted.
* If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED
* in via the flags, and vice versa if the struct_mutex is not held, the caller
* must not specify that the wait is locked.
*
* Returns 0 if the request was found within the alloted time. Else returns the
* errno with remaining time filled in timeout argument.
* Returns the remaining time (in jiffies) if the request completed, which may
* be zero or -ETIME if the request is unfinished after the timeout expires.
* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
* pending before the request completes.
*/
int i915_wait_request(struct drm_i915_gem_request *req,
unsigned int flags,
s64 *timeout,
struct intel_rps_client *rps)
long i915_wait_request(struct drm_i915_gem_request *req,
unsigned int flags,
long timeout)
{
const int state = flags & I915_WAIT_INTERRUPTIBLE ?
TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
DEFINE_WAIT(reset);
struct intel_wait wait;
unsigned long timeout_remain;
int ret = 0;
might_sleep();
#if IS_ENABLED(CONFIG_LOCKDEP)
GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
GEM_BUG_ON(debug_locks &&
!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
!!(flags & I915_WAIT_LOCKED));
#endif
GEM_BUG_ON(timeout < 0);
if (i915_gem_request_completed(req))
return 0;
return timeout;
timeout_remain = MAX_SCHEDULE_TIMEOUT;
if (timeout) {
if (WARN_ON(*timeout < 0))
return -EINVAL;
if (*timeout == 0)
return -ETIME;
/* Record current time in case interrupted, or wedged */
timeout_remain = nsecs_to_jiffies_timeout(*timeout);
*timeout += ktime_get_raw_ns();
}
if (!timeout)
return -ETIME;
trace_i915_gem_request_wait_begin(req);
/* This client is about to stall waiting for the GPU. In many cases
* this is undesirable and limits the throughput of the system, as
* many clients cannot continue processing user input/output whilst
* blocked. RPS autotuning may take tens of milliseconds to respond
* to the GPU load and thus incurs additional latency for the client.
* We can circumvent that by promoting the GPU frequency to maximum
* before we wait. This makes the GPU throttle up much more quickly
* (good for benchmarks and user experience, e.g. window animations),
* but at a cost of spending more power processing the workload
* (bad for battery). Not all clients even want their results
* immediately and for them we should just let the GPU select its own
* frequency to maximise efficiency. To prevent a single client from
* forcing the clocks too high for the whole system, we only allow
* each client to waitboost once in a busy period.
*/
if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
/* Optimistic short spin before touching IRQs */
if (i915_spin_request(req, state, 5))
goto complete;
......@@ -867,16 +819,17 @@ int i915_wait_request(struct drm_i915_gem_request *req,
for (;;) {
if (signal_pending_state(state, current)) {
ret = -ERESTARTSYS;
timeout = -ERESTARTSYS;
break;
}
timeout_remain = io_schedule_timeout(timeout_remain);
if (timeout_remain == 0) {
ret = -ETIME;
if (!timeout) {
timeout = -ETIME;
break;
}
timeout = io_schedule_timeout(timeout);
if (intel_wait_complete(&wait))
break;
......@@ -923,40 +876,7 @@ int i915_wait_request(struct drm_i915_gem_request *req,
complete:
trace_i915_gem_request_wait_end(req);
if (timeout) {
*timeout -= ktime_get_raw_ns();
if (*timeout < 0)
*timeout = 0;
/*
* Apparently ktime isn't accurate enough and occasionally has a
* bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
* things up to make the test happy. We allow up to 1 jiffy.
*
* This is a regrssion from the timespec->ktime conversion.
*/
if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
*timeout = 0;
}
if (IS_RPS_USER(rps) &&
req->fence.seqno == req->engine->last_submitted_seqno) {
/* The GPU is now idle and this client has stalled.
* Since no other client has submitted a request in the
* meantime, assume that this client is the only one
* supplying work to the GPU but is unable to keep that
* work supplied because it is waiting. Since the GPU is
* then never kept fully busy, RPS autoclocking will
* keep the clocks relatively low, causing further delays.
* Compensate by giving the synchronous client credit for
* a waitboost next time.
*/
spin_lock(&req->i915->rps.client_lock);
list_del_init(&rps->link);
spin_unlock(&req->i915->rps.client_lock);
}
return ret;
return timeout;
}
static bool engine_retire_requests(struct intel_engine_cs *engine)
......
......@@ -228,13 +228,13 @@ struct intel_rps_client;
#define IS_RPS_CLIENT(p) (!IS_ERR(p))
#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
int i915_wait_request(struct drm_i915_gem_request *req,
unsigned int flags,
s64 *timeout,
struct intel_rps_client *rps)
long i915_wait_request(struct drm_i915_gem_request *req,
unsigned int flags,
long timeout)
__attribute__((nonnull(1)));
#define I915_WAIT_INTERRUPTIBLE BIT(0)
#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */
#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */
static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
......@@ -583,14 +583,16 @@ static inline int __must_check
i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
{
struct drm_i915_gem_request *request;
long ret;
request = i915_gem_active_peek(active, mutex);
if (!request)
return 0;
return i915_wait_request(request,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
NULL, NULL);
ret = i915_wait_request(request,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);
return ret < 0 ? ret : 0;
}
/**
......@@ -617,20 +619,18 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
*/
static inline int
i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
unsigned int flags,
s64 *timeout,
struct intel_rps_client *rps)
unsigned int flags)
{
struct drm_i915_gem_request *request;
int ret = 0;
long ret = 0;
request = i915_gem_active_get_unlocked(active);
if (request) {
ret = i915_wait_request(request, flags, timeout, rps);
ret = i915_wait_request(request, flags, MAX_SCHEDULE_TIMEOUT);
i915_gem_request_put(request);
}
return ret;
return ret < 0 ? ret : 0;
}
/**
......@@ -647,7 +647,7 @@ i915_gem_active_retire(struct i915_gem_active *active,
struct mutex *mutex)
{
struct drm_i915_gem_request *request;
int ret;
long ret;
request = i915_gem_active_raw(active, mutex);
if (!request)
......@@ -655,8 +655,8 @@ i915_gem_active_retire(struct i915_gem_active *active,
ret = i915_wait_request(request,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
NULL, NULL);
if (ret)
MAX_SCHEDULE_TIMEOUT);
if (ret < 0)
return ret;
list_del_init(&active->link);
......
......@@ -61,23 +61,13 @@ struct i915_mmu_object {
bool attached;
};
static void wait_rendering(struct drm_i915_gem_object *obj)
{
unsigned long active = __I915_BO_ACTIVE(obj);
int idx;
for_each_active(active, idx)
i915_gem_active_wait_unlocked(&obj->last_read[idx],
0, NULL, NULL);
}
static void cancel_userptr(struct work_struct *work)
{
struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
struct drm_i915_gem_object *obj = mo->obj;
struct drm_device *dev = obj->base.dev;
wait_rendering(obj);
i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL);
mutex_lock(&dev->struct_mutex);
/* Cancel any active worker and force us to re-evaluate gup */
......
......@@ -12072,7 +12072,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
if (work->flip_queued_req)
WARN_ON(i915_wait_request(work->flip_queued_req,
0, NULL, NO_WAITBOOST));
0, MAX_SCHEDULE_TIMEOUT) < 0);
/* For framebuffer backed by dmabuf, wait for fence */
resv = i915_gem_object_get_dmabuf_resv(obj);
......@@ -14187,19 +14187,21 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
for_each_plane_in_state(state, plane, plane_state, i) {
struct intel_plane_state *intel_plane_state =
to_intel_plane_state(plane_state);
long timeout;
if (!intel_plane_state->wait_req)
continue;
ret = i915_wait_request(intel_plane_state->wait_req,
I915_WAIT_INTERRUPTIBLE,
NULL, NULL);
if (ret) {
timeout = i915_wait_request(intel_plane_state->wait_req,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
if (timeout < 0) {
/* Any hang should be swallowed by the wait */
WARN_ON(ret == -EIO);
WARN_ON(timeout == -EIO);
mutex_lock(&dev->struct_mutex);
drm_atomic_helper_cleanup_planes(dev, state);
mutex_unlock(&dev->struct_mutex);
ret = timeout;
break;
}
}
......@@ -14403,7 +14405,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
bool hw_check = intel_state->modeset;
unsigned long put_domains[I915_MAX_PIPES] = {};
unsigned crtc_vblank_mask = 0;
int i, ret;
int i;
for_each_plane_in_state(state, plane, plane_state, i) {
struct intel_plane_state *intel_plane_state =
......@@ -14412,11 +14414,10 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
if (!intel_plane_state->wait_req)
continue;
ret = i915_wait_request(intel_plane_state->wait_req,
0, NULL, NULL);
/* EIO should be eaten, and we can't get interrupted in the
* worker, and blocking commits have waited already. */
WARN_ON(ret);
WARN_ON(i915_wait_request(intel_plane_state->wait_req,
0, MAX_SCHEDULE_TIMEOUT) < 0);
}
drm_atomic_helper_wait_for_dependencies(state);
......@@ -14780,7 +14781,11 @@ intel_prepare_plane_fb(struct drm_plane *plane,
* can safely continue.
*/
if (needs_modeset(crtc_state))
ret = i915_gem_object_wait_rendering(old_obj, true);
ret = i915_gem_object_wait(old_obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT,
NULL);
if (ret) {
/* GPU hangs should have been swallowed by the wait */
WARN_ON(ret == -EIO);
......
......@@ -2155,7 +2155,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
{
struct intel_ring *ring = req->ring;
struct drm_i915_gem_request *target;
int ret;
long timeout;
lockdep_assert_held(&req->i915->drm.struct_mutex);
intel_ring_update_space(ring);
if (ring->space >= bytes)
......@@ -2185,11 +2187,11 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
if (WARN_ON(&target->ring_link == &ring->request_list))
return -ENOSPC;
ret = i915_wait_request(target,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
NULL, NO_WAITBOOST);
if (ret)
return ret;
timeout = i915_wait_request(target,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);
if (timeout < 0)
return timeout;
i915_gem_request_retire_upto(target);
......
......@@ -524,8 +524,7 @@ static inline int intel_engine_idle(struct intel_engine_cs *engine,
unsigned int flags)
{
/* Wait upon the last request to be completed */
return i915_gem_active_wait_unlocked(&engine->last_request,
flags, NULL, NULL);
return i915_gem_active_wait_unlocked(&engine->last_request, flags);
}
int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment