drm/i915: Rearrange i915_wait_request() accounting with callers

Our low-level wait routine has evolved from our generic wait interface that handled unlocked, RPS boosting, waits with time tracking. If we push our GEM fence tracking to use reservation_objects (required for handling multiple timelines), we lose the ability to pass the required information down to i915_wait_request(). However, if we push the extra functionality from i915_wait_request() to the individual callsites (i915_gem_object_wait_rendering and i915_gem_wait_ioctl) that make use of those extras, we can both simplify our low level wait and prepare for extending the GEM interface for use of reservation_objects. v2: Rewrite i915_wait_request() kerneldocs Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Matthew Auld <matthew.william.auld@gmail.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-4-chris@chris-wilson.co.uk

drm/i915: Rearrange i915_wait_request() accounting with callers
Our low-level wait routine has evolved from our generic wait interface that handled unlocked, RPS boosting, waits with time tracking. If we push our GEM fence tracking to use reservation_objects (required for handling multiple timelines), we lose the ability to pass the required information down to i915_wait_request(). However, if we push the extra functionality from i915_wait_request() to the individual callsites (i915_gem_object_wait_rendering and i915_gem_wait_ioctl) that make use of those extras, we can both simplify our low level wait and prepare for extending the GEM interface for use of reservation_objects. v2: Rewrite i915_wait_request() kerneldocs Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Matthew Auld <matthew.william.auld@gmail.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-4-chris@chris-wilson.co.uk
e95433c7 · Chris Wilson · c92ac094 · e95433c7 · e95433c7 · e95433c7
Commit e95433c7 authored Oct 28, 2016 by Chris Wilson
9 changed files
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -400,6 +400,7 @@ static int workload_thread(void *priv)
 	int ring_id = p->ring_id;
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	struct intel_vgpu_workload *workload = NULL;
+	long lret;
 	int ret;
 	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv);
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
@@ -449,10 +450,12 @@ static int workload_thread(void *priv)
 		gvt_dbg_sched("ring id %d wait workload %p\n",
 				workload->ring_id, workload);

-		workload->status = i915_wait_request(workload->req,
-						     0, NULL, NULL);
-		if (workload->status != 0)
+		lret = i915_wait_request(workload->req,
+					 0, MAX_SCHEDULE_TIMEOUT);
+		if (lret < 0) {
+			workload->status = lret;
 			gvt_err("fail to wait workload, skip\n");
+		}

 complete:
 		gvt_dbg_sched("will complete workload %p\n, status: %d\n",

--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3319,9 +3319,10 @@ int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void i915_gem_resume(struct drm_device *dev);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
-int __must_check
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-			       bool readonly);
+int i915_gem_object_wait(struct drm_i915_gem_object *obj,
+			 unsigned int flags,
+			 long timeout,
+			 struct intel_rps_client *rps);
 int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
 				  bool write);

--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -59,31 +59,9 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence)

 static signed long i915_fence_wait(struct dma_fence *fence,
 				   bool interruptible,
-				   signed long timeout_jiffies)
+				   signed long timeout)
 {
-	s64 timeout_ns, *timeout;
-	int ret;
-
-	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
-		timeout_ns = jiffies_to_nsecs(timeout_jiffies);
-		timeout = &timeout_ns;
-	} else {
-		timeout = NULL;
-	}
-
-	ret = i915_wait_request(to_request(fence),
-				interruptible, timeout,
-				NO_WAITBOOST);
-	if (ret == -ETIME)
-		return 0;
-
-	if (ret < 0)
-		return ret;
-
-	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
-		timeout_jiffies = nsecs_to_jiffies(timeout_ns);
-
-	return timeout_jiffies;
+	return i915_wait_request(to_request(fence), interruptible, timeout);
 }

 static void i915_fence_value_str(struct dma_fence *fence, char *str, int size)
@@ -166,7 +144,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	struct i915_gem_active *active, *next;

 	trace_i915_gem_request_retire(request);
-	list_del(&request->link);
+	list_del_init(&request->link);

 	/* We know the GPU must have read the request to have
 	 * sent us the seqno + interrupt, so use the position
@@ -224,7 +202,8 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 	struct drm_i915_gem_request *tmp;

 	lockdep_assert_held(&req->i915->drm.struct_mutex);
-	GEM_BUG_ON(list_empty(&req->link));
+	if (list_empty(&req->link))
+		return;

 	do {
 		tmp = list_first_entry(&engine->request_list,
@@ -780,75 +759,48 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,

 /**
 * i915_wait_request - wait until execution of request has finished
- * @req: duh!
+ * @req: the request to wait upon
 * @flags: how to wait
- * @timeout: in - how long to wait (NULL forever); out - how much time remaining
- * @rps: client to charge for RPS boosting
+ * @timeout: how long to wait in jiffies
+ *
+ * i915_wait_request() waits for the request to be completed, for a
+ * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
+ * unbounded wait).
 *
- * Note: It is of utmost importance that the passed in seqno and reset_counter
- * values have been read by the caller in an smp safe manner. Where read-side
- * locks are involved, it is sufficient to read the reset_counter before
- * unlocking the lock that protects the seqno. For lockless tricks, the
- * reset_counter _must_ be read before, and an appropriate smp_rmb must be
- * inserted.
+ * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED
+ * in via the flags, and vice versa if the struct_mutex is not held, the caller
+ * must not specify that the wait is locked.
 *
- * Returns 0 if the request was found within the alloted time. Else returns the
- * errno with remaining time filled in timeout argument.
+ * Returns the remaining time (in jiffies) if the request completed, which may
+ * be zero or -ETIME if the request is unfinished after the timeout expires.
+ * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
+ * pending before the request completes.
 */
-int i915_wait_request(struct drm_i915_gem_request *req,
-		      unsigned int flags,
-		      s64 *timeout,
-		      struct intel_rps_client *rps)
+long i915_wait_request(struct drm_i915_gem_request *req,
+		       unsigned int flags,
+		       long timeout)
 {
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	DEFINE_WAIT(reset);
 	struct intel_wait wait;
-	unsigned long timeout_remain;
-	int ret = 0;

 	might_sleep();
 #if IS_ENABLED(CONFIG_LOCKDEP)
-	GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
+	GEM_BUG_ON(debug_locks &&
+		   !!lockdep_is_held(&req->i915->drm.struct_mutex) !=
 		   !!(flags & I915_WAIT_LOCKED));
 #endif
+	GEM_BUG_ON(timeout < 0);

 	if (i915_gem_request_completed(req))
-		return 0;
+		return timeout;

-	timeout_remain = MAX_SCHEDULE_TIMEOUT;
-	if (timeout) {
-		if (WARN_ON(*timeout < 0))
-			return -EINVAL;
-
-		if (*timeout == 0)
-			return -ETIME;
-
-		/* Record current time in case interrupted, or wedged */
-		timeout_remain = nsecs_to_jiffies_timeout(*timeout);
-		*timeout += ktime_get_raw_ns();
-	}
+	if (!timeout)
+		return -ETIME;

 	trace_i915_gem_request_wait_begin(req);

-	/* This client is about to stall waiting for the GPU. In many cases
-	 * this is undesirable and limits the throughput of the system, as
-	 * many clients cannot continue processing user input/output whilst
-	 * blocked. RPS autotuning may take tens of milliseconds to respond
-	 * to the GPU load and thus incurs additional latency for the client.
-	 * We can circumvent that by promoting the GPU frequency to maximum
-	 * before we wait. This makes the GPU throttle up much more quickly
-	 * (good for benchmarks and user experience, e.g. window animations),
-	 * but at a cost of spending more power processing the workload
-	 * (bad for battery). Not all clients even want their results
-	 * immediately and for them we should just let the GPU select its own
-	 * frequency to maximise efficiency. To prevent a single client from
-	 * forcing the clocks too high for the whole system, we only allow
-	 * each client to waitboost once in a busy period.
-	 */
-	if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
-		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
-
 	/* Optimistic short spin before touching IRQs */
 	if (i915_spin_request(req, state, 5))
 		goto complete;
@@ -867,16 +819,17 @@ int i915_wait_request(struct drm_i915_gem_request *req,

 	for (;;) {
 		if (signal_pending_state(state, current)) {
-			ret = -ERESTARTSYS;
+			timeout = -ERESTARTSYS;
 			break;
 		}

-		timeout_remain = io_schedule_timeout(timeout_remain);
-		if (timeout_remain == 0) {
-			ret = -ETIME;
+		if (!timeout) {
+			timeout = -ETIME;
 			break;
 		}

+		timeout = io_schedule_timeout(timeout);
+
 		if (intel_wait_complete(&wait))
 			break;

@@ -923,40 +876,7 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 complete:
 	trace_i915_gem_request_wait_end(req);

-	if (timeout) {
-		*timeout -= ktime_get_raw_ns();
-		if (*timeout < 0)
-			*timeout = 0;
-
-		/*
-		 * Apparently ktime isn't accurate enough and occasionally has a
-		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
-		 * things up to make the test happy. We allow up to 1 jiffy.
-		 *
-		 * This is a regrssion from the timespec->ktime conversion.
-		 */
-		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
-			*timeout = 0;
-	}
-
-	if (IS_RPS_USER(rps) &&
-	    req->fence.seqno == req->engine->last_submitted_seqno) {
-		/* The GPU is now idle and this client has stalled.
-		 * Since no other client has submitted a request in the
-		 * meantime, assume that this client is the only one
-		 * supplying work to the GPU but is unable to keep that
-		 * work supplied because it is waiting. Since the GPU is
-		 * then never kept fully busy, RPS autoclocking will
-		 * keep the clocks relatively low, causing further delays.
-		 * Compensate by giving the synchronous client credit for
-		 * a waitboost next time.
-		 */
-		spin_lock(&req->i915->rps.client_lock);
-		list_del_init(&rps->link);
-		spin_unlock(&req->i915->rps.client_lock);
-	}
-
-	return ret;
+	return timeout;
 }

 static bool engine_retire_requests(struct intel_engine_cs *engine)

--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -228,13 +228,13 @@ struct intel_rps_client;
 #define IS_RPS_CLIENT(p) (!IS_ERR(p))
 #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))

-int i915_wait_request(struct drm_i915_gem_request *req,
-		      unsigned int flags,
-		      s64 *timeout,
-		      struct intel_rps_client *rps)
+long i915_wait_request(struct drm_i915_gem_request *req,
+		       unsigned int flags,
+		       long timeout)
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
+#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */

 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);

@@ -583,14 +583,16 @@ static inline int __must_check
 i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 {
 	struct drm_i915_gem_request *request;
+	long ret;

 	request = i915_gem_active_peek(active, mutex);
 	if (!request)
 		return 0;

-	return i915_wait_request(request,
-				 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
-				 NULL, NULL);
+	ret = i915_wait_request(request,
+				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				MAX_SCHEDULE_TIMEOUT);
+	return ret < 0 ? ret : 0;
 }

 /**
@@ -617,20 +619,18 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 */
 static inline int
 i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
-			      unsigned int flags,
-			      s64 *timeout,
-			      struct intel_rps_client *rps)
+			      unsigned int flags)
 {
 	struct drm_i915_gem_request *request;
-	int ret = 0;
+	long ret = 0;

 	request = i915_gem_active_get_unlocked(active);
 	if (request) {
-		ret = i915_wait_request(request, flags, timeout, rps);
+		ret = i915_wait_request(request, flags, MAX_SCHEDULE_TIMEOUT);
 		i915_gem_request_put(request);
 	}

-	return ret;
+	return ret < 0 ? ret : 0;
 }

 /**
@@ -647,7 +647,7 @@ i915_gem_active_retire(struct i915_gem_active *active,
 		       struct mutex *mutex)
 {
 	struct drm_i915_gem_request *request;
-	int ret;
+	long ret;

 	request = i915_gem_active_raw(active, mutex);
 	if (!request)
@@ -655,8 +655,8 @@ i915_gem_active_retire(struct i915_gem_active *active,

 	ret = i915_wait_request(request,
 				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
-				NULL, NULL);
-	if (ret)
+				MAX_SCHEDULE_TIMEOUT);
+	if (ret < 0)
 		return ret;

 	list_del_init(&active->link);

--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -61,23 +61,13 @@ struct i915_mmu_object {
 	bool attached;
 };

-static void wait_rendering(struct drm_i915_gem_object *obj)
-{
-	unsigned long active = __I915_BO_ACTIVE(obj);
-	int idx;
-
-	for_each_active(active, idx)
-		i915_gem_active_wait_unlocked(&obj->last_read[idx],
-					      0, NULL, NULL);
-}
-
 static void cancel_userptr(struct work_struct *work)
 {
 	struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
 	struct drm_i915_gem_object *obj = mo->obj;
 	struct drm_device *dev = obj->base.dev;

-	wait_rendering(obj);
+	i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL);

 	mutex_lock(&dev->struct_mutex);
 	/* Cancel any active worker and force us to re-evaluate gup */

--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12072,7 +12072,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)

 	if (work->flip_queued_req)
 		WARN_ON(i915_wait_request(work->flip_queued_req,
-					  0, NULL, NO_WAITBOOST));
+					  0, MAX_SCHEDULE_TIMEOUT) < 0);

 	/* For framebuffer backed by dmabuf, wait for fence */
 	resv = i915_gem_object_get_dmabuf_resv(obj);
@@ -14187,19 +14187,21 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 		for_each_plane_in_state(state, plane, plane_state, i) {
 			struct intel_plane_state *intel_plane_state =
 				to_intel_plane_state(plane_state);
+			long timeout;

 			if (!intel_plane_state->wait_req)
 				continue;

-			ret = i915_wait_request(intel_plane_state->wait_req,
-						I915_WAIT_INTERRUPTIBLE,
-						NULL, NULL);
-			if (ret) {
+			timeout = i915_wait_request(intel_plane_state->wait_req,
+						    I915_WAIT_INTERRUPTIBLE,
+						    MAX_SCHEDULE_TIMEOUT);
+			if (timeout < 0) {
 				/* Any hang should be swallowed by the wait */
-				WARN_ON(ret == -EIO);
+				WARN_ON(timeout == -EIO);
 				mutex_lock(&dev->struct_mutex);
 				drm_atomic_helper_cleanup_planes(dev, state);
 				mutex_unlock(&dev->struct_mutex);
+				ret = timeout;
 				break;
 			}
 		}
@@ -14403,7 +14405,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 	bool hw_check = intel_state->modeset;
 	unsigned long put_domains[I915_MAX_PIPES] = {};
 	unsigned crtc_vblank_mask = 0;
-	int i, ret;
+	int i;

 	for_each_plane_in_state(state, plane, plane_state, i) {
 		struct intel_plane_state *intel_plane_state =
@@ -14412,11 +14414,10 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 		if (!intel_plane_state->wait_req)
 			continue;

-		ret = i915_wait_request(intel_plane_state->wait_req,
-					0, NULL, NULL);
 		/* EIO should be eaten, and we can't get interrupted in the
 		 * worker, and blocking commits have waited already. */
-		WARN_ON(ret);
+		WARN_ON(i915_wait_request(intel_plane_state->wait_req,
+					  0, MAX_SCHEDULE_TIMEOUT) < 0);
 	}

 	drm_atomic_helper_wait_for_dependencies(state);
@@ -14780,7 +14781,11 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		 * can safely continue.
 		 */
 		if (needs_modeset(crtc_state))
-			ret = i915_gem_object_wait_rendering(old_obj, true);
+			ret = i915_gem_object_wait(old_obj,
+						   I915_WAIT_INTERRUPTIBLE |
+						   I915_WAIT_LOCKED,
+						   MAX_SCHEDULE_TIMEOUT,
+						   NULL);
 		if (ret) {
 			/* GPU hangs should have been swallowed by the wait */
 			WARN_ON(ret == -EIO);

--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2155,7 +2155,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 {
 	struct intel_ring *ring = req->ring;
 	struct drm_i915_gem_request *target;
-	int ret;
+	long timeout;
+
+	lockdep_assert_held(&req->i915->drm.struct_mutex);

 	intel_ring_update_space(ring);
 	if (ring->space >= bytes)
@@ -2185,11 +2187,11 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (WARN_ON(&target->ring_link == &ring->request_list))
 		return -ENOSPC;

-	ret = i915_wait_request(target,
-				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
-				NULL, NO_WAITBOOST);
-	if (ret)
-		return ret;
+	timeout = i915_wait_request(target,
+				    I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				    MAX_SCHEDULE_TIMEOUT);
+	if (timeout < 0)
+		return timeout;

 	i915_gem_request_retire_upto(target);


--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -524,8 +524,7 @@ static inline int intel_engine_idle(struct intel_engine_cs *engine,
 				    unsigned int flags)
 {
 	/* Wait upon the last request to be completed */
-	return i915_gem_active_wait_unlocked(&engine->last_request,
-					     flags, NULL, NULL);
+	return i915_gem_active_wait_unlocked(&engine->last_request, flags);
 }

 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);