drm/i915: Reduce presumption of request ordering for barriers

Currently we assume that we know the order in which requests run and so can determine if we need to reissue a switch-to-kernel-context prior to idling. That assumption does not hold for the future, so instead of tracking which barriers have been used, simply determine if we have ever switched away from the kernel context by using the engine and before idling ensure that all engines that have been used since the last idle are synchronously switched back to the kernel context for safety (and else of shrinking memory while idle). v2: Use intel_engine_mask_t and ALL_ENGINES Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190308093657.8640-3-chris@chris-wilson.co.uk

drm/i915: Reduce presumption of request ordering for barriers
Currently we assume that we know the order in which requests run and so can determine if we need to reissue a switch-to-kernel-context prior to idling. That assumption does not hold for the future, so instead of tracking which barriers have been used, simply determine if we have ever switched away from the kernel context by using the engine and before idling ensure that all engines that have been used since the last idle are synchronously switched back to the kernel context for safety (and else of shrinking memory while idle). v2: Use intel_engine_mask_t and ALL_ENGINES Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190308093657.8640-3-chris@chris-wilson.co.uk
c6eeb479 · Chris Wilson · 604c37d7 · c6eeb479 · c6eeb479 · c6eeb479
Commit c6eeb479 authored Mar 08, 2019 by Chris Wilson
10 changed files
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1995,6 +1995,7 @@ struct drm_i915_private {
 			struct list_head hwsp_free_list;
 		} timelines;

+		intel_engine_mask_t active_engines;
 		struct list_head active_rings;
 		struct list_head closed_vma;
 		u32 active_requests;

--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2845,7 +2845,8 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 	}
 }

-static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
+static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
+					  unsigned long mask)
 {
 	bool result = true;

@@ -2854,7 +2855,7 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
 	 * to save itself before we report the failure. Yes, this may be a
 	 * false positive due to e.g. ENOMEM, caveat emptor!
 	 */
-	if (i915_gem_switch_to_kernel_context(i915))
+	if (i915_gem_switch_to_kernel_context(i915, mask))
 		result = false;

 	if (i915_gem_wait_for_idle(i915,
@@ -2879,7 +2880,8 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)

 static bool load_power_context(struct drm_i915_private *i915)
 {
-	if (!switch_to_kernel_context_sync(i915))
+	/* Force loading the kernel context on all engines */
+	if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
 		return false;

 	/*
@@ -2927,7 +2929,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	    !i915->gt.active_requests) {
 		++i915->gt.active_requests; /* don't requeue idle */

-		switch_to_kernel_context_sync(i915);
+		switch_to_kernel_context_sync(i915, i915->gt.active_engines);

 		if (!--i915->gt.active_requests) {
 			__i915_gem_park(i915);
@@ -4380,7 +4382,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	switch_to_kernel_context_sync(i915);
+	switch_to_kernel_context_sync(i915, i915->gt.active_engines);

 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_reset_flush(i915);

--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -704,63 +704,10 @@ last_request_on_engine(struct i915_timeline *timeline,
 	return NULL;
 }

-static bool engine_has_kernel_context_barrier(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-	const struct intel_context * const ce =
-		to_intel_context(i915->kernel_context, engine);
-	struct i915_timeline *barrier = ce->ring->timeline;
-	struct intel_ring *ring;
-	bool any_active = false;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	list_for_each_entry(ring, &i915->gt.active_rings, active_link) {
-		struct i915_request *rq;
-
-		rq = last_request_on_engine(ring->timeline, engine);
-		if (!rq)
-			continue;
-
-		any_active = true;
-
-		if (rq->hw_context == ce)
-			continue;
-
-		/*
-		 * Was this request submitted after the previous
-		 * switch-to-kernel-context?
-		 */
-		if (!i915_timeline_sync_is_later(barrier, &rq->fence)) {
-			GEM_TRACE("%s needs barrier for %llx:%lld\n",
-				  ring->timeline->name,
-				  rq->fence.context,
-				  rq->fence.seqno);
-			return false;
-		}
-
-		GEM_TRACE("%s has barrier after %llx:%lld\n",
-			  ring->timeline->name,
-			  rq->fence.context,
-			  rq->fence.seqno);
-	}
-
-	/*
-	 * If any other timeline was still active and behind the last barrier,
-	 * then our last switch-to-kernel-context must still be queued and
-	 * will run last (leaving the engine in the kernel context when it
-	 * eventually idles).
-	 */
-	if (any_active)
-		return true;
-
-	/* The engine is idle; check that it is idling in the kernel context. */
-	return engine->last_retired_context == ce;
-}
-
-int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
+int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
+				      unsigned long mask)
 {
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;

 	GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake));

@@ -771,17 +718,11 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 	if (i915_terminally_wedged(i915))
 		return 0;

-	i915_retire_requests(i915);
-
-	for_each_engine(engine, i915, id) {
+	for_each_engine_masked(engine, i915, mask, mask) {
 		struct intel_ring *ring;
 		struct i915_request *rq;

 		GEM_BUG_ON(!to_intel_context(i915->kernel_context, engine));
-		if (engine_has_kernel_context_barrier(engine))
-			continue;
-
-		GEM_TRACE("emit barrier on %s\n", engine->name);

 		rq = i915_request_alloc(engine, i915->kernel_context);
 		if (IS_ERR(rq))
@@ -805,7 +746,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 			i915_sw_fence_await_sw_fence_gfp(&rq->submit,
 							 &prev->submit,
 							 I915_FENCE_GFP);
-			i915_timeline_sync_set(rq->timeline, &prev->fence);
 		}

 		i915_request_add(rq);

--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -372,7 +372,8 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 void i915_gem_context_close(struct drm_file *file);

 int i915_switch_context(struct i915_request *rq);
-int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv);
+int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
+				      unsigned long engine_mask);

 void i915_gem_context_release(struct kref *ctx_ref);
 struct i915_gem_context *

--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -62,7 +62,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	err = i915_gem_switch_to_kernel_context(i915);
+	err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines);
 	if (err)
 		return err;


--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1068,6 +1068,7 @@ void i915_request_add(struct i915_request *request)
 		GEM_TRACE("marking %s as active\n", ring->timeline->name);
 		list_add(&ring->active_link, &request->i915->gt.active_rings);
 	}
+	request->i915->gt.active_engines |= request->engine->mask;
 	request->emitted_jiffies = jiffies;

 	/*

--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1106,6 +1106,9 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)

 	lockdep_assert_held(&engine->i915->drm.struct_mutex);

+	if (!engine->context_size)
+		return true;
+
 	/*
 	 * Check the last context seen by the engine. If active, it will be
 	 * the last request that remains in the timeline. When idle, it is
@@ -1205,6 +1208,8 @@ void intel_engines_park(struct drm_i915_private *i915)
 		i915_gem_batch_pool_fini(&engine->batch_pool);
 		engine->execlists.no_priolist = false;
 	}
+
+	i915->gt.active_engines = 0;
 }

 /**

--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1512,7 +1512,8 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
 			}
 		}

-		err = i915_gem_switch_to_kernel_context(i915);
+		err = i915_gem_switch_to_kernel_context(i915,
+							i915->gt.active_engines);
 		if (err)
 			return err;


--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -14,7 +14,7 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
 	cond_resched();

 	if (flags & I915_WAIT_LOCKED &&
-	    i915_gem_switch_to_kernel_context(i915)) {
+	    i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines)) {
 		pr_err("Failed to switch back to kernel context; declaring wedged\n");
 		i915_gem_set_wedged(i915);
 	}

--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -109,6 +109,10 @@ static void mock_retire_work_handler(struct work_struct *work)

 static void mock_idle_work_handler(struct work_struct *work)
 {
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), gt.idle_work.work);
+
+	i915->gt.active_engines = 0;
 }

 static int pm_domain_resume(struct device *dev)