drm/i915: Use VMA for ringbuffer tracking

Use the GGTT VMA as the primary cookie for handing ring objects as the most common action upon the ring is mapping and unmapping which act upon the VMA itself. By restructuring the code to work with the ring VMA, we can shrink the code and remove a few cycles from context pinning. v2: Move the flush of the object back to before the first pin. We use the am-I-bound? query to only have to check the flush on the first bind and so avoid stalling on active rings. Lots of little renames and small hoops. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-18-git-send-email-chris@chris-wilson.co.uk

drm/i915: Use VMA for ringbuffer tracking
Use the GGTT VMA as the primary cookie for handing ring objects as the most common action upon the ring is mapping and unmapping which act upon the VMA itself. By restructuring the code to work with the ring VMA, we can shrink the code and remove a few cycles from context pinning. v2: Move the flush of the object back to before the first pin. We use the am-I-bound? query to only have to check the flush on the first bind and so avoid stalling on active rings. Lots of little renames and small hoops. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-18-git-send-email-chris@chris-wilson.co.uk
57e88531 · Chris Wilson · e5cdb22b · 57e88531 · 57e88531 · 57e88531
Commit 57e88531 authored Aug 15, 2016 by Chris Wilson
6 changed files
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -356,7 +356,7 @@ static int per_file_ctx_stats(int id, void *ptr, void *data)
 		if (ctx->engine[n].state)
 			per_file_stats(0, ctx->engine[n].state->obj, data);
 		if (ctx->engine[n].ring)
-			per_file_stats(0, ctx->engine[n].ring->obj, data);
+			per_file_stats(0, ctx->engine[n].ring->vma->obj, data);
 	}

 	return 0;

--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1128,12 +1128,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 			ee->cpu_ring_tail = ring->tail;
 			ee->ringbuffer =
 				i915_error_ggtt_object_create(dev_priv,
-							      ring->obj);
+							      ring->vma->obj);
 		}

 		ee->hws_page =
 			i915_error_ggtt_object_create(dev_priv,
-						      engine->status_page.obj);
+						      engine->status_page.vma->obj);

 		ee->wa_ctx = i915_error_ggtt_object_create(dev_priv,
 							   engine->wa_ctx.obj);

--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -343,7 +343,6 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 		struct intel_context *ce = &ctx->engine[engine->id];
 		uint32_t guc_engine_id = engine->guc_id;
 		struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
-		struct drm_i915_gem_object *obj;

 		/* TODO: We have a design issue to be solved here. Only when we
 		 * receive the first batch, we know which engine is used by the
@@ -358,17 +357,14 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 		lrc->context_desc = lower_32_bits(ce->lrc_desc);

 		/* The state page is after PPHWSP */
-		gfx_addr = ce->state->node.start;
-		lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
+		lrc->ring_lcra =
+			ce->state->node.start + LRC_STATE_PN * PAGE_SIZE;
 		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
 				(guc_engine_id << GUC_ELC_ENGINE_OFFSET);

-		obj = ce->ring->obj;
-		gfx_addr = i915_gem_obj_ggtt_offset(obj);
-
-		lrc->ring_begin = gfx_addr;
-		lrc->ring_end = gfx_addr + obj->base.size - 1;
-		lrc->ring_next_free_location = gfx_addr;
+		lrc->ring_begin = ce->ring->vma->node.start;
+		lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
+		lrc->ring_next_free_location = lrc->ring_begin;
 		lrc->ring_current_tail_pointer_value = 0;

 		desc.engines_used |= (1 << guc_engine_id);
@@ -943,7 +939,7 @@ static void guc_create_ads(struct intel_guc *guc)
 	 * to find it.
 	 */
 	engine = &dev_priv->engine[RCS];
-	ads->golden_context_lrca = engine->status_page.gfx_addr;
+	ads->golden_context_lrca = engine->status_page.ggtt_offset;

 	for_each_engine(engine, dev_priv)
 		ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine);

--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1273,7 +1273,7 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
 	struct drm_i915_private *dev_priv = engine->i915;

 	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
-		   (u32)engine->status_page.gfx_addr);
+		   engine->status_page.ggtt_offset);
 	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
 }

@@ -1695,9 +1695,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)

 	intel_engine_cleanup_common(engine);

-	if (engine->status_page.obj) {
-		i915_gem_object_unpin_map(engine->status_page.obj);
-		engine->status_page.obj = NULL;
+	if (engine->status_page.vma) {
+		i915_gem_object_unpin_map(engine->status_page.vma->obj);
+		engine->status_page.vma = NULL;
 	}
 	intel_lr_context_unpin(dev_priv->kernel_context, engine);

@@ -1744,16 +1744,17 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 static int
 lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
 {
+	const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE;
 	void *hws;

 	/* The HWSP is part of the default context object in LRC mode. */
-	engine->status_page.gfx_addr =
-		vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE;
 	hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
 	if (IS_ERR(hws))
 		return PTR_ERR(hws);
-	engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
-	engine->status_page.obj = vma->obj;
+
+	engine->status_page.page_addr = hws + hws_offset;
+	engine->status_page.ggtt_offset = vma->node.start + hws_offset;
+	engine->status_page.vma = vma;

 	return 0;
 }

--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -466,7 +466,7 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
 		mmio = RING_HWS_PGA(engine->mmio_base);
 	}

-	I915_WRITE(mmio, (u32)engine->status_page.gfx_addr);
+	I915_WRITE(mmio, engine->status_page.ggtt_offset);
 	POSTING_READ(mmio);

 	/*
@@ -531,7 +531,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct intel_ring *ring = engine->buffer;
-	struct drm_i915_gem_object *obj = ring->obj;
 	int ret = 0;

 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
@@ -571,7 +570,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	 * registers with the above sequence (the readback of the HEAD registers
 	 * also enforces ordering), otherwise the hw might lose the new ring
 	 * register values. */
-	I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj));
+	I915_WRITE_START(engine, ring->vma->node.start);

 	/* WaClearRingBufHeadRegAtInit:ctg,elk */
 	if (I915_READ_HEAD(engine))
@@ -586,16 +585,16 @@ static int init_ring_common(struct intel_engine_cs *engine)

 	/* If the head is still not zero, the ring is dead */
 	if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
-		     I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) &&
+		     I915_READ_START(engine) == ring->vma->node.start &&
 		     (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
 		DRM_ERROR("%s initialization failed "
-			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
+			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08llx]\n",
 			  engine->name,
 			  I915_READ_CTL(engine),
 			  I915_READ_CTL(engine) & RING_VALID,
 			  I915_READ_HEAD(engine), I915_READ_TAIL(engine),
 			  I915_READ_START(engine),
-			  (unsigned long)i915_gem_obj_ggtt_offset(obj));
+			  ring->vma->node.start);
 		ret = -EIO;
 		goto out;
 	}
@@ -1853,79 +1852,79 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)

 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;

-	obj = engine->status_page.obj;
-	if (obj == NULL)
+	vma = fetch_and_zero(&engine->status_page.vma);
+	if (!vma)
 		return;

-	kunmap(sg_page(obj->pages->sgl));
-	i915_gem_object_ggtt_unpin(obj);
-	i915_gem_object_put(obj);
-	engine->status_page.obj = NULL;
+	i915_vma_unpin(vma);
+	i915_gem_object_unpin_map(vma->obj);
+	i915_vma_put(vma);
 }

 static int init_status_page(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_object *obj = engine->status_page.obj;
-
-	if (obj == NULL) {
-		unsigned flags;
-		int ret;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	unsigned int flags;
+	int ret;

-		obj = i915_gem_object_create(&engine->i915->drm, 4096);
-		if (IS_ERR(obj)) {
-			DRM_ERROR("Failed to allocate status page\n");
-			return PTR_ERR(obj);
-		}
+	obj = i915_gem_object_create(&engine->i915->drm, 4096);
+	if (IS_ERR(obj)) {
+		DRM_ERROR("Failed to allocate status page\n");
+		return PTR_ERR(obj);
+	}

-		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
-		if (ret)
-			goto err_unref;
-
-		flags = 0;
-		if (!HAS_LLC(engine->i915))
-			/* On g33, we cannot place HWS above 256MiB, so
-			 * restrict its pinning to the low mappable arena.
-			 * Though this restriction is not documented for
-			 * gen4, gen5, or byt, they also behave similarly
-			 * and hang if the HWS is placed at the top of the
-			 * GTT. To generalise, it appears that all !llc
-			 * platforms have issues with us placing the HWS
-			 * above the mappable region (even though we never
-			 * actualy map it).
-			 */
-			flags |= PIN_MAPPABLE;
-		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
-		if (ret) {
-err_unref:
-			i915_gem_object_put(obj);
-			return ret;
-		}
+	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+	if (ret)
+		goto err;

-		engine->status_page.obj = obj;
+	vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err;
 	}

-	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
-	engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
-	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
+	flags = PIN_GLOBAL;
+	if (!HAS_LLC(engine->i915))
+		/* On g33, we cannot place HWS above 256MiB, so
+		 * restrict its pinning to the low mappable arena.
+		 * Though this restriction is not documented for
+		 * gen4, gen5, or byt, they also behave similarly
+		 * and hang if the HWS is placed at the top of the
+		 * GTT. To generalise, it appears that all !llc
+		 * platforms have issues with us placing the HWS
+		 * above the mappable region (even though we never
+		 * actualy map it).
+		 */
+		flags |= PIN_MAPPABLE;
+	ret = i915_vma_pin(vma, 0, 4096, flags);
+	if (ret)
+		goto err;

-	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
-			engine->name, engine->status_page.gfx_addr);
+	engine->status_page.vma = vma;
+	engine->status_page.ggtt_offset = vma->node.start;
+	engine->status_page.page_addr =
+		i915_gem_object_pin_map(obj, I915_MAP_WB);

+	DRM_DEBUG_DRIVER("%s hws offset: 0x%08llx\n",
+			 engine->name, vma->node.start);
 	return 0;
+
+err:
+	i915_gem_object_put(obj);
+	return ret;
 }

 static int init_phys_status_page(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;

-	if (!dev_priv->status_page_dmah) {
-		dev_priv->status_page_dmah =
-			drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
-		if (!dev_priv->status_page_dmah)
-			return -ENOMEM;
-	}
+	dev_priv->status_page_dmah =
+		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
+	if (!dev_priv->status_page_dmah)
+		return -ENOMEM;

 	engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
 	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
@@ -1935,52 +1934,43 @@ static int init_phys_status_page(struct intel_engine_cs *engine)

 int intel_ring_pin(struct intel_ring *ring)
 {
-	struct drm_i915_private *dev_priv = ring->engine->i915;
-	struct drm_i915_gem_object *obj = ring->obj;
 	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
-	unsigned flags = PIN_OFFSET_BIAS | 4096;
+	unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096;
+	struct i915_vma *vma = ring->vma;
 	void *addr;
 	int ret;

-	if (HAS_LLC(dev_priv) && !obj->stolen) {
-		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
-		if (ret)
-			return ret;
+	GEM_BUG_ON(ring->vaddr);

-		ret = i915_gem_object_set_to_cpu_domain(obj, true);
-		if (ret)
-			goto err_unpin;
+	if (ring->needs_iomap)
+		flags |= PIN_MAPPABLE;

-		addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
-		if (IS_ERR(addr)) {
-			ret = PTR_ERR(addr);
-			goto err_unpin;
-		}
-	} else {
-		ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
-					       flags | PIN_MAPPABLE);
-		if (ret)
+	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
+		if (flags & PIN_MAPPABLE)
+			ret = i915_gem_object_set_to_gtt_domain(vma->obj, true);
+		else
+			ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
+		if (unlikely(ret))
 			return ret;
+	}

-		ret = i915_gem_object_set_to_gtt_domain(obj, true);
-		if (ret)
-			goto err_unpin;
+	ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags);
+	if (unlikely(ret))
+		return ret;

-		addr = (void __force *)
-			i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj));
-		if (IS_ERR(addr)) {
-			ret = PTR_ERR(addr);
-			goto err_unpin;
-		}
-	}
+	if (flags & PIN_MAPPABLE)
+		addr = (void __force *)i915_vma_pin_iomap(vma);
+	else
+		addr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(addr))
+		goto err;

 	ring->vaddr = addr;
-	ring->vma = i915_gem_obj_to_ggtt(obj);
 	return 0;

-err_unpin:
-	i915_gem_object_ggtt_unpin(obj);
-	return ret;
+err:
+	i915_vma_unpin(vma);
+	return PTR_ERR(addr);
 }

 void intel_ring_unpin(struct intel_ring *ring)
@@ -1988,60 +1978,56 @@ void intel_ring_unpin(struct intel_ring *ring)
 	GEM_BUG_ON(!ring->vma);
 	GEM_BUG_ON(!ring->vaddr);

-	if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen)
-		i915_gem_object_unpin_map(ring->obj);
-	else
+	if (ring->needs_iomap)
 		i915_vma_unpin_iomap(ring->vma);
+	else
+		i915_gem_object_unpin_map(ring->vma->obj);
 	ring->vaddr = NULL;

-	i915_gem_object_ggtt_unpin(ring->obj);
-	ring->vma = NULL;
-}
-
-static void intel_destroy_ringbuffer_obj(struct intel_ring *ring)
-{
-	i915_gem_object_put(ring->obj);
-	ring->obj = NULL;
+	i915_vma_unpin(ring->vma);
 }

-static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
-				      struct intel_ring *ring)
+static struct i915_vma *
+intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
 {
 	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;

-	obj = NULL;
-	if (!HAS_LLC(dev))
-		obj = i915_gem_object_create_stolen(dev, ring->size);
-	if (obj == NULL)
-		obj = i915_gem_object_create(dev, ring->size);
+	obj = ERR_PTR(-ENODEV);
+	if (!HAS_LLC(dev_priv))
+		obj = i915_gem_object_create_stolen(&dev_priv->drm, size);
 	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+		obj = i915_gem_object_create(&dev_priv->drm, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);

 	/* mark ring buffers as read-only from GPU side by default */
 	obj->gt_ro = 1;

-	ring->obj = obj;
+	vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
+	if (IS_ERR(vma))
+		goto err;

-	return 0;
+	return vma;
+
+err:
+	i915_gem_object_put(obj);
+	return vma;
 }

 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 {
 	struct intel_ring *ring;
-	int ret;
+	struct i915_vma *vma;

 	GEM_BUG_ON(!is_power_of_2(size));

 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-	if (ring == NULL) {
-		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
-				 engine->name);
+	if (!ring)
 		return ERR_PTR(-ENOMEM);
-	}

 	ring->engine = engine;
-	list_add(&ring->link, &engine->buffers);

 	INIT_LIST_HEAD(&ring->request_list);

@@ -2057,22 +2043,23 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 	ring->last_retired_head = -1;
 	intel_ring_update_space(ring);

-	ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
-				 engine->name, ret);
-		list_del(&ring->link);
+	vma = intel_ring_create_vma(engine->i915, size);
+	if (IS_ERR(vma)) {
 		kfree(ring);
-		return ERR_PTR(ret);
+		return ERR_CAST(vma);
 	}
+	ring->vma = vma;
+	if (!HAS_LLC(engine->i915) || vma->obj->stolen)
+		ring->needs_iomap = true;

+	list_add(&ring->link, &engine->buffers);
 	return ring;
 }

 void
 intel_ring_free(struct intel_ring *ring)
 {
-	intel_destroy_ringbuffer_obj(ring);
+	i915_vma_put(ring->vma);
 	list_del(&ring->link);
 	kfree(ring);
 }
@@ -2166,7 +2153,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 		ret = PTR_ERR(ring);
 		goto error;
 	}
-	engine->buffer = ring;

 	if (I915_NEED_GFX_HWS(dev_priv)) {
 		ret = init_status_page(engine);
@@ -2181,11 +2167,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)

 	ret = intel_ring_pin(ring);
 	if (ret) {
-		DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
-				engine->name, ret);
-		intel_destroy_ringbuffer_obj(ring);
+		intel_ring_free(ring);
 		goto error;
 	}
+	engine->buffer = ring;

 	return 0;


--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -26,10 +26,10 @@
 */
 #define I915_RING_FREE_SPACE 64

-struct  intel_hw_status_page {
-	u32		*page_addr;
-	unsigned int	gfx_addr;
-	struct		drm_i915_gem_object *obj;
+struct intel_hw_status_page {
+	struct i915_vma *vma;
+	u32 *page_addr;
+	u32 ggtt_offset;
 };

 #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -83,9 +83,8 @@ struct intel_engine_hangcheck {
 };

 struct intel_ring {
-	struct drm_i915_gem_object *obj;
-	void *vaddr;
 	struct i915_vma *vma;
+	void *vaddr;

 	struct intel_engine_cs *engine;
 	struct list_head link;
@@ -97,6 +96,7 @@ struct intel_ring {
 	int space;
 	int size;
 	int effective_size;
+	bool needs_iomap;

 	/** We track the position of the requests in the ring buffer, and
 	 * when each is retired we increment last_retired_head as the GPU
@@ -516,7 +516,7 @@ int init_workarounds_ring(struct intel_engine_cs *engine);

 static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 {
-	return engine->status_page.gfx_addr + I915_GEM_HWS_INDEX_ADDR;
+	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
 }

 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */