Commit 50ae6c61 authored by Maarten Lankhorst's avatar Maarten Lankhorst Committed by Joonas Lahtinen

drm/i915: Revert relocation chaining commits.

This reverts commit 964a9b0f ("drm/i915/gem: Use chained reloc batches")
and commit 0e97fbb0 ("drm/i915/gem: Use a single chained reloc batches
for a single execbuf").

When adding ww locking to execbuf, it's hard enough to deal with a
single BO that is part of relocation execution. Chaining is hard to
get right, and with GPU relocation deprecated, it's best to drop this
altogether, instead of trying to fix something we will remove.

This is not a completely 1:1 revert, we reset rq_size to 0 in
reloc_cache_init, this was from e3d29130 ("drm/i915/gem: Implement legacy
MI_STORE_DATA_IMM"), because we don't want to break the selftests. (Daniel)
Signed-off-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20200819140904.1708856-3-maarten.lankhorst@linux.intel.comSigned-off-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
parent 102a0a90
...@@ -276,9 +276,7 @@ struct i915_execbuffer { ...@@ -276,9 +276,7 @@ struct i915_execbuffer {
bool has_fence : 1; bool has_fence : 1;
bool needs_unfenced : 1; bool needs_unfenced : 1;
struct i915_vma *target;
struct i915_request *rq; struct i915_request *rq;
struct i915_vma *rq_vma;
u32 *rq_cmd; u32 *rq_cmd;
unsigned int rq_size; unsigned int rq_size;
} reloc_cache; } reloc_cache;
...@@ -973,7 +971,7 @@ static void reloc_cache_init(struct reloc_cache *cache, ...@@ -973,7 +971,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0; cache->node.flags = 0;
cache->rq = NULL; cache->rq = NULL;
cache->target = NULL; cache->rq_size = 0;
} }
static inline void *unmask_page(unsigned long p) static inline void *unmask_page(unsigned long p)
...@@ -995,122 +993,29 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) ...@@ -995,122 +993,29 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
return &i915->ggtt; return &i915->ggtt;
} }
#define RELOC_TAIL 4 static void reloc_gpu_flush(struct reloc_cache *cache)
static int reloc_gpu_chain(struct reloc_cache *cache)
{
struct intel_gt_buffer_pool_node *pool;
struct i915_request *rq = cache->rq;
struct i915_vma *batch;
u32 *cmd;
int err;
pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE);
if (IS_ERR(pool))
return PTR_ERR(pool);
batch = i915_vma_instance(pool->obj, rq->context->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_pool;
}
err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
if (err)
goto out_pool;
GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32));
cmd = cache->rq_cmd + cache->rq_size;
*cmd++ = MI_ARB_CHECK;
if (cache->gen >= 8)
*cmd++ = MI_BATCH_BUFFER_START_GEN8;
else if (cache->gen >= 6)
*cmd++ = MI_BATCH_BUFFER_START;
else
*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
*cmd++ = lower_32_bits(batch->node.start);
*cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */
i915_gem_object_flush_map(cache->rq_vma->obj);
i915_gem_object_unpin_map(cache->rq_vma->obj);
cache->rq_vma = NULL;
err = intel_gt_buffer_pool_mark_active(pool, rq);
if (err == 0) {
i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
err = i915_vma_move_to_active(batch, rq, 0);
i915_vma_unlock(batch);
}
i915_vma_unpin(batch);
if (err)
goto out_pool;
cmd = i915_gem_object_pin_map(batch->obj,
cache->has_llc ?
I915_MAP_FORCE_WB :
I915_MAP_FORCE_WC);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
goto out_pool;
}
/* Return with batch mapping (cmd) still pinned */
cache->rq_cmd = cmd;
cache->rq_size = 0;
cache->rq_vma = batch;
out_pool:
intel_gt_buffer_pool_put(pool);
return err;
}
static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
{
return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
}
static int reloc_gpu_flush(struct reloc_cache *cache)
{ {
struct i915_request *rq; struct drm_i915_gem_object *obj = cache->rq->batch->obj;
int err;
rq = fetch_and_zero(&cache->rq);
if (!rq)
return 0;
if (cache->rq_vma) {
struct drm_i915_gem_object *obj = cache->rq_vma->obj;
GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END; cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
__i915_gem_object_flush_map(obj, __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
0, sizeof(u32) * cache->rq_size);
i915_gem_object_unpin_map(obj); i915_gem_object_unpin_map(obj);
}
err = 0;
if (rq->engine->emit_init_breadcrumb)
err = rq->engine->emit_init_breadcrumb(rq);
if (!err)
err = rq->engine->emit_bb_start(rq,
rq->batch->node.start,
PAGE_SIZE,
reloc_bb_flags(cache));
if (err)
i915_request_set_error_once(rq, err);
intel_gt_chipset_flush(rq->engine->gt); intel_gt_chipset_flush(cache->rq->engine->gt);
i915_request_add(rq);
return err; i915_request_add(cache->rq);
cache->rq = NULL;
} }
static void reloc_cache_reset(struct reloc_cache *cache) static void reloc_cache_reset(struct reloc_cache *cache)
{ {
void *vaddr; void *vaddr;
if (cache->rq)
reloc_gpu_flush(cache);
if (!cache->vaddr) if (!cache->vaddr)
return; return;
...@@ -1309,6 +1214,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) ...@@ -1309,6 +1214,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
static int __reloc_gpu_alloc(struct i915_execbuffer *eb, static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
struct intel_engine_cs *engine, struct intel_engine_cs *engine,
struct i915_vma *vma,
unsigned int len) unsigned int len)
{ {
struct reloc_cache *cache = &eb->reloc_cache; struct reloc_cache *cache = &eb->reloc_cache;
...@@ -1331,7 +1237,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, ...@@ -1331,7 +1237,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto out_pool; goto out_pool;
} }
batch = i915_vma_instance(pool->obj, eb->context->vm, NULL); batch = i915_vma_instance(pool->obj, vma->vm, NULL);
if (IS_ERR(batch)) { if (IS_ERR(batch)) {
err = PTR_ERR(batch); err = PTR_ERR(batch);
goto err_unmap; goto err_unmap;
...@@ -1367,6 +1273,16 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, ...@@ -1367,6 +1273,16 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err) if (err)
goto err_request; goto err_request;
err = reloc_move_to_gpu(rq, vma);
if (err)
goto err_request;
err = eb->engine->emit_bb_start(rq,
batch->node.start, PAGE_SIZE,
cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
if (err)
goto skip_request;
i915_vma_lock(batch); i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false); err = i915_request_await_object(rq, batch->obj, false);
if (err == 0) if (err == 0)
...@@ -1381,7 +1297,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, ...@@ -1381,7 +1297,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
cache->rq = rq; cache->rq = rq;
cache->rq_cmd = cmd; cache->rq_cmd = cmd;
cache->rq_size = 0; cache->rq_size = 0;
cache->rq_vma = batch;
/* Return with batch mapping (cmd) still pinned */ /* Return with batch mapping (cmd) still pinned */
goto out_pool; goto out_pool;
...@@ -1410,9 +1325,12 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, ...@@ -1410,9 +1325,12 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
{ {
struct reloc_cache *cache = &eb->reloc_cache; struct reloc_cache *cache = &eb->reloc_cache;
u32 *cmd; u32 *cmd;
int err;
if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
reloc_gpu_flush(cache);
if (unlikely(!cache->rq)) { if (unlikely(!cache->rq)) {
int err;
struct intel_engine_cs *engine = eb->engine; struct intel_engine_cs *engine = eb->engine;
if (!reloc_can_use_engine(engine)) { if (!reloc_can_use_engine(engine)) {
...@@ -1421,31 +1339,11 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, ...@@ -1421,31 +1339,11 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
return ERR_PTR(-ENODEV); return ERR_PTR(-ENODEV);
} }
err = __reloc_gpu_alloc(eb, engine, len); err = __reloc_gpu_alloc(eb, engine, vma, len);
if (unlikely(err)) if (unlikely(err))
return ERR_PTR(err); return ERR_PTR(err);
} }
if (vma != cache->target) {
err = reloc_move_to_gpu(cache->rq, vma);
if (unlikely(err)) {
i915_request_set_error_once(cache->rq, err);
return ERR_PTR(err);
}
cache->target = vma;
}
if (unlikely(cache->rq_size + len >
PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
err = reloc_gpu_chain(cache);
if (unlikely(err)) {
i915_request_set_error_once(cache->rq, err);
return ERR_PTR(err);
}
}
GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32));
cmd = cache->rq_cmd + cache->rq_size; cmd = cache->rq_cmd + cache->rq_size;
cache->rq_size += len; cache->rq_size += len;
...@@ -1793,20 +1691,15 @@ static int eb_relocate(struct i915_execbuffer *eb) ...@@ -1793,20 +1691,15 @@ static int eb_relocate(struct i915_execbuffer *eb)
/* The objects are in their final locations, apply the relocations. */ /* The objects are in their final locations, apply the relocations. */
if (eb->args->flags & __EXEC_HAS_RELOC) { if (eb->args->flags & __EXEC_HAS_RELOC) {
struct eb_vma *ev; struct eb_vma *ev;
int flush;
list_for_each_entry(ev, &eb->relocs, reloc_link) { list_for_each_entry(ev, &eb->relocs, reloc_link) {
err = eb_relocate_vma(eb, ev); err = eb_relocate_vma(eb, ev);
if (err) if (err)
break; return err;
} }
flush = reloc_gpu_flush(&eb->reloc_cache);
if (!err)
err = flush;
} }
return err; return 0;
} }
static int eb_move_to_gpu(struct i915_execbuffer *eb) static int eb_move_to_gpu(struct i915_execbuffer *eb)
......
...@@ -53,13 +53,13 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, ...@@ -53,13 +53,13 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
} }
/* Skip to the end of the cmd page */ /* Skip to the end of the cmd page */
i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1; i = PAGE_SIZE / sizeof(u32) - 1;
i -= eb->reloc_cache.rq_size; i -= eb->reloc_cache.rq_size;
memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size, memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size,
MI_NOOP, i); MI_NOOP, i);
eb->reloc_cache.rq_size += i; eb->reloc_cache.rq_size += i;
/* Force batch chaining */ /* Force next batch */
if (!__reloc_entry_gpu(eb, vma, if (!__reloc_entry_gpu(eb, vma,
offsets[2] * sizeof(u32), offsets[2] * sizeof(u32),
2)) { 2)) {
...@@ -69,9 +69,7 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, ...@@ -69,9 +69,7 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
GEM_BUG_ON(!eb->reloc_cache.rq); GEM_BUG_ON(!eb->reloc_cache.rq);
rq = i915_request_get(eb->reloc_cache.rq); rq = i915_request_get(eb->reloc_cache.rq);
err = reloc_gpu_flush(&eb->reloc_cache); reloc_gpu_flush(&eb->reloc_cache);
if (err)
goto put_rq;
GEM_BUG_ON(eb->reloc_cache.rq); GEM_BUG_ON(eb->reloc_cache.rq);
err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2); err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment