Commit cf586021 authored by Chris Wilson's avatar Chris Wilson Committed by Matthew Auld

drm/i915/gt: Pipelined page migration

If we pipeline the PTE updates and then do the copy of those pages
within a single unpreemptible command packet, we can submit the copies
and leave them to be scheduled without having to synchronously wait
under a global lock. In order to manage migration, we need to
preallocate the page tables (and keep them pinned and available for use
at any time), causing a bottleneck for migrations as all clients must
contend on the limited resources. By inlining the ppGTT updates and
performing the blit atomically, each client only owns the PTE while in
use, and so we can reschedule individual operations however we see fit.
And most importantly, we do not need to take a global lock on the shared
vm, and wait until the operation is complete before releasing the lock
for others to claim the PTE for themselves.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Co-developed-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Signed-off-by: default avatarMatthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-8-thomas.hellstrom@linux.intel.com
parent b4ef9530
......@@ -108,6 +108,7 @@ gt-y += \
gt/intel_gtt.o \
gt/intel_llc.o \
gt/intel_lrc.o \
gt/intel_migrate.o \
gt/intel_mocs.o \
gt/intel_ppgtt.o \
gt/intel_rc6.o \
......
......@@ -188,6 +188,7 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32))
#define I915_GEM_HWS_SEQNO 0x40
#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32))
#define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32))
#define I915_GEM_HWS_SCRATCH 0x80
#define I915_HWS_CSB_BUF0_INDEX 0x10
......
......@@ -123,8 +123,10 @@
#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
#define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5)
#define MI_SEMAPHORE_TOKEN_SHIFT 5
#define MI_STORE_DATA_IMM MI_INSTR(0x20, 0)
#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2)
#define MI_STORE_QWORD_IMM_GEN8 (MI_INSTR(0x20, 3) | REG_BIT(21))
#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
#define MI_USE_GGTT (1 << 22) /* g4x+ */
#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1)
......
This diff is collapsed.
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2020 Intel Corporation
*/
#ifndef __INTEL_MIGRATE__
#define __INTEL_MIGRATE__
#include "intel_migrate_types.h"
struct dma_fence;
struct i915_request;
struct i915_gem_ww_ctx;
struct intel_gt;
struct scatterlist;
enum i915_cache_level;
int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt);
struct intel_context *intel_migrate_create_context(struct intel_migrate *m);
int intel_migrate_copy(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
struct dma_fence *await,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
bool src_is_lmem,
struct scatterlist *dst,
enum i915_cache_level dst_cache_level,
bool dst_is_lmem,
struct i915_request **out);
int intel_context_migrate_copy(struct intel_context *ce,
struct dma_fence *await,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
bool src_is_lmem,
struct scatterlist *dst,
enum i915_cache_level dst_cache_level,
bool dst_is_lmem,
struct i915_request **out);
void intel_migrate_fini(struct intel_migrate *m);
#endif /* __INTEL_MIGRATE__ */
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2020 Intel Corporation
*/
#ifndef __INTEL_MIGRATE_TYPES__
#define __INTEL_MIGRATE_TYPES__
struct intel_context;
struct intel_migrate {
struct intel_context *context;
};
#endif /* __INTEL_MIGRATE_TYPES__ */
......@@ -49,6 +49,7 @@ static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
* intel_ring_begin()).
*/
GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
GEM_BUG_ON(!IS_ALIGNED(rq->ring->emit, 8)); /* RING_TAIL qword align */
}
static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
......
// SPDX-License-Identifier: MIT
/*
* Copyright © 2020 Intel Corporation
*/
#include "selftests/i915_random.h"
static const unsigned int sizes[] = {
SZ_4K,
SZ_64K,
SZ_2M,
CHUNK_SZ - SZ_4K,
CHUNK_SZ,
CHUNK_SZ + SZ_4K,
SZ_64M,
};
static struct drm_i915_gem_object *
create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
{
if (HAS_LMEM(i915)) {
struct drm_i915_gem_object *obj;
obj = i915_gem_object_create_lmem(i915, size, 0);
if (!IS_ERR(obj))
return obj;
}
return i915_gem_object_create_internal(i915, size);
}
static int copy(struct intel_migrate *migrate,
int (*fn)(struct intel_migrate *migrate,
struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object *src,
struct drm_i915_gem_object *dst,
struct i915_request **out),
u32 sz, struct rnd_state *prng)
{
struct drm_i915_private *i915 = migrate->context->engine->i915;
struct drm_i915_gem_object *src, *dst;
struct i915_request *rq;
struct i915_gem_ww_ctx ww;
u32 *vaddr;
int err = 0;
int i;
src = create_lmem_or_internal(i915, sz);
if (IS_ERR(src))
return 0;
dst = i915_gem_object_create_internal(i915, sz);
if (IS_ERR(dst))
goto err_free_src;
for_i915_gem_ww(&ww, err, true) {
err = i915_gem_object_lock(src, &ww);
if (err)
continue;
err = i915_gem_object_lock(dst, &ww);
if (err)
continue;
vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
continue;
}
for (i = 0; i < sz / sizeof(u32); i++)
vaddr[i] = i;
i915_gem_object_flush_map(src);
vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto unpin_src;
}
for (i = 0; i < sz / sizeof(u32); i++)
vaddr[i] = ~i;
i915_gem_object_flush_map(dst);
err = fn(migrate, &ww, src, dst, &rq);
if (!err)
continue;
if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
pr_err("%ps failed, size: %u\n", fn, sz);
if (rq) {
i915_request_wait(rq, 0, HZ);
i915_request_put(rq);
}
i915_gem_object_unpin_map(dst);
unpin_src:
i915_gem_object_unpin_map(src);
}
if (err)
goto err_out;
if (rq) {
if (i915_request_wait(rq, 0, HZ) < 0) {
pr_err("%ps timed out, size: %u\n", fn, sz);
err = -ETIME;
}
i915_request_put(rq);
}
for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
if (vaddr[x] != x) {
pr_err("%ps failed, size: %u, offset: %zu\n",
fn, sz, x * sizeof(u32));
igt_hexdump(vaddr + i * 1024, 4096);
err = -EINVAL;
}
}
i915_gem_object_unpin_map(dst);
i915_gem_object_unpin_map(src);
err_out:
i915_gem_object_put(dst);
err_free_src:
i915_gem_object_put(src);
return err;
}
static int __migrate_copy(struct intel_migrate *migrate,
struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object *src,
struct drm_i915_gem_object *dst,
struct i915_request **out)
{
return intel_migrate_copy(migrate, ww, NULL,
src->mm.pages->sgl, src->cache_level,
i915_gem_object_is_lmem(src),
dst->mm.pages->sgl, dst->cache_level,
i915_gem_object_is_lmem(dst),
out);
}
static int __global_copy(struct intel_migrate *migrate,
struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object *src,
struct drm_i915_gem_object *dst,
struct i915_request **out)
{
return intel_context_migrate_copy(migrate->context, NULL,
src->mm.pages->sgl, src->cache_level,
i915_gem_object_is_lmem(src),
dst->mm.pages->sgl, dst->cache_level,
i915_gem_object_is_lmem(dst),
out);
}
static int
migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
{
return copy(migrate, __migrate_copy, sz, prng);
}
static int
global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
{
return copy(migrate, __global_copy, sz, prng);
}
static int live_migrate_copy(void *arg)
{
struct intel_migrate *migrate = arg;
struct drm_i915_private *i915 = migrate->context->engine->i915;
I915_RND_STATE(prng);
int i;
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
int err;
err = migrate_copy(migrate, sizes[i], &prng);
if (err == 0)
err = global_copy(migrate, sizes[i], &prng);
i915_gem_drain_freed_objects(i915);
if (err)
return err;
}
return 0;
}
struct threaded_migrate {
struct intel_migrate *migrate;
struct task_struct *tsk;
struct rnd_state prng;
};
static int threaded_migrate(struct intel_migrate *migrate,
int (*fn)(void *arg),
unsigned int flags)
{
const unsigned int n_cpus = num_online_cpus() + 1;
struct threaded_migrate *thread;
I915_RND_STATE(prng);
unsigned int i;
int err = 0;
thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL);
if (!thread)
return 0;
for (i = 0; i < n_cpus; ++i) {
struct task_struct *tsk;
thread[i].migrate = migrate;
thread[i].prng =
I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
tsk = kthread_run(fn, &thread[i], "igt-%d", i);
if (IS_ERR(tsk)) {
err = PTR_ERR(tsk);
break;
}
get_task_struct(tsk);
thread[i].tsk = tsk;
}
msleep(10); /* start all threads before we kthread_stop() */
for (i = 0; i < n_cpus; ++i) {
struct task_struct *tsk = thread[i].tsk;
int status;
if (IS_ERR_OR_NULL(tsk))
continue;
status = kthread_stop(tsk);
if (status && !err)
err = status;
put_task_struct(tsk);
}
kfree(thread);
return err;
}
static int __thread_migrate_copy(void *arg)
{
struct threaded_migrate *tm = arg;
return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
}
static int thread_migrate_copy(void *arg)
{
return threaded_migrate(arg, __thread_migrate_copy, 0);
}
static int __thread_global_copy(void *arg)
{
struct threaded_migrate *tm = arg;
return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
}
static int thread_global_copy(void *arg)
{
return threaded_migrate(arg, __thread_global_copy, 0);
}
int intel_migrate_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_migrate_copy),
SUBTEST(thread_migrate_copy),
SUBTEST(thread_global_copy),
};
struct intel_migrate m;
int err;
if (intel_migrate_init(&m, &i915->gt))
return 0;
err = i915_subtests(tests, &m);
intel_migrate_fini(&m);
return err;
}
......@@ -26,6 +26,7 @@ selftest(gt_mocs, intel_mocs_live_selftests)
selftest(gt_pm, intel_gt_pm_live_selftests)
selftest(gt_heartbeat, intel_heartbeat_live_selftests)
selftest(requests, i915_request_live_selftests)
selftest(migrate, intel_migrate_live_selftests)
selftest(active, i915_active_live_selftests)
selftest(objects, i915_gem_object_live_selftests)
selftest(mman, i915_gem_mman_live_selftests)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment