Commit d1b48c1e authored by Chris Wilson's avatar Chris Wilson

drm/i915: Replace execbuf vma ht with an idr

This was the competing idea long ago, but it was only with the rewrite
of the idr as an radixtree and using the radixtree directly ourselves,
along with the realisation that we can store the vma directly in the
radixtree and only need a list for the reverse mapping, that made the
patch performant enough to displace using a hashtable. Though the vma ht
is fast and doesn't require any extra allocation (as we can embed the node
inside the vma), it does require a thread for resizing and serialization
and will have the occasional slow lookup. That is hairy enough to
investigate alternatives and favour them if equivalent in peak performance.
One advantage of allocating an indirection entry is that we can support a
single shared bo between many clients, something that was done on a
first-come first-serve basis for shared GGTT vma previously. To offset
the extra allocations, we create yet another kmem_cache for them.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170816085210.4199-5-chris@chris-wilson.co.uk
parent 170fa29b
......@@ -1954,12 +1954,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
seq_putc(m, '\n');
}
seq_printf(m,
"\tvma hashtable size=%u (actual %lu), count=%u\n",
ctx->vma_lut.ht_size,
BIT(ctx->vma_lut.ht_bits),
ctx->vma_lut.ht_count);
seq_putc(m, '\n');
}
......
......@@ -2156,6 +2156,7 @@ struct drm_i915_private {
struct kmem_cache *objects;
struct kmem_cache *vmas;
struct kmem_cache *luts;
struct kmem_cache *requests;
struct kmem_cache *dependencies;
struct kmem_cache *priorities;
......
......@@ -3242,25 +3242,33 @@ i915_gem_idle_work_handler(struct work_struct *work)
void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
{
struct drm_i915_private *i915 = to_i915(gem->dev);
struct drm_i915_gem_object *obj = to_intel_bo(gem);
struct drm_i915_file_private *fpriv = file->driver_priv;
struct i915_vma *vma, *vn;
struct i915_lut_handle *lut, *ln;
mutex_lock(&obj->base.dev->struct_mutex);
list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
if (vma->vm->file == fpriv)
mutex_lock(&i915->drm.struct_mutex);
list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
struct i915_gem_context *ctx = lut->ctx;
struct i915_vma *vma;
if (ctx->file_priv != fpriv)
continue;
vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
if (!i915_vma_is_ggtt(vma))
i915_vma_close(vma);
vma = obj->vma_hashed;
if (vma && vma->ctx->file_priv == fpriv)
i915_vma_unlink_ctx(vma);
list_del(&lut->obj_link);
list_del(&lut->ctx_link);
if (i915_gem_object_is_active(obj) &&
!i915_gem_object_has_active_reference(obj)) {
i915_gem_object_set_active_reference(obj);
i915_gem_object_get(obj);
kmem_cache_free(i915->luts, lut);
__i915_gem_object_release_unless_active(obj);
}
mutex_unlock(&obj->base.dev->struct_mutex);
mutex_unlock(&i915->drm.struct_mutex);
}
static unsigned long to_wait_timeout(s64 timeout_ns)
......@@ -4252,6 +4260,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(&obj->global_link);
INIT_LIST_HEAD(&obj->userfault_link);
INIT_LIST_HEAD(&obj->vma_list);
INIT_LIST_HEAD(&obj->lut_list);
INIT_LIST_HEAD(&obj->batch_pool_link);
obj->ops = ops;
......@@ -4495,8 +4504,8 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
{
lockdep_assert_held(&obj->base.dev->struct_mutex);
GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
if (i915_gem_object_is_active(obj))
if (!i915_gem_object_has_active_reference(obj) &&
i915_gem_object_is_active(obj))
i915_gem_object_set_active_reference(obj);
else
i915_gem_object_put(obj);
......@@ -4888,12 +4897,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
if (!dev_priv->vmas)
goto err_objects;
dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
if (!dev_priv->luts)
goto err_vmas;
dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_TYPESAFE_BY_RCU);
if (!dev_priv->requests)
goto err_vmas;
goto err_luts;
dev_priv->dependencies = KMEM_CACHE(i915_dependency,
SLAB_HWCACHE_ALIGN |
......@@ -4937,6 +4950,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
kmem_cache_destroy(dev_priv->dependencies);
err_requests:
kmem_cache_destroy(dev_priv->requests);
err_luts:
kmem_cache_destroy(dev_priv->luts);
err_vmas:
kmem_cache_destroy(dev_priv->vmas);
err_objects:
......@@ -4959,6 +4974,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
kmem_cache_destroy(dev_priv->priorities);
kmem_cache_destroy(dev_priv->dependencies);
kmem_cache_destroy(dev_priv->requests);
kmem_cache_destroy(dev_priv->luts);
kmem_cache_destroy(dev_priv->vmas);
kmem_cache_destroy(dev_priv->objects);
......
......@@ -93,69 +93,28 @@
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
/* Initial size (as log2) to preallocate the handle->object hashtable */
#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
static void resize_vma_ht(struct work_struct *work)
static void lut_close(struct i915_gem_context *ctx)
{
struct i915_gem_context_vma_lut *lut =
container_of(work, typeof(*lut), resize);
unsigned int bits, new_bits, size, i;
struct hlist_head *new_ht;
GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
bits = 1 + ilog2(4*lut->ht_count/3 + 1);
new_bits = min_t(unsigned int,
max(bits, VMA_HT_BITS),
sizeof(unsigned int) * BITS_PER_BYTE - 1);
if (new_bits == lut->ht_bits)
goto out;
new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
if (!new_ht)
new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
if (!new_ht)
/* Pretend resize succeeded and stop calling us for a bit! */
goto out;
size = BIT(lut->ht_bits);
for (i = 0; i < size; i++) {
struct i915_vma *vma;
struct hlist_node *tmp;
struct i915_lut_handle *lut, *ln;
struct radix_tree_iter iter;
void __rcu **slot;
hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
hlist_add_head(&vma->ctx_node,
&new_ht[hash_32(vma->ctx_handle,
new_bits)]);
list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
list_del(&lut->obj_link);
kmem_cache_free(ctx->i915->luts, lut);
}
kvfree(lut->ht);
lut->ht = new_ht;
lut->ht_bits = new_bits;
out:
smp_store_release(&lut->ht_size, BIT(bits));
GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
}
static void vma_lut_free(struct i915_gem_context *ctx)
{
struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
unsigned int i, size;
radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
struct i915_vma *vma = rcu_dereference_raw(*slot);
struct drm_i915_gem_object *obj = vma->obj;
if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
cancel_work_sync(&lut->resize);
radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
size = BIT(lut->ht_bits);
for (i = 0; i < size; i++) {
struct i915_vma *vma;
if (!i915_vma_is_ggtt(vma))
i915_vma_close(vma);
hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
vma->obj->vma_hashed = NULL;
vma->ctx = NULL;
i915_vma_put(vma);
}
__i915_gem_object_release_unless_active(obj);
}
kvfree(lut->ht);
}
static void i915_gem_context_free(struct i915_gem_context *ctx)
......@@ -165,7 +124,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
lockdep_assert_held(&ctx->i915->drm.struct_mutex);
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
vma_lut_free(ctx);
i915_ppgtt_put(ctx->ppgtt);
for (i = 0; i < I915_NUM_ENGINES; i++) {
......@@ -239,8 +197,11 @@ void i915_gem_context_release(struct kref *ref)
static void context_close(struct i915_gem_context *ctx)
{
i915_gem_context_set_closed(ctx);
lut_close(ctx);
if (ctx->ppgtt)
i915_ppgtt_close(&ctx->ppgtt->base);
ctx->file_priv = ERR_PTR(-EBADF);
i915_gem_context_put(ctx);
}
......@@ -313,16 +274,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;
ctx->vma_lut.ht_bits = VMA_HT_BITS;
ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
sizeof(*ctx->vma_lut.ht),
GFP_KERNEL);
if (!ctx->vma_lut.ht)
goto err_out;
INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
INIT_LIST_HEAD(&ctx->handles_list);
/* Default context will never have a file_priv */
ret = DEFAULT_CONTEXT_HANDLE;
......@@ -372,8 +325,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
put_pid(ctx->pid);
idr_remove(&file_priv->context_idr, ctx->user_handle);
err_lut:
kvfree(ctx->vma_lut.ht);
err_out:
context_close(ctx);
return ERR_PTR(ret);
}
......
......@@ -27,6 +27,7 @@
#include <linux/bitops.h>
#include <linux/list.h>
#include <linux/radix-tree.h>
struct pid;
......@@ -149,32 +150,6 @@ struct i915_gem_context {
/** ggtt_offset_bias: placement restriction for context objects */
u32 ggtt_offset_bias;
struct i915_gem_context_vma_lut {
/** ht_size: last request size to allocate the hashtable for. */
unsigned int ht_size;
#define I915_CTX_RESIZE_IN_PROGRESS BIT(0)
/** ht_bits: real log2(size) of hashtable. */
unsigned int ht_bits;
/** ht_count: current number of entries inside the hashtable */
unsigned int ht_count;
/** ht: the array of buckets comprising the simple hashtable */
struct hlist_head *ht;
/**
* resize: After an execbuf completes, we check the load factor
* of the hashtable. If the hashtable is too full, or too empty,
* we schedule a task to resize the hashtable. During the
* resize, the entries are moved between different buckets and
* so we cannot simultaneously read the hashtable as it is
* being resized (unlike rhashtable). Therefore we treat the
* active work as a strong barrier, pausing a subsequent
* execbuf to wait for the resize worker to complete, if
* required.
*/
struct work_struct resize;
} vma_lut;
/** engine: per-engine logical HW state */
struct intel_context {
struct i915_vma *state;
......@@ -205,6 +180,18 @@ struct i915_gem_context {
/** remap_slice: Bitmask of cache lines that need remapping */
u8 remap_slice;
/** handles_vma: rbtree to look up our context specific obj/vma for
* the user handle. (user handles are per fd, but the binding is
* per vm, which may be one per context or shared with the global GTT)
*/
struct radix_tree_root handles_vma;
/** handles_list: reverse list of all the rbtree entries in use for
* this context, which allows us to free all the allocations on
* context close.
*/
struct list_head handles_list;
};
static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
......
......@@ -450,9 +450,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
}
static int
eb_add_vma(struct i915_execbuffer *eb,
unsigned int i, struct i915_vma *vma,
unsigned int flags)
eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
int err;
......@@ -482,7 +480,7 @@ eb_add_vma(struct i915_execbuffer *eb,
* to find the right target VMA when doing relocations.
*/
eb->vma[i] = vma;
eb->flags[i] = entry->flags | flags;
eb->flags[i] = entry->flags;
vma->exec_flags = &eb->flags[i];
err = 0;
......@@ -647,19 +645,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
} while (1);
}
static inline struct hlist_head *
ht_head(const struct i915_gem_context_vma_lut *lut, u32 handle)
{
return &lut->ht[hash_32(handle, lut->ht_bits)];
}
static inline bool
ht_needs_resize(const struct i915_gem_context_vma_lut *lut)
{
return (4*lut->ht_count > 3*lut->ht_size ||
4*lut->ht_count + 1 < lut->ht_size);
}
static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
{
if (eb->args->flags & I915_EXEC_BATCH_FIRST)
......@@ -688,7 +673,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
static int eb_lookup_vmas(struct i915_execbuffer *eb)
{
struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
struct drm_i915_gem_object *uninitialized_var(obj);
unsigned int i;
int err;
......@@ -702,24 +687,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
INIT_LIST_HEAD(&eb->relocs);
INIT_LIST_HEAD(&eb->unbound);
if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS))
flush_work(&lut->resize);
GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
for (i = 0; i < eb->buffer_count; i++) {
u32 handle = eb->exec[i].handle;
struct hlist_head *hl = ht_head(lut, handle);
unsigned int flags = 0;
struct i915_lut_handle *lut;
struct i915_vma *vma;
hlist_for_each_entry(vma, hl, ctx_node) {
GEM_BUG_ON(vma->ctx != eb->ctx);
if (vma->ctx_handle != handle)
continue;
vma = radix_tree_lookup(handles_vma, handle);
if (likely(vma))
goto add_vma;
}
obj = i915_gem_object_lookup(eb->file, handle);
if (unlikely(!obj)) {
......@@ -733,26 +708,28 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
goto err_obj;
}
/* First come, first served */
if (!vma->ctx) {
vma->ctx = eb->ctx;
vma->ctx_handle = handle;
hlist_add_head(&vma->ctx_node, hl);
lut->ht_count++;
lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
if (i915_vma_is_ggtt(vma)) {
GEM_BUG_ON(obj->vma_hashed);
obj->vma_hashed = vma;
}
lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
if (unlikely(!lut)) {
err = -ENOMEM;
goto err_obj;
}
/* transfer ref to ctx */
obj = NULL;
} else {
flags = __EXEC_OBJECT_HAS_REF;
err = radix_tree_insert(handles_vma, handle, vma);
if (unlikely(err)) {
kfree(lut);
goto err_obj;
}
list_add(&lut->obj_link, &obj->lut_list);
list_add(&lut->ctx_link, &eb->ctx->handles_list);
lut->ctx = eb->ctx;
lut->handle = handle;
/* transfer ref to ctx */
obj = NULL;
add_vma:
err = eb_add_vma(eb, i, vma, flags);
err = eb_add_vma(eb, i, vma);
if (unlikely(err))
goto err_obj;
......@@ -760,13 +737,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
}
if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
if (ht_needs_resize(lut))
queue_work(system_highpri_wq, &lut->resize);
else
lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
}
/* take note of the batch buffer before we might reorder the lists */
i = eb_batch_index(eb);
eb->batch = eb->vma[i];
......@@ -794,7 +764,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
i915_gem_object_put(obj);
err_vma:
eb->vma[i] = NULL;
lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
return err;
}
......
......@@ -38,6 +38,19 @@
struct drm_i915_gem_object;
/*
* struct i915_lut_handle tracks the fast lookups from handle to vma used
* for execbuf. Although we use a radixtree for that mapping, in order to
* remove them as the object or context is closed, we need a secondary list
* and a translation entry (i915_lut_handle).
*/
struct i915_lut_handle {
struct list_head obj_link;
struct list_head ctx_link;
struct i915_gem_context *ctx;
u32 handle;
};
struct drm_i915_gem_object_ops {
unsigned int flags;
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
......@@ -89,7 +102,15 @@ struct drm_i915_gem_object {
* They are also added to @vma_list for easy iteration.
*/
struct rb_root vma_tree;
struct i915_vma *vma_hashed;
/**
* @lut_list: List of vma lookup entries in use for this object.
*
* If this object is closed, we need to remove all of its VMA from
* the fast lookup index in associated contexts; @lut_list provides
* this translation from object to context->handles_vma.
*/
struct list_head lut_list;
/** Stolen memory for this object, instead of being backed by shmem. */
struct drm_mm_node *stolen;
......
......@@ -597,33 +597,11 @@ static void i915_vma_destroy(struct i915_vma *vma)
kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
}
void i915_vma_unlink_ctx(struct i915_vma *vma)
{
struct i915_gem_context *ctx = vma->ctx;
if (ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
cancel_work_sync(&ctx->vma_lut.resize);
ctx->vma_lut.ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
}
__hlist_del(&vma->ctx_node);
ctx->vma_lut.ht_count--;
if (i915_vma_is_ggtt(vma))
vma->obj->vma_hashed = NULL;
vma->ctx = NULL;
i915_vma_put(vma);
}
void i915_vma_close(struct i915_vma *vma)
{
GEM_BUG_ON(i915_vma_is_closed(vma));
vma->flags |= I915_VMA_CLOSED;
if (vma->ctx)
i915_vma_unlink_ctx(vma);
list_del(&vma->obj_link);
rb_erase(&vma->obj_node, &vma->obj->vma_tree);
......
......@@ -115,10 +115,6 @@ struct i915_vma {
unsigned int *exec_flags;
struct hlist_node exec_node;
u32 exec_handle;
struct i915_gem_context *ctx;
struct hlist_node ctx_node;
u32 ctx_handle;
};
struct i915_vma *
......
......@@ -40,18 +40,13 @@ mock_context(struct drm_i915_private *i915,
INIT_LIST_HEAD(&ctx->link);
ctx->i915 = i915;
ctx->vma_lut.ht_bits = VMA_HT_BITS;
ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
sizeof(*ctx->vma_lut.ht),
GFP_KERNEL);
if (!ctx->vma_lut.ht)
goto err_free;
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
INIT_LIST_HEAD(&ctx->handles_list);
ret = ida_simple_get(&i915->contexts.hw_ida,
0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
if (ret < 0)
goto err_vma_ht;
goto err_handles;
ctx->hw_id = ret;
if (name) {
......@@ -66,9 +61,7 @@ mock_context(struct drm_i915_private *i915,
return ctx;
err_vma_ht:
kvfree(ctx->vma_lut.ht);
err_free:
err_handles:
kfree(ctx);
return NULL;
......
......@@ -2022,6 +2022,7 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
if (__radix_tree_delete(root, iter->node, slot))
iter->index = iter->next_index;
}
EXPORT_SYMBOL(radix_tree_iter_delete);
/**
* radix_tree_delete_item - delete an item from a radix tree
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment