Commit c7c6e46f authored by Chris Wilson's avatar Chris Wilson

drm/i915: Convert execbuf to use struct-of-array packing for critical fields

When userspace is doing most of the work, avoiding relocs (using
NO_RELOC) and opting out of implicit synchronisation (using ASYNC), we
still spend a lot of time processing the arrays in execbuf, even though
we now should have nothing to do most of the time. One issue that
becomes readily apparent in profiling anv is that iterating over the
large execobj[] is unfriendly to the loop prefetchers of the CPU and it
much prefers iterating over a pair of arrays rather than one big array.

v2: Clear vma[] on construction to handle errors during vma lookup
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170816085210.4199-3-chris@chris-wilson.co.uk
parent 8bcbfb12
...@@ -318,8 +318,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, ...@@ -318,8 +318,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
/* Overlap of objects in the same batch? */ /* Overlap of objects in the same batch? */
if (i915_vma_is_pinned(vma)) { if (i915_vma_is_pinned(vma)) {
ret = -ENOSPC; ret = -ENOSPC;
if (vma->exec_entry && if (vma->exec_flags &&
vma->exec_entry->flags & EXEC_OBJECT_PINNED) *vma->exec_flags & EXEC_OBJECT_PINNED)
ret = -EINVAL; ret = -EINVAL;
break; break;
} }
......
...@@ -192,6 +192,8 @@ struct i915_execbuffer { ...@@ -192,6 +192,8 @@ struct i915_execbuffer {
struct drm_file *file; /** per-file lookup tables and limits */ struct drm_file *file; /** per-file lookup tables and limits */
struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
struct i915_vma **vma;
unsigned int *flags;
struct intel_engine_cs *engine; /** engine to queue the request to */ struct intel_engine_cs *engine; /** engine to queue the request to */
struct i915_gem_context *ctx; /** context for building the request */ struct i915_gem_context *ctx; /** context for building the request */
...@@ -245,13 +247,7 @@ struct i915_execbuffer { ...@@ -245,13 +247,7 @@ struct i915_execbuffer {
struct hlist_head *buckets; /** ht for relocation handles */ struct hlist_head *buckets; /** ht for relocation handles */
}; };
/* #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
* As an alternative to creating a hashtable of handle-to-vma for a batch,
* we used the last available reserved field in the execobject[] and stash
* a link from the execobj to its vma.
*/
#define __exec_to_vma(ee) (ee)->rsvd2
#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee))
/* /*
* Used to convert any address to canonical form. * Used to convert any address to canonical form.
...@@ -320,85 +316,82 @@ static int eb_create(struct i915_execbuffer *eb) ...@@ -320,85 +316,82 @@ static int eb_create(struct i915_execbuffer *eb)
static bool static bool
eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
const struct i915_vma *vma) const struct i915_vma *vma,
unsigned int flags)
{ {
if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
return true;
if (vma->node.size < entry->pad_to_size) if (vma->node.size < entry->pad_to_size)
return true; return true;
if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
return true; return true;
if (entry->flags & EXEC_OBJECT_PINNED && if (flags & EXEC_OBJECT_PINNED &&
vma->node.start != entry->offset) vma->node.start != entry->offset)
return true; return true;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
vma->node.start < BATCH_OFFSET_BIAS) vma->node.start < BATCH_OFFSET_BIAS)
return true; return true;
if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
(vma->node.start + vma->node.size - 1) >> 32) (vma->node.start + vma->node.size - 1) >> 32)
return true; return true;
return false; return false;
} }
static inline void static inline bool
eb_pin_vma(struct i915_execbuffer *eb, eb_pin_vma(struct i915_execbuffer *eb,
struct drm_i915_gem_exec_object2 *entry, const struct drm_i915_gem_exec_object2 *entry,
struct i915_vma *vma) struct i915_vma *vma)
{ {
u64 flags; unsigned int exec_flags = *vma->exec_flags;
u64 pin_flags;
if (vma->node.size) if (vma->node.size)
flags = vma->node.start; pin_flags = vma->node.start;
else else
flags = entry->offset & PIN_OFFSET_MASK; pin_flags = entry->offset & PIN_OFFSET_MASK;
flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_GTT)) if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
flags |= PIN_GLOBAL; pin_flags |= PIN_GLOBAL;
if (unlikely(i915_vma_pin(vma, 0, 0, flags))) if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
return; return false;
if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) { if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
if (unlikely(i915_vma_get_fence(vma))) { if (unlikely(i915_vma_get_fence(vma))) {
i915_vma_unpin(vma); i915_vma_unpin(vma);
return; return false;
} }
if (i915_vma_pin_fence(vma)) if (i915_vma_pin_fence(vma))
entry->flags |= __EXEC_OBJECT_HAS_FENCE; exec_flags |= __EXEC_OBJECT_HAS_FENCE;
} }
entry->flags |= __EXEC_OBJECT_HAS_PIN; *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
return !eb_vma_misplaced(entry, vma, exec_flags);
} }
static inline void static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
__eb_unreserve_vma(struct i915_vma *vma,
const struct drm_i915_gem_exec_object2 *entry)
{ {
GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN)); GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE)) if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
i915_vma_unpin_fence(vma); i915_vma_unpin_fence(vma);
__i915_vma_unpin(vma); __i915_vma_unpin(vma);
} }
static inline void static inline void
eb_unreserve_vma(struct i915_vma *vma, eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
struct drm_i915_gem_exec_object2 *entry)
{ {
if (!(entry->flags & __EXEC_OBJECT_HAS_PIN)) if (!(*flags & __EXEC_OBJECT_HAS_PIN))
return; return;
__eb_unreserve_vma(vma, entry); __eb_unreserve_vma(vma, *flags);
entry->flags &= ~__EXEC_OBJECT_RESERVED; *flags &= ~__EXEC_OBJECT_RESERVED;
} }
static int static int
...@@ -428,7 +421,7 @@ eb_validate_vma(struct i915_execbuffer *eb, ...@@ -428,7 +421,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
entry->pad_to_size = 0; entry->pad_to_size = 0;
} }
if (unlikely(vma->exec_entry)) { if (unlikely(vma->exec_flags)) {
DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
entry->handle, (int)(entry - eb->exec)); entry->handle, (int)(entry - eb->exec));
return -EINVAL; return -EINVAL;
...@@ -441,14 +434,25 @@ eb_validate_vma(struct i915_execbuffer *eb, ...@@ -441,14 +434,25 @@ eb_validate_vma(struct i915_execbuffer *eb,
*/ */
entry->offset = gen8_noncanonical_addr(entry->offset); entry->offset = gen8_noncanonical_addr(entry->offset);
if (!eb->reloc_cache.has_fence) {
entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
} else {
if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
eb->reloc_cache.needs_unfenced) &&
i915_gem_object_is_tiled(vma->obj))
entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
}
if (!(entry->flags & EXEC_OBJECT_PINNED))
entry->flags |= eb->context_flags;
return 0; return 0;
} }
static int static int
eb_add_vma(struct i915_execbuffer *eb, eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
struct drm_i915_gem_exec_object2 *entry,
struct i915_vma *vma)
{ {
struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
int err; int err;
GEM_BUG_ON(i915_vma_is_closed(vma)); GEM_BUG_ON(i915_vma_is_closed(vma));
...@@ -469,40 +473,28 @@ eb_add_vma(struct i915_execbuffer *eb, ...@@ -469,40 +473,28 @@ eb_add_vma(struct i915_execbuffer *eb,
if (entry->relocation_count) if (entry->relocation_count)
list_add_tail(&vma->reloc_link, &eb->relocs); list_add_tail(&vma->reloc_link, &eb->relocs);
if (!eb->reloc_cache.has_fence) {
entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
} else {
if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
eb->reloc_cache.needs_unfenced) &&
i915_gem_object_is_tiled(vma->obj))
entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
}
if (!(entry->flags & EXEC_OBJECT_PINNED))
entry->flags |= eb->context_flags;
/* /*
* Stash a pointer from the vma to execobj, so we can query its flags, * Stash a pointer from the vma to execobj, so we can query its flags,
* size, alignment etc as provided by the user. Also we stash a pointer * size, alignment etc as provided by the user. Also we stash a pointer
* to the vma inside the execobj so that we can use a direct lookup * to the vma inside the execobj so that we can use a direct lookup
* to find the right target VMA when doing relocations. * to find the right target VMA when doing relocations.
*/ */
vma->exec_entry = entry; eb->vma[i] = vma;
__exec_to_vma(entry) = (uintptr_t)vma; eb->flags[i] = entry->flags;
vma->exec_flags = &eb->flags[i];
err = 0; err = 0;
eb_pin_vma(eb, entry, vma); if (eb_pin_vma(eb, entry, vma)) {
if (eb_vma_misplaced(entry, vma)) {
eb_unreserve_vma(vma, entry);
list_add_tail(&vma->exec_link, &eb->unbound);
if (drm_mm_node_allocated(&vma->node))
err = i915_vma_unbind(vma);
} else {
if (entry->offset != vma->node.start) { if (entry->offset != vma->node.start) {
entry->offset = vma->node.start | UPDATE; entry->offset = vma->node.start | UPDATE;
eb->args->flags |= __EXEC_HAS_RELOC; eb->args->flags |= __EXEC_HAS_RELOC;
} }
} else {
eb_unreserve_vma(vma, vma->exec_flags);
list_add_tail(&vma->exec_link, &eb->unbound);
if (drm_mm_node_allocated(&vma->node))
err = i915_vma_unbind(vma);
} }
return err; return err;
} }
...@@ -527,32 +519,35 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache, ...@@ -527,32 +519,35 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
static int eb_reserve_vma(const struct i915_execbuffer *eb, static int eb_reserve_vma(const struct i915_execbuffer *eb,
struct i915_vma *vma) struct i915_vma *vma)
{ {
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
u64 flags; unsigned int exec_flags = *vma->exec_flags;
u64 pin_flags;
int err; int err;
flags = PIN_USER | PIN_NONBLOCK; pin_flags = PIN_USER | PIN_NONBLOCK;
if (entry->flags & EXEC_OBJECT_NEEDS_GTT) if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL; pin_flags |= PIN_GLOBAL;
/* /*
* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
* limit address to the first 4GBs for unflagged objects. * limit address to the first 4GBs for unflagged objects.
*/ */
if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
flags |= PIN_ZONE_4G; pin_flags |= PIN_ZONE_4G;
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
flags |= PIN_MAPPABLE; pin_flags |= PIN_MAPPABLE;
if (entry->flags & EXEC_OBJECT_PINNED) { if (exec_flags & EXEC_OBJECT_PINNED) {
flags |= entry->offset | PIN_OFFSET_FIXED; pin_flags |= entry->offset | PIN_OFFSET_FIXED;
flags &= ~PIN_NONBLOCK; /* force overlapping PINNED checks */ pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
} else if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) { } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
} }
err = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, flags); err = i915_vma_pin(vma,
entry->pad_to_size, entry->alignment,
pin_flags);
if (err) if (err)
return err; return err;
...@@ -561,7 +556,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, ...@@ -561,7 +556,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
eb->args->flags |= __EXEC_HAS_RELOC; eb->args->flags |= __EXEC_HAS_RELOC;
} }
if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) { if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_get_fence(vma); err = i915_vma_get_fence(vma);
if (unlikely(err)) { if (unlikely(err)) {
i915_vma_unpin(vma); i915_vma_unpin(vma);
...@@ -569,11 +564,11 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, ...@@ -569,11 +564,11 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
} }
if (i915_vma_pin_fence(vma)) if (i915_vma_pin_fence(vma))
entry->flags |= __EXEC_OBJECT_HAS_FENCE; exec_flags |= __EXEC_OBJECT_HAS_FENCE;
} }
entry->flags |= __EXEC_OBJECT_HAS_PIN; *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
GEM_BUG_ON(eb_vma_misplaced(entry, vma)); GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
return 0; return 0;
} }
...@@ -615,18 +610,18 @@ static int eb_reserve(struct i915_execbuffer *eb) ...@@ -615,18 +610,18 @@ static int eb_reserve(struct i915_execbuffer *eb)
INIT_LIST_HEAD(&eb->unbound); INIT_LIST_HEAD(&eb->unbound);
INIT_LIST_HEAD(&last); INIT_LIST_HEAD(&last);
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; unsigned int flags = eb->flags[i];
struct i915_vma *vma = eb->vma[i];
if (entry->flags & EXEC_OBJECT_PINNED && if (flags & EXEC_OBJECT_PINNED &&
entry->flags & __EXEC_OBJECT_HAS_PIN) flags & __EXEC_OBJECT_HAS_PIN)
continue; continue;
vma = exec_to_vma(entry); eb_unreserve_vma(vma, &eb->flags[i]);
eb_unreserve_vma(vma, entry);
if (entry->flags & EXEC_OBJECT_PINNED) if (flags & EXEC_OBJECT_PINNED)
list_add(&vma->exec_link, &eb->unbound); list_add(&vma->exec_link, &eb->unbound);
else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) else if (flags & __EXEC_OBJECT_NEEDS_MAP)
list_add_tail(&vma->exec_link, &eb->unbound); list_add_tail(&vma->exec_link, &eb->unbound);
else else
list_add_tail(&vma->exec_link, &last); list_add_tail(&vma->exec_link, &last);
...@@ -714,18 +709,15 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) ...@@ -714,18 +709,15 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS); GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
__exec_to_vma(&eb->exec[i]) = 0;
hlist_for_each_entry(vma, hlist_for_each_entry(vma,
ht_head(lut, eb->exec[i].handle), ht_head(lut, eb->exec[i].handle),
ctx_node) { ctx_node) {
if (vma->ctx_handle != eb->exec[i].handle) if (vma->ctx_handle != eb->exec[i].handle)
continue; continue;
err = eb_add_vma(eb, &eb->exec[i], vma); err = eb_add_vma(eb, i, vma);
if (unlikely(err)) if (unlikely(err))
return err; return err;
goto next_vma; goto next_vma;
} }
...@@ -746,7 +738,7 @@ next_vma: ; ...@@ -746,7 +738,7 @@ next_vma: ;
for (i = slow_pass; i < count; i++) { for (i = slow_pass; i < count; i++) {
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
if (__exec_to_vma(&eb->exec[i])) if (eb->vma[i])
continue; continue;
obj = to_intel_bo(idr_find(idr, eb->exec[i].handle)); obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
...@@ -758,14 +750,17 @@ next_vma: ; ...@@ -758,14 +750,17 @@ next_vma: ;
goto err; goto err;
} }
__exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj; eb->vma[i] = (struct i915_vma *)
ptr_pack_bits(obj, INTERMEDIATE, 1);
} }
spin_unlock(&eb->file->table_lock); spin_unlock(&eb->file->table_lock);
for (i = slow_pass; i < count; i++) { for (i = slow_pass; i < count; i++) {
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
unsigned int is_obj;
if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)) obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1);
if (!is_obj)
continue; continue;
/* /*
...@@ -776,8 +771,6 @@ next_vma: ; ...@@ -776,8 +771,6 @@ next_vma: ;
* from the (obj, vm) we don't run the risk of creating * from the (obj, vm) we don't run the risk of creating
* duplicated vmas for the same vm. * duplicated vmas for the same vm.
*/ */
obj = u64_to_ptr(typeof(*obj),
__exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE);
vma = i915_vma_instance(obj, eb->vm, NULL); vma = i915_vma_instance(obj, eb->vm, NULL);
if (unlikely(IS_ERR(vma))) { if (unlikely(IS_ERR(vma))) {
DRM_DEBUG("Failed to lookup VMA\n"); DRM_DEBUG("Failed to lookup VMA\n");
...@@ -801,14 +794,17 @@ next_vma: ; ...@@ -801,14 +794,17 @@ next_vma: ;
i915_vma_get(vma); i915_vma_get(vma);
} }
err = eb_add_vma(eb, &eb->exec[i], vma); err = eb_add_vma(eb, i, vma);
if (unlikely(err)) if (unlikely(err))
goto err; goto err;
GEM_BUG_ON(vma != eb->vma[i]);
GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
/* Only after we validated the user didn't use our bits */ /* Only after we validated the user didn't use our bits */
if (vma->ctx != eb->ctx) { if (vma->ctx != eb->ctx) {
i915_vma_get(vma); i915_vma_get(vma);
eb->exec[i].flags |= __EXEC_OBJECT_HAS_REF; *vma->exec_flags |= __EXEC_OBJECT_HAS_REF;
} }
} }
...@@ -822,7 +818,8 @@ next_vma: ; ...@@ -822,7 +818,8 @@ next_vma: ;
out: out:
/* take note of the batch buffer before we might reorder the lists */ /* take note of the batch buffer before we might reorder the lists */
i = eb_batch_index(eb); i = eb_batch_index(eb);
eb->batch = exec_to_vma(&eb->exec[i]); eb->batch = eb->vma[i];
GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
/* /*
* SNA is doing fancy tricks with compressing batch buffers, which leads * SNA is doing fancy tricks with compressing batch buffers, which leads
...@@ -833,18 +830,18 @@ next_vma: ; ...@@ -833,18 +830,18 @@ next_vma: ;
* Note that actual hangs have only been observed on gen7, but for * Note that actual hangs have only been observed on gen7, but for
* paranoia do it everywhere. * paranoia do it everywhere.
*/ */
if (!(eb->exec[i].flags & EXEC_OBJECT_PINNED)) if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
eb->exec[i].flags |= __EXEC_OBJECT_NEEDS_BIAS; eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
if (eb->reloc_cache.has_fence) if (eb->reloc_cache.has_fence)
eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE; eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
eb->args->flags |= __EXEC_VALIDATED; eb->args->flags |= __EXEC_VALIDATED;
return eb_reserve(eb); return eb_reserve(eb);
err: err:
for (i = slow_pass; i < count; i++) { for (i = slow_pass; i < count; i++) {
if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) if (ptr_unmask_bits(eb->vma[i], 1))
__exec_to_vma(&eb->exec[i]) = 0; eb->vma[i] = NULL;
} }
lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
return err; return err;
...@@ -857,7 +854,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) ...@@ -857,7 +854,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
if (eb->lut_size < 0) { if (eb->lut_size < 0) {
if (handle >= -eb->lut_size) if (handle >= -eb->lut_size)
return NULL; return NULL;
return exec_to_vma(&eb->exec[handle]); return eb->vma[handle];
} else { } else {
struct hlist_head *head; struct hlist_head *head;
struct i915_vma *vma; struct i915_vma *vma;
...@@ -877,24 +874,21 @@ static void eb_release_vmas(const struct i915_execbuffer *eb) ...@@ -877,24 +874,21 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
unsigned int i; unsigned int i;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; struct i915_vma *vma = eb->vma[i];
struct i915_vma *vma = exec_to_vma(entry); unsigned int flags = eb->flags[i];
if (!vma) if (!vma)
continue; continue;
GEM_BUG_ON(vma->exec_entry != entry); GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
vma->exec_entry = NULL; vma->exec_flags = NULL;
__exec_to_vma(entry) = 0; eb->vma[i] = NULL;
if (entry->flags & __EXEC_OBJECT_HAS_PIN) if (flags & __EXEC_OBJECT_HAS_PIN)
__eb_unreserve_vma(vma, entry); __eb_unreserve_vma(vma, flags);
if (entry->flags & __EXEC_OBJECT_HAS_REF) if (flags & __EXEC_OBJECT_HAS_REF)
i915_vma_put(vma); i915_vma_put(vma);
entry->flags &=
~(__EXEC_OBJECT_RESERVED | __EXEC_OBJECT_HAS_REF);
} }
} }
...@@ -1383,7 +1377,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, ...@@ -1383,7 +1377,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
} }
if (reloc->write_domain) { if (reloc->write_domain) {
target->exec_entry->flags |= EXEC_OBJECT_WRITE; *target->exec_flags |= EXEC_OBJECT_WRITE;
/* /*
* Sandybridge PPGTT errata: We need a global gtt mapping * Sandybridge PPGTT errata: We need a global gtt mapping
...@@ -1435,7 +1429,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, ...@@ -1435,7 +1429,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
* do relocations we are already stalling, disable the user's opt * do relocations we are already stalling, disable the user's opt
* of our synchronisation. * of our synchronisation.
*/ */
vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC; *vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
/* and update the user's relocation entry */ /* and update the user's relocation entry */
return relocate_entry(vma, reloc, eb, target); return relocate_entry(vma, reloc, eb, target);
...@@ -1446,7 +1440,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) ...@@ -1446,7 +1440,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
struct drm_i915_gem_relocation_entry __user *urelocs; struct drm_i915_gem_relocation_entry __user *urelocs;
const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
unsigned int remain; unsigned int remain;
urelocs = u64_to_user_ptr(entry->relocs_ptr); urelocs = u64_to_user_ptr(entry->relocs_ptr);
...@@ -1529,7 +1523,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) ...@@ -1529,7 +1523,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
static int static int
eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma) eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
{ {
const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
struct drm_i915_gem_relocation_entry *relocs = struct drm_i915_gem_relocation_entry *relocs =
u64_to_ptr(typeof(*relocs), entry->relocs_ptr); u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
unsigned int i; unsigned int i;
...@@ -1733,6 +1727,8 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb) ...@@ -1733,6 +1727,8 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
if (err) if (err)
goto err; goto err;
GEM_BUG_ON(!eb->batch);
list_for_each_entry(vma, &eb->relocs, reloc_link) { list_for_each_entry(vma, &eb->relocs, reloc_link) {
if (!have_copy) { if (!have_copy) {
pagefault_disable(); pagefault_disable();
...@@ -1826,11 +1822,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) ...@@ -1826,11 +1822,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
int err; int err;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; unsigned int flags = eb->flags[i];
struct i915_vma *vma = exec_to_vma(entry); struct i915_vma *vma = eb->vma[i];
struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_object *obj = vma->obj;
if (entry->flags & EXEC_OBJECT_CAPTURE) { if (flags & EXEC_OBJECT_CAPTURE) {
struct i915_gem_capture_list *capture; struct i915_gem_capture_list *capture;
capture = kmalloc(sizeof(*capture), GFP_KERNEL); capture = kmalloc(sizeof(*capture), GFP_KERNEL);
...@@ -1838,7 +1834,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) ...@@ -1838,7 +1834,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
return -ENOMEM; return -ENOMEM;
capture->next = eb->request->capture_list; capture->next = eb->request->capture_list;
capture->vma = vma; capture->vma = eb->vma[i];
eb->request->capture_list = capture; eb->request->capture_list = capture;
} }
...@@ -1856,29 +1852,29 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) ...@@ -1856,29 +1852,29 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
*/ */
if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
if (i915_gem_clflush_object(obj, 0)) if (i915_gem_clflush_object(obj, 0))
entry->flags &= ~EXEC_OBJECT_ASYNC; flags &= ~EXEC_OBJECT_ASYNC;
} }
if (entry->flags & EXEC_OBJECT_ASYNC) if (flags & EXEC_OBJECT_ASYNC)
goto skip_flushes; continue;
err = i915_gem_request_await_object err = i915_gem_request_await_object
(eb->request, obj, entry->flags & EXEC_OBJECT_WRITE); (eb->request, obj, flags & EXEC_OBJECT_WRITE);
if (err) if (err)
return err; return err;
skip_flushes:
i915_vma_move_to_active(vma, eb->request, entry->flags);
__eb_unreserve_vma(vma, entry);
vma->exec_entry = NULL;
} }
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; unsigned int flags = eb->flags[i];
struct i915_vma *vma = exec_to_vma(entry); struct i915_vma *vma = eb->vma[i];
i915_vma_move_to_active(vma, eb->request, flags);
eb_export_fence(vma, eb->request, flags);
eb_export_fence(vma, eb->request, entry->flags); __eb_unreserve_vma(vma, flags);
if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF)) vma->exec_flags = NULL;
if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
i915_vma_put(vma); i915_vma_put(vma);
} }
eb->exec = NULL; eb->exec = NULL;
...@@ -2007,11 +2003,11 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) ...@@ -2007,11 +2003,11 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
if (IS_ERR(vma)) if (IS_ERR(vma))
goto out; goto out;
vma->exec_entry = eb->vma[eb->buffer_count] = i915_vma_get(vma);
memset(&eb->exec[eb->buffer_count++], eb->flags[eb->buffer_count] =
0, sizeof(*vma->exec_entry)); __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; vma->exec_flags = &eb->flags[eb->buffer_count];
__exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma); eb->buffer_count++;
out: out:
i915_gem_object_unpin_pages(shadow_batch_obj); i915_gem_object_unpin_pages(shadow_batch_obj);
...@@ -2270,7 +2266,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -2270,7 +2266,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.args = args; eb.args = args;
if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
args->flags |= __EXEC_HAS_RELOC; args->flags |= __EXEC_HAS_RELOC;
eb.exec = exec; eb.exec = exec;
eb.vma = memset(exec + args->buffer_count + 1, 0,
(args->buffer_count + 1) * sizeof(*eb.vma));
eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
if (USES_FULL_PPGTT(eb.i915)) if (USES_FULL_PPGTT(eb.i915))
eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT; eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
...@@ -2358,7 +2359,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -2358,7 +2359,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma; goto err_vma;
} }
if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) { if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
err = -EINVAL; err = -EINVAL;
goto err_vma; goto err_vma;
...@@ -2511,7 +2512,9 @@ int ...@@ -2511,7 +2512,9 @@ int
i915_gem_execbuffer(struct drm_device *dev, void *data, i915_gem_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file) struct drm_file *file)
{ {
const size_t sz = sizeof(struct drm_i915_gem_exec_object2); const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
sizeof(struct i915_vma *) +
sizeof(unsigned int));
struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_execbuffer *args = data;
struct drm_i915_gem_execbuffer2 exec2; struct drm_i915_gem_execbuffer2 exec2;
struct drm_i915_gem_exec_object *exec_list = NULL; struct drm_i915_gem_exec_object *exec_list = NULL;
...@@ -2602,7 +2605,9 @@ int ...@@ -2602,7 +2605,9 @@ int
i915_gem_execbuffer2(struct drm_device *dev, void *data, i915_gem_execbuffer2(struct drm_device *dev, void *data,
struct drm_file *file) struct drm_file *file)
{ {
const size_t sz = sizeof(struct drm_i915_gem_exec_object2); const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
sizeof(struct i915_vma *) +
sizeof(unsigned int));
struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_execbuffer2 *args = data;
struct drm_i915_gem_exec_object2 *exec2_list; struct drm_i915_gem_exec_object2 *exec2_list;
struct drm_syncobj **fences = NULL; struct drm_syncobj **fences = NULL;
......
...@@ -112,7 +112,7 @@ struct i915_vma { ...@@ -112,7 +112,7 @@ struct i915_vma {
/** /**
* Used for performing relocations during execbuffer insertion. * Used for performing relocations during execbuffer insertion.
*/ */
struct drm_i915_gem_exec_object2 *exec_entry; unsigned int *exec_flags;
struct hlist_node exec_node; struct hlist_node exec_node;
u32 exec_handle; u32 exec_handle;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment