Commit 8d286e2f authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-next-fixes-2020-06-04' of...

Merge tag 'drm-intel-next-fixes-2020-06-04' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

- Includes gvt-next-fixes-2020-05-28
- Use after free fix for display global state.
- Whitelisting context-local timestamp on Gen9
  and two scheduler fixes with deps (Cc: stable)
- Removal of write flag from sysfs files where
  ineffective
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200604150454.GA59322@jlahtine-desk.ger.corp.intel.com
parents fa3fa222 f8665d79
...@@ -10,6 +10,28 @@ ...@@ -10,6 +10,28 @@
#include "intel_display_types.h" #include "intel_display_types.h"
#include "intel_global_state.h" #include "intel_global_state.h"
static void __intel_atomic_global_state_free(struct kref *kref)
{
struct intel_global_state *obj_state =
container_of(kref, struct intel_global_state, ref);
struct intel_global_obj *obj = obj_state->obj;
obj->funcs->atomic_destroy_state(obj, obj_state);
}
static void intel_atomic_global_state_put(struct intel_global_state *obj_state)
{
kref_put(&obj_state->ref, __intel_atomic_global_state_free);
}
static struct intel_global_state *
intel_atomic_global_state_get(struct intel_global_state *obj_state)
{
kref_get(&obj_state->ref);
return obj_state;
}
void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv, void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv,
struct intel_global_obj *obj, struct intel_global_obj *obj,
struct intel_global_state *state, struct intel_global_state *state,
...@@ -17,6 +39,10 @@ void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv, ...@@ -17,6 +39,10 @@ void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv,
{ {
memset(obj, 0, sizeof(*obj)); memset(obj, 0, sizeof(*obj));
state->obj = obj;
kref_init(&state->ref);
obj->state = state; obj->state = state;
obj->funcs = funcs; obj->funcs = funcs;
list_add_tail(&obj->head, &dev_priv->global_obj_list); list_add_tail(&obj->head, &dev_priv->global_obj_list);
...@@ -28,7 +54,9 @@ void intel_atomic_global_obj_cleanup(struct drm_i915_private *dev_priv) ...@@ -28,7 +54,9 @@ void intel_atomic_global_obj_cleanup(struct drm_i915_private *dev_priv)
list_for_each_entry_safe(obj, next, &dev_priv->global_obj_list, head) { list_for_each_entry_safe(obj, next, &dev_priv->global_obj_list, head) {
list_del(&obj->head); list_del(&obj->head);
obj->funcs->atomic_destroy_state(obj, obj->state);
drm_WARN_ON(&dev_priv->drm, kref_read(&obj->state->ref) != 1);
intel_atomic_global_state_put(obj->state);
} }
} }
...@@ -97,10 +125,14 @@ intel_atomic_get_global_obj_state(struct intel_atomic_state *state, ...@@ -97,10 +125,14 @@ intel_atomic_get_global_obj_state(struct intel_atomic_state *state,
if (!obj_state) if (!obj_state)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
obj_state->obj = obj;
obj_state->changed = false; obj_state->changed = false;
kref_init(&obj_state->ref);
state->global_objs[index].state = obj_state; state->global_objs[index].state = obj_state;
state->global_objs[index].old_state = obj->state; state->global_objs[index].old_state =
intel_atomic_global_state_get(obj->state);
state->global_objs[index].new_state = obj_state; state->global_objs[index].new_state = obj_state;
state->global_objs[index].ptr = obj; state->global_objs[index].ptr = obj;
obj_state->state = state; obj_state->state = state;
...@@ -163,7 +195,9 @@ void intel_atomic_swap_global_state(struct intel_atomic_state *state) ...@@ -163,7 +195,9 @@ void intel_atomic_swap_global_state(struct intel_atomic_state *state)
new_obj_state->state = NULL; new_obj_state->state = NULL;
state->global_objs[i].state = old_obj_state; state->global_objs[i].state = old_obj_state;
obj->state = new_obj_state;
intel_atomic_global_state_put(obj->state);
obj->state = intel_atomic_global_state_get(new_obj_state);
} }
} }
...@@ -172,10 +206,9 @@ void intel_atomic_clear_global_state(struct intel_atomic_state *state) ...@@ -172,10 +206,9 @@ void intel_atomic_clear_global_state(struct intel_atomic_state *state)
int i; int i;
for (i = 0; i < state->num_global_objs; i++) { for (i = 0; i < state->num_global_objs; i++) {
struct intel_global_obj *obj = state->global_objs[i].ptr; intel_atomic_global_state_put(state->global_objs[i].old_state);
intel_atomic_global_state_put(state->global_objs[i].new_state);
obj->funcs->atomic_destroy_state(obj,
state->global_objs[i].state);
state->global_objs[i].ptr = NULL; state->global_objs[i].ptr = NULL;
state->global_objs[i].state = NULL; state->global_objs[i].state = NULL;
state->global_objs[i].old_state = NULL; state->global_objs[i].old_state = NULL;
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#ifndef __INTEL_GLOBAL_STATE_H__ #ifndef __INTEL_GLOBAL_STATE_H__
#define __INTEL_GLOBAL_STATE_H__ #define __INTEL_GLOBAL_STATE_H__
#include <linux/kref.h>
#include <linux/list.h> #include <linux/list.h>
struct drm_i915_private; struct drm_i915_private;
...@@ -54,7 +55,9 @@ struct intel_global_obj { ...@@ -54,7 +55,9 @@ struct intel_global_obj {
for_each_if(obj) for_each_if(obj)
struct intel_global_state { struct intel_global_state {
struct intel_global_obj *obj;
struct intel_atomic_state *state; struct intel_atomic_state *state;
struct kref ref;
bool changed; bool changed;
}; };
......
...@@ -230,7 +230,7 @@ static void intel_context_set_gem(struct intel_context *ce, ...@@ -230,7 +230,7 @@ static void intel_context_set_gem(struct intel_context *ce,
ce->timeline = intel_timeline_get(ctx->timeline); ce->timeline = intel_timeline_get(ctx->timeline);
if (ctx->sched.priority >= I915_PRIORITY_NORMAL && if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
intel_engine_has_semaphores(ce->engine)) intel_engine_has_timeslices(ce->engine))
__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
} }
...@@ -1969,7 +1969,7 @@ static int __apply_priority(struct intel_context *ce, void *arg) ...@@ -1969,7 +1969,7 @@ static int __apply_priority(struct intel_context *ce, void *arg)
{ {
struct i915_gem_context *ctx = arg; struct i915_gem_context *ctx = arg;
if (!intel_engine_has_semaphores(ce->engine)) if (!intel_engine_has_timeslices(ce->engine))
return 0; return 0;
if (ctx->sched.priority >= I915_PRIORITY_NORMAL) if (ctx->sched.priority >= I915_PRIORITY_NORMAL)
......
...@@ -39,7 +39,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) ...@@ -39,7 +39,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned long last_pfn = 0; /* suppress gcc warning */
unsigned int max_segment = i915_sg_segment_size(); unsigned int max_segment = i915_sg_segment_size();
unsigned int sg_page_sizes; unsigned int sg_page_sizes;
struct pagevec pvec;
gfp_t noreclaim; gfp_t noreclaim;
int ret; int ret;
...@@ -192,6 +191,9 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) ...@@ -192,6 +191,9 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
sg_mark_end(sg); sg_mark_end(sg);
err_pages: err_pages:
mapping_clear_unevictable(mapping); mapping_clear_unevictable(mapping);
if (sg != st->sgl) {
struct pagevec pvec;
pagevec_init(&pvec); pagevec_init(&pvec);
for_each_sgt_page(page, sgt_iter, st) { for_each_sgt_page(page, sgt_iter, st) {
if (!pagevec_add(&pvec, page)) if (!pagevec_add(&pvec, page))
...@@ -199,6 +201,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) ...@@ -199,6 +201,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
} }
if (pagevec_count(&pvec)) if (pagevec_count(&pvec))
check_release_pagevec(&pvec); check_release_pagevec(&pvec);
}
sg_free_table(st); sg_free_table(st);
kfree(st); kfree(st);
......
...@@ -97,8 +97,6 @@ int __intel_context_do_pin(struct intel_context *ce) ...@@ -97,8 +97,6 @@ int __intel_context_do_pin(struct intel_context *ce)
{ {
int err; int err;
GEM_BUG_ON(intel_context_is_closed(ce));
if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
err = intel_context_alloc_state(ce); err = intel_context_alloc_state(ce);
if (err) if (err)
......
...@@ -124,7 +124,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) ...@@ -124,7 +124,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
*/ */
low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE; low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE;
high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE; high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
num_types = sizeof(vgpu_types) / sizeof(vgpu_types[0]); num_types = ARRAY_SIZE(vgpu_types);
gvt->types = kcalloc(num_types, sizeof(struct intel_vgpu_type), gvt->types = kcalloc(num_types, sizeof(struct intel_vgpu_type),
GFP_KERNEL); GFP_KERNEL);
......
...@@ -572,6 +572,9 @@ struct drm_i915_reg_descriptor { ...@@ -572,6 +572,9 @@ struct drm_i915_reg_descriptor {
#define REG32(_reg, ...) \ #define REG32(_reg, ...) \
{ .addr = (_reg), __VA_ARGS__ } { .addr = (_reg), __VA_ARGS__ }
#define REG32_IDX(_reg, idx) \
{ .addr = _reg(idx) }
/* /*
* Convenience macro for adding 64-bit registers. * Convenience macro for adding 64-bit registers.
* *
...@@ -669,6 +672,7 @@ static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { ...@@ -669,6 +672,7 @@ static const struct drm_i915_reg_descriptor gen9_blt_regs[] = {
REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
REG32(BCS_SWCTRL), REG32(BCS_SWCTRL),
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
REG32_IDX(RING_CTX_TIMESTAMP, BLT_RING_BASE),
REG64_IDX(BCS_GPR, 0), REG64_IDX(BCS_GPR, 0),
REG64_IDX(BCS_GPR, 1), REG64_IDX(BCS_GPR, 1),
REG64_IDX(BCS_GPR, 2), REG64_IDX(BCS_GPR, 2),
......
...@@ -173,7 +173,7 @@ i915_param_named(enable_gvt, bool, 0400, ...@@ -173,7 +173,7 @@ i915_param_named(enable_gvt, bool, 0400,
#endif #endif
#if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) #if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)
i915_param_named_unsafe(fake_lmem_start, ulong, 0600, i915_param_named_unsafe(fake_lmem_start, ulong, 0400,
"Fake LMEM start offset (default: 0)"); "Fake LMEM start offset (default: 0)");
#endif #endif
......
...@@ -64,7 +64,7 @@ struct drm_printer; ...@@ -64,7 +64,7 @@ struct drm_printer;
param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \ param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \
param(int, edp_vswing, 0, 0400) \ param(int, edp_vswing, 0, 0400) \
param(unsigned int, reset, 3, 0600) \ param(unsigned int, reset, 3, 0600) \
param(unsigned int, inject_probe_failure, 0, 0600) \ param(unsigned int, inject_probe_failure, 0, 0) \
param(int, fastboot, -1, 0600) \ param(int, fastboot, -1, 0600) \
param(int, enable_dpcd_backlight, -1, 0600) \ param(int, enable_dpcd_backlight, -1, 0600) \
param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \
......
...@@ -121,8 +121,39 @@ static void i915_fence_release(struct dma_fence *fence) ...@@ -121,8 +121,39 @@ static void i915_fence_release(struct dma_fence *fence)
i915_sw_fence_fini(&rq->submit); i915_sw_fence_fini(&rq->submit);
i915_sw_fence_fini(&rq->semaphore); i915_sw_fence_fini(&rq->semaphore);
/* Keep one request on each engine for reserved use under mempressure */ /*
if (!cmpxchg(&rq->engine->request_pool, NULL, rq)) * Keep one request on each engine for reserved use under mempressure
*
* We do not hold a reference to the engine here and so have to be
* very careful in what rq->engine we poke. The virtual engine is
* referenced via the rq->context and we released that ref during
* i915_request_retire(), ergo we must not dereference a virtual
* engine here. Not that we would want to, as the only consumer of
* the reserved engine->request_pool is the power management parking,
* which must-not-fail, and that is only run on the physical engines.
*
* Since the request must have been executed to be have completed,
* we know that it will have been processed by the HW and will
* not be unsubmitted again, so rq->engine and rq->execution_mask
* at this point is stable. rq->execution_mask will be a single
* bit if the last and _only_ engine it could execution on was a
* physical engine, if it's multiple bits then it started on and
* could still be on a virtual engine. Thus if the mask is not a
* power-of-two we assume that rq->engine may still be a virtual
* engine and so a dangling invalid pointer that we cannot dereference
*
* For example, consider the flow of a bonded request through a virtual
* engine. The request is created with a wide engine mask (all engines
* that we might execute on). On processing the bond, the request mask
* is reduced to one or more engines. If the request is subsequently
* bound to a single engine, it will then be constrained to only
* execute on that engine and never returned to the virtual engine
* after timeslicing away, see __unwind_incomplete_requests(). Thus we
* know that if the rq->execution_mask is a single bit, rq->engine
* can be a physical engine with the exact corresponding mask.
*/
if (is_power_of_2(rq->execution_mask) &&
!cmpxchg(&rq->engine->request_pool, NULL, rq))
return; return;
kmem_cache_free(global.slab_requests, rq); kmem_cache_free(global.slab_requests, rq);
...@@ -326,6 +357,53 @@ void i915_request_retire_upto(struct i915_request *rq) ...@@ -326,6 +357,53 @@ void i915_request_retire_upto(struct i915_request *rq)
} while (i915_request_retire(tmp) && tmp != rq); } while (i915_request_retire(tmp) && tmp != rq);
} }
static struct i915_request * const *
__engine_active(struct intel_engine_cs *engine)
{
return READ_ONCE(engine->execlists.active);
}
static bool __request_in_flight(const struct i915_request *signal)
{
struct i915_request * const *port, *rq;
bool inflight = false;
if (!i915_request_is_ready(signal))
return false;
/*
* Even if we have unwound the request, it may still be on
* the GPU (preempt-to-busy). If that request is inside an
* unpreemptible critical section, it will not be removed. Some
* GPU functions may even be stuck waiting for the paired request
* (__await_execution) to be submitted and cannot be preempted
* until the bond is executing.
*
* As we know that there are always preemption points between
* requests, we know that only the currently executing request
* may be still active even though we have cleared the flag.
* However, we can't rely on our tracking of ELSP[0] to known
* which request is currently active and so maybe stuck, as
* the tracking maybe an event behind. Instead assume that
* if the context is still inflight, then it is still active
* even if the active flag has been cleared.
*/
if (!intel_context_inflight(signal->context))
return false;
rcu_read_lock();
for (port = __engine_active(signal->engine); (rq = *port); port++) {
if (rq->context == signal->context) {
inflight = i915_seqno_passed(rq->fence.seqno,
signal->fence.seqno);
break;
}
}
rcu_read_unlock();
return inflight;
}
static int static int
__await_execution(struct i915_request *rq, __await_execution(struct i915_request *rq,
struct i915_request *signal, struct i915_request *signal,
...@@ -356,7 +434,7 @@ __await_execution(struct i915_request *rq, ...@@ -356,7 +434,7 @@ __await_execution(struct i915_request *rq,
} }
spin_lock_irq(&signal->lock); spin_lock_irq(&signal->lock);
if (i915_request_is_active(signal)) { if (i915_request_is_active(signal) || __request_in_flight(signal)) {
if (hook) { if (hook) {
hook(rq, &signal->fence); hook(rq, &signal->fence);
i915_request_put(signal); i915_request_put(signal);
...@@ -1022,37 +1100,91 @@ emit_semaphore_wait(struct i915_request *to, ...@@ -1022,37 +1100,91 @@ emit_semaphore_wait(struct i915_request *to,
I915_FENCE_GFP); I915_FENCE_GFP);
} }
static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
struct dma_fence *fence)
{
return __intel_timeline_sync_is_later(tl,
fence->context,
fence->seqno - 1);
}
static int intel_timeline_sync_set_start(struct intel_timeline *tl,
const struct dma_fence *fence)
{
return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
}
static int static int
i915_request_await_request(struct i915_request *to, struct i915_request *from) __i915_request_await_execution(struct i915_request *to,
struct i915_request *from,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
{ {
int ret; int err;
GEM_BUG_ON(to == from); GEM_BUG_ON(intel_context_is_barrier(from->context));
GEM_BUG_ON(to->timeline == from->timeline);
if (i915_request_completed(from)) { /* Submit both requests at the same time */
i915_sw_fence_set_error_once(&to->submit, from->fence.error); err = __await_execution(to, from, hook, I915_FENCE_GFP);
if (err)
return err;
/* Squash repeated depenendices to the same timelines */
if (intel_timeline_sync_has_start(i915_request_timeline(to),
&from->fence))
return 0; return 0;
/*
* Wait until the start of this request.
*
* The execution cb fires when we submit the request to HW. But in
* many cases this may be long before the request itself is ready to
* run (consider that we submit 2 requests for the same context, where
* the request of interest is behind an indefinite spinner). So we hook
* up to both to reduce our queues and keep the execution lag minimised
* in the worst case, though we hope that the await_start is elided.
*/
err = i915_request_await_start(to, from);
if (err < 0)
return err;
/*
* Ensure both start together [after all semaphores in signal]
*
* Now that we are queued to the HW at roughly the same time (thanks
* to the execute cb) and are ready to run at roughly the same time
* (thanks to the await start), our signaler may still be indefinitely
* delayed by waiting on a semaphore from a remote engine. If our
* signaler depends on a semaphore, so indirectly do we, and we do not
* want to start our payload until our signaler also starts theirs.
* So we wait.
*
* However, there is also a second condition for which we need to wait
* for the precise start of the signaler. Consider that the signaler
* was submitted in a chain of requests following another context
* (with just an ordinary intra-engine fence dependency between the
* two). In this case the signaler is queued to HW, but not for
* immediate execution, and so we must wait until it reaches the
* active slot.
*/
if (intel_engine_has_semaphores(to->engine) &&
!i915_request_has_initial_breadcrumb(to)) {
err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
if (err < 0)
return err;
} }
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) { if (to->engine->schedule) {
ret = i915_sched_node_add_dependency(&to->sched, err = i915_sched_node_add_dependency(&to->sched,
&from->sched, &from->sched,
I915_DEPENDENCY_EXTERNAL); I915_DEPENDENCY_WEAK);
if (ret < 0) if (err < 0)
return ret; return err;
} }
if (to->engine == from->engine) return intel_timeline_sync_set_start(i915_request_timeline(to),
ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->fence);
&from->submit,
I915_FENCE_GFP);
else
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
if (ret < 0)
return ret;
return 0;
} }
static void mark_external(struct i915_request *rq) static void mark_external(struct i915_request *rq)
...@@ -1105,23 +1237,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) ...@@ -1105,23 +1237,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
} }
int int
i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) i915_request_await_execution(struct i915_request *rq,
struct dma_fence *fence,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
{ {
struct dma_fence **child = &fence; struct dma_fence **child = &fence;
unsigned int nchild = 1; unsigned int nchild = 1;
int ret; int ret;
/*
* Note that if the fence-array was created in signal-on-any mode,
* we should *not* decompose it into its individual fences. However,
* we don't currently store which mode the fence-array is operating
* in. Fortunately, the only user of signal-on-any is private to
* amdgpu and we should not see any incoming fence-array from
* sync-file being in signal-on-any mode.
*/
if (dma_fence_is_array(fence)) { if (dma_fence_is_array(fence)) {
struct dma_fence_array *array = to_dma_fence_array(fence); struct dma_fence_array *array = to_dma_fence_array(fence);
/* XXX Error for signal-on-any fence arrays */
child = array->fences; child = array->fences;
nchild = array->num_fences; nchild = array->num_fences;
GEM_BUG_ON(!nchild); GEM_BUG_ON(!nchild);
...@@ -1134,138 +1263,95 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) ...@@ -1134,138 +1263,95 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
continue; continue;
} }
/*
* Requests on the same timeline are explicitly ordered, along
* with their dependencies, by i915_request_add() which ensures
* that requests are submitted in-order through each ring.
*/
if (fence->context == rq->fence.context) if (fence->context == rq->fence.context)
continue; continue;
/* Squash repeated waits to the same timelines */ /*
if (fence->context && * We don't squash repeated fence dependencies here as we
intel_timeline_sync_is_later(i915_request_timeline(rq), * want to run our callback in all cases.
fence)) */
continue;
if (dma_fence_is_i915(fence)) if (dma_fence_is_i915(fence))
ret = i915_request_await_request(rq, to_request(fence)); ret = __i915_request_await_execution(rq,
to_request(fence),
hook);
else else
ret = i915_request_await_external(rq, fence); ret = i915_request_await_external(rq, fence);
if (ret < 0) if (ret < 0)
return ret; return ret;
/* Record the latest fence used against each timeline */
if (fence->context)
intel_timeline_sync_set(i915_request_timeline(rq),
fence);
} while (--nchild); } while (--nchild);
return 0; return 0;
} }
static bool intel_timeline_sync_has_start(struct intel_timeline *tl, static int
struct dma_fence *fence) await_request_submit(struct i915_request *to, struct i915_request *from)
{
return __intel_timeline_sync_is_later(tl,
fence->context,
fence->seqno - 1);
}
static int intel_timeline_sync_set_start(struct intel_timeline *tl,
const struct dma_fence *fence)
{ {
return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); /*
* If we are waiting on a virtual engine, then it may be
* constrained to execute on a single engine *prior* to submission.
* When it is submitted, it will be first submitted to the virtual
* engine and then passed to the physical engine. We cannot allow
* the waiter to be submitted immediately to the physical engine
* as it may then bypass the virtual request.
*/
if (to->engine == READ_ONCE(from->engine))
return i915_sw_fence_await_sw_fence_gfp(&to->submit,
&from->submit,
I915_FENCE_GFP);
else
return __i915_request_await_execution(to, from, NULL);
} }
static int static int
__i915_request_await_execution(struct i915_request *to, i915_request_await_request(struct i915_request *to, struct i915_request *from)
struct i915_request *from,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
{ {
int err; int ret;
GEM_BUG_ON(intel_context_is_barrier(from->context));
/* Submit both requests at the same time */ GEM_BUG_ON(to == from);
err = __await_execution(to, from, hook, I915_FENCE_GFP); GEM_BUG_ON(to->timeline == from->timeline);
if (err)
return err;
/* Squash repeated depenendices to the same timelines */ if (i915_request_completed(from)) {
if (intel_timeline_sync_has_start(i915_request_timeline(to), i915_sw_fence_set_error_once(&to->submit, from->fence.error);
&from->fence))
return 0; return 0;
/*
* Wait until the start of this request.
*
* The execution cb fires when we submit the request to HW. But in
* many cases this may be long before the request itself is ready to
* run (consider that we submit 2 requests for the same context, where
* the request of interest is behind an indefinite spinner). So we hook
* up to both to reduce our queues and keep the execution lag minimised
* in the worst case, though we hope that the await_start is elided.
*/
err = i915_request_await_start(to, from);
if (err < 0)
return err;
/*
* Ensure both start together [after all semaphores in signal]
*
* Now that we are queued to the HW at roughly the same time (thanks
* to the execute cb) and are ready to run at roughly the same time
* (thanks to the await start), our signaler may still be indefinitely
* delayed by waiting on a semaphore from a remote engine. If our
* signaler depends on a semaphore, so indirectly do we, and we do not
* want to start our payload until our signaler also starts theirs.
* So we wait.
*
* However, there is also a second condition for which we need to wait
* for the precise start of the signaler. Consider that the signaler
* was submitted in a chain of requests following another context
* (with just an ordinary intra-engine fence dependency between the
* two). In this case the signaler is queued to HW, but not for
* immediate execution, and so we must wait until it reaches the
* active slot.
*/
if (intel_engine_has_semaphores(to->engine) &&
!i915_request_has_initial_breadcrumb(to)) {
err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
if (err < 0)
return err;
} }
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) { if (to->engine->schedule) {
err = i915_sched_node_add_dependency(&to->sched, ret = i915_sched_node_add_dependency(&to->sched,
&from->sched, &from->sched,
I915_DEPENDENCY_WEAK); I915_DEPENDENCY_EXTERNAL);
if (err < 0) if (ret < 0)
return err; return ret;
} }
return intel_timeline_sync_set_start(i915_request_timeline(to), if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
&from->fence); ret = await_request_submit(to, from);
else
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
if (ret < 0)
return ret;
return 0;
} }
int int
i915_request_await_execution(struct i915_request *rq, i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
struct dma_fence *fence,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
{ {
struct dma_fence **child = &fence; struct dma_fence **child = &fence;
unsigned int nchild = 1; unsigned int nchild = 1;
int ret; int ret;
/*
* Note that if the fence-array was created in signal-on-any mode,
* we should *not* decompose it into its individual fences. However,
* we don't currently store which mode the fence-array is operating
* in. Fortunately, the only user of signal-on-any is private to
* amdgpu and we should not see any incoming fence-array from
* sync-file being in signal-on-any mode.
*/
if (dma_fence_is_array(fence)) { if (dma_fence_is_array(fence)) {
struct dma_fence_array *array = to_dma_fence_array(fence); struct dma_fence_array *array = to_dma_fence_array(fence);
/* XXX Error for signal-on-any fence arrays */
child = array->fences; child = array->fences;
nchild = array->num_fences; nchild = array->num_fences;
GEM_BUG_ON(!nchild); GEM_BUG_ON(!nchild);
...@@ -1278,22 +1364,31 @@ i915_request_await_execution(struct i915_request *rq, ...@@ -1278,22 +1364,31 @@ i915_request_await_execution(struct i915_request *rq,
continue; continue;
} }
/*
* Requests on the same timeline are explicitly ordered, along
* with their dependencies, by i915_request_add() which ensures
* that requests are submitted in-order through each ring.
*/
if (fence->context == rq->fence.context) if (fence->context == rq->fence.context)
continue; continue;
/* /* Squash repeated waits to the same timelines */
* We don't squash repeated fence dependencies here as we if (fence->context &&
* want to run our callback in all cases. intel_timeline_sync_is_later(i915_request_timeline(rq),
*/ fence))
continue;
if (dma_fence_is_i915(fence)) if (dma_fence_is_i915(fence))
ret = __i915_request_await_execution(rq, ret = i915_request_await_request(rq, to_request(fence));
to_request(fence),
hook);
else else
ret = i915_request_await_external(rq, fence); ret = i915_request_await_external(rq, fence);
if (ret < 0) if (ret < 0)
return ret; return ret;
/* Record the latest fence used against each timeline */
if (fence->context)
intel_timeline_sync_set(i915_request_timeline(rq),
fence);
} while (--nchild); } while (--nchild);
return 0; return 0;
......
...@@ -209,14 +209,6 @@ static void kick_submission(struct intel_engine_cs *engine, ...@@ -209,14 +209,6 @@ static void kick_submission(struct intel_engine_cs *engine,
if (!inflight) if (!inflight)
goto unlock; goto unlock;
ENGINE_TRACE(engine,
"bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n",
prio,
rq->fence.context, rq->fence.seqno,
inflight->fence.context, inflight->fence.seqno,
inflight->sched.attr.priority);
engine->execlists.queue_priority_hint = prio;
/* /*
* If we are already the currently executing context, don't * If we are already the currently executing context, don't
* bother evaluating if we should preempt ourselves. * bother evaluating if we should preempt ourselves.
...@@ -224,6 +216,14 @@ static void kick_submission(struct intel_engine_cs *engine, ...@@ -224,6 +216,14 @@ static void kick_submission(struct intel_engine_cs *engine,
if (inflight->context == rq->context) if (inflight->context == rq->context)
goto unlock; goto unlock;
ENGINE_TRACE(engine,
"bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n",
prio,
rq->fence.context, rq->fence.seqno,
inflight->fence.context, inflight->fence.seqno,
inflight->sched.attr.priority);
engine->execlists.queue_priority_hint = prio;
if (need_preempt(prio, rq_prio(inflight))) if (need_preempt(prio, rq_prio(inflight)))
tasklet_hi_schedule(&engine->execlists.tasklet); tasklet_hi_schedule(&engine->execlists.tasklet);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment