Commit e1e52981 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-fixes-2019-05-23' of...

Merge tag 'drm-intel-fixes-2019-05-23' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes

- Fix boosting of new client to be non-preemptive
- Fix to actually bump ready tasks ahead of busywaits
- Includes gvt-fixes-2019-05-21
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190523094221.GA26026@jlahtine-desk.ger.corp.intel.com
parents 6b0538da 57cb853d
......@@ -896,12 +896,16 @@ static int cmd_reg_handler(struct parser_exec_state *s,
}
/* TODO
* Right now only scan LRI command on KBL and in inhibit context.
* It's good enough to support initializing mmio by lri command in
* vgpu inhibit context on KBL.
* In order to let workload with inhibit context to generate
* correct image data into memory, vregs values will be loaded to
* hw via LRIs in the workload with inhibit context. But as
* indirect context is loaded prior to LRIs in workload, we don't
* want reg values specified in indirect context overwritten by
* LRIs in workloads. So, when scanning an indirect context, we
* update reg values in it into vregs, so LRIs in workload with
* inhibit context will restore with correct values
*/
if ((IS_KABYLAKE(s->vgpu->gvt->dev_priv)
|| IS_COFFEELAKE(s->vgpu->gvt->dev_priv)) &&
if (IS_GEN(gvt->dev_priv, 9) &&
intel_gvt_mmio_is_in_ctx(gvt, offset) &&
!strncmp(cmd, "lri", 3)) {
intel_gvt_hypervisor_read_gpa(s->vgpu,
......
......@@ -1076,8 +1076,10 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
} else {
int type = get_next_pt_type(we->type);
if (!gtt_type_is_pt(type))
if (!gtt_type_is_pt(type)) {
ret = -EINVAL;
goto err;
}
spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
if (IS_ERR(spt)) {
......
......@@ -1364,7 +1364,6 @@ static int dma_ctrl_write(struct intel_vgpu *vgpu, unsigned int offset,
static int gen9_trtte_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes)
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
u32 trtte = *(u32 *)p_data;
if ((trtte & 1) && (trtte & (1 << 1)) == 0) {
......@@ -1373,11 +1372,6 @@ static int gen9_trtte_write(struct intel_vgpu *vgpu, unsigned int offset,
return -EINVAL;
}
write_vreg(vgpu, offset, p_data, bytes);
/* TRTTE is not per-context */
mmio_hw_access_pre(dev_priv);
I915_WRITE(_MMIO(offset), vgpu_vreg(vgpu, offset));
mmio_hw_access_post(dev_priv);
return 0;
}
......@@ -1385,15 +1379,6 @@ static int gen9_trtte_write(struct intel_vgpu *vgpu, unsigned int offset,
static int gen9_trtt_chicken_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes)
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
u32 val = *(u32 *)p_data;
if (val & 1) {
/* unblock hw logic */
mmio_hw_access_pre(dev_priv);
I915_WRITE(_MMIO(offset), val);
mmio_hw_access_post(dev_priv);
}
write_vreg(vgpu, offset, p_data, bytes);
return 0;
}
......
......@@ -108,12 +108,13 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
{RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
{RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
{RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
{RCS0, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
{RCS0, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
{RCS0, TRNULLDETCT, 0, false}, /* 0x4de8 */
{RCS0, TRINVTILEDETCT, 0, false}, /* 0x4dec */
{RCS0, TRVADR, 0, false}, /* 0x4df0 */
{RCS0, TRTTE, 0, false}, /* 0x4df4 */
{RCS0, TRVATTL3PTRDW(0), 0, true}, /* 0x4de0 */
{RCS0, TRVATTL3PTRDW(1), 0, true}, /* 0x4de4 */
{RCS0, TRNULLDETCT, 0, true}, /* 0x4de8 */
{RCS0, TRINVTILEDETCT, 0, true}, /* 0x4dec */
{RCS0, TRVADR, 0, true}, /* 0x4df0 */
{RCS0, TRTTE, 0, true}, /* 0x4df4 */
{RCS0, _MMIO(0x4dfc), 0, true},
{BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
{BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */
......@@ -392,10 +393,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
return;
if (ring_id == RCS0 &&
(IS_KABYLAKE(dev_priv) ||
IS_BROXTON(dev_priv) ||
IS_COFFEELAKE(dev_priv)))
if (ring_id == RCS0 && IS_GEN(dev_priv, 9))
return;
if (!pre && !gen9_render_mocs.initialized)
......@@ -470,11 +468,10 @@ static void switch_mmio(struct intel_vgpu *pre,
continue;
/*
* No need to do save or restore of the mmio which is in context
* state image on kabylake, it's initialized by lri command and
* state image on gen9, it's initialized by lri command and
* save or restore with context together.
*/
if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv)
|| IS_COFFEELAKE(dev_priv)) && mmio->in_context)
if (IS_GEN(dev_priv, 9) && mmio->in_context)
continue;
// save
......
......@@ -298,12 +298,29 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
struct i915_request *req = workload->req;
void *shadow_ring_buffer_va;
u32 *cs;
int err;
if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915)
|| IS_COFFEELAKE(req->i915))
&& is_inhibit_context(req->hw_context))
if (IS_GEN(req->i915, 9) && is_inhibit_context(req->hw_context))
intel_vgpu_restore_inhibit_context(vgpu, req);
/*
* To track whether a request has started on HW, we can emit a
* breadcrumb at the beginning of the request and check its
* timeline's HWSP to see if the breadcrumb has advanced past the
* start of this request. Actually, the request must have the
* init_breadcrumb if its timeline set has_init_bread_crumb, or the
* scheduler might get a wrong state of it during reset. Since the
* requests from gvt always set the has_init_breadcrumb flag, here
* need to do the emit_init_breadcrumb for all the requests.
*/
if (req->engine->emit_init_breadcrumb) {
err = req->engine->emit_init_breadcrumb(req);
if (err) {
gvt_vgpu_err("fail to emit init breadcrumb\n");
return err;
}
}
/* allocate shadow ring buffer */
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
if (IS_ERR(cs)) {
......
......@@ -20,15 +20,14 @@ enum {
I915_PRIORITY_INVALID = INT_MIN
};
#define I915_USER_PRIORITY_SHIFT 3
#define I915_USER_PRIORITY_SHIFT 2
#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
#define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
#define I915_PRIORITY_WAIT ((u8)BIT(0))
#define I915_PRIORITY_NEWCLIENT ((u8)BIT(1))
#define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(2))
#define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(1))
#define __NO_PREEMPTION (I915_PRIORITY_WAIT)
......
......@@ -502,15 +502,6 @@ void __i915_request_unsubmit(struct i915_request *request)
/* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
/*
* As we do not allow WAIT to preempt inflight requests,
* once we have executed a request, along with triggering
* any execution callbacks, we must preserve its ordering
* within the non-preemptible FIFO.
*/
BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
request->sched.attr.priority |= __NO_PREEMPTION;
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
i915_request_cancel_breadcrumb(request);
......@@ -582,18 +573,7 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
switch (state) {
case FENCE_COMPLETE:
/*
* We only check a small portion of our dependencies
* and so cannot guarantee that there remains no
* semaphore chain across all. Instead of opting
* for the full NOSEMAPHORE boost, we go for the
* smaller (but still preempting) boost of
* NEWCLIENT. This will be enough to boost over
* a busywaiting request (as that cannot be
* NEWCLIENT) without accidentally boosting
* a busywait over real work elsewhere.
*/
i915_schedule_bump_priority(request, I915_PRIORITY_NEWCLIENT);
i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE);
break;
case FENCE_FREE:
......@@ -874,12 +854,6 @@ emit_semaphore_wait(struct i915_request *to,
if (err < 0)
return err;
err = i915_sw_fence_await_dma_fence(&to->semaphore,
&from->fence, 0,
I915_FENCE_GFP);
if (err < 0)
return err;
/* We need to pin the signaler's HWSP until we are finished reading. */
err = i915_timeline_read_hwsp(from, to, &hwsp_offset);
if (err)
......@@ -945,8 +919,18 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
&from->fence, 0,
I915_FENCE_GFP);
}
if (ret < 0)
return ret;
return ret < 0 ? ret : 0;
if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
ret = i915_sw_fence_await_dma_fence(&to->semaphore,
&from->fence, 0,
I915_FENCE_GFP);
if (ret < 0)
return ret;
}
return 0;
}
int
......@@ -1237,7 +1221,7 @@ void i915_request_add(struct i915_request *request)
* the bulk clients. (FQ_CODEL)
*/
if (list_empty(&request->sched.signalers_list))
attr.priority |= I915_PRIORITY_NEWCLIENT;
attr.priority |= I915_PRIORITY_WAIT;
engine->schedule(request, &attr);
}
......
......@@ -35,109 +35,6 @@ static inline bool node_signaled(const struct i915_sched_node *node)
return i915_request_completed(node_to_request(node));
}
void i915_sched_node_init(struct i915_sched_node *node)
{
INIT_LIST_HEAD(&node->signalers_list);
INIT_LIST_HEAD(&node->waiters_list);
INIT_LIST_HEAD(&node->link);
node->attr.priority = I915_PRIORITY_INVALID;
node->semaphores = 0;
node->flags = 0;
}
static struct i915_dependency *
i915_dependency_alloc(void)
{
return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
}
static void
i915_dependency_free(struct i915_dependency *dep)
{
kmem_cache_free(global.slab_dependencies, dep);
}
bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
struct i915_sched_node *signal,
struct i915_dependency *dep,
unsigned long flags)
{
bool ret = false;
spin_lock_irq(&schedule_lock);
if (!node_signaled(signal)) {
INIT_LIST_HEAD(&dep->dfs_link);
list_add(&dep->wait_link, &signal->waiters_list);
list_add(&dep->signal_link, &node->signalers_list);
dep->signaler = signal;
dep->flags = flags;
/* Keep track of whether anyone on this chain has a semaphore */
if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
!node_started(signal))
node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
ret = true;
}
spin_unlock_irq(&schedule_lock);
return ret;
}
int i915_sched_node_add_dependency(struct i915_sched_node *node,
struct i915_sched_node *signal)
{
struct i915_dependency *dep;
dep = i915_dependency_alloc();
if (!dep)
return -ENOMEM;
if (!__i915_sched_node_add_dependency(node, signal, dep,
I915_DEPENDENCY_ALLOC))
i915_dependency_free(dep);
return 0;
}
void i915_sched_node_fini(struct i915_sched_node *node)
{
struct i915_dependency *dep, *tmp;
GEM_BUG_ON(!list_empty(&node->link));
spin_lock_irq(&schedule_lock);
/*
* Everyone we depended upon (the fences we wait to be signaled)
* should retire before us and remove themselves from our list.
* However, retirement is run independently on each timeline and
* so we may be called out-of-order.
*/
list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
GEM_BUG_ON(!node_signaled(dep->signaler));
GEM_BUG_ON(!list_empty(&dep->dfs_link));
list_del(&dep->wait_link);
if (dep->flags & I915_DEPENDENCY_ALLOC)
i915_dependency_free(dep);
}
/* Remove ourselves from everyone who depends upon us */
list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
GEM_BUG_ON(dep->signaler != node);
GEM_BUG_ON(!list_empty(&dep->dfs_link));
list_del(&dep->signal_link);
if (dep->flags & I915_DEPENDENCY_ALLOC)
i915_dependency_free(dep);
}
spin_unlock_irq(&schedule_lock);
}
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
......@@ -239,6 +136,11 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
return &p->requests[idx];
}
void __i915_priolist_free(struct i915_priolist *p)
{
kmem_cache_free(global.slab_priorities, p);
}
struct sched_cache {
struct list_head *priolist;
};
......@@ -273,7 +175,7 @@ static bool inflight(const struct i915_request *rq,
return active->hw_context == rq->hw_context;
}
static void __i915_schedule(struct i915_request *rq,
static void __i915_schedule(struct i915_sched_node *node,
const struct i915_sched_attr *attr)
{
struct intel_engine_cs *engine;
......@@ -287,13 +189,13 @@ static void __i915_schedule(struct i915_request *rq,
lockdep_assert_held(&schedule_lock);
GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
if (i915_request_completed(rq))
if (node_signaled(node))
return;
if (prio <= READ_ONCE(rq->sched.attr.priority))
if (prio <= READ_ONCE(node->attr.priority))
return;
stack.signaler = &rq->sched;
stack.signaler = node;
list_add(&stack.dfs_link, &dfs);
/*
......@@ -344,9 +246,9 @@ static void __i915_schedule(struct i915_request *rq,
* execlists_submit_request()), we can set our own priority and skip
* acquiring the engine locks.
*/
if (rq->sched.attr.priority == I915_PRIORITY_INVALID) {
GEM_BUG_ON(!list_empty(&rq->sched.link));
rq->sched.attr = *attr;
if (node->attr.priority == I915_PRIORITY_INVALID) {
GEM_BUG_ON(!list_empty(&node->link));
node->attr = *attr;
if (stack.dfs_link.next == stack.dfs_link.prev)
return;
......@@ -355,15 +257,14 @@ static void __i915_schedule(struct i915_request *rq,
}
memset(&cache, 0, sizeof(cache));
engine = rq->engine;
engine = node_to_request(node)->engine;
spin_lock(&engine->timeline.lock);
/* Fifo and depth-first replacement ensure our deps execute before us */
list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
struct i915_sched_node *node = dep->signaler;
INIT_LIST_HEAD(&dep->dfs_link);
node = dep->signaler;
engine = sched_lock_engine(node, engine, &cache);
lockdep_assert_held(&engine->timeline.lock);
......@@ -413,13 +314,20 @@ static void __i915_schedule(struct i915_request *rq,
void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
{
spin_lock_irq(&schedule_lock);
__i915_schedule(rq, attr);
__i915_schedule(&rq->sched, attr);
spin_unlock_irq(&schedule_lock);
}
static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
{
struct i915_sched_attr attr = node->attr;
attr.priority |= bump;
__i915_schedule(node, &attr);
}
void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
{
struct i915_sched_attr attr;
unsigned long flags;
GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
......@@ -428,17 +336,122 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
return;
spin_lock_irqsave(&schedule_lock, flags);
__bump_priority(&rq->sched, bump);
spin_unlock_irqrestore(&schedule_lock, flags);
}
attr = rq->sched.attr;
attr.priority |= bump;
__i915_schedule(rq, &attr);
void i915_sched_node_init(struct i915_sched_node *node)
{
INIT_LIST_HEAD(&node->signalers_list);
INIT_LIST_HEAD(&node->waiters_list);
INIT_LIST_HEAD(&node->link);
node->attr.priority = I915_PRIORITY_INVALID;
node->semaphores = 0;
node->flags = 0;
}
spin_unlock_irqrestore(&schedule_lock, flags);
static struct i915_dependency *
i915_dependency_alloc(void)
{
return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
}
void __i915_priolist_free(struct i915_priolist *p)
static void
i915_dependency_free(struct i915_dependency *dep)
{
kmem_cache_free(global.slab_priorities, p);
kmem_cache_free(global.slab_dependencies, dep);
}
bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
struct i915_sched_node *signal,
struct i915_dependency *dep,
unsigned long flags)
{
bool ret = false;
spin_lock_irq(&schedule_lock);
if (!node_signaled(signal)) {
INIT_LIST_HEAD(&dep->dfs_link);
list_add(&dep->wait_link, &signal->waiters_list);
list_add(&dep->signal_link, &node->signalers_list);
dep->signaler = signal;
dep->flags = flags;
/* Keep track of whether anyone on this chain has a semaphore */
if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
!node_started(signal))
node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
/*
* As we do not allow WAIT to preempt inflight requests,
* once we have executed a request, along with triggering
* any execution callbacks, we must preserve its ordering
* within the non-preemptible FIFO.
*/
BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
if (flags & I915_DEPENDENCY_EXTERNAL)
__bump_priority(signal, __NO_PREEMPTION);
ret = true;
}
spin_unlock_irq(&schedule_lock);
return ret;
}
int i915_sched_node_add_dependency(struct i915_sched_node *node,
struct i915_sched_node *signal)
{
struct i915_dependency *dep;
dep = i915_dependency_alloc();
if (!dep)
return -ENOMEM;
if (!__i915_sched_node_add_dependency(node, signal, dep,
I915_DEPENDENCY_EXTERNAL |
I915_DEPENDENCY_ALLOC))
i915_dependency_free(dep);
return 0;
}
void i915_sched_node_fini(struct i915_sched_node *node)
{
struct i915_dependency *dep, *tmp;
GEM_BUG_ON(!list_empty(&node->link));
spin_lock_irq(&schedule_lock);
/*
* Everyone we depended upon (the fences we wait to be signaled)
* should retire before us and remove themselves from our list.
* However, retirement is run independently on each timeline and
* so we may be called out-of-order.
*/
list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
GEM_BUG_ON(!node_signaled(dep->signaler));
GEM_BUG_ON(!list_empty(&dep->dfs_link));
list_del(&dep->wait_link);
if (dep->flags & I915_DEPENDENCY_ALLOC)
i915_dependency_free(dep);
}
/* Remove ourselves from everyone who depends upon us */
list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
GEM_BUG_ON(dep->signaler != node);
GEM_BUG_ON(!list_empty(&dep->dfs_link));
list_del(&dep->signal_link);
if (dep->flags & I915_DEPENDENCY_ALLOC)
i915_dependency_free(dep);
}
spin_unlock_irq(&schedule_lock);
}
static void i915_global_scheduler_shrink(void)
......
......@@ -66,7 +66,8 @@ struct i915_dependency {
struct list_head wait_link;
struct list_head dfs_link;
unsigned long flags;
#define I915_DEPENDENCY_ALLOC BIT(0)
#define I915_DEPENDENCY_ALLOC BIT(0)
#define I915_DEPENDENCY_EXTERNAL BIT(1)
};
#endif /* _I915_SCHEDULER_TYPES_H_ */
......@@ -164,7 +164,7 @@
#define WA_TAIL_DWORDS 2
#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
#define ACTIVE_PRIORITY (I915_PRIORITY_NOSEMAPHORE)
static int execlists_context_deferred_alloc(struct intel_context *ce,
struct intel_engine_cs *engine);
......
......@@ -99,12 +99,14 @@ static int live_busywait_preempt(void *arg)
ctx_hi = kernel_context(i915);
if (!ctx_hi)
goto err_unlock;
ctx_hi->sched.priority = INT_MAX;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
ctx_lo = kernel_context(i915);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority = INT_MIN;
ctx_lo->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(obj)) {
......@@ -954,12 +956,14 @@ static int live_preempt_hang(void *arg)
ctx_hi = kernel_context(i915);
if (!ctx_hi)
goto err_spin_lo;
ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
ctx_lo = kernel_context(i915);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
ctx_lo->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
for_each_engine(engine, i915, id) {
struct i915_request *rq;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment