Commit 9c117313 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-next-fixes-2017-11-10' of...

Merge tag 'drm-intel-next-fixes-2017-11-10' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

drm/i915 fixes for v4.15

* tag 'drm-intel-next-fixes-2017-11-10' of git://anongit.freedesktop.org/drm/drm-intel:
  drm/i915: Reorder context-close to avoid calling i915_vma_close() under RCU
  drm/i915: Move init_clock_gating() back to where it was
  drm/i915: Prune the reservation shared fence array
  drm/i915: Idle the GPU before shinking everything
  drm/i915: Lock llist_del_first() vs llist_del_all()
  drm/i915: Calculate ironlake intermediate watermarks correctly, v2.
  drm/i915: Disable lazy PPGTT page table optimization for vGPU
  drm/i915/execlists: Remove the priority "optimisation"
  drm/i915: Filter out spurious execlists context-switch interrupts
parents fee25cb9 e8c49fa9
......@@ -4603,11 +4603,17 @@ static void __i915_gem_free_work(struct work_struct *work)
* unbound now.
*/
spin_lock(&i915->mm.free_lock);
while ((freed = llist_del_all(&i915->mm.free_list))) {
spin_unlock(&i915->mm.free_lock);
__i915_gem_free_objects(i915, freed);
if (need_resched())
break;
return;
spin_lock(&i915->mm.free_lock);
}
spin_unlock(&i915->mm.free_lock);
}
static void __i915_gem_free_object_rcu(struct rcu_head *head)
......
......@@ -106,14 +106,9 @@ static void lut_close(struct i915_gem_context *ctx)
radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
struct i915_vma *vma = rcu_dereference_raw(*slot);
struct drm_i915_gem_object *obj = vma->obj;
radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
if (!i915_vma_is_ggtt(vma))
i915_vma_close(vma);
__i915_gem_object_release_unless_active(obj);
__i915_gem_object_release_unless_active(vma->obj);
}
}
......@@ -198,6 +193,11 @@ static void context_close(struct i915_gem_context *ctx)
{
i915_gem_context_set_closed(ctx);
/*
* The LUT uses the VMA as a backpointer to unref the object,
* so we need to clear the LUT before we close all the VMA (inside
* the ppgtt).
*/
lut_close(ctx);
if (ctx->ppgtt)
i915_ppgtt_close(&ctx->ppgtt->base);
......
......@@ -1341,7 +1341,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
if (IS_ERR(pt))
goto unwind;
if (count < GEN8_PTES)
if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
gen8_initialize_pt(vm, pt);
gen8_ppgtt_set_pde(vm, pd, pt, pde);
......
......@@ -162,6 +162,18 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
if (!shrinker_lock(dev_priv, &unlock))
return 0;
/*
* When shrinking the active list, also consider active contexts.
* Active contexts are pinned until they are retired, and so can
* not be simply unbound to retire and unpin their pages. To shrink
* the contexts, we must wait until the gpu is idle.
*
* We don't care about errors here; if we cannot wait upon the GPU,
* we will free as much as we can and hope to get a second chance.
*/
if (flags & I915_SHRINK_ACTIVE)
i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
trace_i915_gem_shrink(dev_priv, target, flags);
i915_gem_retire_requests(dev_priv);
......
......@@ -610,6 +610,7 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine)
execlists->first = rb;
if (submit) {
port_assign(port, last);
execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
i915_guc_submit(engine);
}
spin_unlock_irq(&engine->timeline->lock);
......@@ -633,6 +634,8 @@ static void i915_guc_irq_handler(unsigned long data)
rq = port_request(&port[0]);
}
if (!rq)
execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
if (!port_isset(last_port))
i915_guc_dequeue(engine);
......
......@@ -1388,8 +1388,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
bool tasklet = false;
if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
tasklet = true;
if (READ_ONCE(engine->execlists.active)) {
__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
tasklet = true;
}
}
if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
......
......@@ -54,6 +54,13 @@ i915_vma_retire(struct i915_gem_active *active,
if (--obj->active_count)
return;
/* Prune the shared fence arrays iff completely idle (inc. external) */
if (reservation_object_trylock(obj->resv)) {
if (reservation_object_test_signaled_rcu(obj->resv, true))
reservation_object_add_excl_fence(obj->resv, NULL);
reservation_object_unlock(obj->resv);
}
/* Bump our place on the bound list to keep it roughly in LRU order
* so that we don't steal from recently used but inactive objects
* (unless we are forced to ofc!)
......
......@@ -3676,6 +3676,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
intel_pps_unlock_regs_wa(dev_priv);
intel_modeset_init_hw(dev);
intel_init_clock_gating(dev_priv);
spin_lock_irq(&dev_priv->irq_lock);
if (dev_priv->display.hpd_irq_setup)
......@@ -14350,8 +14351,6 @@ void intel_modeset_init_hw(struct drm_device *dev)
intel_update_cdclk(dev_priv);
dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw;
intel_init_clock_gating(dev_priv);
}
/*
......@@ -15063,6 +15062,15 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
struct intel_encoder *encoder;
int i;
if (IS_HASWELL(dev_priv)) {
/*
* WaRsPkgCStateDisplayPMReq:hsw
* System hang if this isn't done before disabling all planes!
*/
I915_WRITE(CHICKEN_PAR1_1,
I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
}
intel_modeset_readout_hw_state(dev);
/* HW state is read out, now we need to sanitize this mess. */
......@@ -15160,6 +15168,8 @@ void intel_modeset_gem_init(struct drm_device *dev)
intel_init_gt_powersave(dev_priv);
intel_init_clock_gating(dev_priv);
intel_setup_overlay(dev_priv);
}
......
......@@ -1548,8 +1548,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
return false;
/* Both ports drained, no more ELSP submission? */
if (port_request(&engine->execlists.port[0]))
/* Waiting to drain ELSP? */
if (READ_ONCE(engine->execlists.active))
return false;
/* ELSP is empty, but there are ready requests? */
......@@ -1749,6 +1749,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
idx);
}
}
drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
rcu_read_unlock();
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
......
......@@ -575,7 +575,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* the state of the GPU is known (idle).
*/
inject_preempt_context(engine);
execlists->preempt = true;
execlists_set_active(execlists,
EXECLISTS_ACTIVE_PREEMPT);
goto unlock;
} else {
/*
......@@ -683,8 +684,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
unlock:
spin_unlock_irq(&engine->timeline->lock);
if (submit)
if (submit) {
execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
execlists_submit_ports(engine);
}
}
static void
......@@ -696,6 +699,7 @@ execlist_cancel_port_requests(struct intel_engine_execlists *execlists)
while (num_ports-- && port_isset(port)) {
struct drm_i915_gem_request *rq = port_request(port);
GEM_BUG_ON(!execlists->active);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED);
i915_gem_request_put(rq);
......@@ -730,7 +734,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
INIT_LIST_HEAD(&rq->priotree.link);
rq->priotree.priority = INT_MAX;
dma_fence_set_error(&rq->fence, -EIO);
__i915_gem_request_submit(rq);
......@@ -861,15 +864,21 @@ static void intel_lrc_irq_handler(unsigned long data)
unwind_incomplete_requests(engine);
spin_unlock_irq(&engine->timeline->lock);
GEM_BUG_ON(!execlists->preempt);
execlists->preempt = false;
GEM_BUG_ON(!execlists_is_active(execlists,
EXECLISTS_ACTIVE_PREEMPT));
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_PREEMPT);
continue;
}
if (status & GEN8_CTX_STATUS_PREEMPTED &&
execlists->preempt)
execlists_is_active(execlists,
EXECLISTS_ACTIVE_PREEMPT))
continue;
GEM_BUG_ON(!execlists_is_active(execlists,
EXECLISTS_ACTIVE_USER));
/* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
......@@ -881,7 +890,6 @@ static void intel_lrc_irq_handler(unsigned long data)
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
trace_i915_gem_request_out(rq);
rq->priotree.priority = INT_MAX;
i915_gem_request_put(rq);
execlists_port_complete(execlists, port);
......@@ -892,6 +900,9 @@ static void intel_lrc_irq_handler(unsigned long data)
/* After the final element, the hw should be idle */
GEM_BUG_ON(port_count(port) == 0 &&
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
if (port_count(port) == 0)
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_USER);
}
if (head != execlists->csb_head) {
......@@ -901,7 +912,7 @@ static void intel_lrc_irq_handler(unsigned long data)
}
}
if (!execlists->preempt)
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
execlists_dequeue(engine);
intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
......@@ -1460,7 +1471,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
execlists->csb_head = -1;
execlists->preempt = false;
execlists->active = 0;
/* After a GPU reset, we may have requests to replay */
if (!i915_modparams.enable_guc_submission && execlists->first)
......
......@@ -3133,7 +3133,11 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev,
struct intel_crtc_state *newstate)
{
struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk;
struct intel_atomic_state *intel_state =
to_intel_atomic_state(newstate->base.state);
const struct intel_crtc_state *oldstate =
intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
int level, max_level = ilk_wm_max_level(to_i915(dev));
/*
......@@ -3142,6 +3146,9 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev,
* and after the vblank.
*/
*a = newstate->wm.ilk.optimal;
if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base))
return 0;
a->pipe_enabled |= b->pipe_enabled;
a->sprites_enabled |= b->sprites_enabled;
a->sprites_scaled |= b->sprites_scaled;
......@@ -5755,12 +5762,30 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
mutex_unlock(&dev_priv->wm.wm_mutex);
}
/*
* FIXME should probably kill this and improve
* the real watermark readout/sanitation instead
*/
static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
{
I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
/*
* Don't touch WM1S_LP_EN here.
* Doing so could cause underruns.
*/
}
void ilk_wm_get_hw_state(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct ilk_wm_values *hw = &dev_priv->wm.hw;
struct drm_crtc *crtc;
ilk_init_lp_watermarks(dev_priv);
for_each_crtc(dev, crtc)
ilk_pipe_wm_get_hw_state(crtc);
......@@ -8207,18 +8232,6 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
}
}
static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
{
I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
/*
* Don't touch WM1S_LP_EN here.
* Doing so could cause underruns.
*/
}
static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
{
uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
......@@ -8252,8 +8265,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
(I915_READ(DISP_ARB_CTL) |
DISP_FBC_WM_DIS));
ilk_init_lp_watermarks(dev_priv);
/*
* Based on the document from hardware guys the following bits
* should be set unconditionally in order to enable FBC.
......@@ -8366,8 +8377,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_GT_MODE,
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
ilk_init_lp_watermarks(dev_priv);
I915_WRITE(CACHE_MODE_0,
_MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
......@@ -8594,8 +8603,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
I915_GTT_PAGE_SIZE_2M);
enum pipe pipe;
ilk_init_lp_watermarks(dev_priv);
/* WaSwitchSolVfFArbitrationPriority:bdw */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
......@@ -8646,8 +8653,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
{
ilk_init_lp_watermarks(dev_priv);
/* L3 caching of data atomics doesn't work -- disable it. */
I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
I915_WRITE(HSW_ROW_CHICKEN3,
......@@ -8691,10 +8696,6 @@ static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
/* WaSwitchSolVfFArbitrationPriority:hsw */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
/* WaRsPkgCStateDisplayPMReq:hsw */
I915_WRITE(CHICKEN_PAR1_1,
I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
lpt_init_clock_gating(dev_priv);
}
......@@ -8702,8 +8703,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
{
uint32_t snpcr;
ilk_init_lp_watermarks(dev_priv);
I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
/* WaDisableEarlyCull:ivb */
......
......@@ -241,9 +241,17 @@ struct intel_engine_execlists {
} port[EXECLIST_MAX_PORTS];
/**
* @preempt: are we currently handling a preempting context switch?
* @active: is the HW active? We consider the HW as active after
* submitting any context for execution and until we have seen the
* last context completion event. After that, we do not expect any
* more events until we submit, and so can park the HW.
*
* As we have a small number of different sources from which we feed
* the HW, we track the state of each inside a single bitfield.
*/
bool preempt;
unsigned int active;
#define EXECLISTS_ACTIVE_USER 0
#define EXECLISTS_ACTIVE_PREEMPT 1
/**
* @port_mask: number of execlist ports - 1
......@@ -525,6 +533,27 @@ struct intel_engine_cs {
u32 (*get_cmd_length_mask)(u32 cmd_header);
};
static inline void
execlists_set_active(struct intel_engine_execlists *execlists,
unsigned int bit)
{
__set_bit(bit, (unsigned long *)&execlists->active);
}
static inline void
execlists_clear_active(struct intel_engine_execlists *execlists,
unsigned int bit)
{
__clear_bit(bit, (unsigned long *)&execlists->active);
}
static inline bool
execlists_is_active(const struct intel_engine_execlists *execlists,
unsigned int bit)
{
return test_bit(bit, (unsigned long *)&execlists->active);
}
static inline unsigned int
execlists_num_ports(const struct intel_engine_execlists * const execlists)
{
......@@ -538,6 +567,7 @@ execlists_port_complete(struct intel_engine_execlists * const execlists,
const unsigned int m = execlists->port_mask;
GEM_BUG_ON(port_index(port, execlists) != 0);
GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
memmove(port, port + 1, m * sizeof(struct execlist_port));
memset(port + m, 0, sizeof(struct execlist_port));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment