Commit a9877da2 authored by Chris Wilson's avatar Chris Wilson

drm/i915/oa: Reconfigure contexts on the fly

Avoid a global idle barrier by reconfiguring each context by rewriting
them with MI_STORE_DWORD from the kernel context.

v2: We only need to determine the desired register values once, they are
the same for all contexts.
v3: Don't remove the kernel context from the list of known GEM contexts;
the world is not ready for that yet.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarLionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190716213443.9874-1-chris@chris-wilson.co.uk
parent bea5faf7
...@@ -1173,26 +1173,11 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) ...@@ -1173,26 +1173,11 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
if (IS_ERR(rq)) if (IS_ERR(rq))
return PTR_ERR(rq); return PTR_ERR(rq);
/* Queue this switch after all other activity by this context. */ /* Serialise with the remote context */
ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); ret = intel_context_prepare_remote_request(ce, rq);
if (ret) if (ret == 0)
goto out_add;
/*
* Guarantee context image and the timeline remains pinned until the
* modifying request is retired by setting the ce activity tracker.
*
* But we only need to take one pin on the account of it. Or in other
* words transfer the pinned ce object to tracked active request.
*/
GEM_BUG_ON(i915_active_is_idle(&ce->active));
ret = i915_active_ref(&ce->active, rq->fence.context, rq);
if (ret)
goto out_add;
ret = gen8_emit_rpcs_config(rq, ce, sseu); ret = gen8_emit_rpcs_config(rq, ce, sseu);
out_add:
i915_request_add(rq); i915_request_add(rq);
return ret; return ret;
} }
......
...@@ -239,6 +239,31 @@ void intel_context_exit_engine(struct intel_context *ce) ...@@ -239,6 +239,31 @@ void intel_context_exit_engine(struct intel_context *ce)
intel_engine_pm_put(ce->engine); intel_engine_pm_put(ce->engine);
} }
int intel_context_prepare_remote_request(struct intel_context *ce,
struct i915_request *rq)
{
struct intel_timeline *tl = ce->ring->timeline;
int err;
/* Only suitable for use in remotely modifying this context */
GEM_BUG_ON(rq->hw_context == ce);
/* Queue this switch after all other activity by this context. */
err = i915_active_request_set(&tl->last_request, rq);
if (err)
return err;
/*
* Guarantee context image and the timeline remains pinned until the
* modifying request is retired by setting the ce activity tracker.
*
* But we only need to take one pin on the account of it. Or in other
* words transfer the pinned ce object to tracked active request.
*/
GEM_BUG_ON(i915_active_is_idle(&ce->active));
return i915_active_ref(&ce->active, rq->fence.context, rq);
}
struct i915_request *intel_context_create_request(struct intel_context *ce) struct i915_request *intel_context_create_request(struct intel_context *ce)
{ {
struct i915_request *rq; struct i915_request *rq;
......
...@@ -139,6 +139,9 @@ static inline void intel_context_timeline_unlock(struct intel_context *ce) ...@@ -139,6 +139,9 @@ static inline void intel_context_timeline_unlock(struct intel_context *ce)
mutex_unlock(&ce->ring->timeline->mutex); mutex_unlock(&ce->ring->timeline->mutex);
} }
int intel_context_prepare_remote_request(struct intel_context *ce,
struct i915_request *rq);
struct i915_request *intel_context_create_request(struct intel_context *ce); struct i915_request *intel_context_create_request(struct intel_context *ce);
#endif /* __INTEL_CONTEXT_H__ */ #endif /* __INTEL_CONTEXT_H__ */
...@@ -1576,9 +1576,12 @@ __execlists_update_reg_state(struct intel_context *ce, ...@@ -1576,9 +1576,12 @@ __execlists_update_reg_state(struct intel_context *ce,
regs[CTX_RING_TAIL + 1] = ring->tail; regs[CTX_RING_TAIL + 1] = ring->tail;
/* RPCS */ /* RPCS */
if (engine->class == RENDER_CLASS) if (engine->class == RENDER_CLASS) {
regs[CTX_R_PWR_CLK_STATE + 1] = regs[CTX_R_PWR_CLK_STATE + 1] =
intel_sseu_make_rpcs(engine->i915, &ce->sseu); intel_sseu_make_rpcs(engine->i915, &ce->sseu);
i915_oa_init_reg_state(engine, ce, regs);
}
} }
static int static int
...@@ -3001,8 +3004,6 @@ static void execlists_init_reg_state(u32 *regs, ...@@ -3001,8 +3004,6 @@ static void execlists_init_reg_state(u32 *regs,
if (rcs) { if (rcs) {
regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
i915_oa_init_reg_state(engine, ce, regs);
} }
regs[CTX_END] = MI_BATCH_BUFFER_END; regs[CTX_END] = MI_BATCH_BUFFER_END;
......
...@@ -1636,6 +1636,27 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) ...@@ -1636,6 +1636,27 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
~GT_NOA_ENABLE)); ~GT_NOA_ENABLE));
} }
static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
i915_reg_t reg)
{
u32 mmio = i915_mmio_reg_offset(reg);
int i;
/*
* This arbitrary default will select the 'EU FPU0 Pipeline
* Active' event. In the future it's anticipated that there
* will be an explicit 'No Event' we can select, but not yet...
*/
if (!oa_config)
return 0;
for (i = 0; i < oa_config->flex_regs_len; i++) {
if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio)
return oa_config->flex_regs[i].value;
}
return 0;
}
/* /*
* NB: It must always remain pointer safe to run this even if the OA unit * NB: It must always remain pointer safe to run this even if the OA unit
* has been disabled. * has been disabled.
...@@ -1669,28 +1690,8 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, ...@@ -1669,28 +1690,8 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
GEN8_OA_COUNTER_RESUME); GEN8_OA_COUNTER_RESUME);
for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
u32 state_offset = ctx_flexeu0 + i * 2; CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i],
u32 mmio = i915_mmio_reg_offset(flex_regs[i]); oa_config_flex_reg(oa_config, flex_regs[i]));
/*
* This arbitrary default will select the 'EU FPU0 Pipeline
* Active' event. In the future it's anticipated that there
* will be an explicit 'No Event' we can select, but not yet...
*/
u32 value = 0;
if (oa_config) {
u32 j;
for (j = 0; j < oa_config->flex_regs_len; j++) {
if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
value = oa_config->flex_regs[j].value;
break;
}
}
}
CTX_REG(reg_state, state_offset, flex_regs[i], value);
} }
CTX_REG(reg_state, CTX_REG(reg_state,
...@@ -1698,6 +1699,99 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, ...@@ -1698,6 +1699,99 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
intel_sseu_make_rpcs(i915, &ce->sseu)); intel_sseu_make_rpcs(i915, &ce->sseu));
} }
struct flex {
i915_reg_t reg;
u32 offset;
u32 value;
};
static int
gen8_store_flex(struct i915_request *rq,
struct intel_context *ce,
const struct flex *flex, unsigned int count)
{
u32 offset;
u32 *cs;
cs = intel_ring_begin(rq, 4 * count);
if (IS_ERR(cs))
return PTR_ERR(cs);
offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
do {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = offset + (flex->offset + 1) * sizeof(u32);
*cs++ = 0;
*cs++ = flex->value;
} while (flex++, --count);
intel_ring_advance(rq, cs);
return 0;
}
static int
gen8_load_flex(struct i915_request *rq,
struct intel_context *ce,
const struct flex *flex, unsigned int count)
{
u32 *cs;
GEM_BUG_ON(!count || count > 63);
cs = intel_ring_begin(rq, 2 * count + 2);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_LOAD_REGISTER_IMM(count);
do {
*cs++ = i915_mmio_reg_offset(flex->reg);
*cs++ = flex->value;
} while (flex++, --count);
*cs++ = MI_NOOP;
intel_ring_advance(rq, cs);
return 0;
}
static int gen8_modify_context(struct intel_context *ce,
const struct flex *flex, unsigned int count)
{
struct i915_request *rq;
int err;
lockdep_assert_held(&ce->pin_mutex);
rq = i915_request_create(ce->engine->kernel_context);
if (IS_ERR(rq))
return PTR_ERR(rq);
/* Serialise with the remote context */
err = intel_context_prepare_remote_request(ce, rq);
if (err == 0)
err = gen8_store_flex(rq, ce, flex, count);
i915_request_add(rq);
return err;
}
static int gen8_modify_self(struct intel_context *ce,
const struct flex *flex, unsigned int count)
{
struct i915_request *rq;
int err;
rq = i915_request_create(ce);
if (IS_ERR(rq))
return PTR_ERR(rq);
err = gen8_load_flex(rq, ce, flex, count);
i915_request_add(rq);
return err;
}
/* /*
* Manages updating the per-context aspects of the OA stream * Manages updating the per-context aspects of the OA stream
* configuration across all contexts. * configuration across all contexts.
...@@ -1722,15 +1816,43 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, ...@@ -1722,15 +1816,43 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
* *
* Note: it's only the RCS/Render context that has any OA state. * Note: it's only the RCS/Render context that has any OA state.
*/ */
static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, static int gen8_configure_all_contexts(struct drm_i915_private *i915,
const struct i915_oa_config *oa_config) const struct i915_oa_config *oa_config)
{ {
unsigned int map_type = i915_coherent_map_type(dev_priv); /* The MMIO offsets for Flex EU registers aren't contiguous */
const u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset;
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N))
struct flex regs[] = {
{
GEN8_R_PWR_CLK_STATE,
CTX_R_PWR_CLK_STATE,
},
{
GEN8_OACTXCONTROL,
i915->perf.oa.ctx_oactxctrl_offset,
((i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
(i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME)
},
{ EU_PERF_CNTL0, ctx_flexeuN(0) },
{ EU_PERF_CNTL1, ctx_flexeuN(1) },
{ EU_PERF_CNTL2, ctx_flexeuN(2) },
{ EU_PERF_CNTL3, ctx_flexeuN(3) },
{ EU_PERF_CNTL4, ctx_flexeuN(4) },
{ EU_PERF_CNTL5, ctx_flexeuN(5) },
{ EU_PERF_CNTL6, ctx_flexeuN(6) },
};
#undef ctx_flexeuN
struct intel_engine_cs *engine;
struct i915_gem_context *ctx; struct i915_gem_context *ctx;
struct i915_request *rq; enum intel_engine_id id;
int ret; int err;
int i;
lockdep_assert_held(&dev_priv->drm.struct_mutex); for (i = 2; i < ARRAY_SIZE(regs); i++)
regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
lockdep_assert_held(&i915->drm.struct_mutex);
/* /*
* The OA register config is setup through the context image. This image * The OA register config is setup through the context image. This image
...@@ -1742,58 +1864,63 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, ...@@ -1742,58 +1864,63 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
* this might leave small interval of time where the OA unit is * this might leave small interval of time where the OA unit is
* configured at an invalid sampling period. * configured at an invalid sampling period.
* *
* So far the best way to work around this issue seems to be draining * Note that since we emit all requests from a single ring, there
* the GPU from any submitted work. * is still an implicit global barrier here that may cause a high
* priority context to wait for an otherwise independent low priority
* context. Contexts idle at the time of reconfiguration are not
* trapped behind the barrier.
*/ */
ret = i915_gem_wait_for_idle(dev_priv, list_for_each_entry(ctx, &i915->contexts.list, link) {
I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
/* Update all contexts now that we've stalled the submission. */
list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
struct i915_gem_engines_iter it; struct i915_gem_engines_iter it;
struct intel_context *ce; struct intel_context *ce;
if (ctx == i915->kernel_context)
continue;
for_each_gem_engine(ce, for_each_gem_engine(ce,
i915_gem_context_lock_engines(ctx), i915_gem_context_lock_engines(ctx),
it) { it) {
u32 *regs; GEM_BUG_ON(ce == ce->engine->kernel_context);
if (ce->engine->class != RENDER_CLASS) if (ce->engine->class != RENDER_CLASS)
continue; continue;
/* OA settings will be set upon first use */ err = intel_context_lock_pinned(ce);
if (!ce->state) if (err)
continue; break;
regs = i915_gem_object_pin_map(ce->state->obj,
map_type);
if (IS_ERR(regs)) {
i915_gem_context_unlock_engines(ctx);
return PTR_ERR(regs);
}
ce->state->obj->mm.dirty = true; regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
gen8_update_reg_state_unlocked(ce, regs, oa_config); /* Otherwise OA settings will be set upon first use */
if (intel_context_is_pinned(ce))
err = gen8_modify_context(ce, regs, ARRAY_SIZE(regs));
i915_gem_object_unpin_map(ce->state->obj); intel_context_unlock_pinned(ce);
if (err)
break;
} }
i915_gem_context_unlock_engines(ctx); i915_gem_context_unlock_engines(ctx);
if (err)
return err;
} }
/* /*
* Apply the configuration by doing one context restore of the edited * After updating all other contexts, we need to modify ourselves.
* context image. * If we don't modify the kernel_context, we do not get events while
* idle.
*/ */
rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context); for_each_engine(engine, i915, id) {
if (IS_ERR(rq)) struct intel_context *ce = engine->kernel_context;
return PTR_ERR(rq);
i915_request_add(rq); if (engine->class != RENDER_CLASS)
continue;
regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs));
if (err)
return err;
}
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment