Commit d1813ca2 authored by Chris Wilson's avatar Chris Wilson

drm/i915/gt: Clear LRC image inline

When creating the initial LRC image, we also want to clear the MI_NOOPs
and register values. Rather than use a blanket memset beforehand, apply
the clears inline, close the context image and force inhibition of the
uninitialised reminder.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200102131707.1463945-2-chris@chris-wilson.co.uk
parent 6a505e64
...@@ -488,9 +488,15 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) ...@@ -488,9 +488,15 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
return desc; return desc;
} }
static u32 *set_offsets(u32 *regs, static inline unsigned int dword_in_page(void *addr)
{
return offset_in_page(addr) / sizeof(u32);
}
static void set_offsets(u32 *regs,
const u8 *data, const u8 *data,
const struct intel_engine_cs *engine) const struct intel_engine_cs *engine,
bool clear)
#define NOP(x) (BIT(7) | (x)) #define NOP(x) (BIT(7) | (x))
#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
#define POSTED BIT(0) #define POSTED BIT(0)
...@@ -498,7 +504,7 @@ static u32 *set_offsets(u32 *regs, ...@@ -498,7 +504,7 @@ static u32 *set_offsets(u32 *regs,
#define REG16(x) \ #define REG16(x) \
(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
(((x) >> 2) & 0x7f) (((x) >> 2) & 0x7f)
#define END() 0 #define END(x) 0, (x)
{ {
const u32 base = engine->mmio_base; const u32 base = engine->mmio_base;
...@@ -506,7 +512,10 @@ static u32 *set_offsets(u32 *regs, ...@@ -506,7 +512,10 @@ static u32 *set_offsets(u32 *regs,
u8 count, flags; u8 count, flags;
if (*data & BIT(7)) { /* skip */ if (*data & BIT(7)) { /* skip */
regs += *data++ & ~BIT(7); count = *data++ & ~BIT(7);
if (clear)
memset32(regs, MI_NOOP, count);
regs += count;
continue; continue;
} }
...@@ -532,12 +541,25 @@ static u32 *set_offsets(u32 *regs, ...@@ -532,12 +541,25 @@ static u32 *set_offsets(u32 *regs,
offset |= v & ~BIT(7); offset |= v & ~BIT(7);
} while (v & BIT(7)); } while (v & BIT(7));
*regs = base + (offset << 2); regs[0] = base + (offset << 2);
if (clear)
regs[1] = 0;
regs += 2; regs += 2;
} while (--count); } while (--count);
} }
return regs; if (clear) {
u8 count = *++data;
/* Clear past the tail for HW access */
GEM_BUG_ON(dword_in_page(regs) > count);
memset32(regs, MI_NOOP, count - dword_in_page(regs));
/* Close the batch; used mainly by live_lrc_layout() */
*regs = MI_BATCH_BUFFER_END;
if (INTEL_GEN(engine->i915) >= 10)
*regs |= BIT(0);
}
} }
static const u8 gen8_xcs_offsets[] = { static const u8 gen8_xcs_offsets[] = {
...@@ -572,7 +594,7 @@ static const u8 gen8_xcs_offsets[] = { ...@@ -572,7 +594,7 @@ static const u8 gen8_xcs_offsets[] = {
REG16(0x200), REG16(0x200),
REG(0x028), REG(0x028),
END(), END(80)
}; };
static const u8 gen9_xcs_offsets[] = { static const u8 gen9_xcs_offsets[] = {
...@@ -656,7 +678,7 @@ static const u8 gen9_xcs_offsets[] = { ...@@ -656,7 +678,7 @@ static const u8 gen9_xcs_offsets[] = {
REG16(0x67c), REG16(0x67c),
REG(0x068), REG(0x068),
END(), END(176)
}; };
static const u8 gen12_xcs_offsets[] = { static const u8 gen12_xcs_offsets[] = {
...@@ -688,7 +710,7 @@ static const u8 gen12_xcs_offsets[] = { ...@@ -688,7 +710,7 @@ static const u8 gen12_xcs_offsets[] = {
REG16(0x274), REG16(0x274),
REG16(0x270), REG16(0x270),
END(), END(80)
}; };
static const u8 gen8_rcs_offsets[] = { static const u8 gen8_rcs_offsets[] = {
...@@ -725,7 +747,7 @@ static const u8 gen8_rcs_offsets[] = { ...@@ -725,7 +747,7 @@ static const u8 gen8_rcs_offsets[] = {
LRI(1, 0), LRI(1, 0),
REG(0x0c8), REG(0x0c8),
END(), END(80)
}; };
static const u8 gen9_rcs_offsets[] = { static const u8 gen9_rcs_offsets[] = {
...@@ -809,7 +831,7 @@ static const u8 gen9_rcs_offsets[] = { ...@@ -809,7 +831,7 @@ static const u8 gen9_rcs_offsets[] = {
REG16(0x67c), REG16(0x67c),
REG(0x68), REG(0x68),
END() END(176)
}; };
static const u8 gen11_rcs_offsets[] = { static const u8 gen11_rcs_offsets[] = {
...@@ -850,7 +872,7 @@ static const u8 gen11_rcs_offsets[] = { ...@@ -850,7 +872,7 @@ static const u8 gen11_rcs_offsets[] = {
LRI(1, 0), LRI(1, 0),
REG(0x0c8), REG(0x0c8),
END(), END(80)
}; };
static const u8 gen12_rcs_offsets[] = { static const u8 gen12_rcs_offsets[] = {
...@@ -891,7 +913,7 @@ static const u8 gen12_rcs_offsets[] = { ...@@ -891,7 +913,7 @@ static const u8 gen12_rcs_offsets[] = {
LRI(1, 0), LRI(1, 0),
REG(0x0c8), REG(0x0c8),
END(), END(80)
}; };
#undef END #undef END
...@@ -1529,7 +1551,7 @@ static bool can_merge_rq(const struct i915_request *prev, ...@@ -1529,7 +1551,7 @@ static bool can_merge_rq(const struct i915_request *prev,
static void virtual_update_register_offsets(u32 *regs, static void virtual_update_register_offsets(u32 *regs,
struct intel_engine_cs *engine) struct intel_engine_cs *engine)
{ {
set_offsets(regs, reg_offsets(engine), engine); set_offsets(regs, reg_offsets(engine), engine, false);
} }
static bool virtual_matches(const struct virtual_engine *ve, static bool virtual_matches(const struct virtual_engine *ve,
...@@ -4043,15 +4065,19 @@ static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) ...@@ -4043,15 +4065,19 @@ static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
static void init_common_reg_state(u32 * const regs, static void init_common_reg_state(u32 * const regs,
const struct intel_engine_cs *engine, const struct intel_engine_cs *engine,
const struct intel_ring *ring) const struct intel_ring *ring,
bool inhibit)
{ {
regs[CTX_CONTEXT_CONTROL] = u32 ctl;
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
if (inhibit)
ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
if (INTEL_GEN(engine->i915) < 11) if (INTEL_GEN(engine->i915) < 11)
regs[CTX_CONTEXT_CONTROL] |= ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE); CTX_CTRL_RS_CTX_ENABLE);
regs[CTX_CONTEXT_CONTROL] = ctl;
regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
} }
...@@ -4109,7 +4135,7 @@ static void execlists_init_reg_state(u32 *regs, ...@@ -4109,7 +4135,7 @@ static void execlists_init_reg_state(u32 *regs,
const struct intel_context *ce, const struct intel_context *ce,
const struct intel_engine_cs *engine, const struct intel_engine_cs *engine,
const struct intel_ring *ring, const struct intel_ring *ring,
bool close) bool inhibit)
{ {
/* /*
* A context is actually a big batch buffer with several * A context is actually a big batch buffer with several
...@@ -4121,15 +4147,9 @@ static void execlists_init_reg_state(u32 *regs, ...@@ -4121,15 +4147,9 @@ static void execlists_init_reg_state(u32 *regs,
* *
* Must keep consistent with virtual_update_register_offsets(). * Must keep consistent with virtual_update_register_offsets().
*/ */
u32 *bbe = set_offsets(regs, reg_offsets(engine), engine); set_offsets(regs, reg_offsets(engine), engine, inhibit);
if (close) { /* Close the batch; used mainly by live_lrc_layout() */ init_common_reg_state(regs, engine, ring, inhibit);
*bbe = MI_BATCH_BUFFER_END;
if (INTEL_GEN(engine->i915) >= 10)
*bbe |= BIT(0);
}
init_common_reg_state(regs, engine, ring);
init_ppgtt_reg_state(regs, vm_alias(ce->vm)); init_ppgtt_reg_state(regs, vm_alias(ce->vm));
init_wa_bb_reg_state(regs, engine, init_wa_bb_reg_state(regs, engine,
...@@ -4148,7 +4168,6 @@ populate_lr_context(struct intel_context *ce, ...@@ -4148,7 +4168,6 @@ populate_lr_context(struct intel_context *ce,
{ {
bool inhibit = true; bool inhibit = true;
void *vaddr; void *vaddr;
u32 *regs;
int ret; int ret;
vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
...@@ -4178,11 +4197,8 @@ populate_lr_context(struct intel_context *ce, ...@@ -4178,11 +4197,8 @@ populate_lr_context(struct intel_context *ce,
/* The second page of the context object contains some fields which must /* The second page of the context object contains some fields which must
* be set up prior to the first execution. */ * be set up prior to the first execution. */
regs = vaddr + LRC_STATE_PN * PAGE_SIZE; execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
execlists_init_reg_state(regs, ce, engine, ring, inhibit); ce, engine, ring, inhibit);
if (inhibit)
regs[CTX_CONTEXT_CONTROL] |=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
ret = 0; ret = 0;
err_unpin_ctx: err_unpin_ctx:
......
...@@ -3362,7 +3362,7 @@ static int live_lrc_layout(void *arg) ...@@ -3362,7 +3362,7 @@ static int live_lrc_layout(void *arg)
struct intel_gt *gt = arg; struct intel_gt *gt = arg;
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
enum intel_engine_id id; enum intel_engine_id id;
u32 *mem; u32 *lrc;
int err; int err;
/* /*
...@@ -3370,13 +3370,13 @@ static int live_lrc_layout(void *arg) ...@@ -3370,13 +3370,13 @@ static int live_lrc_layout(void *arg)
* match the layout saved by HW. * match the layout saved by HW.
*/ */
mem = kmalloc(PAGE_SIZE, GFP_KERNEL); lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!mem) if (!lrc)
return -ENOMEM; return -ENOMEM;
err = 0; err = 0;
for_each_engine(engine, gt, id) { for_each_engine(engine, gt, id) {
u32 *hw, *lrc; u32 *hw;
int dw; int dw;
if (!engine->default_state) if (!engine->default_state)
...@@ -3390,8 +3390,7 @@ static int live_lrc_layout(void *arg) ...@@ -3390,8 +3390,7 @@ static int live_lrc_layout(void *arg)
} }
hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
lrc = memset(mem, 0, PAGE_SIZE); execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
execlists_init_reg_state(lrc,
engine->kernel_context, engine->kernel_context,
engine, engine,
engine->kernel_context->ring, engine->kernel_context->ring,
...@@ -3461,7 +3460,7 @@ static int live_lrc_layout(void *arg) ...@@ -3461,7 +3460,7 @@ static int live_lrc_layout(void *arg)
break; break;
} }
kfree(mem); kfree(lrc);
return err; return err;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment