Commit 9e000847 authored by Arun Siluvery's avatar Arun Siluvery Committed by Daniel Vetter

drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch

In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
instruction but there is a slight complication as this is applied in WA batch
where the values are only initialized once.
Dave identified an issue with the current implementation where the register value
is read once at the beginning and it is reused; this patch corrects this by saving
the register value to memory, update register with the bit of our interest and
restore it back with original value.

This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
by command parser and was using a default length of 0. This is now updated
with correct length and moved to appropriate place.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Gordon <david.s.gordon@intel.com>
Signed-off-by: default avatarArun Siluvery <arun.siluvery@linux.intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 0bf73c36
...@@ -131,7 +131,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { ...@@ -131,7 +131,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
.mask = MI_GLOBAL_GTT, .mask = MI_GLOBAL_GTT,
.expected = 0, .expected = 0,
}}, ), }}, ),
CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B, CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B,
.reg = { .offset = 1, .mask = 0x007FFFFC }, .reg = { .offset = 1, .mask = 0x007FFFFC },
.bits = {{ .bits = {{
.offset = 0, .offset = 0,
...@@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, ...@@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
* only MI_LOAD_REGISTER_IMM commands. * only MI_LOAD_REGISTER_IMM commands.
*/ */
if (reg_addr == OACONTROL) { if (reg_addr == OACONTROL) {
if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n"); DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n");
return false; return false;
} }
...@@ -1035,7 +1035,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, ...@@ -1035,7 +1035,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
* allowed mask/value pair given in the whitelist entry. * allowed mask/value pair given in the whitelist entry.
*/ */
if (reg->mask) { if (reg->mask) {
if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n", DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n",
reg_addr); reg_addr);
return false; return false;
......
...@@ -354,6 +354,8 @@ ...@@ -354,6 +354,8 @@
#define MI_INVALIDATE_BSD (1<<7) #define MI_INVALIDATE_BSD (1<<7)
#define MI_FLUSH_DW_USE_GTT (1<<2) #define MI_FLUSH_DW_USE_GTT (1<<2)
#define MI_FLUSH_DW_USE_PPGTT (0<<2) #define MI_FLUSH_DW_USE_PPGTT (0<<2)
#define MI_LOAD_REGISTER_MEM(x) MI_INSTR(0x29, 2*(x)-1)
#define MI_LOAD_REGISTER_MEM_GEN8(x) MI_INSTR(0x29, 3*(x)-1)
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1) #define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
...@@ -459,7 +461,6 @@ ...@@ -459,7 +461,6 @@
#define MI_CLFLUSH MI_INSTR(0x27, 0) #define MI_CLFLUSH MI_INSTR(0x27, 0)
#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) #define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0)
#define MI_REPORT_PERF_COUNT_GGTT (1<<0) #define MI_REPORT_PERF_COUNT_GGTT (1<<0)
#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 0)
#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) #define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0)
#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) #define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0)
#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) #define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0)
......
...@@ -1092,6 +1092,56 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) ...@@ -1092,6 +1092,56 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
batch[index++] = (cmd); \ batch[index++] = (cmd); \
} while (0) } while (0)
/*
* In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
* PIPE_CONTROL instruction. This is required for the flush to happen correctly
* but there is a slight complication as this is applied in WA batch where the
* values are only initialized once so we cannot take register value at the
* beginning and reuse it further; hence we save its value to memory, upload a
* constant value with bit21 set and then we restore it back with the saved value.
* To simplify the WA, a constant value is formed by using the default value
* of this register. This shouldn't be a problem because we are only modifying
* it for a short period and this batch in non-premptible. We can ofcourse
* use additional instructions that read the actual value of the register
* at that time and set our bit of interest but it makes the WA complicated.
*
* This WA is also required for Gen9 so extracting as a function avoids
* code duplication.
*/
static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring,
uint32_t *const batch,
uint32_t index)
{
uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
wa_ctx_emit(batch, (MI_STORE_REGISTER_MEM_GEN8(1) |
MI_SRM_LRM_GLOBAL_GTT));
wa_ctx_emit(batch, GEN8_L3SQCREG4);
wa_ctx_emit(batch, ring->scratch.gtt_offset + 256);
wa_ctx_emit(batch, 0);
wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
wa_ctx_emit(batch, GEN8_L3SQCREG4);
wa_ctx_emit(batch, l3sqc4_flush);
wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
wa_ctx_emit(batch, (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_DC_FLUSH_ENABLE));
wa_ctx_emit(batch, 0);
wa_ctx_emit(batch, 0);
wa_ctx_emit(batch, 0);
wa_ctx_emit(batch, 0);
wa_ctx_emit(batch, (MI_LOAD_REGISTER_MEM_GEN8(1) |
MI_SRM_LRM_GLOBAL_GTT));
wa_ctx_emit(batch, GEN8_L3SQCREG4);
wa_ctx_emit(batch, ring->scratch.gtt_offset + 256);
wa_ctx_emit(batch, 0);
return index;
}
static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx, static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
uint32_t offset, uint32_t offset,
uint32_t start_alignment) uint32_t start_alignment)
...@@ -1152,25 +1202,9 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring, ...@@ -1152,25 +1202,9 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
if (IS_BROADWELL(ring->dev)) { if (IS_BROADWELL(ring->dev)) {
struct drm_i915_private *dev_priv = to_i915(ring->dev); index = gen8_emit_flush_coherentl3_wa(ring, batch, index);
uint32_t l3sqc4_flush = (I915_READ(GEN8_L3SQCREG4) | if (index < 0)
GEN8_LQSC_FLUSH_COHERENT_LINES); return index;
wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
wa_ctx_emit(batch, GEN8_L3SQCREG4);
wa_ctx_emit(batch, l3sqc4_flush);
wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
wa_ctx_emit(batch, (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_DC_FLUSH_ENABLE));
wa_ctx_emit(batch, 0);
wa_ctx_emit(batch, 0);
wa_ctx_emit(batch, 0);
wa_ctx_emit(batch, 0);
wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
wa_ctx_emit(batch, GEN8_L3SQCREG4);
wa_ctx_emit(batch, l3sqc4_flush & ~GEN8_LQSC_FLUSH_COHERENT_LINES);
} }
/* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment