intel_workarounds.c 93.5 KB
Newer Older
Chris Wilson's avatar
Chris Wilson committed
1
// SPDX-License-Identifier: MIT
2 3 4 5 6
/*
 * Copyright © 2014-2018 Intel Corporation
 */

#include "i915_drv.h"
7
#include "intel_context.h"
8
#include "intel_engine_pm.h"
9
#include "intel_engine_regs.h"
10
#include "intel_gpu_commands.h"
11
#include "intel_gt.h"
12
#include "intel_gt_mcr.h"
13
#include "intel_gt_regs.h"
14
#include "intel_ring.h"
15 16 17 18 19
#include "intel_workarounds.h"

/**
 * DOC: Hardware workarounds
 *
20 21 22 23
 * Hardware workarounds are register programming documented to be executed in
 * the driver that fall outside of the normal programming sequences for a
 * platform. There are some basic categories of workarounds, depending on
 * how/when they are applied:
24
 *
25 26 27 28 29 30
 * - Context workarounds: workarounds that touch registers that are
 *   saved/restored to/from the HW context image. The list is emitted (via Load
 *   Register Immediate commands) once when initializing the device and saved in
 *   the default context. That default context is then used on every context
 *   creation to have a "primed golden context", i.e. a context image that
 *   already contains the changes needed to all the registers.
31
 *
32 33 34 35 36 37 38 39 40 41 42
 * - Engine workarounds: the list of these WAs is applied whenever the specific
 *   engine is reset. It's also possible that a set of engine classes share a
 *   common power domain and they are reset together. This happens on some
 *   platforms with render and compute engines. In this case (at least) one of
 *   them need to keeep the workaround programming: the approach taken in the
 *   driver is to tie those workarounds to the first compute/render engine that
 *   is registered.  When executing with GuC submission, engine resets are
 *   outside of kernel driver control, hence the list of registers involved in
 *   written once, on engine initialization, and then passed to GuC, that
 *   saves/restores their values before/after the reset takes place. See
 *   ``drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c`` for reference.
43
 *
44 45 46 47 48 49 50 51 52 53 54 55 56 57
 * - GT workarounds: the list of these WAs is applied whenever these registers
 *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
 *
 * - Register whitelist: some workarounds need to be implemented in userspace,
 *   but need to touch privileged registers. The whitelist in the kernel
 *   instructs the hardware to allow the access to happen. From the kernel side,
 *   this is just a special case of a MMIO workaround (as we write the list of
 *   these to/be-whitelisted registers to some special HW registers).
 *
 * - Workaround batchbuffers: buffers that get executed automatically by the
 *   hardware on every HW context restore. These buffers are created and
 *   programmed in the default context so the hardware always go through those
 *   programming sequences when switching contexts. The support for workaround
 *   batchbuffers is enabled these hardware mechanisms:
58
 *
59 60 61 62
 *   #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
 *      context, pointing the hardware to jump to that location when that offset
 *      is reached in the context restore. Workaround batchbuffer in the driver
 *      currently uses this mechanism for all platforms.
63
 *
64 65 66 67
 *   #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
 *      pointing the hardware to a buffer to continue executing after the
 *      engine registers are restored in a context restore sequence. This is
 *      currently not used in the driver.
68
 *
69 70 71 72 73 74 75
 * - Other:  There are WAs that, due to their nature, cannot be applied from a
 *   central place. Those are peppered around the rest of the code, as needed.
 *   Workarounds related to the display IP are the main example.
 *
 * .. [1] Technically, some registers are powercontext saved & restored, so they
 *    survive a suspend/resume. In practice, writing them again is not too
 *    costly and simplifies things, so it's the approach taken in the driver.
76 77
 */

78 79
static void wa_init_start(struct i915_wa_list *wal, struct intel_gt *gt,
			  const char *name, const char *engine_name)
80
{
81
	wal->gt = gt;
82
	wal->name = name;
83
	wal->engine_name = engine_name;
84 85
}

86 87
#define WA_LIST_CHUNK (1 << 4)

88 89
static void wa_init_finish(struct i915_wa_list *wal)
{
90 91 92 93 94 95 96 97 98 99 100 101
	/* Trim unused entries. */
	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
		struct i915_wa *list = kmemdup(wal->list,
					       wal->count * sizeof(*list),
					       GFP_KERNEL);

		if (list) {
			kfree(wal->list);
			wal->list = list;
		}
	}

102 103 104
	if (!wal->count)
		return;

105 106
	drm_dbg(&wal->gt->i915->drm, "Initialized %u %s workarounds on %s\n",
		wal->wa_count, wal->name, wal->engine_name);
107 108
}

109
static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
110
{
111
	unsigned int addr = i915_mmio_reg_offset(wa->reg);
112
	struct drm_i915_private *i915 = wal->gt->i915;
113
	unsigned int start = 0, end = wal->count;
114
	const unsigned int grow = WA_LIST_CHUNK;
115 116 117 118 119 120 121 122 123 124
	struct i915_wa *wa_;

	GEM_BUG_ON(!is_power_of_2(grow));

	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
		struct i915_wa *list;

		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
				     GFP_KERNEL);
		if (!list) {
125
			drm_err(&i915->drm, "No space for workaround init!\n");
126 127 128
			return;
		}

129
		if (wal->list) {
130
			memcpy(list, wal->list, sizeof(*wa) * wal->count);
131 132
			kfree(wal->list);
		}
133 134 135

		wal->list = list;
	}
136 137 138 139

	while (start < end) {
		unsigned int mid = start + (end - start) / 2;

140
		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
141
			start = mid + 1;
142
		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
143 144
			end = mid;
		} else {
145
			wa_ = &wal->list[mid];
146

147
			if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
148 149 150 151
				drm_err(&i915->drm,
					"Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
					i915_mmio_reg_offset(wa_->reg),
					wa_->clr, wa_->set);
152

153
				wa_->set &= ~wa->clr;
154 155
			}

156
			wal->wa_count++;
157 158
			wa_->set |= wa->set;
			wa_->clr |= wa->clr;
159
			wa_->read |= wa->read;
160 161 162
			return;
		}
	}
163

164 165 166
	wal->wa_count++;
	wa_ = &wal->list[wal->count++];
	*wa_ = *wa;
167

168 169 170 171 172
	while (wa_-- > wal->list) {
		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
			   i915_mmio_reg_offset(wa_[1].reg));
		if (i915_mmio_reg_offset(wa_[1].reg) >
		    i915_mmio_reg_offset(wa_[0].reg))
173
			break;
174

175
		swap(wa_[1], wa_[0]);
176
	}
177 178
}

179
static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
180
		   u32 clear, u32 set, u32 read_mask, bool masked_reg)
181 182
{
	struct i915_wa wa = {
183
		.reg  = reg,
184 185
		.clr  = clear,
		.set  = set,
186
		.read = read_mask,
187
		.masked_reg = masked_reg,
188 189 190 191 192
	};

	_wa_add(wal, &wa);
}

193
static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
194 195 196
		       u32 clear, u32 set, u32 read_mask, bool masked_reg)
{
	struct i915_wa wa = {
197
		.mcr_reg = reg,
198 199 200 201 202 203 204 205 206 207
		.clr  = clear,
		.set  = set,
		.read = read_mask,
		.masked_reg = masked_reg,
		.is_mcr = 1,
	};

	_wa_add(wal, &wa);
}

208
static void
209
wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
210
{
211
	wa_add(wal, reg, clear, set, clear, false);
212 213
}

214
static void
215
wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
216 217 218 219
{
	wa_mcr_add(wal, reg, clear, set, clear, false);
}

220
static void
221 222
wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
{
223
	wa_write_clr_set(wal, reg, ~0, set);
224 225 226 227
}

static void
wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
228
{
229
	wa_write_clr_set(wal, reg, set, set);
230 231
}

232
static void
233
wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
234 235 236 237
{
	wa_mcr_write_clr_set(wal, reg, set, set);
}

238 239 240
static void
wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
{
241
	wa_write_clr_set(wal, reg, clr, 0);
242 243
}

244
static void
245
wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
246 247 248 249
{
	wa_mcr_write_clr_set(wal, reg, clr, 0);
}

250 251 252 253 254 255 256 257 258 259 260
/*
 * WA operations on "masked register". A masked register has the upper 16 bits
 * documented as "masked" in b-spec. Its purpose is to allow writing to just a
 * portion of the register without a rmw: you simply write in the upper 16 bits
 * the mask of bits you are going to modify.
 *
 * The wa_masked_* family of functions already does the necessary operations to
 * calculate the mask based on the parameters passed, so user only has to
 * provide the lower 16 bits of that register.
 */

261
static void
262
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
263
{
264
	wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
265 266
}

267
static void
268
wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
269 270 271 272
{
	wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
}

273
static void
274
wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
275
{
276
	wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
277 278
}

279
static void
280
wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
281 282 283 284
{
	wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
}

285 286 287 288
static void
wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
		    u32 mask, u32 val)
{
289
	wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
290
}
291

292
static void
293
wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
294 295 296 297 298
			u32 mask, u32 val)
{
	wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
}

299 300 301
static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
{
302
	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
303 304 305 306 307
}

static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
{
308
	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
309 310
}

311 312
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
313
{
314
	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
315 316

	/* WaDisableAsyncFlipPerfMode:bdw,chv */
317
	wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
318 319

	/* WaDisablePartialInstShootdown:bdw,chv */
320 321
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
322 323

	/* Use Force Non-Coherent whenever executing a 3D context. This is a
324
	 * workaround for a possible hang in the unlikely event a TLB
325 326 327 328
	 * invalidation occurs during a PSD flush.
	 */
	/* WaForceEnableNonCoherent:bdw,chv */
	/* WaHdcDisableFetchWhenMasked:bdw,chv */
329 330 331
	wa_masked_en(wal, HDC_CHICKEN0,
		     HDC_DONOT_FETCH_MEM_WHEN_MASKED |
		     HDC_FORCE_NON_COHERENT);
332 333 334 335 336 337 338 339 340

	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
	 *  polygons in the same 8x4 pixel/sample area to be processed without
	 *  stalling waiting for the earlier ones to write to Hierarchical Z
	 *  buffer."
	 *
	 * This optimization is off by default for BDW and CHV; turn it on.
	 */
341
	wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
342 343

	/* Wa4x4STCOptimizationDisable:bdw,chv */
344
	wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
345 346 347 348 349 350 351 352 353

	/*
	 * BSpec recommends 8x4 when MSAA is used,
	 * however in practice 16x4 seems fastest.
	 *
	 * Note that PS/WM thread counts depend on the WIZ hashing
	 * disable bit, which we don't touch here, but it's good
	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
	 */
354
	wa_masked_field_set(wal, GEN7_GT_MODE,
355 356 357 358
			    GEN6_WIZ_HASHING_MASK,
			    GEN6_WIZ_HASHING_16x4);
}

359 360
static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
361
{
362
	struct drm_i915_private *i915 = engine->i915;
363

364
	gen8_ctx_workarounds_init(engine, wal);
365 366

	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
367
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
368 369 370

	/* WaDisableDopClockGating:bdw
	 *
371
	 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
372 373
	 * to disable EUTC clock gating.
	 */
374 375
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
			 DOP_CLOCK_GATING_DISABLE);
376

377 378
	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
			 GEN8_SAMPLER_POWER_BYPASS_DIS);
379

380 381 382 383 384
	wa_masked_en(wal, HDC_CHICKEN0,
		     /* WaForceContextSaveRestoreNonCoherent:bdw */
		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
		     /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
		     (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
385 386
}

387 388
static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
389
{
390
	gen8_ctx_workarounds_init(engine, wal);
391 392

	/* WaDisableThreadStallDopClockGating:chv */
393
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
394 395

	/* Improve HiZ throughput on CHV. */
396
	wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
397 398
}

399 400
static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
401
{
402 403 404
	struct drm_i915_private *i915 = engine->i915;

	if (HAS_LLC(i915)) {
405 406 407 408 409
		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
		 *
		 * Must match Display Engine. See
		 * WaCompressedResourceDisplayNewHashMode.
		 */
410 411
		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
			     GEN9_PBE_COMPRESSED_HASH_SELECTION);
412 413
		wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
				 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
414 415 416 417
	}

	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
418 419 420
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
			 FLOW_CONTROL_ENABLE |
			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
421 422 423

	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
424 425 426
	wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
			 GEN9_ENABLE_YV12_BUGFIX |
			 GEN9_ENABLE_GPGPU_PREEMPTION);
427 428 429

	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
430 431 432
	wa_masked_en(wal, CACHE_MODE_1,
		     GEN8_4x4_STC_OPTIMIZATION_DISABLE |
		     GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
433 434

	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
435 436
	wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
			  GEN9_CCS_TLB_PREFETCH_ENABLE);
437 438

	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
439 440 441
	wa_masked_en(wal, HDC_CHICKEN0,
		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
		     HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456

	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
	 * both tied to WaForceContextSaveRestoreNonCoherent
	 * in some hsds for skl. We keep the tie for all gen9. The
	 * documentation is a bit hazy and so we want to get common behaviour,
	 * even though there is no clear evidence we would need both on kbl/bxt.
	 * This area has been source of system hangs so we play it safe
	 * and mimic the skl regardless of what bspec says.
	 *
	 * Use Force Non-Coherent whenever executing a 3D context. This
	 * is a workaround for a possible hang in the unlikely event
	 * a TLB invalidation occurs during a PSD flush.
	 */

	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
457 458
	wa_masked_en(wal, HDC_CHICKEN0,
		     HDC_FORCE_NON_COHERENT);
459 460

	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
461 462 463 464
	if (IS_SKYLAKE(i915) ||
	    IS_KABYLAKE(i915) ||
	    IS_COFFEELAKE(i915) ||
	    IS_COMETLAKE(i915))
465 466
		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
				 GEN8_SAMPLER_POWER_BYPASS_DIS);
467 468

	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
469
	wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
470 471 472 473 474 475 476 477 478 479 480 481 482

	/*
	 * Supporting preemption with fine-granularity requires changes in the
	 * batch buffer programming. Since we can't break old userspace, we
	 * need to set our default preemption level to safe value. Userspace is
	 * still able to use more fine-grained preemption levels, since in
	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
	 * not real HW workarounds, but merely a way to start using preemption
	 * while maintaining old contract with userspace.
	 */

	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
483
	wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
484 485

	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
486
	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
487 488 489
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);

490
	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
491
	if (IS_GEN9_LP(i915))
492
		wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
493 494
}

495 496
static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
				struct i915_wa_list *wal)
497
{
498
	struct intel_gt *gt = engine->gt;
499 500 501 502 503 504 505 506 507 508
	u8 vals[3] = { 0, 0, 0 };
	unsigned int i;

	for (i = 0; i < 3; i++) {
		u8 ss;

		/*
		 * Only consider slices where one, and only one, subslice has 7
		 * EUs
		 */
509
		if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
510 511 512 513 514 515 516 517
			continue;

		/*
		 * subslice_7eu[i] != 0 (because of the check above) and
		 * ss_max == 4 (maximum number of subslices possible per slice)
		 *
		 * ->    0 <= ss <= 3;
		 */
518
		ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
519 520 521 522
		vals[i] = 3 - ss;
	}

	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
523
		return;
524 525

	/* Tune IZ hashing. See intel_device_info_runtime_init() */
526
	wa_masked_field_set(wal, GEN7_GT_MODE,
527 528 529 530 531 532 533 534
			    GEN9_IZ_HASHING_MASK(2) |
			    GEN9_IZ_HASHING_MASK(1) |
			    GEN9_IZ_HASHING_MASK(0),
			    GEN9_IZ_HASHING(2, vals[2]) |
			    GEN9_IZ_HASHING(1, vals[1]) |
			    GEN9_IZ_HASHING(0, vals[0]));
}

535 536
static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
537
{
538 539
	gen9_ctx_workarounds_init(engine, wal);
	skl_tune_iz_hashing(engine, wal);
540
}
541

542 543
static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
544
{
545
	gen9_ctx_workarounds_init(engine, wal);
546

547
	/* WaDisableThreadStallDopClockGating:bxt */
548 549
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
			 STALL_DOP_GATING_DISABLE);
550 551

	/* WaToEnableHwFixForPushConstHWBug:bxt */
552 553
	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
554 555
}

556 557
static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
558
{
559
	struct drm_i915_private *i915 = engine->i915;
560

561
	gen9_ctx_workarounds_init(engine, wal);
562

563
	/* WaToEnableHwFixForPushConstHWBug:kbl */
564
	if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
565 566
		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
			     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
567

568
	/* WaDisableSbeCacheDispatchPortSharing:kbl */
569 570
	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
571 572
}

573 574
static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
575
{
576
	gen9_ctx_workarounds_init(engine, wal);
577 578

	/* WaToEnableHwFixForPushConstHWBug:glk */
579 580
	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
581 582
}

583 584
static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
585
{
586
	gen9_ctx_workarounds_init(engine, wal);
587 588

	/* WaToEnableHwFixForPushConstHWBug:cfl */
589 590
	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
591

592
	/* WaDisableSbeCacheDispatchPortSharing:cfl */
593 594
	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
595 596
}

597 598
static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
599
{
600
	/* Wa_1406697149 (WaDisableBankHangMode:icl) */
601 602 603 604 605
	wa_write(wal,
		 GEN8_L3CNTLREG,
		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
		 GEN8_ERRDETBCTRL);

606 607 608 609 610 611 612
	/* WaForceEnableNonCoherent:icl
	 * This is not the same workaround as in early Gen9 platforms, where
	 * lacking this could cause system hangs, but coherency performance
	 * overhead is high and only a few compute workloads really need it
	 * (the register is whitelisted in hardware now, so UMDs can opt in
	 * for coherency if they have a good reason).
	 */
613
	wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
614

615
	/* WaEnableFloatBlendOptimization:icl */
616 617 618 619
	wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
		   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
		   0 /* write-only, so skip validation */,
		   true);
620 621

	/* WaDisableGPGPUMidThreadPreemption:icl */
622
	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
623 624
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
625 626

	/* allow headerless messages for preemptible GPGPU context */
627 628
	wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
			 GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
629 630 631

	/* Wa_1604278689:icl,ehl */
	wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
632 633 634
	wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
			 0, /* write-only register; skip validation */
			 0xFFFFFFFF);
635 636

	/* Wa_1406306137:icl,ehl */
637
	wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
638 639
}

640 641 642 643 644 645 646
/*
 * These settings aren't actually workarounds, but general tuning settings that
 * need to be programmed on dg2 platform.
 */
static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
				   struct i915_wa_list *wal)
{
647
	wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
648 649 650 651 652 653 654
	wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
			     REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
	wa_mcr_add(wal,
		   XEHP_FF_MODE2,
		   FF_MODE2_TDS_TIMER_MASK,
		   FF_MODE2_TDS_TIMER_128,
		   0, false);
655 656
}

657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
/*
 * These settings aren't actually workarounds, but general tuning settings that
 * need to be programmed on several platforms.
 */
static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
{
	/*
	 * Although some platforms refer to it as Wa_1604555607, we need to
	 * program it even on those that don't explicitly list that
	 * workaround.
	 *
	 * Note that the programming of this register is further modified
	 * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
	 * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
	 * value when read. The default value for this register is zero for all
	 * fields and there are no bit masks. So instead of doing a RMW we
	 * should just write TDS timer value. For the same reason read
	 * verification is ignored.
	 */
	wa_add(wal,
678
	       GEN12_FF_MODE2,
679 680
	       FF_MODE2_TDS_TIMER_MASK,
	       FF_MODE2_TDS_TIMER_128,
681
	       0, false);
682 683
}

684 685
static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
				       struct i915_wa_list *wal)
686
{
687 688
	struct drm_i915_private *i915 = engine->i915;

689 690
	gen12_ctx_gt_tuning_init(engine, wal);

691
	/*
692 693 694 695 696 697 698 699 700 701
	 * Wa_1409142259:tgl,dg1,adl-p
	 * Wa_1409347922:tgl,dg1,adl-p
	 * Wa_1409252684:tgl,dg1,adl-p
	 * Wa_1409217633:tgl,dg1,adl-p
	 * Wa_1409207793:tgl,dg1,adl-p
	 * Wa_1409178076:tgl,dg1,adl-p
	 * Wa_1408979724:tgl,dg1,adl-p
	 * Wa_14010443199:tgl,rkl,dg1,adl-p
	 * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
	 * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
702
	 */
703 704
	wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
		     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
705

706
	/* WaDisableGPGPUMidThreadPreemption:gen12 */
707
	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
708 709 710
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);

711
	/*
712
	 * Wa_16011163337
713
	 *
714 715
	 * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
	 * to Wa_1608008084.
716
	 */
717
	wa_add(wal,
718
	       GEN12_FF_MODE2,
719 720
	       FF_MODE2_GS_TIMER_MASK,
	       FF_MODE2_GS_TIMER_224,
721
	       0, false);
722 723 724 725

	if (!IS_DG1(i915))
		/* Wa_1806527549 */
		wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
726 727
}

728 729 730 731 732 733
static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
{
	gen12_ctx_workarounds_init(engine, wal);

	/* Wa_1409044764 */
734 735
	wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
		      DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
736 737

	/* Wa_22010493298 */
738 739
	wa_masked_en(wal, HIZ_CHICKEN,
		     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
740 741
}

742 743 744
static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
{
745
	dg2_ctx_gt_tuning_init(engine, wal);
746 747 748

	/* Wa_16011186671:dg2_g11 */
	if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
749 750
		wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
		wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
751 752 753 754
	}

	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
		/* Wa_14010469329:dg2_g10 */
755 756
		wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
				 XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
757 758 759 760 761 762

		/*
		 * Wa_22010465075:dg2_g10
		 * Wa_22010613112:dg2_g10
		 * Wa_14010698770:dg2_g10
		 */
763 764
		wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
				 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
765 766 767
	}

	/* Wa_16013271637:dg2 */
768 769
	wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
			 MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
770 771 772

	/* Wa_14014947963:dg2 */
	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
773
	    IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
774
		wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
775

776 777 778 779 780
	/* Wa_18018764978:dg2 */
	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) ||
	    IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
		wa_masked_en(wal, PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);

781 782
	/* Wa_15010599737:dg2 */
	wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
783 784 785

	/* Wa_18019271663:dg2 */
	wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
786 787
}

788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
					 struct i915_wa_list *wal)
{
	/*
	 * This is a "fake" workaround defined by software to ensure we
	 * maintain reliable, backward-compatible behavior for userspace with
	 * regards to how nested MI_BATCH_BUFFER_START commands are handled.
	 *
	 * The per-context setting of MI_MODE[12] determines whether the bits
	 * of a nested MI_BATCH_BUFFER_START instruction should be interpreted
	 * in the traditional manner or whether they should instead use a new
	 * tgl+ meaning that breaks backward compatibility, but allows nesting
	 * into 3rd-level batchbuffers.  When this new capability was first
	 * added in TGL, it remained off by default unless a context
	 * intentionally opted in to the new behavior.  However Xe_HPG now
	 * flips this on by default and requires that we explicitly opt out if
	 * we don't want the new behavior.
	 *
	 * From a SW perspective, we want to maintain the backward-compatible
	 * behavior for userspace, so we'll apply a fake workaround to set it
	 * back to the legacy behavior on platforms where the hardware default
	 * is to break compatibility.  At the moment there is no Linux
	 * userspace that utilizes third-level batchbuffers, so this will avoid
	 * userspace from needing to make any changes.  using the legacy
	 * meaning is the correct thing to do.  If/when we have userspace
	 * consumers that want to utilize third-level batch nesting, we can
	 * provide a context parameter to allow them to opt-in.
	 */
	wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
}

819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853
static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
				   struct i915_wa_list *wal)
{
	u8 mocs;

	/*
	 * Some blitter commands do not have a field for MOCS, those
	 * commands will use MOCS index pointed by BLIT_CCTL.
	 * BLIT_CCTL registers are needed to be programmed to un-cached.
	 */
	if (engine->class == COPY_ENGINE_CLASS) {
		mocs = engine->gt->mocs.uc_index;
		wa_write_clr_set(wal,
				 BLIT_CCTL(engine->mmio_base),
				 BLIT_CCTL_MASK,
				 BLIT_CCTL_MOCS(mocs, mocs));
	}
}

/*
 * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
 * defined by the hardware team, but it programming general context registers.
 * Adding those context register programming in context workaround
 * allow us to use the wa framework for proper application and validation.
 */
static void
gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
			  struct i915_wa_list *wal)
{
	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
		fakewa_disable_nestedbb_mode(engine, wal);

	gen12_ctx_gt_mocs_init(engine, wal);
}

854 855 856 857
static void
__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
			   struct i915_wa_list *wal,
			   const char *name)
858
{
859 860
	struct drm_i915_private *i915 = engine->i915;

861
	wa_init_start(wal, engine->gt, name, engine->name);
862 863

	/* Applies to all engines */
864 865 866 867 868 869
	/*
	 * Fake workarounds are not the actual workaround but
	 * programming of context registers using workaround framework.
	 */
	if (GRAPHICS_VER(i915) >= 12)
		gen12_ctx_gt_fake_wa_init(engine, wal);
870

871
	if (engine->class != RENDER_CLASS)
872
		goto done;
873

874 875 876
	if (IS_PONTEVECCHIO(i915))
		; /* noop; none at this time */
	else if (IS_DG2(i915))
877 878
		dg2_ctx_workarounds_init(engine, wal);
	else if (IS_XEHPSDV(i915))
879 880
		; /* noop; none at this time */
	else if (IS_DG1(i915))
881
		dg1_ctx_workarounds_init(engine, wal);
882
	else if (GRAPHICS_VER(i915) == 12)
883
		gen12_ctx_workarounds_init(engine, wal);
884
	else if (GRAPHICS_VER(i915) == 11)
885
		icl_ctx_workarounds_init(engine, wal);
886
	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
887
		cfl_ctx_workarounds_init(engine, wal);
888
	else if (IS_GEMINILAKE(i915))
889
		glk_ctx_workarounds_init(engine, wal);
890
	else if (IS_KABYLAKE(i915))
891
		kbl_ctx_workarounds_init(engine, wal);
892
	else if (IS_BROXTON(i915))
893
		bxt_ctx_workarounds_init(engine, wal);
894
	else if (IS_SKYLAKE(i915))
895
		skl_ctx_workarounds_init(engine, wal);
896
	else if (IS_CHERRYVIEW(i915))
897
		chv_ctx_workarounds_init(engine, wal);
898
	else if (IS_BROADWELL(i915))
899
		bdw_ctx_workarounds_init(engine, wal);
900
	else if (GRAPHICS_VER(i915) == 7)
901
		gen7_ctx_workarounds_init(engine, wal);
902
	else if (GRAPHICS_VER(i915) == 6)
903
		gen6_ctx_workarounds_init(engine, wal);
904
	else if (GRAPHICS_VER(i915) < 8)
905
		;
906
	else
907
		MISSING_CASE(GRAPHICS_VER(i915));
908

909
done:
910
	wa_init_finish(wal);
911 912
}

913 914 915 916 917
void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
{
	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
}

918
int intel_engine_emit_ctx_wa(struct i915_request *rq)
919
{
920 921 922
	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
	struct i915_wa *wa;
	unsigned int i;
923
	u32 *cs;
924
	int ret;
925

926
	if (wal->count == 0)
927 928 929
		return 0;

	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
930 931 932
	if (ret)
		return ret;

933
	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
934 935 936
	if (IS_ERR(cs))
		return PTR_ERR(cs);

937 938 939
	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
		*cs++ = i915_mmio_reg_offset(wa->reg);
940
		*cs++ = wa->set;
941 942 943 944 945 946 947 948 949 950 951 952
	}
	*cs++ = MI_NOOP;

	intel_ring_advance(rq, cs);

	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
	if (ret)
		return ret;

	return 0;
}

953
static void
954
gen4_gt_workarounds_init(struct intel_gt *gt,
955
			 struct i915_wa_list *wal)
956
{
957 958 959 960 961
	/* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
	wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
}

static void
962
g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
963
{
964
	gen4_gt_workarounds_init(gt, wal);
965

966
	/* WaDisableRenderCachePipelinedFlush:g4x,ilk */
967
	wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
968
}
969

970
static void
971
ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
972
{
973
	g4x_gt_workarounds_init(gt, wal);
974 975

	wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
976 977
}

978
static void
979
snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
980 981 982
{
}

983
static void
984
ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
985 986 987 988 989 990 991 992 993 994 995 996 997 998
{
	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
	wa_masked_dis(wal,
		      GEN7_COMMON_SLICE_CHICKEN1,
		      GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);

	/* WaApplyL3ControlAndL3ChickenMode:ivb */
	wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
	wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);

	/* WaForceL3Serialization:ivb */
	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
}

999
static void
1000
vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
{
	/* WaForceL3Serialization:vlv */
	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);

	/*
	 * WaIncreaseL3CreditsForVLVB0:vlv
	 * This is the hardware default actually.
	 */
	wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
}

1012
static void
1013
hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1014 1015 1016 1017 1018 1019 1020
{
	/* L3 caching of data atomics doesn't work -- disable it. */
	wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);

	wa_add(wal,
	       HSW_ROW_CHICKEN3, 0,
	       _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
1021
	       0 /* XXX does this reg exist? */, true);
1022 1023 1024 1025 1026

	/* WaVSRefCountFullforceMissDisable:hsw */
	wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
}

1027 1028 1029
static void
gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
1030
	const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
	unsigned int slice, subslice;
	u32 mcr, mcr_mask;

	GEM_BUG_ON(GRAPHICS_VER(i915) != 9);

	/*
	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
	 * Before any MMIO read into slice/subslice specific registers, MCR
	 * packet control register needs to be programmed to point to any
	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
	 * This means each subsequent MMIO read will be forwarded to an
	 * specific s/ss combination, but this is OK since these registers
	 * are consistent across s/ss in almost all cases. In the rare
	 * occasions, such as INSTDONE, where this value is dependent
	 * on s/ss combo, the read should be done with read_subslice_reg.
	 */
	slice = ffs(sseu->slice_mask) - 1;
1048 1049
	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask.hsw));
	subslice = ffs(intel_sseu_get_hsw_subslices(sseu, slice));
1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
	GEM_BUG_ON(!subslice);
	subslice--;

	/*
	 * We use GEN8_MCR..() macros to calculate the |mcr| value for
	 * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
	 */
	mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
	mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;

	drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr);

	wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
}

1065
static void
1066
gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1067
{
1068 1069
	struct drm_i915_private *i915 = gt->i915;

1070 1071 1072
	/* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
	gen9_wa_init_mcr(i915, wal);

1073
	/* WaDisableKillLogic:bxt,skl,kbl */
1074
	if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
1075 1076 1077
		wa_write_or(wal,
			    GAM_ECOCHK,
			    ECOCHK_DIS_TLB);
1078

1079
	if (HAS_LLC(i915)) {
1080 1081 1082 1083 1084
		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
		 *
		 * Must match Display Engine. See
		 * WaCompressedResourceDisplayNewHashMode.
		 */
1085 1086 1087
		wa_write_or(wal,
			    MMCD_MISC_CTRL,
			    MMCD_PCLA | MMCD_HOTSPOT_EN);
1088 1089 1090
	}

	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1091 1092 1093
	wa_write_or(wal,
		    GAM_ECOCHK,
		    BDW_DISABLE_HDC_INVALIDATION);
1094 1095
}

1096
static void
1097
skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1098
{
1099
	gen9_gt_workarounds_init(gt, wal);
1100 1101

	/* WaDisableGafsUnitClkGating:skl */
1102 1103 1104
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1105 1106

	/* WaInPlaceDecompressionHang:skl */
1107
	if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
1108 1109 1110
		wa_write_or(wal,
			    GEN9_GAMT_ECO_REG_RW_IA,
			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1111 1112
}

1113
static void
1114
kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1115
{
1116
	gen9_gt_workarounds_init(gt, wal);
1117

1118
	/* WaDisableDynamicCreditSharing:kbl */
1119
	if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
1120 1121 1122
		wa_write_or(wal,
			    GAMT_CHKN_BIT_REG,
			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1123

1124
	/* WaDisableGafsUnitClkGating:kbl */
1125 1126 1127
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1128

1129
	/* WaInPlaceDecompressionHang:kbl */
1130 1131 1132
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1133
}
1134

1135
static void
1136
glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1137
{
1138
	gen9_gt_workarounds_init(gt, wal);
1139 1140
}

1141
static void
1142
cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1143
{
1144
	gen9_gt_workarounds_init(gt, wal);
1145 1146

	/* WaDisableGafsUnitClkGating:cfl */
1147 1148 1149
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1150

1151
	/* WaInPlaceDecompressionHang:cfl */
1152 1153 1154
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1155
}
1156

1157 1158 1159
static void __set_mcr_steering(struct i915_wa_list *wal,
			       i915_reg_t steering_reg,
			       unsigned int slice, unsigned int subslice)
1160 1161 1162 1163 1164 1165
{
	u32 mcr, mcr_mask;

	mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
	mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;

1166 1167 1168
	wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
}

1169
static void debug_dump_steering(struct intel_gt *gt)
1170
{
1171
	struct drm_printer p = drm_debug_printer("MCR Steering:");
1172

1173 1174 1175 1176 1177 1178 1179
	if (drm_debug_enabled(DRM_UT_DRIVER))
		intel_gt_mcr_report_steering(&p, gt, false);
}

static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
			 unsigned int slice, unsigned int subslice)
{
1180
	__set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
1181 1182 1183 1184

	gt->default_steering.groupid = slice;
	gt->default_steering.instanceid = subslice;

1185
	debug_dump_steering(gt);
1186 1187
}

1188
static void
1189
icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1190
{
1191
	const struct sseu_dev_info *sseu = &gt->info.sseu;
1192
	unsigned int subslice;
1193

1194
	GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
1195
	GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
1196

1197
	/*
1198 1199 1200 1201 1202 1203 1204
	 * Although a platform may have subslices, we need to always steer
	 * reads to the lowest instance that isn't fused off.  When Render
	 * Power Gating is enabled, grabbing forcewake will only power up a
	 * single subslice (the "minconfig") if there isn't a real workload
	 * that needs to be run; this means that if we steer register reads to
	 * one of the higher subslices, we run the risk of reading back 0's or
	 * random garbage.
1205
	 */
1206
	subslice = __ffs(intel_sseu_get_hsw_subslices(sseu, 0));
1207

1208 1209 1210 1211 1212
	/*
	 * If the subslice we picked above also steers us to a valid L3 bank,
	 * then we can just rely on the default steering and won't need to
	 * worry about explicitly re-steering L3BANK reads later.
	 */
1213 1214
	if (gt->info.l3bank_mask & BIT(subslice))
		gt->steering_table[L3BANK] = NULL;
1215

1216
	__add_mcr_wa(gt, wal, 0, subslice);
1217
}
1218

1219 1220 1221 1222 1223 1224 1225
static void
xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
{
	const struct sseu_dev_info *sseu = &gt->info.sseu;
	unsigned long slice, subslice = 0, slice_mask = 0;
	u32 lncf_mask = 0;
	int i;
1226

1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253
	/*
	 * On Xe_HP the steering increases in complexity. There are now several
	 * more units that require steering and we're not guaranteed to be able
	 * to find a common setting for all of them. These are:
	 * - GSLICE (fusable)
	 * - DSS (sub-unit within gslice; fusable)
	 * - L3 Bank (fusable)
	 * - MSLICE (fusable)
	 * - LNCF (sub-unit within mslice; always present if mslice is present)
	 *
	 * We'll do our default/implicit steering based on GSLICE (in the
	 * sliceid field) and DSS (in the subsliceid field).  If we can
	 * find overlap between the valid MSLICE and/or LNCF values with
	 * a suitable GSLICE, then we can just re-use the default value and
	 * skip and explicit steering at runtime.
	 *
	 * We only need to look for overlap between GSLICE/MSLICE/LNCF to find
	 * a valid sliceid value.  DSS steering is the only type of steering
	 * that utilizes the 'subsliceid' bits.
	 *
	 * Also note that, even though the steering domain is called "GSlice"
	 * and it is encoded in the register using the gslice format, the spec
	 * says that the combined (geometry | compute) fuse should be used to
	 * select the steering.
	 */

	/* Find the potential gslice candidates */
1254 1255
	slice_mask = intel_slicemask_from_xehp_dssmask(sseu->subslice_mask,
						       GEN_DSS_PER_GSLICE);
1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278

	/*
	 * Find the potential LNCF candidates.  Either LNCF within a valid
	 * mslice is fine.
	 */
	for_each_set_bit(i, &gt->info.mslice_mask, GEN12_MAX_MSLICES)
		lncf_mask |= (0x3 << (i * 2));

	/*
	 * Are there any sliceid values that work for both GSLICE and LNCF
	 * steering?
	 */
	if (slice_mask & lncf_mask) {
		slice_mask &= lncf_mask;
		gt->steering_table[LNCF] = NULL;
	}

	/* How about sliceid values that also work for MSLICE steering? */
	if (slice_mask & gt->info.mslice_mask) {
		slice_mask &= gt->info.mslice_mask;
		gt->steering_table[MSLICE] = NULL;
	}

1279 1280 1281
	if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
		gt->steering_table[GAM] = NULL;

1282
	slice = __ffs(slice_mask);
1283 1284
	subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
		GEN_DSS_PER_GSLICE;
1285

1286
	__add_mcr_wa(gt, wal, slice, subslice);
1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298

	/*
	 * SQIDI ranges are special because they use different steering
	 * registers than everything else we work with.  On XeHP SDV and
	 * DG2-G10, any value in the steering registers will work fine since
	 * all instances are present, but DG2-G11 only has SQIDI instances at
	 * ID's 2 and 3, so we need to steer to one of those.  For simplicity
	 * we'll just steer to a hardcoded "2" since that value will work
	 * everywhere.
	 */
	__set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2);
	__set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2);
1299 1300 1301 1302 1303 1304 1305

	/*
	 * On DG2, GAM registers have a dedicated steering control register
	 * and must always be programmed to a hardcoded groupid of "1."
	 */
	if (IS_DG2(gt->i915))
		__set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0);
1306 1307
}

1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321
static void
pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
{
	unsigned int dss;

	/*
	 * Setup implicit steering for COMPUTE and DSS ranges to the first
	 * non-fused-off DSS.  All other types of MCR registers will be
	 * explicitly steered.
	 */
	dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
	__add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE);
}

1322
static void
1323
icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1324
{
1325 1326 1327
	struct drm_i915_private *i915 = gt->i915;

	icl_wa_init_mcr(gt, wal);
1328

1329
	/* WaModifyGamTlbPartitioning:icl */
1330 1331 1332 1333
	wa_write_clr_set(wal,
			 GEN11_GACB_PERF_CTRL,
			 GEN11_HASH_CTRL_MASK,
			 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
1334

1335 1336 1337
	/* Wa_1405766107:icl
	 * Formerly known as WaCL2SFHalfMaxAlloc
	 */
1338 1339 1340 1341
	wa_write_or(wal,
		    GEN11_LSN_UNSLCVC,
		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
Oscar Mateo's avatar
Oscar Mateo committed
1342 1343 1344 1345

	/* Wa_220166154:icl
	 * Formerly known as WaDisCtxReload
	 */
1346 1347 1348
	wa_write_or(wal,
		    GEN8_GAMW_ECO_DEV_RW_IA,
		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
Oscar Mateo's avatar
Oscar Mateo committed
1349

Oscar Mateo's avatar
Oscar Mateo committed
1350 1351 1352
	/* Wa_1406463099:icl
	 * Formerly known as WaGamTlbPendError
	 */
1353 1354 1355
	wa_write_or(wal,
		    GAMT_CHKN_BIT_REG,
		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
1356

1357 1358 1359 1360 1361
	/* Wa_1407352427:icl,ehl */
	wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
		    PSDUNIT_CLKGATE_DIS);

	/* Wa_1406680159:icl,ehl */
1362 1363 1364
	wa_mcr_write_or(wal,
			GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
			GWUNIT_CLKGATE_DIS);
1365

1366 1367
	/* Wa_1607087056:icl,ehl,jsl */
	if (IS_ICELAKE(i915) ||
1368
	    IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1369
		wa_write_or(wal,
1370
			    GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1371
			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1372 1373 1374 1375 1376

	/*
	 * This is not a documented workaround, but rather an optimization
	 * to reduce sampler power.
	 */
1377
	wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1378 1379
}

1380 1381 1382 1383 1384 1385 1386
/*
 * Though there are per-engine instances of these registers,
 * they retain their value through engine resets and should
 * only be provided on the GT workaround list rather than
 * the engine-specific workaround list.
 */
static void
1387
wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal)
1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401
{
	struct intel_engine_cs *engine;
	int id;

	for_each_engine(engine, gt, id) {
		if (engine->class != VIDEO_DECODE_CLASS ||
		    (engine->instance % 2))
			continue;

		wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base),
			    IECPUNIT_CLKGATE_DIS);
	}
}

1402
static void
1403
gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1404
{
1405
	icl_wa_init_mcr(gt, wal);
1406

1407
	/* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
1408
	wa_14011060649(gt, wal);
1409 1410

	/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
1411
	wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1412 1413 1414
}

static void
1415
tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1416
{
1417 1418 1419
	struct drm_i915_private *i915 = gt->i915;

	gen12_gt_workarounds_init(gt, wal);
1420

1421
	/* Wa_1409420604:tgl */
1422
	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1423 1424 1425
		wa_mcr_write_or(wal,
				SUBSLICE_UNIT_LEVEL_CLKGATE2,
				CPSSUNIT_CLKGATE_DIS);
1426

1427
	/* Wa_1607087056:tgl also know as BUG:1409180338 */
1428
	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1429
		wa_write_or(wal,
1430
			    GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1431
			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1432 1433

	/* Wa_1408615072:tgl[a0] */
1434
	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1435 1436
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
			    VSUNIT_CLKGATE_DIS_TGL);
1437 1438
}

1439
static void
1440
dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1441
{
1442 1443 1444
	struct drm_i915_private *i915 = gt->i915;

	gen12_gt_workarounds_init(gt, wal);
1445 1446

	/* Wa_1607087056:dg1 */
1447
	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1448
		wa_write_or(wal,
1449
			    GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1450 1451 1452 1453
			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);

	/* Wa_1409420604:dg1 */
	if (IS_DG1(i915))
1454 1455 1456
		wa_mcr_write_or(wal,
				SUBSLICE_UNIT_LEVEL_CLKGATE2,
				CPSSUNIT_CLKGATE_DIS);
1457 1458 1459 1460 1461 1462 1463 1464

	/* Wa_1408615072:dg1 */
	/* Empirical testing shows this register is unaffected by engine reset. */
	if (IS_DG1(i915))
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
			    VSUNIT_CLKGATE_DIS_TGL);
}

1465
static void
1466
xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1467
{
1468 1469
	struct drm_i915_private *i915 = gt->i915;

1470
	xehp_init_mcr(gt, wal);
1471 1472

	/* Wa_1409757795:xehpsdv */
1473
	wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516

	/* Wa_16011155590:xehpsdv */
	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
			    TSGUNIT_CLKGATE_DIS);

	/* Wa_14011780169:xehpsdv */
	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
		wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
			    GAMTLBVDBOX7_CLKGATE_DIS |
			    GAMTLBVDBOX6_CLKGATE_DIS |
			    GAMTLBVDBOX5_CLKGATE_DIS |
			    GAMTLBVDBOX4_CLKGATE_DIS |
			    GAMTLBVDBOX3_CLKGATE_DIS |
			    GAMTLBVDBOX2_CLKGATE_DIS |
			    GAMTLBVDBOX1_CLKGATE_DIS |
			    GAMTLBVDBOX0_CLKGATE_DIS |
			    GAMTLBKCR_CLKGATE_DIS |
			    GAMTLBGUC_CLKGATE_DIS |
			    GAMTLBBLT_CLKGATE_DIS);
		wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
			    GAMTLBGFXA1_CLKGATE_DIS |
			    GAMTLBCOMPA0_CLKGATE_DIS |
			    GAMTLBCOMPA1_CLKGATE_DIS |
			    GAMTLBCOMPB0_CLKGATE_DIS |
			    GAMTLBCOMPB1_CLKGATE_DIS |
			    GAMTLBCOMPC0_CLKGATE_DIS |
			    GAMTLBCOMPC1_CLKGATE_DIS |
			    GAMTLBCOMPD0_CLKGATE_DIS |
			    GAMTLBCOMPD1_CLKGATE_DIS |
			    GAMTLBMERT_CLKGATE_DIS   |
			    GAMTLBVEBOX3_CLKGATE_DIS |
			    GAMTLBVEBOX2_CLKGATE_DIS |
			    GAMTLBVEBOX1_CLKGATE_DIS |
			    GAMTLBVEBOX0_CLKGATE_DIS);
	}

	/* Wa_16012725990:xehpsdv */
	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);

	/* Wa_14011060649:xehpsdv */
	wa_14011060649(gt, wal);
1517 1518
}

1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552
static void
dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
	struct intel_engine_cs *engine;
	int id;

	xehp_init_mcr(gt, wal);

	/* Wa_14011060649:dg2 */
	wa_14011060649(gt, wal);

	/*
	 * Although there are per-engine instances of these registers,
	 * they technically exist outside the engine itself and are not
	 * impacted by engine resets.  Furthermore, they're part of the
	 * GuC blacklist so trying to treat them as engine workarounds
	 * will result in GuC initialization failure and a wedged GPU.
	 */
	for_each_engine(engine, gt, id) {
		if (engine->class != VIDEO_DECODE_CLASS)
			continue;

		/* Wa_16010515920:dg2_g10 */
		if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
			wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
				    ALNUNIT_CLKGATE_DIS);
	}

	if (IS_DG2_G10(gt->i915)) {
		/* Wa_22010523718:dg2 */
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
			    CG3DDISCFEG_CLKGATE_DIS);

		/* Wa_14011006942:dg2 */
1553 1554
		wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
				DSS_ROUTER_CLKGATE_DIS);
1555 1556 1557 1558 1559 1560 1561 1562 1563 1564
	}

	if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
		/* Wa_14010948348:dg2_g10 */
		wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);

		/* Wa_14011037102:dg2_g10 */
		wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);

		/* Wa_14011371254:dg2_g10 */
1565
		wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600

		/* Wa_14011431319:dg2_g10 */
		wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
			    GAMTLBVDBOX7_CLKGATE_DIS |
			    GAMTLBVDBOX6_CLKGATE_DIS |
			    GAMTLBVDBOX5_CLKGATE_DIS |
			    GAMTLBVDBOX4_CLKGATE_DIS |
			    GAMTLBVDBOX3_CLKGATE_DIS |
			    GAMTLBVDBOX2_CLKGATE_DIS |
			    GAMTLBVDBOX1_CLKGATE_DIS |
			    GAMTLBVDBOX0_CLKGATE_DIS |
			    GAMTLBKCR_CLKGATE_DIS |
			    GAMTLBGUC_CLKGATE_DIS |
			    GAMTLBBLT_CLKGATE_DIS);
		wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
			    GAMTLBGFXA1_CLKGATE_DIS |
			    GAMTLBCOMPA0_CLKGATE_DIS |
			    GAMTLBCOMPA1_CLKGATE_DIS |
			    GAMTLBCOMPB0_CLKGATE_DIS |
			    GAMTLBCOMPB1_CLKGATE_DIS |
			    GAMTLBCOMPC0_CLKGATE_DIS |
			    GAMTLBCOMPC1_CLKGATE_DIS |
			    GAMTLBCOMPD0_CLKGATE_DIS |
			    GAMTLBCOMPD1_CLKGATE_DIS |
			    GAMTLBMERT_CLKGATE_DIS   |
			    GAMTLBVEBOX3_CLKGATE_DIS |
			    GAMTLBVEBOX2_CLKGATE_DIS |
			    GAMTLBVEBOX1_CLKGATE_DIS |
			    GAMTLBVEBOX0_CLKGATE_DIS);

		/* Wa_14010569222:dg2_g10 */
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
			    GAMEDIA_CLKGATE_DIS);

		/* Wa_14011028019:dg2_g10 */
1601
		wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
1602 1603 1604
	}

	/* Wa_14014830051:dg2 */
1605
	wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
1606 1607 1608 1609 1610 1611

	/*
	 * The following are not actually "workarounds" but rather
	 * recommended tuning settings documented in the bspec's
	 * performance guide section.
	 */
1612
	wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
1613 1614

	/* Wa_14015795083 */
1615
	wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1616 1617
}

1618 1619 1620
static void
pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
1621 1622
	pvc_init_mcr(gt, wal);

1623
	/* Wa_14015795083 */
1624
	wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1625 1626
}

1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638
static void
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
	/* FIXME: Actual workarounds will be added in future patch(es) */

	/*
	 * Unlike older platforms, we no longer setup implicit steering here;
	 * all MCR accesses are explicitly steered.
	 */
	debug_dump_steering(gt);
}

1639 1640 1641 1642 1643 1644
static void
xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
	/* FIXME: Actual workarounds will be added in future patch(es) */

	debug_dump_steering(gt);
1645 1646
}

1647
static void
1648
gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
1649
{
1650 1651
	struct drm_i915_private *i915 = gt->i915;

1652 1653 1654 1655 1656 1657
	if (gt->type == GT_MEDIA) {
		if (MEDIA_VER(i915) >= 13)
			xelpmp_gt_workarounds_init(gt, wal);
		else
			MISSING_CASE(MEDIA_VER(i915));

1658
		return;
1659
	}
1660 1661 1662 1663

	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
		xelpg_gt_workarounds_init(gt, wal);
	else if (IS_PONTEVECCHIO(i915))
1664
		pvc_gt_workarounds_init(gt, wal);
1665
	else if (IS_DG2(i915))
1666 1667
		dg2_gt_workarounds_init(gt, wal);
	else if (IS_XEHPSDV(i915))
1668
		xehpsdv_gt_workarounds_init(gt, wal);
1669
	else if (IS_DG1(i915))
1670
		dg1_gt_workarounds_init(gt, wal);
1671
	else if (IS_TIGERLAKE(i915))
1672
		tgl_gt_workarounds_init(gt, wal);
1673
	else if (GRAPHICS_VER(i915) == 12)
1674
		gen12_gt_workarounds_init(gt, wal);
1675
	else if (GRAPHICS_VER(i915) == 11)
1676
		icl_gt_workarounds_init(gt, wal);
1677
	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1678
		cfl_gt_workarounds_init(gt, wal);
1679
	else if (IS_GEMINILAKE(i915))
1680
		glk_gt_workarounds_init(gt, wal);
1681
	else if (IS_KABYLAKE(i915))
1682
		kbl_gt_workarounds_init(gt, wal);
1683
	else if (IS_BROXTON(i915))
1684
		gen9_gt_workarounds_init(gt, wal);
1685
	else if (IS_SKYLAKE(i915))
1686
		skl_gt_workarounds_init(gt, wal);
1687
	else if (IS_HASWELL(i915))
1688
		hsw_gt_workarounds_init(gt, wal);
1689
	else if (IS_VALLEYVIEW(i915))
1690
		vlv_gt_workarounds_init(gt, wal);
1691
	else if (IS_IVYBRIDGE(i915))
1692
		ivb_gt_workarounds_init(gt, wal);
1693
	else if (GRAPHICS_VER(i915) == 6)
1694
		snb_gt_workarounds_init(gt, wal);
1695
	else if (GRAPHICS_VER(i915) == 5)
1696
		ilk_gt_workarounds_init(gt, wal);
1697
	else if (IS_G4X(i915))
1698
		g4x_gt_workarounds_init(gt, wal);
1699
	else if (GRAPHICS_VER(i915) == 4)
1700
		gen4_gt_workarounds_init(gt, wal);
1701
	else if (GRAPHICS_VER(i915) <= 8)
1702
		;
1703
	else
1704
		MISSING_CASE(GRAPHICS_VER(i915));
1705 1706
}

1707
void intel_gt_init_workarounds(struct intel_gt *gt)
1708
{
1709
	struct i915_wa_list *wal = &gt->wa_list;
1710

1711
	wa_init_start(wal, gt, "GT", "global");
1712
	gt_init_workarounds(gt, wal);
1713 1714 1715 1716
	wa_init_finish(wal);
}

static enum forcewake_domains
1717
wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1718 1719 1720 1721 1722 1723
{
	enum forcewake_domains fw = 0;
	struct i915_wa *wa;
	unsigned int i;

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1724
		fw |= intel_uncore_forcewake_for_reg(uncore,
1725 1726 1727 1728 1729 1730 1731
						     wa->reg,
						     FW_REG_READ |
						     FW_REG_WRITE);

	return fw;
}

1732
static bool
1733 1734
wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
	  const char *name, const char *from)
1735
{
1736
	if ((cur ^ wa->set) & wa->read) {
1737 1738 1739 1740
		drm_err(&gt->i915->drm,
			"%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
			name, from, i915_mmio_reg_offset(wa->reg),
			cur, cur & wa->read, wa->set & wa->read);
1741 1742 1743 1744 1745 1746 1747

		return false;
	}

	return true;
}

1748
static void wa_list_apply(const struct i915_wa_list *wal)
1749
{
1750
	struct intel_gt *gt = wal->gt;
1751
	struct intel_uncore *uncore = gt->uncore;
1752 1753 1754 1755 1756 1757 1758 1759
	enum forcewake_domains fw;
	unsigned long flags;
	struct i915_wa *wa;
	unsigned int i;

	if (!wal->count)
		return;

1760
	fw = wal_get_fw_for_rmw(uncore, wal);
1761

1762 1763
	intel_gt_mcr_lock(gt, &flags);
	spin_lock(&uncore->lock);
1764
	intel_uncore_forcewake_get__locked(uncore, fw);
1765 1766

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1767 1768 1769
		u32 val, old = 0;

		/* open-coded rmw due to steering */
1770 1771
		if (wa->clr)
			old = wa->is_mcr ?
1772
				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1773
				intel_uncore_read_fw(uncore, wa->reg);
1774
		val = (old & ~wa->clr) | wa->set;
1775 1776
		if (val != old || !wa->clr) {
			if (wa->is_mcr)
1777
				intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
1778 1779 1780 1781 1782 1783
			else
				intel_uncore_write_fw(uncore, wa->reg, val);
		}

		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
			u32 val = wa->is_mcr ?
1784
				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1785
				intel_uncore_read_fw(uncore, wa->reg);
1786

1787
			wa_verify(gt, wa, val, wal->name, "application");
1788
		}
1789 1790
	}

1791
	intel_uncore_forcewake_put__locked(uncore, fw);
1792 1793
	spin_unlock(&uncore->lock);
	intel_gt_mcr_unlock(gt, flags);
1794 1795
}

1796
void intel_gt_apply_workarounds(struct intel_gt *gt)
1797
{
1798
	wa_list_apply(&gt->wa_list);
1799 1800
}

1801
static bool wa_list_verify(struct intel_gt *gt,
1802 1803 1804
			   const struct i915_wa_list *wal,
			   const char *from)
{
1805
	struct intel_uncore *uncore = gt->uncore;
1806
	struct i915_wa *wa;
1807 1808
	enum forcewake_domains fw;
	unsigned long flags;
1809 1810 1811
	unsigned int i;
	bool ok = true;

1812 1813
	fw = wal_get_fw_for_rmw(uncore, wal);

1814 1815
	intel_gt_mcr_lock(gt, &flags);
	spin_lock(&uncore->lock);
1816 1817
	intel_uncore_forcewake_get__locked(uncore, fw);

1818
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1819
		ok &= wa_verify(wal->gt, wa, wa->is_mcr ?
1820
				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1821
				intel_uncore_read_fw(uncore, wa->reg),
1822
				wal->name, from);
1823

1824
	intel_uncore_forcewake_put__locked(uncore, fw);
1825 1826
	spin_unlock(&uncore->lock);
	intel_gt_mcr_unlock(gt, flags);
1827

1828 1829 1830
	return ok;
}

1831
bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1832
{
1833
	return wa_list_verify(gt, &gt->wa_list, from);
1834 1835
}

1836
__maybe_unused
Chris Wilson's avatar
Chris Wilson committed
1837
static bool is_nonpriv_flags_valid(u32 flags)
1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850
{
	/* Check only valid flag bits are set */
	if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
		return false;

	/* NB: Only 3 out of 4 enum values are valid for access field */
	if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
	    RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
		return false;

	return true;
}

1851
static void
1852
whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1853
{
1854 1855 1856
	struct i915_wa wa = {
		.reg = reg
	};
1857

1858 1859
	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
		return;
1860

1861 1862 1863
	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
		return;

1864
	wa.reg.reg |= flags;
1865
	_wa_add(wal, &wa);
1866 1867
}

1868
static void
1869
whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
1870 1871
{
	struct i915_wa wa = {
1872
		.mcr_reg = reg,
1873 1874 1875 1876 1877 1878 1879 1880 1881
		.is_mcr = 1,
	};

	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
		return;

	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
		return;

1882
	wa.mcr_reg.reg |= flags;
1883 1884 1885
	_wa_add(wal, &wa);
}

1886 1887 1888
static void
whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
{
1889
	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1890 1891
}

1892
static void
1893
whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
1894 1895 1896 1897
{
	whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
}

1898
static void gen9_whitelist_build(struct i915_wa_list *w)
1899 1900
{
	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1901
	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1902 1903

	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1904
	whitelist_reg(w, GEN8_CS_CHICKEN1);
1905 1906

	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1907
	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1908 1909 1910

	/* WaSendPushConstantsFromMMIO:skl,bxt */
	whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1911 1912
}

1913
static void skl_whitelist_build(struct intel_engine_cs *engine)
1914
{
1915 1916 1917 1918 1919
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1920
	gen9_whitelist_build(w);
1921 1922

	/* WaDisableLSQCROPERFforOCL:skl */
1923
	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
1924 1925
}

1926
static void bxt_whitelist_build(struct intel_engine_cs *engine)
1927
{
1928 1929 1930 1931
	if (engine->class != RENDER_CLASS)
		return;

	gen9_whitelist_build(&engine->whitelist);
1932 1933
}

1934
static void kbl_whitelist_build(struct intel_engine_cs *engine)
1935
{
1936 1937 1938 1939 1940
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1941
	gen9_whitelist_build(w);
1942

1943
	/* WaDisableLSQCROPERFforOCL:kbl */
1944
	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
1945 1946
}

1947
static void glk_whitelist_build(struct intel_engine_cs *engine)
1948
{
1949 1950 1951 1952 1953
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1954
	gen9_whitelist_build(w);
1955

1956
	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1957
	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1958
}
1959

1960
static void cfl_whitelist_build(struct intel_engine_cs *engine)
1961
{
1962 1963
	struct i915_wa_list *w = &engine->whitelist;

1964 1965 1966
	if (engine->class != RENDER_CLASS)
		return;

1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978
	gen9_whitelist_build(w);

	/*
	 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
	 *
	 * This covers 4 register which are next to one another :
	 *   - PS_INVOCATION_COUNT
	 *   - PS_INVOCATION_COUNT_UDW
	 *   - PS_DEPTH_COUNT
	 *   - PS_DEPTH_COUNT_UDW
	 */
	whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1979
			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1980
			  RING_FORCE_TO_NONPRIV_RANGE_4);
1981 1982
}

1983
static void allow_read_ctx_timestamp(struct intel_engine_cs *engine)
1984 1985 1986 1987 1988 1989 1990
{
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		whitelist_reg_ext(w,
				  RING_CTX_TIMESTAMP(engine->mmio_base),
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1991 1992 1993 1994 1995
}

static void cml_whitelist_build(struct intel_engine_cs *engine)
{
	allow_read_ctx_timestamp(engine);
1996 1997 1998 1999

	cfl_whitelist_build(engine);
}

2000
static void icl_whitelist_build(struct intel_engine_cs *engine)
2001
{
2002 2003
	struct i915_wa_list *w = &engine->whitelist;

2004 2005
	allow_read_ctx_timestamp(engine);

2006 2007 2008
	switch (engine->class) {
	case RENDER_CLASS:
		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
2009
		whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
2010 2011

		/* WaAllowUMDToModifySamplerMode:icl */
2012
		whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
2013 2014 2015

		/* WaEnableStateCacheRedirectToCS:icl */
		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026

		/*
		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
		 *
		 * This covers 4 register which are next to one another :
		 *   - PS_INVOCATION_COUNT
		 *   - PS_INVOCATION_COUNT_UDW
		 *   - PS_DEPTH_COUNT
		 *   - PS_DEPTH_COUNT_UDW
		 */
		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2027
				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
2028
				  RING_FORCE_TO_NONPRIV_RANGE_4);
2029 2030 2031 2032 2033
		break;

	case VIDEO_DECODE_CLASS:
		/* hucStatusRegOffset */
		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
2034
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2035 2036
		/* hucUKernelHdrInfoRegOffset */
		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
2037
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2038 2039
		/* hucStatus2RegOffset */
		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
2040
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2041 2042 2043 2044 2045
		break;

	default:
		break;
	}
2046 2047
}

2048 2049
static void tgl_whitelist_build(struct intel_engine_cs *engine)
{
2050 2051
	struct i915_wa_list *w = &engine->whitelist;

2052 2053
	allow_read_ctx_timestamp(engine);

2054 2055 2056 2057
	switch (engine->class) {
	case RENDER_CLASS:
		/*
		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
2058
		 * Wa_1408556865:tgl
2059 2060 2061 2062 2063 2064 2065 2066 2067 2068
		 *
		 * This covers 4 registers which are next to one another :
		 *   - PS_INVOCATION_COUNT
		 *   - PS_INVOCATION_COUNT_UDW
		 *   - PS_DEPTH_COUNT
		 *   - PS_DEPTH_COUNT_UDW
		 */
		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
				  RING_FORCE_TO_NONPRIV_RANGE_4);
2069

2070 2071 2072 2073 2074
		/*
		 * Wa_1808121037:tgl
		 * Wa_14012131227:dg1
		 * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
		 */
2075
		whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
2076 2077 2078

		/* Wa_1806527549:tgl */
		whitelist_reg(w, HIZ_CHICKEN);
2079 2080 2081 2082
		break;
	default:
		break;
	}
2083 2084
}

2085 2086 2087 2088 2089 2090 2091
static void dg1_whitelist_build(struct intel_engine_cs *engine)
{
	struct i915_wa_list *w = &engine->whitelist;

	tgl_whitelist_build(engine);

	/* GEN:BUG:1409280441:dg1 */
2092
	if (IS_DG1_GRAPHICS_STEP(engine->i915, STEP_A0, STEP_B0) &&
2093 2094 2095 2096 2097 2098
	    (engine->class == RENDER_CLASS ||
	     engine->class == COPY_ENGINE_CLASS))
		whitelist_reg_ext(w, RING_ID(engine->mmio_base),
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
}

2099 2100 2101 2102 2103
static void xehpsdv_whitelist_build(struct intel_engine_cs *engine)
{
	allow_read_ctx_timestamp(engine);
}

2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126
static void dg2_whitelist_build(struct intel_engine_cs *engine)
{
	struct i915_wa_list *w = &engine->whitelist;

	allow_read_ctx_timestamp(engine);

	switch (engine->class) {
	case RENDER_CLASS:
		/*
		 * Wa_1507100340:dg2_g10
		 *
		 * This covers 4 registers which are next to one another :
		 *   - PS_INVOCATION_COUNT
		 *   - PS_INVOCATION_COUNT_UDW
		 *   - PS_DEPTH_COUNT
		 *   - PS_DEPTH_COUNT_UDW
		 */
		if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
			whitelist_reg_ext(w, PS_INVOCATION_COUNT,
					  RING_FORCE_TO_NONPRIV_ACCESS_RD |
					  RING_FORCE_TO_NONPRIV_RANGE_4);

		break;
2127 2128 2129 2130 2131
	case COMPUTE_CLASS:
		/* Wa_16011157294:dg2_g10 */
		if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
			whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
		break;
2132 2133 2134 2135 2136
	default:
		break;
	}
}

2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162
static void blacklist_trtt(struct intel_engine_cs *engine)
{
	struct i915_wa_list *w = &engine->whitelist;

	/*
	 * Prevent read/write access to [0x4400, 0x4600) which covers
	 * the TRTT range across all engines. Note that normally userspace
	 * cannot access the other engines' trtt control, but for simplicity
	 * we cover the entire range on each engine.
	 */
	whitelist_reg_ext(w, _MMIO(0x4400),
			  RING_FORCE_TO_NONPRIV_DENY |
			  RING_FORCE_TO_NONPRIV_RANGE_64);
	whitelist_reg_ext(w, _MMIO(0x4500),
			  RING_FORCE_TO_NONPRIV_DENY |
			  RING_FORCE_TO_NONPRIV_RANGE_64);
}

static void pvc_whitelist_build(struct intel_engine_cs *engine)
{
	allow_read_ctx_timestamp(engine);

	/* Wa_16014440446:pvc */
	blacklist_trtt(engine);
}

2163
void intel_engine_init_whitelist(struct intel_engine_cs *engine)
2164 2165
{
	struct drm_i915_private *i915 = engine->i915;
2166
	struct i915_wa_list *w = &engine->whitelist;
2167

2168
	wa_init_start(w, engine->gt, "whitelist", engine->name);
2169

2170 2171 2172
	if (IS_PONTEVECCHIO(i915))
		pvc_whitelist_build(engine);
	else if (IS_DG2(i915))
2173 2174
		dg2_whitelist_build(engine);
	else if (IS_XEHPSDV(i915))
2175 2176
		xehpsdv_whitelist_build(engine);
	else if (IS_DG1(i915))
2177
		dg1_whitelist_build(engine);
2178
	else if (GRAPHICS_VER(i915) == 12)
2179
		tgl_whitelist_build(engine);
2180
	else if (GRAPHICS_VER(i915) == 11)
2181
		icl_whitelist_build(engine);
2182 2183 2184
	else if (IS_COMETLAKE(i915))
		cml_whitelist_build(engine);
	else if (IS_COFFEELAKE(i915))
2185
		cfl_whitelist_build(engine);
2186
	else if (IS_GEMINILAKE(i915))
2187
		glk_whitelist_build(engine);
2188
	else if (IS_KABYLAKE(i915))
2189
		kbl_whitelist_build(engine);
2190
	else if (IS_BROXTON(i915))
2191
		bxt_whitelist_build(engine);
2192
	else if (IS_SKYLAKE(i915))
2193
		skl_whitelist_build(engine);
2194
	else if (GRAPHICS_VER(i915) <= 8)
2195
		;
2196
	else
2197
		MISSING_CASE(GRAPHICS_VER(i915));
2198

2199
	wa_init_finish(w);
2200 2201
}

2202
void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
2203
{
2204
	const struct i915_wa_list *wal = &engine->whitelist;
2205
	struct intel_uncore *uncore = engine->uncore;
2206
	const u32 base = engine->mmio_base;
2207
	struct i915_wa *wa;
2208 2209
	unsigned int i;

2210
	if (!wal->count)
2211
		return;
2212

2213
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
2214 2215 2216
		intel_uncore_write(uncore,
				   RING_FORCE_TO_NONPRIV(base, i),
				   i915_mmio_reg_offset(wa->reg));
2217

2218 2219
	/* And clear the rest just in case of garbage */
	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
2220 2221 2222
		intel_uncore_write(uncore,
				   RING_FORCE_TO_NONPRIV(base, i),
				   i915_mmio_reg_offset(RING_NOPID(base)));
2223 2224
}

2225 2226 2227 2228 2229 2230 2231 2232 2233 2234
/*
 * engine_fake_wa_init(), a place holder to program the registers
 * which are not part of an official workaround defined by the
 * hardware team.
 * Adding programming of those register inside workaround will
 * allow utilizing wa framework to proper application and verification.
 */
static void
engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
2235
	u8 mocs_w, mocs_r;
2236 2237

	/*
2238 2239 2240 2241 2242 2243
	 * RING_CMD_CCTL specifies the default MOCS entry that will be used
	 * by the command streamer when executing commands that don't have
	 * a way to explicitly specify a MOCS setting.  The default should
	 * usually reference whichever MOCS entry corresponds to uncached
	 * behavior, although use of a WB cached entry is recommended by the
	 * spec in certain circumstances on specific platforms.
2244 2245
	 */
	if (GRAPHICS_VER(engine->i915) >= 12) {
2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261
		mocs_r = engine->gt->mocs.uc_index;
		mocs_w = engine->gt->mocs.uc_index;

		if (HAS_L3_CCS_READ(engine->i915) &&
		    engine->class == COMPUTE_CLASS) {
			mocs_r = engine->gt->mocs.wb_index;

			/*
			 * Even on the few platforms where MOCS 0 is a
			 * legitimate table entry, it's never the correct
			 * setting to use here; we can assume the MOCS init
			 * just forgot to initialize wb_index.
			 */
			drm_WARN_ON(&engine->i915->drm, mocs_r == 0);
		}

2262 2263 2264
		wa_masked_field_set(wal,
				    RING_CMD_CCTL(engine->mmio_base),
				    CMD_CCTL_MOCS_MASK,
2265
				    CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
2266 2267
	}
}
2268 2269 2270

static bool needs_wa_1308578152(struct intel_engine_cs *engine)
{
2271
	return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
2272
		GEN_DSS_PER_GSLICE;
2273 2274
}

2275 2276
static void
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2277 2278
{
	struct drm_i915_private *i915 = engine->i915;
2279

2280
	if (IS_DG2(i915)) {
2281 2282 2283
		/* Wa_1509235366:dg2 */
		wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
			    GLOBAL_INVALIDATION_MODE);
2284 2285
	}

2286
	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
2287
		/* Wa_14013392000:dg2_g11 */
2288
		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
2289 2290
	}

2291 2292 2293
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
		/* Wa_1509727124:dg2 */
2294 2295
		wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
				 SC_DISABLE_POWER_OPTIMIZATION_EBB);
2296 2297
	}

2298 2299
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
2300
		/* Wa_14012419201:dg2 */
2301 2302
		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
				 GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
2303 2304
	}

2305 2306
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
	    IS_DG2_G11(i915)) {
2307 2308 2309 2310
		/*
		 * Wa_22012826095:dg2
		 * Wa_22013059131:dg2
		 */
2311 2312 2313
		wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
				     MAXREQS_PER_BANK,
				     REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
2314 2315

		/* Wa_22013059131:dg2 */
2316 2317
		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
				FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
2318 2319 2320
	}

	/* Wa_1308578152:dg2_g10 when first gslice is fused off */
2321
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
2322
	    needs_wa_1308578152(engine)) {
2323 2324 2325 2326
		wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
			      GEN12_REPLAY_MODE_GRANULARITY);
	}

2327
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
2328
	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
2329
		/* Wa_22013037850:dg2 */
2330 2331
		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
				DISABLE_128B_EVICTION_COMMAND_UDW);
2332 2333

		/* Wa_22012856258:dg2 */
2334 2335
		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
				 GEN12_DISABLE_READ_SUPPRESSION);
2336 2337 2338 2339 2340

		/*
		 * Wa_22010960976:dg2
		 * Wa_14013347512:dg2
		 */
2341 2342
		wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
				  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
2343 2344
	}

2345
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
2346 2347 2348 2349
		/*
		 * Wa_1608949956:dg2_g10
		 * Wa_14010198302:dg2_g10
		 */
2350 2351
		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
				 MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
2352 2353 2354 2355 2356 2357 2358

		/*
		 * Wa_14010918519:dg2_g10
		 *
		 * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
		 * so ignoring verification.
		 */
2359 2360 2361
		wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
			   FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
			   0, false);
2362 2363
	}

2364
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
2365
		/* Wa_22010430635:dg2 */
2366 2367 2368
		wa_mcr_masked_en(wal,
				 GEN9_ROW_CHICKEN4,
				 GEN12_DISABLE_GRF_CLEAR);
2369 2370

		/* Wa_14010648519:dg2 */
2371
		wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
2372 2373 2374
	}

	/* Wa_14013202645:dg2 */
2375 2376
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
2377
		wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
2378

2379 2380 2381
	/* Wa_22012532006:dg2 */
	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
	    IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
2382 2383
		wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
				 DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
2384 2385 2386 2387 2388 2389 2390 2391 2392 2393

	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
		/* Wa_14010680813:dg2_g10 */
		wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
			    EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
	}

	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
	    IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
		/* Wa_14012362059:dg2 */
2394
		wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
2395 2396
	}

2397 2398 2399
	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
	    IS_DG2_G10(i915)) {
		/* Wa_22014600077:dg2 */
2400 2401 2402 2403
		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
			   _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
			   0 /* Wa_14012342262 write-only reg, so skip verification */,
			   true);
2404 2405
	}

2406 2407
	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
	    IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
2408
		/*
2409 2410
		 * Wa_1607138336:tgl[a0],dg1[a0]
		 * Wa_1607063988:tgl[a0],dg1[a0]
2411
		 */
2412 2413 2414
		wa_write_or(wal,
			    GEN9_CTX_PREEMPT_REG,
			    GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
2415
	}
2416

2417
	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
2418 2419 2420 2421 2422 2423 2424
		/*
		 * Wa_1606679103:tgl
		 * (see also Wa_1606682166:icl)
		 */
		wa_write_or(wal,
			    GEN7_SARCHKMD,
			    GEN7_DISABLE_SAMPLER_PREFETCH);
2425 2426
	}

2427
	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
2428
	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2429
		/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
2430
		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
2431

2432 2433 2434 2435
		/*
		 * Wa_1407928979:tgl A*
		 * Wa_18011464164:tgl[B0+],dg1[B0+]
		 * Wa_22010931296:tgl[B0+],dg1[B0+]
2436
		 * Wa_14010919138:rkl,dg1,adl-s,adl-p
2437 2438 2439
		 */
		wa_write_or(wal, GEN7_FF_THREAD_MODE,
			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2440
	}
2441

2442 2443
	if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) ||
	    IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2444
		/*
2445 2446 2447
		 * Wa_1606700617:tgl,dg1,adl-p
		 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
		 * Wa_14010826681:tgl,dg1,rkl,adl-p
2448
		 * Wa_18019627453:dg2
2449 2450 2451 2452
		 */
		wa_masked_en(wal,
			     GEN9_CS_DEBUG_MODE1,
			     FF_DOP_CLOCK_GATE_DISABLE);
2453 2454
	}

2455
	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
2456
	    IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
2457
	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2458
		/* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
2459 2460
		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
				 GEN12_PUSH_CONST_DEREF_HOLD_DIS);
2461

2462 2463
		/*
		 * Wa_1409085225:tgl
2464
		 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
2465
		 */
2466
		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
2467 2468
	}

2469
	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
2470
	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
2471 2472 2473
		/*
		 * Wa_1607030317:tgl
		 * Wa_1607186500:tgl
2474
		 * Wa_1607297627:tgl,rkl,dg1[a0],adlp
2475 2476 2477 2478 2479
		 *
		 * On TGL and RKL there are multiple entries for this WA in the
		 * BSpec; some indicate this is an A0-only WA, others indicate
		 * it applies to all steppings so we trust the "all steppings."
		 * For DG1 this only applies to A0.
2480 2481
		 */
		wa_masked_en(wal,
2482
			     RING_PSMI_CTL(RENDER_RING_BASE),
2483 2484
			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
			     GEN8_RC_SEMA_IDLE_MSG_DISABLE);
2485 2486
	}

2487
	if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
2488 2489
	    IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
		/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
2490 2491 2492
		wa_mcr_masked_en(wal,
				 GEN10_SAMPLER_MODE,
				 ENABLE_SMALLPL);
2493 2494
	}

2495
	if (GRAPHICS_VER(i915) == 11) {
2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512
		/* This is not an Wa. Enable for better image quality */
		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);

		/*
		 * Wa_1405543622:icl
		 * Formerly known as WaGAPZPriorityScheme
		 */
		wa_write_or(wal,
			    GEN8_GARBCNTL,
			    GEN11_ARBITRATION_PRIO_ORDER_MASK);

		/*
		 * Wa_1604223664:icl
		 * Formerly known as WaL3BankAddressHashing
		 */
2513 2514 2515 2516 2517 2518 2519 2520
		wa_write_clr_set(wal,
				 GEN8_GARBCNTL,
				 GEN11_HASH_CTRL_EXCL_MASK,
				 GEN11_HASH_CTRL_EXCL_BIT0);
		wa_write_clr_set(wal,
				 GEN11_GLBLINVL,
				 GEN11_BANK_HASH_ADDR_EXCL_MASK,
				 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
2521 2522 2523 2524 2525

		/*
		 * Wa_1405733216:icl
		 * Formerly known as WaDisableCleanEvicts
		 */
2526 2527 2528
		wa_mcr_write_or(wal,
				GEN8_L3SQCREG4,
				GEN11_LQSC_CLEAN_EVICT_DISABLE);
2529

2530 2531 2532 2533
		/* Wa_1606682166:icl */
		wa_write_or(wal,
			    GEN7_SARCHKMD,
			    GEN7_DISABLE_SAMPLER_PREFETCH);
2534 2535

		/* Wa_1409178092:icl */
2536 2537 2538 2539
		wa_mcr_write_clr_set(wal,
				     GEN11_SCRATCH2,
				     GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
				     0);
2540 2541 2542 2543 2544 2545 2546 2547 2548

		/* WaEnable32PlaneMode:icl */
		wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
			     GEN11_ENABLE_32_PLANE_MODE);

		/*
		 * Wa_1408615072:icl,ehl  (vsunit)
		 * Wa_1407596294:icl,ehl  (hsunit)
		 */
2549 2550
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
			    VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
2551

2552 2553 2554 2555 2556 2557 2558
		/*
		 * Wa_1408767742:icl[a2..forever],ehl[all]
		 * Wa_1605460711:icl[a0..c0]
		 */
		wa_write_or(wal,
			    GEN7_FF_THREAD_MODE,
			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
Matt Atwood's avatar
Matt Atwood committed
2559

2560 2561 2562 2563
		/* Wa_22010271021 */
		wa_masked_en(wal,
			     GEN9_CS_DEBUG_MODE1,
			     FF_DOP_CLOCK_GATE_DISABLE);
2564 2565
	}

2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620
	/*
	 * Intel platforms that support fine-grained preemption (i.e., gen9 and
	 * beyond) allow the kernel-mode driver to choose between two different
	 * options for controlling preemption granularity and behavior.
	 *
	 * Option 1 (hardware default):
	 *   Preemption settings are controlled in a global manner via
	 *   kernel-only register CS_DEBUG_MODE1 (0x20EC).  Any granularity
	 *   and settings chosen by the kernel-mode driver will apply to all
	 *   userspace clients.
	 *
	 * Option 2:
	 *   Preemption settings are controlled on a per-context basis via
	 *   register CS_CHICKEN1 (0x2580).  CS_CHICKEN1 is saved/restored on
	 *   context switch and is writable by userspace (e.g., via
	 *   MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
	 *   which allows different userspace drivers/clients to select
	 *   different settings, or to change those settings on the fly in
	 *   response to runtime needs.  This option was known by name
	 *   "FtrPerCtxtPreemptionGranularityControl" at one time, although
	 *   that name is somewhat misleading as other non-granularity
	 *   preemption settings are also impacted by this decision.
	 *
	 * On Linux, our policy has always been to let userspace drivers
	 * control preemption granularity/settings (Option 2).  This was
	 * originally mandatory on gen9 to prevent ABI breakage (old gen9
	 * userspace developed before object-level preemption was enabled would
	 * not behave well if i915 were to go with Option 1 and enable that
	 * preemption in a global manner).  On gen9 each context would have
	 * object-level preemption disabled by default (see
	 * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
	 * userspace drivers could opt-in to object-level preemption as they
	 * saw fit.  For post-gen9 platforms, we continue to utilize Option 2;
	 * even though it is no longer necessary for ABI compatibility when
	 * enabling a new platform, it does ensure that userspace will be able
	 * to implement any workarounds that show up requiring temporary
	 * adjustments to preemption behavior at runtime.
	 *
	 * Notes/Workarounds:
	 *  - Wa_14015141709:  On DG2 and early steppings of MTL,
	 *      CS_CHICKEN1[0] does not disable object-level preemption as
	 *      it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
	 *      using Option 1).  Effectively this means userspace is unable
	 *      to disable object-level preemption on these platforms/steppings
	 *      despite the setting here.
	 *
	 *  - Wa_16013994831:  May require that userspace program
	 *      CS_CHICKEN1[10] when certain runtime conditions are true.
	 *      Userspace requires Option 2 to be in effect for their update of
	 *      CS_CHICKEN1[10] to be effective.
	 *
	 * Other workarounds may appear in the future that will also require
	 * Option 2 behavior to allow proper userspace implementation.
	 */
	if (GRAPHICS_VER(i915) >= 9)
2621 2622 2623 2624
		wa_masked_en(wal,
			     GEN7_FF_SLICE_CS_CHICKEN1,
			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);

2625 2626 2627 2628
	if (IS_SKYLAKE(i915) ||
	    IS_KABYLAKE(i915) ||
	    IS_COFFEELAKE(i915) ||
	    IS_COMETLAKE(i915)) {
2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641
		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
		wa_write_or(wal,
			    GEN8_GARBCNTL,
			    GEN9_GAPS_TSV_CREDIT_DISABLE);
	}

	if (IS_BROXTON(i915)) {
		/* WaDisablePooledEuLoadBalancingFix:bxt */
		wa_masked_en(wal,
			     FF_SLICE_CS_CHICKEN2,
			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
	}

2642
	if (GRAPHICS_VER(i915) == 9) {
2643 2644 2645 2646 2647 2648
		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
		wa_masked_en(wal,
			     GEN9_CSFE_CHICKEN1_RCS,
			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);

		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
2649 2650 2651
		wa_mcr_write_or(wal,
				BDW_SCRATCH1,
				GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
2652 2653 2654

		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
		if (IS_GEN9_LP(i915))
2655 2656 2657 2658 2659
			wa_mcr_write_clr_set(wal,
					     GEN8_L3SQCREG1,
					     L3_PRIO_CREDITS_MASK,
					     L3_GENERAL_PRIO_CREDITS(62) |
					     L3_HIGH_PRIO_CREDITS(2));
2660 2661

		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
2662 2663 2664
		wa_mcr_write_or(wal,
				GEN8_L3SQCREG4,
				GEN8_LQSC_FLUSH_COHERENT_LINES);
2665 2666 2667 2668

		/* Disable atomics in L3 to prevent unrecoverable hangs */
		wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
				 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2669 2670 2671 2672
		wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
				     GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
		wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
				     EVICTION_PERF_FIX_ENABLE, 0);
2673
	}
2674

2675 2676 2677
	if (IS_HASWELL(i915)) {
		/* WaSampleCChickenBitEnable:hsw */
		wa_masked_en(wal,
2678
			     HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
2679 2680 2681 2682 2683

		wa_masked_dis(wal,
			      CACHE_MODE_0_GEN7,
			      /* enable HiZ Raw Stall Optimization */
			      HIZ_RAW_STALL_OPT_DISABLE);
2684 2685 2686 2687 2688 2689 2690
	}

	if (IS_VALLEYVIEW(i915)) {
		/* WaDisableEarlyCull:vlv */
		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
2691 2692

		/*
2693
		 * WaVSThreadDispatchOverride:ivb,vlv
2694
		 *
2695 2696
		 * This actually overrides the dispatch
		 * mode for all thread types.
2697
		 */
2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710
		wa_write_clr_set(wal,
				 GEN7_FF_THREAD_MODE,
				 GEN7_FF_SCHED_MASK,
				 GEN7_FF_TS_SCHED_HW |
				 GEN7_FF_VS_SCHED_HW |
				 GEN7_FF_DS_SCHED_HW);

		/* WaPsdDispatchEnable:vlv */
		/* WaDisablePSDDualDispatchEnable:vlv */
		wa_masked_en(wal,
			     GEN7_HALF_SLICE_CHICKEN1,
			     GEN7_MAX_PS_THREAD_DEP |
			     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2711 2712
	}

2713 2714
	if (IS_IVYBRIDGE(i915)) {
		/* WaDisableEarlyCull:ivb */
2715 2716 2717 2718
		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);

2719 2720 2721 2722 2723 2724 2725
		if (0) { /* causes HiZ corruption on ivb:gt1 */
			/* enable HiZ Raw Stall Optimization */
			wa_masked_dis(wal,
				      CACHE_MODE_0_GEN7,
				      HIZ_RAW_STALL_OPT_DISABLE);
		}

2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738
		/*
		 * WaVSThreadDispatchOverride:ivb,vlv
		 *
		 * This actually overrides the dispatch
		 * mode for all thread types.
		 */
		wa_write_clr_set(wal,
				 GEN7_FF_THREAD_MODE,
				 GEN7_FF_SCHED_MASK,
				 GEN7_FF_TS_SCHED_HW |
				 GEN7_FF_VS_SCHED_HW |
				 GEN7_FF_DS_SCHED_HW);

2739 2740 2741 2742 2743 2744 2745
		/* WaDisablePSDDualDispatchEnable:ivb */
		if (IS_IVB_GT1(i915))
			wa_masked_en(wal,
				     GEN7_HALF_SLICE_CHICKEN1,
				     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
	}

2746
	if (GRAPHICS_VER(i915) == 7) {
2747 2748
		/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
		wa_masked_en(wal,
2749
			     RING_MODE_GEN7(RENDER_RING_BASE),
2750 2751 2752
			     GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);

		/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
2753 2754 2755 2756
		wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);

		/*
		 * BSpec says this must be set, even though
2757
		 * WaDisable4x2SubspanOptimization:ivb,hsw
2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771
		 * WaDisable4x2SubspanOptimization isn't listed for VLV.
		 */
		wa_masked_en(wal,
			     CACHE_MODE_1,
			     PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);

		/*
		 * BSpec recommends 8x4 when MSAA is used,
		 * however in practice 16x4 seems fastest.
		 *
		 * Note that PS/WM thread counts depend on the WIZ hashing
		 * disable bit, which we don't touch here, but it's good
		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
		 */
2772 2773 2774 2775
		wa_masked_field_set(wal,
				    GEN7_GT_MODE,
				    GEN6_WIZ_HASHING_MASK,
				    GEN6_WIZ_HASHING_16x4);
2776 2777
	}

2778
	if (IS_GRAPHICS_VER(i915, 6, 7))
2779 2780 2781 2782 2783 2784 2785 2786
		/*
		 * We need to disable the AsyncFlip performance optimisations in
		 * order to use MI_WAIT_FOR_EVENT within the CS. It should
		 * already be programmed to '1' on all products.
		 *
		 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
		 */
		wa_masked_en(wal,
2787
			     RING_MI_MODE(RENDER_RING_BASE),
2788 2789
			     ASYNC_FLIP_PERF_DISABLE);

2790
	if (GRAPHICS_VER(i915) == 6) {
2791 2792 2793 2794 2795 2796 2797 2798 2799
		/*
		 * Required for the hardware to program scanline values for
		 * waiting
		 * WaEnableFlushTlbInvalidationMode:snb
		 */
		wa_masked_en(wal,
			     GFX_MODE,
			     GFX_TLB_INVALIDATE_EXPLICIT);

2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824
		/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
		wa_masked_en(wal,
			     _3D_CHICKEN,
			     _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);

		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     /* WaStripsFansDisableFastClipPerformanceFix:snb */
			     _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
			     /*
			      * Bspec says:
			      * "This bit must be set if 3DSTATE_CLIP clip mode is set
			      * to normal and 3DSTATE_SF number of SF output attributes
			      * is more than 16."
			      */
			     _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);

		/*
		 * BSpec recommends 8x4 when MSAA is used,
		 * however in practice 16x4 seems fastest.
		 *
		 * Note that PS/WM thread counts depend on the WIZ hashing
		 * disable bit, which we don't touch here, but it's good
		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
		 */
2825 2826 2827 2828
		wa_masked_field_set(wal,
				    GEN6_GT_MODE,
				    GEN6_WIZ_HASHING_MASK,
				    GEN6_WIZ_HASHING_16x4);
2829 2830 2831 2832

		/* WaDisable_RenderCache_OperationalFlush:snb */
		wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);

2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843
		/*
		 * From the Sandybridge PRM, volume 1 part 3, page 24:
		 * "If this bit is set, STCunit will have LRA as replacement
		 *  policy. [...] This bit must be reset. LRA replacement
		 *  policy is not supported."
		 */
		wa_masked_dis(wal,
			      CACHE_MODE_0,
			      CM0_STC_EVICT_DISABLE_LRA_SNB);
	}

2844
	if (IS_GRAPHICS_VER(i915, 4, 6))
2845
		/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
2846
		wa_add(wal, RING_MI_MODE(RENDER_RING_BASE),
2847 2848
		       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
		       /* XXX bit doesn't stick on Broadwater */
2849
		       IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
2850

2851
	if (GRAPHICS_VER(i915) == 4)
2852 2853 2854 2855 2856 2857 2858 2859 2860 2861
		/*
		 * Disable CONSTANT_BUFFER before it is loaded from the context
		 * image. For as it is loaded, it is executed and the stored
		 * address may no longer be valid, leading to a GPU hang.
		 *
		 * This imposes the requirement that userspace reload their
		 * CONSTANT_BUFFER on every batch, fortunately a requirement
		 * they are already accustomed to from before contexts were
		 * enabled.
		 */
2862
		wa_add(wal, ECOSKPD(RENDER_RING_BASE),
2863
		       0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
2864 2865
		       0 /* XXX bit doesn't stick on Broadwater */,
		       true);
2866 2867
}

2868 2869
static void
xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2870 2871 2872 2873
{
	struct drm_i915_private *i915 = engine->i915;

	/* WaKBLVECSSemaphoreWaitPoll:kbl */
2874
	if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
2875 2876 2877 2878 2879 2880
		wa_write(wal,
			 RING_SEMA_WAIT_POLL(engine->mmio_base),
			 1);
	}
}

2881 2882 2883 2884 2885
static void
ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
	if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
		/* Wa_14014999345:pvc */
2886
		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
2887 2888 2889
	}
}

2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908
/*
 * The bspec performance guide has recommended MMIO tuning settings.  These
 * aren't truly "workarounds" but we want to program them with the same
 * workaround infrastructure to ensure that they're automatically added to
 * the GuC save/restore lists, re-applied at the right times, and checked for
 * any conflicting programming requested by real workarounds.
 *
 * Programming settings should be added here only if their registers are not
 * part of an engine's register state context.  If a register is part of a
 * context, then any tuning settings should be programmed in an appropriate
 * function invoked by __intel_engine_init_ctx_wa().
 */
static void
add_render_compute_tuning_settings(struct drm_i915_private *i915,
				   struct i915_wa_list *wal)
{
	if (IS_PONTEVECCHIO(i915)) {
		wa_write(wal, XEHPC_L3SCRUB,
			 SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
2909
		wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
2910 2911 2912
	}

	if (IS_DG2(i915)) {
2913 2914
		wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
		wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
2915
	}
2916 2917 2918 2919 2920 2921 2922

	/*
	 * This tuning setting proves beneficial only on ATS-M designs; the
	 * default "age based" setting is optimal on regular DG2 and other
	 * platforms.
	 */
	if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
2923 2924
		wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
					THREAD_EX_ARB_MODE_RR_AFTER_DEP);
2925 2926 2927

	if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
		wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
2928 2929
}

2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943
/*
 * The workarounds in this function apply to shared registers in
 * the general render reset domain that aren't tied to a
 * specific engine.  Since all render+compute engines get reset
 * together, and the contents of these registers are lost during
 * the shared render domain reset, we'll define such workarounds
 * here and then add them to just a single RCS or CCS engine's
 * workaround list (whichever engine has the XXXX flag).
 */
static void
general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
	struct drm_i915_private *i915 = engine->i915;

2944
	add_render_compute_tuning_settings(i915, wal);
2945

2946
	if (IS_PONTEVECCHIO(i915)) {
2947 2948
		/* Wa_16016694945 */
		wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
2949 2950
	}

2951 2952
	if (IS_XEHPSDV(i915)) {
		/* Wa_1409954639 */
2953 2954 2955
		wa_mcr_masked_en(wal,
				 GEN8_ROW_CHICKEN,
				 SYSTOLIC_DOP_CLOCK_GATING_DIS);
2956 2957

		/* Wa_1607196519 */
2958 2959 2960
		wa_mcr_masked_en(wal,
				 GEN9_ROW_CHICKEN4,
				 GEN12_DISABLE_GRF_CLEAR);
2961 2962

		/* Wa_14010670810:xehpsdv */
2963
		wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
2964 2965

		/* Wa_14010449647:xehpsdv */
2966 2967
		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
				 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2968 2969 2970

		/* Wa_18011725039:xehpsdv */
		if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
2971 2972
			wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
			wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
2973 2974 2975
		}

		/* Wa_14012362059:xehpsdv */
2976
		wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
2977 2978 2979 2980

		/* Wa_14014368820:xehpsdv */
		wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
				GLOBAL_INVALIDATION_MODE);
2981
	}
2982

2983 2984
	if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
		/* Wa_14015227452:dg2,pvc */
2985
		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
2986 2987

		/* Wa_22014226127:dg2,pvc */
2988
		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
2989 2990 2991 2992 2993

		/* Wa_16015675438:dg2,pvc */
		wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);

		/* Wa_18018781329:dg2,pvc */
2994 2995 2996 2997
		wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
		wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
		wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
		wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
2998
	}
2999 3000 3001 3002 3003 3004

	if (IS_DG2(i915)) {
		/*
		 * Wa_16011620976:dg2_g11
		 * Wa_22015475538:dg2
		 */
3005
		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
3006 3007 3008

		/* Wa_18017747507:dg2 */
		wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
3009
	}
3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022

	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915))
		/*
		 * Wa_22012654132
		 *
		 * Note that register 0xE420 is write-only and cannot be read
		 * back for verification on DG2 (due to Wa_14012342262), so
		 * we need to explicitly skip the readback.
		 */
		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
			   _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
			   0 /* write-only, so skip validation */,
			   true);
3023 3024
}

3025 3026 3027
static void
engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
3028
	if (GRAPHICS_VER(engine->i915) < 4)
3029 3030
		return;

3031 3032
	engine_fake_wa_init(engine, wal);

3033 3034 3035 3036 3037
	/*
	 * These are common workarounds that just need to applied
	 * to a single RCS/CCS engine's workaround list since
	 * they're reset as part of the general render domain reset.
	 */
3038
	if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
3039 3040
		general_render_compute_wa_init(engine, wal);

3041 3042 3043
	if (engine->class == COMPUTE_CLASS)
		ccs_engine_wa_init(engine, wal);
	else if (engine->class == RENDER_CLASS)
3044 3045 3046 3047 3048
		rcs_engine_wa_init(engine, wal);
	else
		xcs_engine_wa_init(engine, wal);
}

3049 3050 3051 3052
void intel_engine_init_workarounds(struct intel_engine_cs *engine)
{
	struct i915_wa_list *wal = &engine->wa_list;

3053
	wa_init_start(wal, engine->gt, "engine", engine->name);
3054
	engine_init_workarounds(engine, wal);
3055 3056 3057 3058 3059
	wa_init_finish(wal);
}

void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
{
3060
	wa_list_apply(&engine->wa_list);
3061 3062
}

3063
static const struct i915_range mcr_ranges_gen8[] = {
3064 3065 3066 3067 3068 3069 3070 3071
	{ .start = 0x5500, .end = 0x55ff },
	{ .start = 0x7000, .end = 0x7fff },
	{ .start = 0x9400, .end = 0x97ff },
	{ .start = 0xb000, .end = 0xb3ff },
	{ .start = 0xe000, .end = 0xe7ff },
	{},
};

3072
static const struct i915_range mcr_ranges_gen12[] = {
3073 3074 3075 3076 3077 3078 3079 3080
	{ .start =  0x8150, .end =  0x815f },
	{ .start =  0x9520, .end =  0x955f },
	{ .start =  0xb100, .end =  0xb3ff },
	{ .start =  0xde80, .end =  0xe8ff },
	{ .start = 0x24a00, .end = 0x24a7f },
	{},
};

3081
static const struct i915_range mcr_ranges_xehp[] = {
3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094
	{ .start =  0x4000, .end =  0x4aff },
	{ .start =  0x5200, .end =  0x52ff },
	{ .start =  0x5400, .end =  0x7fff },
	{ .start =  0x8140, .end =  0x815f },
	{ .start =  0x8c80, .end =  0x8dff },
	{ .start =  0x94d0, .end =  0x955f },
	{ .start =  0x9680, .end =  0x96ff },
	{ .start =  0xb000, .end =  0xb3ff },
	{ .start =  0xc800, .end =  0xcfff },
	{ .start =  0xd800, .end =  0xd8ff },
	{ .start =  0xdc00, .end =  0xffff },
	{ .start = 0x17000, .end = 0x17fff },
	{ .start = 0x24a00, .end = 0x24a7f },
3095
	{},
3096 3097
};

3098 3099
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
{
3100
	const struct i915_range *mcr_ranges;
3101 3102
	int i;

3103 3104 3105
	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
		mcr_ranges = mcr_ranges_xehp;
	else if (GRAPHICS_VER(i915) >= 12)
3106
		mcr_ranges = mcr_ranges_gen12;
3107
	else if (GRAPHICS_VER(i915) >= 8)
3108 3109
		mcr_ranges = mcr_ranges_gen8;
	else
3110 3111
		return false;

3112
	/*
3113
	 * Registers in these ranges are affected by the MCR selector
3114 3115 3116
	 * which only controls CPU initiated MMIO. Routing does not
	 * work for CS access so we cannot verify them on this path.
	 */
3117 3118 3119
	for (i = 0; mcr_ranges[i].start; i++)
		if (offset >= mcr_ranges[i].start &&
		    offset <= mcr_ranges[i].end)
3120
			return true;
3121 3122 3123 3124

	return false;
}

3125 3126 3127 3128 3129
static int
wa_list_srm(struct i915_request *rq,
	    const struct i915_wa_list *wal,
	    struct i915_vma *vma)
{
3130
	struct drm_i915_private *i915 = rq->engine->i915;
3131
	unsigned int i, count = 0;
3132 3133 3134 3135
	const struct i915_wa *wa;
	u32 srm, *cs;

	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
3136
	if (GRAPHICS_VER(i915) >= 8)
3137 3138
		srm++;

3139 3140 3141 3142 3143 3144
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
		if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
			count++;
	}

	cs = intel_ring_begin(rq, 4 * count);
3145 3146 3147 3148
	if (IS_ERR(cs))
		return PTR_ERR(cs);

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3149 3150 3151 3152 3153
		u32 offset = i915_mmio_reg_offset(wa->reg);

		if (mcr_range(i915, offset))
			continue;

3154
		*cs++ = srm;
3155
		*cs++ = offset;
3156 3157 3158 3159 3160 3161 3162 3163
		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
		*cs++ = 0;
	}
	intel_ring_advance(rq, cs);

	return 0;
}

3164
static int engine_wa_list_verify(struct intel_context *ce,
3165 3166 3167 3168 3169 3170
				 const struct i915_wa_list * const wal,
				 const char *from)
{
	const struct i915_wa *wa;
	struct i915_request *rq;
	struct i915_vma *vma;
3171
	struct i915_gem_ww_ctx ww;
3172 3173 3174 3175 3176 3177 3178
	unsigned int i;
	u32 *results;
	int err;

	if (!wal->count)
		return 0;

3179 3180
	vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
					   wal->count * sizeof(u32));
3181 3182 3183
	if (IS_ERR(vma))
		return PTR_ERR(vma);

3184
	intel_engine_pm_get(ce->engine);
3185 3186 3187 3188 3189 3190 3191 3192
	i915_gem_ww_ctx_init(&ww, false);
retry:
	err = i915_gem_object_lock(vma->obj, &ww);
	if (err == 0)
		err = intel_context_pin_ww(ce, &ww);
	if (err)
		goto err_pm;

3193 3194 3195 3196 3197
	err = i915_vma_pin_ww(vma, &ww, 0, 0,
			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
	if (err)
		goto err_unpin;

3198
	rq = i915_request_create(ce);
3199 3200
	if (IS_ERR(rq)) {
		err = PTR_ERR(rq);
3201
		goto err_vma;
3202 3203
	}

3204
	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
3205 3206
	if (err == 0)
		err = wa_list_srm(rq, wal, vma);
3207

3208
	i915_request_get(rq);
3209 3210
	if (err)
		i915_request_set_error_once(rq, err);
3211
	i915_request_add(rq);
3212 3213 3214 3215

	if (err)
		goto err_rq;

3216
	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3217
		err = -ETIME;
3218
		goto err_rq;
3219 3220 3221 3222 3223
	}

	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
	if (IS_ERR(results)) {
		err = PTR_ERR(results);
3224
		goto err_rq;
3225 3226 3227
	}

	err = 0;
3228
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3229
		if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
3230 3231
			continue;

3232
		if (!wa_verify(wal->gt, wa, results[i], wal->name, from))
3233
			err = -ENXIO;
3234
	}
3235 3236 3237

	i915_gem_object_unpin_map(vma->obj);

3238 3239
err_rq:
	i915_request_put(rq);
3240 3241
err_vma:
	i915_vma_unpin(vma);
3242 3243 3244 3245 3246 3247 3248 3249 3250 3251
err_unpin:
	intel_context_unpin(ce);
err_pm:
	if (err == -EDEADLK) {
		err = i915_gem_ww_ctx_backoff(&ww);
		if (!err)
			goto retry;
	}
	i915_gem_ww_ctx_fini(&ww);
	intel_engine_pm_put(ce->engine);
3252 3253 3254 3255 3256 3257 3258
	i915_vma_put(vma);
	return err;
}

int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
				    const char *from)
{
3259 3260 3261
	return engine_wa_list_verify(engine->kernel_context,
				     &engine->wa_list,
				     from);
3262 3263
}

3264
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3265
#include "selftest_workarounds.c"
3266
#endif