Merge tag 'drm-intel-gt-next-2023-06-08' of...

Merge tag 'drm-intel-gt-next-2023-06-08' of git://anongit.freedesktop.org/drm/drm-intel into drm-next UAPI Changes: - I915_GEM_CREATE_EXT_SET_PAT for Mesa on Meteorlake. Driver Changes: Fixes/improvements/new stuff: - Use large rings for compute contexts (Chris Wilson) - Better logging/debug of unexpected GuC communication issues (Michal Wajdeczko) - Clear out entire reports after reading if not power of 2 size (Ashutosh Dixit) - Limit lmem allocation size to succeed on SmallBars (Andrzej Hajda) - perf/OA capture robustness improvements on DG2 (Umesh Nerlige Ramappa) - Fix error code in intel_gsc_uc_heci_cmd_submit_nonpriv() (Dan Carpenter) Future platform enablement: - Add workaround 14016712196 (Tejas Upadhyay) - HuC loading for MTL (Daniele Ceraolo Spurio) - Allow user to set cache at BO creation (Fei Yang) Miscellaneous: - Use system include style for drm headers (Jani Nikula) - Drop legacy CTB definitions (Michal Wajdeczko) - Turn off the timer to sample frequencies when GT is parked (Ashutosh Dixit) - Make PMU sample array two-dimensional (Ashutosh Dixit) - Use the correct error value when kernel_context() fails (Andi Shyti) - Fix second parameter type of pre-gen8 pte_encode callbacks (Nathan Chancellor) - Fix parameter in gmch_ggtt_insert_{entries, page}() (Nathan Chancellor) - Fix size_t format specifier in gsccs_send_message() (Nathan Chancellor) - Use the fdinfo helper (Tvrtko Ursulin) - Add some missing error propagation (Tvrtko Ursulin) - Reduce I915_MAX_GT to 2 (Matt Atwood) - Rename I915_PMU_MAX_GTS to I915_PMU_MAX_GT (Matt Atwood) - Remove some obsolete definitions (John Harrison) Merges: - Merge drm/drm-next into drm-intel-gt-next (Tvrtko Ursulin) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/ZIH09fqe5v5yArsu@tursulin-desk

Merge tag 'drm-intel-gt-next-2023-06-08' of...
Merge tag 'drm-intel-gt-next-2023-06-08' of git://anongit.freedesktop.org/drm/drm-intel into drm-next UAPI Changes: - I915_GEM_CREATE_EXT_SET_PAT for Mesa on Meteorlake. Driver Changes: Fixes/improvements/new stuff: - Use large rings for compute contexts (Chris Wilson) - Better logging/debug of unexpected GuC communication issues (Michal Wajdeczko) - Clear out entire reports after reading if not power of 2 size (Ashutosh Dixit) - Limit lmem allocation size to succeed on SmallBars (Andrzej Hajda) - perf/OA capture robustness improvements on DG2 (Umesh Nerlige Ramappa) - Fix error code in intel_gsc_uc_heci_cmd_submit_nonpriv() (Dan Carpenter) Future platform enablement: - Add workaround 14016712196 (Tejas Upadhyay) - HuC loading for MTL (Daniele Ceraolo Spurio) - Allow user to set cache at BO creation (Fei Yang) Miscellaneous: - Use system include style for drm headers (Jani Nikula) - Drop legacy CTB definitions (Michal Wajdeczko) - Turn off the timer to sample frequencies when GT is parked (Ashutosh Dixit) - Make PMU sample array two-dimensional (Ashutosh Dixit) - Use the correct error value when kernel_context() fails (Andi Shyti) - Fix second parameter type of pre-gen8 pte_encode callbacks (Nathan Chancellor) - Fix parameter in gmch_ggtt_insert_{entries, page}() (Nathan Chancellor) - Fix size_t format specifier in gsccs_send_message() (Nathan Chancellor) - Use the fdinfo helper (Tvrtko Ursulin) - Add some missing error propagation (Tvrtko Ursulin) - Reduce I915_MAX_GT to 2 (Matt Atwood) - Rename I915_PMU_MAX_GTS to I915_PMU_MAX_GT (Matt Atwood) - Remove some obsolete definitions (John Harrison) Merges: - Merge drm/drm-next into drm-intel-gt-next (Tvrtko Ursulin) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/ZIH09fqe5v5yArsu@tursulin-desk
ba57b9b1 · Dave Airlie · 959294e4 · 24335848 · ba57b9b1 · ba57b9b1
Commit ba57b9b1 authored Jun 09, 2023 by Dave Airlie
45 changed files
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -157,6 +157,7 @@ config DRM_I915_SW_FENCE_CHECK_DAG
 config DRM_I915_DEBUG_GUC
 	bool "Enable additional driver debugging for GuC"
 	depends on DRM_I915
+	select STACKDEPOT
 	default n
 	help
 	  Choose this option to turn on extra driver debugging that may affect

--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -964,6 +964,10 @@ static int intel_context_set_gem(struct intel_context *ce,
 	RCU_INIT_POINTER(ce->gem_context, ctx);

 	GEM_BUG_ON(intel_context_is_pinned(ce));
+
+	if (ce->engine->class == COMPUTE_CLASS)
+		ce->ring_size = SZ_512K;
+	else
 		ce->ring_size = SZ_16K;

 	i915_vm_put(ce->vm);

--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
 	unsigned int n_placements;
 	unsigned int placement_mask;
 	unsigned long flags;
+	unsigned int pat_index;
 };

 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,43 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
 	return 0;
 }

+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+	struct create_ext *ext_data = data;
+	struct drm_i915_private *i915 = ext_data->i915;
+	struct drm_i915_gem_create_ext_set_pat ext;
+	unsigned int max_pat_index;
+
+	BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+		     offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+	/* Limiting the extension only to Meteor Lake */
+	if (!IS_METEORLAKE(i915))
+		return -ENODEV;
+
+	if (copy_from_user(&ext, base, sizeof(ext)))
+		return -EFAULT;
+
+	max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+	if (ext.pat_index > max_pat_index) {
+		drm_dbg(&i915->drm, "PAT index is invalid: %u\n",
+			ext.pat_index);
+		return -EINVAL;
+	}
+
+	ext_data->pat_index = ext.pat_index;
+
+	return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
 	[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
 	[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+	[I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };

+#define PAT_INDEX_NOT_SET	0xffff
 /**
 * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it.
 * @dev: drm device pointer
@@ -418,6 +451,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
 	if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
 		return -EINVAL;

+	ext_data.pat_index = PAT_INDEX_NOT_SET;
 	ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
 				   create_extensions,
 				   ARRAY_SIZE(create_extensions),
@@ -454,5 +488,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);

+	if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+		i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+		/* Mark pat_index is set by UMD */
+		obj->pat_set_by_user = true;
+	}
+
 	return i915_gem_publish(obj, file, &args->size, &args->handle);
 }
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
 	if (!(obj->flags & I915_BO_ALLOC_USER))
 		return false;

+	/*
+	 * Always flush cache for UMD objects at creation time.
+	 */
+	if (obj->pat_set_by_user)
+		return true;
+
 	/*
 	 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 	 * possible for userspace to bypass the GTT caching bits set by the

--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -348,8 +348,10 @@ static int live_parallel_switch(void *arg)
 				continue;

 			ce = intel_context_create(data[m].ce[0]->engine);
-			if (IS_ERR(ce))
+			if (IS_ERR(ce)) {
+				err = PTR_ERR(ce);
 				goto out;
+			}

 			err = intel_context_pin(ce);
 			if (err) {
@@ -369,8 +371,10 @@ static int live_parallel_switch(void *arg)

 		worker = kthread_create_worker(0, "igt/parallel:%s",
 					       data[n].ce[0]->engine->name);
-		if (IS_ERR(worker))
+		if (IS_ERR(worker)) {
+			err = PTR_ERR(worker);
 			goto out;
+		}

 		data[n].worker = worker;
 	}
@@ -399,8 +403,10 @@ static int live_parallel_switch(void *arg)
 			}
 		}

-		if (igt_live_test_end(&t))
-			err = -EIO;
+		if (igt_live_test_end(&t)) {
+			err = err ?: -EIO;
+			break;
+		}
 	}

 out:

--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -177,14 +177,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
 	return cs;
 }

+static int mtl_dummy_pipe_control(struct i915_request *rq)
+{
+	/* Wa_14016712196 */
+	if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) ||
+	    IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) {
+		u32 *cs;
+
+		/* dummy PIPE_CONTROL + depth flush */
+		cs = intel_ring_begin(rq, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+		cs = gen12_emit_pipe_control(cs,
+					     0,
+					     PIPE_CONTROL_DEPTH_CACHE_FLUSH,
+					     LRC_PPHWSP_SCRATCH_ADDR);
+		intel_ring_advance(rq, cs);
+	}
+
+	return 0;
+}
+
 int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 {
 	struct intel_engine_cs *engine = rq->engine;

 	if (mode & EMIT_FLUSH) {
 		u32 flags = 0;
+		int err;
 		u32 *cs;

+		err = mtl_dummy_pipe_control(rq);
+		if (err)
+			return err;
+
 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_FLUSH_L3;
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
@@ -217,6 +243,11 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 	if (mode & EMIT_INVALIDATE) {
 		u32 flags = 0;
 		u32 *cs, count;
+		int err;
+
+		err = mtl_dummy_pipe_control(rq);
+		if (err)
+			return err;

 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
@@ -733,6 +764,13 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 		     PIPE_CONTROL_DC_FLUSH_ENABLE |
 		     PIPE_CONTROL_FLUSH_ENABLE);

+	/* Wa_14016712196 */
+	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+		/* dummy PIPE_CONTROL + depth flush */
+		cs = gen12_emit_pipe_control(cs, 0,
+					     PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
+
 	if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
 		/* Wa_1409600907 */
 		flags |= PIPE_CONTROL_DEPTH_STALL;

--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -1015,16 +1015,16 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)

 /*
 * For pre-gen8 platforms pat_index is the same as enum i915_cache_level,
- * so these PTE encode functions are left with using cache_level.
+ * so the switch-case statements in these PTE encode functions are still valid.
 * See translation table LEGACY_CACHELEVEL.
 */
 static u64 snb_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
+			  unsigned int pat_index,
 			  u32 flags)
 {
 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;

-	switch (level) {
+	switch (pat_index) {
 	case I915_CACHE_L3_LLC:
 	case I915_CACHE_LLC:
 		pte |= GEN6_PTE_CACHE_LLC;
@@ -1033,19 +1033,19 @@ static u64 snb_pte_encode(dma_addr_t addr,
 		pte |= GEN6_PTE_UNCACHED;
 		break;
 	default:
-		MISSING_CASE(level);
+		MISSING_CASE(pat_index);
 	}

 	return pte;
 }

 static u64 ivb_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
+			  unsigned int pat_index,
 			  u32 flags)
 {
 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;

-	switch (level) {
+	switch (pat_index) {
 	case I915_CACHE_L3_LLC:
 		pte |= GEN7_PTE_CACHE_L3_LLC;
 		break;
@@ -1056,14 +1056,14 @@ static u64 ivb_pte_encode(dma_addr_t addr,
 		pte |= GEN6_PTE_UNCACHED;
 		break;
 	default:
-		MISSING_CASE(level);
+		MISSING_CASE(pat_index);
 	}

 	return pte;
 }

 static u64 byt_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
+			  unsigned int pat_index,
 			  u32 flags)
 {
 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
@@ -1071,31 +1071,31 @@ static u64 byt_pte_encode(dma_addr_t addr,
 	if (!(flags & PTE_READ_ONLY))
 		pte |= BYT_PTE_WRITEABLE;

-	if (level != I915_CACHE_NONE)
+	if (pat_index != I915_CACHE_NONE)
 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;

 	return pte;
 }

 static u64 hsw_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
+			  unsigned int pat_index,
 			  u32 flags)
 {
 	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;

-	if (level != I915_CACHE_NONE)
+	if (pat_index != I915_CACHE_NONE)
 		pte |= HSW_WB_LLC_AGE3;

 	return pte;
 }

 static u64 iris_pte_encode(dma_addr_t addr,
-			   enum i915_cache_level level,
+			   unsigned int pat_index,
 			   u32 flags)
 {
 	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;

-	switch (level) {
+	switch (pat_index) {
 	case I915_CACHE_NONE:
 		break;
 	case I915_CACHE_WT:
@@ -1326,6 +1326,9 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
 		ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
 				       ggtt->error_capture.size);

+	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
+		intel_uc_resume_mappings(&gt->uc);
+
 	ggtt->invalidate(ggtt);

 	if (flush)

--- a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
@@ -18,10 +18,10 @@
 static void gmch_ggtt_insert_page(struct i915_address_space *vm,
 				  dma_addr_t addr,
 				  u64 offset,
-				  enum i915_cache_level cache_level,
+				  unsigned int pat_index,
 				  u32 unused)
 {
-	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+	unsigned int flags = (pat_index == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;

 	intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
@@ -29,10 +29,10 @@ static void gmch_ggtt_insert_page(struct i915_address_space *vm,

 static void gmch_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct i915_vma_resource *vma_res,
-				     enum i915_cache_level cache_level,
+				     unsigned int pat_index,
 				     u32 unused)
 {
-	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+	unsigned int flags = (pat_index == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;

 	intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,

--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -1530,8 +1530,8 @@ static int live_busywait_preempt(void *arg)
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	enum intel_engine_id id;
-	int err = -ENOMEM;
 	u32 *map;
+	int err;

 	/*
 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
@@ -1539,13 +1539,17 @@ static int live_busywait_preempt(void *arg)
 	 */

 	ctx_hi = kernel_context(gt->i915, NULL);
-	if (!ctx_hi)
-		return -ENOMEM;
+	if (IS_ERR(ctx_hi))
+		return PTR_ERR(ctx_hi);
+
 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;

 	ctx_lo = kernel_context(gt->i915, NULL);
-	if (!ctx_lo)
+	if (IS_ERR(ctx_lo)) {
+		err = PTR_ERR(ctx_lo);
 		goto err_ctx_hi;
+	}
+
 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;

 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);

--- a/drivers/gpu/drm/i915/gt/selftest_tlb.c
+++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
@@ -190,11 +190,18 @@ pte_tlbinv(struct intel_context *ce,

 static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
 {
+	struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
+	resource_size_t size = SZ_1G;
+
 	/*
 	 * Allocation of largest possible page size allows to test all types
-	 * of pages.
+	 * of pages. To succeed with both allocations, especially in case of Small
+	 * BAR, try to allocate no more than quarter of mappable memory.
 	 */
-	return i915_gem_object_create_lmem(gt->i915, SZ_1G, I915_BO_ALLOC_CONTIGUOUS);
+	if (mr && size > mr->io_size / 4)
+		size = mr->io_size / 4;
+
+	return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
 }

 static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)

--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
@@ -167,25 +167,4 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
 * - **flags**, holds various bits to control message handling
 */

-/*
- * Definition of the command transport message header (DW0)
- *
- * bit[4..0]	message len (in dwords)
- * bit[7..5]	reserved
- * bit[8]	response (G2H only)
- * bit[8]	write fence to desc (H2G only)
- * bit[9]	write status to H2G buff (H2G only)
- * bit[10]	send status back via G2H (H2G only)
- * bit[15..11]	reserved
- * bit[31..16]	action code
- */
-#define GUC_CT_MSG_LEN_SHIFT			0
-#define GUC_CT_MSG_LEN_MASK			0x1F
-#define GUC_CT_MSG_IS_RESPONSE			(1 << 8)
-#define GUC_CT_MSG_WRITE_FENCE_TO_DESC		(1 << 8)
-#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF		(1 << 9)
-#define GUC_CT_MSG_SEND_STATUS			(1 << 10)
-#define GUC_CT_MSG_ACTION_SHIFT			16
-#define GUC_CT_MSG_ACTION_MASK			0xFFFF
-
 #endif /* _ABI_GUC_COMMUNICATION_CTB_ABI_H */
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
@@ -24,6 +24,7 @@
 *  |   | 30:28 | **TYPE** - message type                                      |
 *  |   |       |   - _`GUC_HXG_TYPE_REQUEST` = 0                              |
 *  |   |       |   - _`GUC_HXG_TYPE_EVENT` = 1                                |
+ *  |   |       |   - _`GUC_HXG_TYPE_FAST_REQUEST` = 2                         |
 *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3                     |
 *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5                    |
 *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6                     |
@@ -46,6 +47,7 @@
 #define GUC_HXG_MSG_0_TYPE			(0x7 << 28)
 #define   GUC_HXG_TYPE_REQUEST			0u
 #define   GUC_HXG_TYPE_EVENT			1u
+#define   GUC_HXG_TYPE_FAST_REQUEST		2u
 #define   GUC_HXG_TYPE_NO_RESPONSE_BUSY		3u
 #define   GUC_HXG_TYPE_NO_RESPONSE_RETRY	5u
 #define   GUC_HXG_TYPE_RESPONSE_FAILURE		6u
@@ -89,6 +91,34 @@
 #define GUC_HXG_REQUEST_MSG_0_ACTION		(0xffff << 0)
 #define GUC_HXG_REQUEST_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD

+/**
+ * DOC: HXG Fast Request
+ *
+ * The `HXG Request`_ message should be used to initiate asynchronous activity
+ * for which confirmation or return data is not expected.
+ *
+ * If confirmation is required then `HXG Request`_ shall be used instead.
+ *
+ * The recipient of this message may only use `HXG Failure`_ message if it was
+ * unable to accept this request (like invalid data).
+ *
+ * Format of `HXG Fast Request`_ message is same as `HXG Request`_ except @TYPE.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN - see `HXG Message`_                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 - see `HXG Request`_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION - see `HXG Request`_                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|       | DATAn - see `HXG Request`_                                   |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
 /**
 * DOC: HXG Event
 *

--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _INTEL_GSC_BINARY_HEADERS_H_
+#define _INTEL_GSC_BINARY_HEADERS_H_
+
+#include <linux/types.h>
+
+/* Code partition directory (CPD) structures */
+struct intel_gsc_cpd_header_v2 {
+	u32 header_marker;
+#define INTEL_GSC_CPD_HEADER_MARKER 0x44504324
+
+	u32 num_of_entries;
+	u8 header_version;
+	u8 entry_version;
+	u8 header_length; /* in bytes */
+	u8 flags;
+	u32 partition_name;
+	u32 crc32;
+} __packed;
+
+struct intel_gsc_cpd_entry {
+	u8 name[12];
+
+	/*
+	 * Bits 0-24: offset from the beginning of the code partition
+	 * Bit 25: huffman compressed
+	 * Bits 26-31: reserved
+	 */
+	u32 offset;
+#define INTEL_GSC_CPD_ENTRY_OFFSET_MASK GENMASK(24, 0)
+#define INTEL_GSC_CPD_ENTRY_HUFFMAN_COMP BIT(25)
+
+	/*
+	 * Module/Item length, in bytes. For Huffman-compressed modules, this
+	 * refers to the uncompressed size. For software-compressed modules,
+	 * this refers to the compressed size.
+	 */
+	u32 length;
+
+	u8 reserved[4];
+} __packed;
+
+struct intel_gsc_version {
+	u16 major;
+	u16 minor;
+	u16 hotfix;
+	u16 build;
+} __packed;
+
+struct intel_gsc_manifest_header {
+	u32 header_type; /* 0x4 for manifest type */
+	u32 header_length; /* in dwords */
+	u32 header_version;
+	u32 flags;
+	u32 vendor;
+	u32 date;
+	u32 size; /* In dwords, size of entire manifest (header + extensions) */
+	u32 header_id;
+	u32 internal_data;
+	struct intel_gsc_version fw_version;
+	u32 security_version;
+	struct intel_gsc_version meu_kit_version;
+	u32 meu_manifest_version;
+	u8 general_data[4];
+	u8 reserved3[56];
+	u32 modulus_size; /* in dwords */
+	u32 exponent_size; /* in dwords */
+} __packed;
+
+#endif
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
@@ -5,8 +5,8 @@

 #include <linux/component.h>

-#include "drm/i915_component.h"
-#include "drm/i915_gsc_proxy_mei_interface.h"
+#include <drm/i915_component.h>
+#include <drm/i915_gsc_proxy_mei_interface.h>

 #include "gt/intel_gt.h"
 #include "gt/intel_gt_print.h"

--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
@@ -29,13 +29,32 @@ static void gsc_work(struct work_struct *work)

 	if (actions & GSC_ACTION_FW_LOAD) {
 		ret = intel_gsc_uc_fw_upload(gsc);
-		if (ret == -EEXIST) /* skip proxy if not a new load */
-			actions &= ~GSC_ACTION_FW_LOAD;
-		else if (ret)
+		if (!ret)
+			/* setup proxy on a new load */
+			actions |= GSC_ACTION_SW_PROXY;
+		else if (ret != -EEXIST)
 			goto out_put;
+
+		/*
+		 * The HuC auth can be done both before or after the proxy init;
+		 * if done after, a proxy request will be issued and must be
+		 * serviced before the authentication can complete.
+		 * Since this worker also handles proxy requests, we can't
+		 * perform an action that requires the proxy from within it and
+		 * then stall waiting for it, because we'd be blocking the
+		 * service path. Therefore, it is easier for us to load HuC
+		 * first and do proxy later. The GSC will ack the HuC auth and
+		 * then send the HuC proxy request as part of the proxy init
+		 * flow.
+		 * Note that we can only do the GSC auth if the GuC auth was
+		 * successful.
+		 */
+		if (intel_uc_uses_huc(&gt->uc) &&
+		    intel_huc_is_authenticated(&gt->uc.huc, INTEL_HUC_AUTH_BY_GUC))
+			intel_huc_auth(&gt->uc.huc, INTEL_HUC_AUTH_BY_GSC);
 	}

-	if (actions & (GSC_ACTION_FW_LOAD | GSC_ACTION_SW_PROXY)) {
+	if (actions & GSC_ACTION_SW_PROXY) {
 		if (!intel_gsc_uc_fw_init_done(gsc)) {
 			gt_err(gt, "Proxy request received with GSC not loaded!\n");
 			goto out_put;
@@ -90,7 +109,12 @@ void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc)
 {
 	struct intel_gt *gt = gsc_uc_to_gt(gsc);

-	intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC);
+	/*
+	 * GSC FW needs to be copied to a dedicated memory allocations for
+	 * loading (see gsc->local), so we don't need to GGTT map the FW image
+	 * itself into GGTT.
+	 */
+	intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC, false);
 	INIT_WORK(&gsc->work, gsc_work);

 	/* we can arrive here from i915_driver_early_probe for primary

--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
@@ -99,7 +99,7 @@ void intel_gsc_uc_heci_cmd_emit_mtl_header(struct intel_gsc_mtl_header *header,
 					   u64 host_session_id)
 {
 	host_session_id &= ~HOST_SESSION_MASK;
-	if (heci_client_id == HECI_MEADDRESS_PXP)
+	if (host_session_id && heci_client_id == HECI_MEADDRESS_PXP)
 		host_session_id |= HOST_SESSION_PXP_SINGLE;

 	header->validity_marker = GSC_HECI_VALIDITY_MARKER;
@@ -202,7 +202,7 @@ intel_gsc_uc_heci_cmd_submit_nonpriv(struct intel_gsc_uc *gsc,
 			if (++trials < 10)
 				goto retry;
 			else
-				err = EAGAIN;
+				err = -EAGAIN;
 		}
 	}
 	i915_gem_ww_ctx_fini(&ww);

--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -164,7 +164,7 @@ void intel_guc_init_early(struct intel_guc *guc)
 	struct intel_gt *gt = guc_to_gt(guc);
 	struct drm_i915_private *i915 = gt->i915;

-	intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC);
+	intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, true);
 	intel_guc_ct_init_early(&guc->ct);
 	intel_guc_log_init_early(&guc->log);
 	intel_guc_submission_init_early(guc);

--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -376,6 +376,24 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
 	}
 }

+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static void ct_track_lost_and_found(struct intel_guc_ct *ct, u32 fence, u32 action)
+{
+	unsigned int lost = fence % ARRAY_SIZE(ct->requests.lost_and_found);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+	unsigned long entries[SZ_32];
+	unsigned int n;
+
+	n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+
+	/* May be called under spinlock, so avoid sleeping */
+	ct->requests.lost_and_found[lost].stack = stack_depot_save(entries, n, GFP_NOWAIT);
+#endif
+	ct->requests.lost_and_found[lost].fence = fence;
+	ct->requests.lost_and_found[lost].action = action;
+}
+#endif
+
 static u32 ct_get_next_fence(struct intel_guc_ct *ct)
 {
 	/* For now it's trivial */
@@ -426,11 +444,11 @@ static int ct_write(struct intel_guc_ct *ct,
 		 FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
 		 FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence);

-	type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_EVENT :
+	type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_FAST_REQUEST :
 		GUC_HXG_TYPE_REQUEST;
 	hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) |
-		FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
-			   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
+			   GUC_HXG_REQUEST_MSG_0_DATA0, action[0]);

 	CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
 		 tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
@@ -447,6 +465,11 @@ static int ct_write(struct intel_guc_ct *ct,
 	}
 	GEM_BUG_ON(tail > size);

+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	ct_track_lost_and_found(ct, fence,
+				FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0]));
+#endif
+
 	/*
 	 * make sure H2G buffer update and LRC tail update (if this triggering a
 	 * submission) are visible before updating the descriptor tail
@@ -675,7 +698,7 @@ static int ct_send(struct intel_guc_ct *ct,

 	GEM_BUG_ON(!ct->enabled);
 	GEM_BUG_ON(!len);
-	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+	GEM_BUG_ON(len > GUC_CTB_HXG_MSG_MAX_LEN - GUC_CTB_HDR_LEN);
 	GEM_BUG_ON(!response_buf && response_buf_size);
 	might_sleep();

@@ -953,6 +976,43 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 	return -EPIPE;
 }

+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+	unsigned int n;
+	char *buf = NULL;
+	bool found = false;
+
+	lockdep_assert_held(&ct->requests.lock);
+
+	for (n = 0; n < ARRAY_SIZE(ct->requests.lost_and_found); n++) {
+		if (ct->requests.lost_and_found[n].fence != fence)
+			continue;
+		found = true;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+		buf = kmalloc(SZ_4K, GFP_NOWAIT);
+		if (buf && stack_depot_snprint(ct->requests.lost_and_found[n].stack,
+					       buf, SZ_4K, 0)) {
+			CT_ERROR(ct, "Fence %u was used by action %#04x sent at\n%s",
+				 fence, ct->requests.lost_and_found[n].action, buf);
+			break;
+		}
+#endif
+		CT_ERROR(ct, "Fence %u was used by action %#04x\n",
+			 fence, ct->requests.lost_and_found[n].action);
+		break;
+	}
+	kfree(buf);
+	return found;
+}
+#else
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+	return false;
+}
+#endif
+
 static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response)
 {
 	u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]);
@@ -994,12 +1054,13 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 		break;
 	}
 	if (!found) {
-		CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence);
-		CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence,
-			 ct->requests.last_fence);
+		CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
+			 len, hxg[0], fence, ct->requests.last_fence);
+		if (!ct_check_lost_and_found(ct, fence)) {
 			list_for_each_entry(req, &ct->requests.pending, link)
 				CT_ERROR(ct, "request %u awaits response\n",
 					 req->fence);
+		}
 		err = -ENOKEY;
 	}
 	spin_unlock_irqrestore(&ct->requests.lock, flags);

--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -8,6 +8,7 @@

 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/stackdepot.h>
 #include <linux/workqueue.h>
 #include <linux/ktime.h>
 #include <linux/wait.h>
@@ -81,6 +82,16 @@ struct intel_guc_ct {

 		struct list_head incoming; /* incoming requests */
 		struct work_struct worker; /* handler for incoming requests */
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+		struct {
+			u16 fence;
+			u16 action;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+			depot_stack_handle_t stack;
+#endif
+		} lost_and_found[SZ_16];
+#endif
 	} requests;

 	/** @stall_time: time of first time a CTB submission is stalled */

--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -35,13 +35,6 @@
 #define GUC_MAX_CONTEXT_ID		65535
 #define	GUC_INVALID_CONTEXT_ID		GUC_MAX_CONTEXT_ID

-#define GUC_RENDER_ENGINE		0
-#define GUC_VIDEO_ENGINE		1
-#define GUC_BLITTER_ENGINE		2
-#define GUC_VIDEOENHANCE_ENGINE		3
-#define GUC_VIDEO_ENGINE2		4
-#define GUC_MAX_ENGINES_NUM		(GUC_VIDEO_ENGINE2 + 1)
-
 #define GUC_RENDER_CLASS		0
 #define GUC_VIDEO_CLASS			1
 #define GUC_VIDEOENHANCE_CLASS		2
@@ -499,32 +492,6 @@ struct guc_log_buffer_state {
 	u32 version;
 } __packed;

-struct guc_ctx_report {
-	u32 report_return_status;
-	u32 reserved1[64];
-	u32 affected_count;
-	u32 reserved2[2];
-} __packed;
-
-/* GuC Shared Context Data Struct */
-struct guc_shared_ctx_data {
-	u32 addr_of_last_preempted_data_low;
-	u32 addr_of_last_preempted_data_high;
-	u32 addr_of_last_preempted_data_high_tmp;
-	u32 padding;
-	u32 is_mapped_to_proxy;
-	u32 proxy_ctx_id;
-	u32 engine_reset_ctx_id;
-	u32 media_reset_count;
-	u32 reserved1[8];
-	u32 uk_last_ctx_switch_reason;
-	u32 was_reset;
-	u32 lrca_gpu_addr;
-	u64 execlist_ctx;
-	u32 reserved2[66];
-	struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM];
-} __packed;
-
 /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
 enum intel_guc_recv_message {
 	INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),

--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -15,6 +15,7 @@
 #include <linux/hrtimer.h>

 struct bus_type;
+struct i915_vma;

 enum intel_huc_delayed_load_status {
 	INTEL_HUC_WAITING_ON_GSC = 0,
@@ -22,6 +23,12 @@ enum intel_huc_delayed_load_status {
 	INTEL_HUC_DELAYED_LOAD_ERROR,
 };

+enum intel_huc_authentication_type {
+	INTEL_HUC_AUTH_BY_GUC = 0,
+	INTEL_HUC_AUTH_BY_GSC,
+	INTEL_HUC_AUTH_MAX_MODES
+};
+
 struct intel_huc {
 	/* Generic uC firmware management */
 	struct intel_uc_fw fw;
@@ -31,7 +38,7 @@ struct intel_huc {
 		i915_reg_t reg;
 		u32 mask;
 		u32 value;
-	} status;
+	} status[INTEL_HUC_AUTH_MAX_MODES];

 	struct {
 		struct i915_sw_fence fence;
@@ -39,6 +46,11 @@ struct intel_huc {
 		struct notifier_block nb;
 		enum intel_huc_delayed_load_status status;
 	} delayed_load;
+
+	/* for load via GSCCS */
+	struct i915_vma *heci_pkt;
+
+	bool loaded_via_gsc;
 };

 int intel_huc_sanitize(struct intel_huc *huc);
@@ -46,11 +58,13 @@ void intel_huc_init_early(struct intel_huc *huc);
 int intel_huc_init(struct intel_huc *huc);
 void intel_huc_fini(struct intel_huc *huc);
 void intel_huc_suspend(struct intel_huc *huc);
-int intel_huc_auth(struct intel_huc *huc);
-int intel_huc_wait_for_auth_complete(struct intel_huc *huc);
+int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type);
+int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
+				     enum intel_huc_authentication_type type);
+bool intel_huc_is_authenticated(struct intel_huc *huc,
+				enum intel_huc_authentication_type type);
 int intel_huc_check_status(struct intel_huc *huc);
 void intel_huc_update_auth_status(struct intel_huc *huc);
-bool intel_huc_is_authenticated(struct intel_huc *huc);

 void intel_huc_register_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus);
 void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus);
@@ -73,13 +87,13 @@ static inline bool intel_huc_is_used(struct intel_huc *huc)

 static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc)
 {
-	return huc->fw.loaded_via_gsc;
+	return huc->loaded_via_gsc;
 }

 static inline bool intel_huc_wait_required(struct intel_huc *huc)
 {
 	return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) &&
-	       !intel_huc_is_authenticated(huc);
+	       !intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC);
 }

 void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);

--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -5,10 +5,241 @@

 #include "gt/intel_gsc.h"
 #include "gt/intel_gt.h"
+#include "intel_gsc_binary_headers.h"
+#include "intel_gsc_uc_heci_cmd_submit.h"
 #include "intel_huc.h"
 #include "intel_huc_fw.h"
+#include "intel_huc_print.h"
 #include "i915_drv.h"
 #include "pxp/intel_pxp_huc.h"
+#include "pxp/intel_pxp_cmd_interface_43.h"
+
+struct mtl_huc_auth_msg_in {
+	struct intel_gsc_mtl_header header;
+	struct pxp43_new_huc_auth_in huc_in;
+} __packed;
+
+struct mtl_huc_auth_msg_out {
+	struct intel_gsc_mtl_header header;
+	struct pxp43_huc_auth_out huc_out;
+} __packed;
+
+int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc)
+{
+	struct intel_gt *gt = huc_to_gt(huc);
+	struct drm_i915_private *i915 = gt->i915;
+	struct drm_i915_gem_object *obj;
+	struct mtl_huc_auth_msg_in *msg_in;
+	struct mtl_huc_auth_msg_out *msg_out;
+	void *pkt_vaddr;
+	u64 pkt_offset;
+	int retry = 5;
+	int err = 0;
+
+	if (!huc->heci_pkt)
+		return -ENODEV;
+
+	obj = huc->heci_pkt->obj;
+	pkt_offset = i915_ggtt_offset(huc->heci_pkt);
+
+	pkt_vaddr = i915_gem_object_pin_map_unlocked(obj,
+						     i915_coherent_map_type(i915, obj, true));
+	if (IS_ERR(pkt_vaddr))
+		return PTR_ERR(pkt_vaddr);
+
+	msg_in = pkt_vaddr;
+	msg_out = pkt_vaddr + PXP43_HUC_AUTH_INOUT_SIZE;
+
+	intel_gsc_uc_heci_cmd_emit_mtl_header(&msg_in->header,
+					      HECI_MEADDRESS_PXP,
+					      sizeof(*msg_in), 0);
+
+	msg_in->huc_in.header.api_version = PXP_APIVER(4, 3);
+	msg_in->huc_in.header.command_id = PXP43_CMDID_NEW_HUC_AUTH;
+	msg_in->huc_in.header.status = 0;
+	msg_in->huc_in.header.buffer_len = sizeof(msg_in->huc_in) -
+					   sizeof(msg_in->huc_in.header);
+	msg_in->huc_in.huc_base_address = huc->fw.vma_res.start;
+	msg_in->huc_in.huc_size = huc->fw.obj->base.size;
+
+	do {
+		err = intel_gsc_uc_heci_cmd_submit_packet(&gt->uc.gsc,
+							  pkt_offset, sizeof(*msg_in),
+							  pkt_offset + PXP43_HUC_AUTH_INOUT_SIZE,
+							  PXP43_HUC_AUTH_INOUT_SIZE);
+		if (err) {
+			huc_err(huc, "failed to submit GSC request to auth: %d\n", err);
+			goto out_unpin;
+		}
+
+		if (msg_out->header.flags & GSC_OUTFLAG_MSG_PENDING) {
+			msg_in->header.gsc_message_handle = msg_out->header.gsc_message_handle;
+			err = -EBUSY;
+			msleep(50);
+		}
+	} while (--retry && err == -EBUSY);
+
+	if (err)
+		goto out_unpin;
+
+	if (msg_out->header.message_size != sizeof(*msg_out)) {
+		huc_err(huc, "invalid GSC reply length %u [expected %zu]\n",
+			msg_out->header.message_size, sizeof(*msg_out));
+		err = -EPROTO;
+		goto out_unpin;
+	}
+
+	/*
+	 * The GSC will return PXP_STATUS_OP_NOT_PERMITTED if the HuC is already
+	 * loaded. If the same error is ever returned with HuC not loaded we'll
+	 * still catch it when we check the authentication bit later.
+	 */
+	if (msg_out->huc_out.header.status != PXP_STATUS_SUCCESS &&
+	    msg_out->huc_out.header.status != PXP_STATUS_OP_NOT_PERMITTED) {
+		huc_err(huc, "auth failed with GSC error = 0x%x\n",
+			msg_out->huc_out.header.status);
+		err = -EIO;
+		goto out_unpin;
+	}
+
+out_unpin:
+	i915_gem_object_unpin_map(obj);
+	return err;
+}
+
+static void get_version_from_gsc_manifest(struct intel_uc_fw_ver *ver, const void *data)
+{
+	const struct intel_gsc_manifest_header *manifest = data;
+
+	ver->major = manifest->fw_version.major;
+	ver->minor = manifest->fw_version.minor;
+	ver->patch = manifest->fw_version.hotfix;
+}
+
+static bool css_valid(const void *data, size_t size)
+{
+	const struct uc_css_header *css = data;
+
+	if (unlikely(size < sizeof(struct uc_css_header)))
+		return false;
+
+	if (css->module_type != 0x6)
+		return false;
+
+	if (css->module_vendor != PCI_VENDOR_ID_INTEL)
+		return false;
+
+	return true;
+}
+
+static inline u32 entry_offset(const struct intel_gsc_cpd_entry *entry)
+{
+	return entry->offset & INTEL_GSC_CPD_ENTRY_OFFSET_MASK;
+}
+
+int intel_huc_fw_get_binary_info(struct intel_uc_fw *huc_fw, const void *data, size_t size)
+{
+	struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
+	const struct intel_gsc_cpd_header_v2 *header = data;
+	const struct intel_gsc_cpd_entry *entry;
+	size_t min_size = sizeof(*header);
+	int i;
+
+	if (!huc_fw->has_gsc_headers) {
+		huc_err(huc, "Invalid FW type for GSC header parsing!\n");
+		return -EINVAL;
+	}
+
+	if (size < sizeof(*header)) {
+		huc_err(huc, "FW too small! %zu < %zu\n", size, min_size);
+		return -ENODATA;
+	}
+
+	/*
+	 * The GSC-enabled HuC binary starts with a directory header, followed
+	 * by a series of entries. Each entry is identified by a name and
+	 * points to a specific section of the binary containing the relevant
+	 * data. The entries we're interested in are:
+	 * - "HUCP.man": points to the GSC manifest header for the HuC, which
+	 *               contains the version info.
+	 * - "huc_fw": points to the legacy-style binary that can be used for
+	 *             load via the DMA. This entry only contains a valid CSS
+	 *             on binaries for platforms that support 2-step HuC load
+	 *             via dma and auth via GSC (like MTL).
+	 *
+	 * --------------------------------------------------
+	 * [  intel_gsc_cpd_header_v2                       ]
+	 * --------------------------------------------------
+	 * [  intel_gsc_cpd_entry[]                         ]
+	 * [      entry1                                    ]
+	 * [      ...                                       ]
+	 * [      entryX                                    ]
+	 * [          "HUCP.man"                            ]
+	 * [           ...                                  ]
+	 * [           offset  >----------------------------]------o
+	 * [      ...                                       ]      |
+	 * [      entryY                                    ]      |
+	 * [          "huc_fw"                              ]      |
+	 * [           ...                                  ]      |
+	 * [           offset  >----------------------------]----------o
+	 * --------------------------------------------------      |   |
+	 *                                                         |   |
+	 * --------------------------------------------------      |   |
+	 * [ intel_gsc_manifest_header                      ]<-----o   |
+	 * [  ...                                           ]          |
+	 * [  intel_gsc_version fw_version                  ]          |
+	 * [  ...                                           ]          |
+	 * --------------------------------------------------          |
+	 *                                                             |
+	 * --------------------------------------------------          |
+	 * [ data[]                                         ]<---------o
+	 * [  ...                                           ]
+	 * [  ...                                           ]
+	 * --------------------------------------------------
+	 */
+
+	if (header->header_marker != INTEL_GSC_CPD_HEADER_MARKER) {
+		huc_err(huc, "invalid marker for CPD header: 0x%08x!\n",
+			header->header_marker);
+		return -EINVAL;
+	}
+
+	/* we only have binaries with header v2 and entry v1 for now */
+	if (header->header_version != 2 || header->entry_version != 1) {
+		huc_err(huc, "invalid CPD header/entry version %u:%u!\n",
+			header->header_version, header->entry_version);
+		return -EINVAL;
+	}
+
+	if (header->header_length < sizeof(struct intel_gsc_cpd_header_v2)) {
+		huc_err(huc, "invalid CPD header length %u!\n",
+			header->header_length);
+		return -EINVAL;
+	}
+
+	min_size = header->header_length + sizeof(*entry) * header->num_of_entries;
+	if (size < min_size) {
+		huc_err(huc, "FW too small! %zu < %zu\n", size, min_size);
+		return -ENODATA;
+	}
+
+	entry = data + header->header_length;
+
+	for (i = 0; i < header->num_of_entries; i++, entry++) {
+		if (strcmp(entry->name, "HUCP.man") == 0)
+			get_version_from_gsc_manifest(&huc_fw->file_selected.ver,
+						      data + entry_offset(entry));
+
+		if (strcmp(entry->name, "huc_fw") == 0) {
+			u32 offset = entry_offset(entry);
+
+			if (offset < size && css_valid(data + offset, size - offset))
+				huc_fw->dma_start_offset = offset;
+		}
+	}
+
+	return 0;
+}

 int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
 {
@@ -25,7 +256,7 @@ int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
 	 * component gets re-bound and this function called again. If so, just
 	 * mark the HuC as loaded.
 	 */
-	if (intel_huc_is_authenticated(huc)) {
+	if (intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC)) {
 		intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
 		return 0;
 	}
@@ -38,7 +269,7 @@ int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)

 	intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_TRANSFERRED);

-	return intel_huc_wait_for_auth_complete(huc);
+	return intel_huc_wait_for_auth_complete(huc, INTEL_HUC_AUTH_BY_GSC);
 }

 /**

--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
@@ -7,8 +7,12 @@
 #define _INTEL_HUC_FW_H_

 struct intel_huc;
+struct intel_uc_fw;
+
+#include <linux/types.h>

 int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc);
+int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc);
 int intel_huc_fw_upload(struct intel_huc *huc);
-
+int intel_huc_fw_get_binary_info(struct intel_uc_fw *huc_fw, const void *data, size_t size);
 #endif
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_print.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_print.h
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_HUC_PRINT__
+#define __INTEL_HUC_PRINT__
+
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_print.h"
+
+#define huc_printk(_huc, _level, _fmt, ...) \
+	gt_##_level(huc_to_gt(_huc), "HuC: " _fmt, ##__VA_ARGS__)
+#define huc_err(_huc, _fmt, ...)	huc_printk((_huc), err, _fmt, ##__VA_ARGS__)
+#define huc_warn(_huc, _fmt, ...)	huc_printk((_huc), warn, _fmt, ##__VA_ARGS__)
+#define huc_notice(_huc, _fmt, ...)	huc_printk((_huc), notice, _fmt, ##__VA_ARGS__)
+#define huc_info(_huc, _fmt, ...)	huc_printk((_huc), info, _fmt, ##__VA_ARGS__)
+#define huc_dbg(_huc, _fmt, ...)	huc_printk((_huc), dbg, _fmt, ##__VA_ARGS__)
+#define huc_probe_error(_huc, _fmt, ...) huc_printk((_huc), probe_error, _fmt, ##__VA_ARGS__)
+
+#endif /* __INTEL_HUC_PRINT__ */
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -538,7 +538,7 @@ static int __uc_init_hw(struct intel_uc *uc)
 	if (intel_huc_is_loaded_by_gsc(huc))
 		intel_huc_update_auth_status(huc);
 	else
-		intel_huc_auth(huc);
+		intel_huc_auth(huc, INTEL_HUC_AUTH_BY_GUC);

 	if (intel_uc_uses_guc_submission(uc)) {
 		ret = intel_guc_submission_enable(guc);
@@ -700,6 +700,12 @@ void intel_uc_suspend(struct intel_uc *uc)
 	}
 }

+static void __uc_resume_mappings(struct intel_uc *uc)
+{
+	intel_uc_fw_resume_mapping(&uc->guc.fw);
+	intel_uc_fw_resume_mapping(&uc->huc.fw);
+}
+
 static int __uc_resume(struct intel_uc *uc, bool enable_communication)
 {
 	struct intel_guc *guc = &uc->guc;
@@ -767,4 +773,6 @@ static const struct intel_uc_ops uc_ops_on = {

 	.init_hw = __uc_init_hw,
 	.fini_hw = __uc_fini_hw,
+
+	.resume_mappings = __uc_resume_mappings,
 };
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -24,6 +24,7 @@ struct intel_uc_ops {
 	void (*fini)(struct intel_uc *uc);
 	int (*init_hw)(struct intel_uc *uc);
 	void (*fini_hw)(struct intel_uc *uc);
+	void (*resume_mappings)(struct intel_uc *uc);
 };

 struct intel_uc {
@@ -114,6 +115,7 @@ intel_uc_ops_function(init, init, int, 0);
 intel_uc_ops_function(fini, fini, void, );
 intel_uc_ops_function(init_hw, init_hw, int, 0);
 intel_uc_ops_function(fini_hw, fini_hw, void, );
+intel_uc_ops_function(resume_mappings, resume_mappings, void, );
 #undef intel_uc_ops_function

 #endif
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -99,20 +99,28 @@ struct intel_uc_fw {
 	struct drm_i915_gem_object *obj;

 	/**
-	 * @dummy: A vma used in binding the uc fw to ggtt. We can't define this
-	 * vma on the stack as it can lead to a stack overflow, so we define it
-	 * here. Safe to have 1 copy per uc fw because the binding is single
-	 * threaded as it done during driver load (inherently single threaded)
-	 * or during a GT reset (mutex guarantees single threaded).
+	 * @needs_ggtt_mapping: indicates whether the fw object needs to be
+	 * pinned to ggtt. If true, the fw is pinned at init time and unpinned
+	 * during driver unload.
 	 */
-	struct i915_vma_resource dummy;
+	bool needs_ggtt_mapping;
+
+	/**
+	 * @vma_res: A vma resource used in binding the uc fw to ggtt. The fw is
+	 * pinned in a reserved area of the ggtt (above the maximum address
+	 * usable by GuC); therefore, we can't use the normal vma functions to
+	 * do the pinning and we instead use this resource to do so.
+	 */
+	struct i915_vma_resource vma_res;
 	struct i915_vma *rsa_data;

 	u32 rsa_size;
 	u32 ucode_size;
 	u32 private_data_size;

-	bool loaded_via_gsc;
+	u32 dma_start_offset;
+
+	bool has_gsc_headers;
 };

 /*
@@ -282,12 +290,14 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
 }

 void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
-			    enum intel_uc_fw_type type);
+			    enum intel_uc_fw_type type,
+			    bool needs_ggtt_mapping);
 int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
 void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
 int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);
 int intel_uc_fw_init(struct intel_uc_fw *uc_fw);
 void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
+void intel_uc_fw_resume_mapping(struct intel_uc_fw *uc_fw);
 size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len);
 int intel_uc_fw_mark_load_failed(struct intel_uc_fw *uc_fw, int err);
 void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p);

--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
@@ -84,10 +84,4 @@ struct uc_css_header {
 } __packed;
 static_assert(sizeof(struct uc_css_header) == 128);

-#define HUC_GSC_VERSION_HI_DW		44
-#define   HUC_GSC_MAJOR_VER_HI_MASK	(0xFF << 0)
-#define   HUC_GSC_MINOR_VER_HI_MASK	(0xFF << 16)
-#define HUC_GSC_VERSION_LO_DW		45
-#define   HUC_GSC_PATCH_VER_LO_MASK	(0xFF << 0)
-
 #endif /* _INTEL_UC_FW_ABI_H */
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -243,8 +243,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 	if (ret < 0)
 		goto err_rootgt;

-	i915_drm_clients_init(&dev_priv->clients, dev_priv);
-
 	i915_gem_init_early(dev_priv);

 	/* This must be called before any calls to HAS_PCH_* */
@@ -278,7 +276,6 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv)
 	intel_power_domains_cleanup(dev_priv);
 	i915_gem_cleanup_early(dev_priv);
 	intel_gt_driver_late_release_all(dev_priv);
-	i915_drm_clients_fini(&dev_priv->clients);
 	intel_region_ttm_device_fini(dev_priv);
 	vlv_suspend_cleanup(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
@@ -1706,7 +1703,7 @@ static const struct file_operations i915_driver_fops = {
 	.compat_ioctl = i915_ioc32_compat_ioctl,
 	.llseek = noop_llseek,
 #ifdef CONFIG_PROC_FS
-	.show_fdinfo = i915_drm_client_fdinfo,
+	.show_fdinfo = drm_show_fdinfo,
 #endif
 };

@@ -1806,6 +1803,7 @@ static const struct drm_driver i915_drm_driver = {
 	.open = i915_driver_open,
 	.lastclose = i915_driver_lastclose,
 	.postclose = i915_driver_postclose,
+	.show_fdinfo = i915_drm_client_fdinfo,

 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,

--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -17,64 +17,29 @@
 #include "i915_gem.h"
 #include "i915_utils.h"

-void i915_drm_clients_init(struct i915_drm_clients *clients,
-			   struct drm_i915_private *i915)
-{
-	clients->i915 = i915;
-	clients->next_id = 0;
-
-	xa_init_flags(&clients->xarray, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
-}
-
-struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients)
+struct i915_drm_client *i915_drm_client_alloc(void)
 {
 	struct i915_drm_client *client;
-	struct xarray *xa = &clients->xarray;
-	int ret;

 	client = kzalloc(sizeof(*client), GFP_KERNEL);
 	if (!client)
-		return ERR_PTR(-ENOMEM);
-
-	xa_lock_irq(xa);
-	ret = __xa_alloc_cyclic(xa, &client->id, client, xa_limit_32b,
-				&clients->next_id, GFP_KERNEL);
-	xa_unlock_irq(xa);
-	if (ret < 0)
-		goto err;
+		return NULL;

 	kref_init(&client->kref);
 	spin_lock_init(&client->ctx_lock);
 	INIT_LIST_HEAD(&client->ctx_list);
-	client->clients = clients;

 	return client;
-
-err:
-	kfree(client);
-
-	return ERR_PTR(ret);
 }

 void __i915_drm_client_free(struct kref *kref)
 {
 	struct i915_drm_client *client =
 		container_of(kref, typeof(*client), kref);
-	struct xarray *xa = &client->clients->xarray;
-	unsigned long flags;

-	xa_lock_irqsave(xa, flags);
-	__xa_erase(xa, client->id);
-	xa_unlock_irqrestore(xa, flags);
 	kfree(client);
 }

-void i915_drm_clients_fini(struct i915_drm_clients *clients)
-{
-	GEM_BUG_ON(!xa_empty(&clients->xarray));
-	xa_destroy(&clients->xarray);
-}
-
 #ifdef CONFIG_PROC_FS
 static const char * const uabi_class_names[] = {
 	[I915_ENGINE_CLASS_RENDER] = "render",
@@ -101,38 +66,34 @@ static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
 }

 static void
-show_client_class(struct seq_file *m,
+show_client_class(struct drm_printer *p,
+		  struct drm_i915_private *i915,
 		  struct i915_drm_client *client,
 		  unsigned int class)
 {
-	const struct list_head *list = &client->ctx_list;
+	const unsigned int capacity = i915->engine_uabi_class_count[class];
 	u64 total = atomic64_read(&client->past_runtime[class]);
-	const unsigned int capacity =
-		client->clients->i915->engine_uabi_class_count[class];
 	struct i915_gem_context *ctx;

 	rcu_read_lock();
-	list_for_each_entry_rcu(ctx, list, client_link)
+	list_for_each_entry_rcu(ctx, &client->ctx_list, client_link)
 		total += busy_add(ctx, class);
 	rcu_read_unlock();

 	if (capacity)
-		seq_printf(m, "drm-engine-%s:\t%llu ns\n",
+		drm_printf(p, "drm-engine-%s:\t%llu ns\n",
 			   uabi_class_names[class], total);

 	if (capacity > 1)
-		seq_printf(m, "drm-engine-capacity-%s:\t%u\n",
+		drm_printf(p, "drm-engine-capacity-%s:\t%u\n",
 			   uabi_class_names[class],
 			   capacity);
 }

-void i915_drm_client_fdinfo(struct seq_file *m, struct file *f)
+void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
 {
-	struct drm_file *file = f->private_data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_private *i915 = file_priv->i915;
-	struct i915_drm_client *client = file_priv->client;
-	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 	unsigned int i;

 	/*
@@ -141,16 +102,10 @@ void i915_drm_client_fdinfo(struct seq_file *m, struct file *f)
 	 * ******************************************************************
 	 */

-	seq_printf(m, "drm-driver:\t%s\n", i915->drm.driver->name);
-	seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n",
-		   pci_domain_nr(pdev->bus), pdev->bus->number,
-		   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
-	seq_printf(m, "drm-client-id:\t%u\n", client->id);
-
 	if (GRAPHICS_VER(i915) < 8)
 		return;

 	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
-		show_client_class(m, client, i);
+		show_client_class(p, i915, file_priv->client, i);
 }
 #endif
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -9,20 +9,13 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
-#include <linux/xarray.h>

 #include <uapi/drm/i915_drm.h>

 #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE

-struct drm_i915_private;
-
-struct i915_drm_clients {
-	struct drm_i915_private *i915;
-
-	struct xarray xarray;
-	u32 next_id;
-};
+struct drm_file;
+struct drm_printer;

 struct i915_drm_client {
 	struct kref kref;
@@ -32,17 +25,12 @@ struct i915_drm_client {
 	spinlock_t ctx_lock; /* For add/remove from ctx_list. */
 	struct list_head ctx_list; /* List of contexts belonging to client. */

-	struct i915_drm_clients *clients;
-
 	/**
 	 * @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
 	 */
 	atomic64_t past_runtime[I915_LAST_UABI_ENGINE_CLASS + 1];
 };

-void i915_drm_clients_init(struct i915_drm_clients *clients,
-			   struct drm_i915_private *i915);
-
 static inline struct i915_drm_client *
 i915_drm_client_get(struct i915_drm_client *client)
 {
@@ -57,12 +45,10 @@ static inline void i915_drm_client_put(struct i915_drm_client *client)
 	kref_put(&client->kref, __i915_drm_client_free);
 }

-struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients);
+struct i915_drm_client *i915_drm_client_alloc(void);

 #ifdef CONFIG_PROC_FS
-void i915_drm_client_fdinfo(struct seq_file *m, struct file *f);
+void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
 #endif

-void i915_drm_clients_fini(struct i915_drm_clients *clients);
-
 #endif /* !__I915_DRM_CLIENT_H__ */
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -314,7 +314,7 @@ struct drm_i915_private {
 	/*
 	 * i915->gt[0] == &i915->gt0
 	 */
-#define I915_MAX_GT 4
+#define I915_MAX_GT 2
 	struct intel_gt *gt[I915_MAX_GT];

 	struct kobject *sysfs_gt;
@@ -348,8 +348,6 @@ struct drm_i915_private {

 	struct i915_pmu pmu;

-	struct i915_drm_clients clients;
-
 	/* The TTM device structure. */
 	struct ttm_device bdev;


--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1325,11 +1325,9 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 	if (!file_priv)
 		goto err_alloc;

-	client = i915_drm_client_add(&i915->clients);
-	if (IS_ERR(client)) {
-		ret = PTR_ERR(client);
+	client = i915_drm_client_alloc();
+	if (!client)
 		goto err_client;
-	}

 	file->driver_priv = file_priv;
 	file_priv->i915 = i915;

--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -100,6 +100,10 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = sseu->min_eu_in_pool;
 		break;
 	case I915_PARAM_HUC_STATUS:
+		/* On platform with a media GT, the HuC is on that GT */
+		if (i915->media_gt)
+			value = intel_huc_check_status(&i915->media_gt->uc.huc);
+		else
 			value = intel_huc_check_status(&to_gt(i915)->uc.huc);
 		if (value < 0)
 			return value;

--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -531,8 +531,7 @@ static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report)
 * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
 *
 * Besides returning true when there is data available to read() this function
- * also updates the tail, aging_tail and aging_timestamp in the oa_buffer
- * object.
+ * also updates the tail in the oa_buffer object.
 *
 * Note: It's safe to read OA config state here unlocked, assuming that this is
 * only called while the stream is enabled, while the global OA configuration
@@ -544,10 +543,10 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 {
 	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
 	int report_size = stream->oa_buffer.format->size;
+	u32 head, tail, read_tail;
 	unsigned long flags;
 	bool pollin;
 	u32 hw_tail;
-	u64 now;
 	u32 partial_report_size;

 	/* We have to consider the (unlikely) possibility that read() errors
@@ -566,28 +565,15 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 	partial_report_size %= report_size;

 	/* Subtract partial amount off the tail */
-	hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);
-
-	now = ktime_get_mono_fast_ns();
-
-	if (hw_tail == stream->oa_buffer.aging_tail &&
-	    (now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) {
-		/* If the HW tail hasn't move since the last check and the HW
-		 * tail has been aging for long enough, declare it the new
-		 * tail.
-		 */
-		stream->oa_buffer.tail = stream->oa_buffer.aging_tail;
-	} else {
-		u32 head, tail, aged_tail;
+	hw_tail = OA_TAKEN(hw_tail, partial_report_size);

 	/* NB: The head we observe here might effectively be a little
 	 * out of date. If a read() is in progress, the head could be
 	 * anywhere between this head and stream->oa_buffer.tail.
 	 */
 	head = stream->oa_buffer.head - gtt_offset;
-		aged_tail = stream->oa_buffer.tail - gtt_offset;
+	read_tail = stream->oa_buffer.tail - gtt_offset;

-		hw_tail -= gtt_offset;
 	tail = hw_tail;

 	/* Walk the stream backward until we find a report with report
@@ -601,7 +587,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 	 * memory in the order they were written to.
 	 * If not : (╯°□°）╯︵ ┻━┻
 	 */
-		while (OA_TAKEN(tail, aged_tail) >= report_size) {
+	while (OA_TAKEN(tail, read_tail) >= report_size) {
 		void *report = stream->oa_buffer.vaddr + tail;

 		if (oa_report_id(stream, report) ||
@@ -618,12 +604,9 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 		 head, tail, hw_tail);

 	stream->oa_buffer.tail = gtt_offset + tail;
-		stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
-		stream->oa_buffer.aging_timestamp = now;
-	}

-	pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
-			  stream->oa_buffer.head - gtt_offset) >= report_size;
+	pollin = OA_TAKEN(stream->oa_buffer.tail,
+			  stream->oa_buffer.head) >= report_size;

 	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);

@@ -877,12 +860,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 			stream->oa_buffer.last_ctx_id = ctx_id;
 		}

+		if (is_power_of_2(report_size)) {
 			/*
-		 * Clear out the report id and timestamp as a means to detect unlanded
-		 * reports.
+			 * Clear out the report id and timestamp as a means
+			 * to detect unlanded reports.
 			 */
 			oa_report_id_clear(stream, report32);
 			oa_timestamp_clear(stream, report32);
+		} else {
+			/* Zero out the entire report */
+			memset(report32, 0, report_size);
+		}
 	}

 	if (start_offset != *offset) {
@@ -1722,7 +1710,6 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 			   gtt_offset | OABUFFER_SIZE_16M);

 	/* Mark that we need updated tail pointers to read from... */
-	stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
 	stream->oa_buffer.tail = gtt_offset;

 	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -1774,7 +1761,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 	intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);

 	/* Mark that we need updated tail pointers to read from... */
-	stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
 	stream->oa_buffer.tail = gtt_offset;

 	/*
@@ -1828,7 +1814,6 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
 			   gtt_offset & GEN12_OAG_OATAILPTR_MASK);

 	/* Mark that we need updated tail pointers to read from... */
-	stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
 	stream->oa_buffer.tail = gtt_offset;

 	/*

--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -312,18 +312,6 @@ struct i915_perf_stream {
 		 */
 		spinlock_t ptr_lock;

-		/**
-		 * @aging_tail: The last HW tail reported by HW. The data
-		 * might not have made it to memory yet though.
-		 */
-		u32 aging_tail;
-
-		/**
-		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
-		 * was read; used to determine when it is old enough to trust.
-		 */
-		u64 aging_timestamp;
-
 		/**
 		 * @head: Although we can always read back the head pointer register,
 		 * we prefer to avoid trusting the HW state, just to avoid any

--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -132,14 +132,14 @@ static u32 frequency_enabled_mask(void)
 	unsigned int i;
 	u32 mask = 0;

-	for (i = 0; i < I915_PMU_MAX_GTS; i++)
+	for (i = 0; i < I915_PMU_MAX_GT; i++)
 		mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) |
 			config_mask(__I915_PMU_REQUESTED_FREQUENCY(i));

 	return mask;
 }

-static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
+static bool pmu_needs_timer(struct i915_pmu *pmu)
 {
 	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
 	u32 enable;
@@ -157,17 +157,11 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
 	 */
 	enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;

-	/*
-	 * When the GPU is idle per-engine counters do not need to be
-	 * running so clear those bits out.
-	 */
-	if (!gpu_active)
-		enable &= ~ENGINE_SAMPLE_MASK;
 	/*
 	 * Also there is software busyness tracking available we do not
 	 * need the timer for I915_SAMPLE_BUSY counter.
 	 */
-	else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
+	if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
 		enable &= ~BIT(I915_SAMPLE_BUSY);

 	/*
@@ -197,31 +191,21 @@ static inline s64 ktime_since_raw(const ktime_t kt)
 	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
 }

-static unsigned int
-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-	unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
-	GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
-	return idx;
-}
-
 static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
 {
-	return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
+	return pmu->sample[gt_id][sample].cur;
 }

 static void
 store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
 {
-	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
+	pmu->sample[gt_id][sample].cur = val;
 }

 static void
 add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
 {
-	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
+	pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul);
 }

 static u64 get_rc6(struct intel_gt *gt)
@@ -295,7 +279,7 @@ static void park_rc6(struct intel_gt *gt)

 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 {
-	if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
+	if (!pmu->timer_enabled && pmu_needs_timer(pmu)) {
 		pmu->timer_enabled = true;
 		pmu->timer_last = ktime_get();
 		hrtimer_start_range_ns(&pmu->timer,
@@ -321,7 +305,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt)
 	 */
 	pmu->unparked &= ~BIT(gt->info.id);
 	if (pmu->unparked == 0)
-		pmu->timer_enabled = pmu_needs_timer(pmu, false);
+		pmu->timer_enabled = false;

 	spin_unlock_irq(&pmu->lock);
 }
@@ -827,7 +811,7 @@ static void i915_pmu_disable(struct perf_event *event)
 	 */
 	if (--pmu->enable_count[bit] == 0) {
 		pmu->enable &= ~BIT(bit);
-		pmu->timer_enabled &= pmu_needs_timer(pmu, true);
+		pmu->timer_enabled &= pmu_needs_timer(pmu);
 	}

 	spin_unlock_irqrestore(&pmu->lock, flags);

--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -38,7 +38,7 @@ enum {
 	__I915_NUM_PMU_SAMPLERS
 };

-#define I915_PMU_MAX_GTS 2
+#define I915_PMU_MAX_GT 2

 /*
 * How many different events we track in the global PMU mask.
@@ -47,7 +47,7 @@ enum {
 */
 #define I915_PMU_MASK_BITS \
 	(I915_ENGINE_SAMPLE_COUNT + \
-	 I915_PMU_MAX_GTS * __I915_PMU_TRACKED_EVENT_COUNT)
+	 I915_PMU_MAX_GT * __I915_PMU_TRACKED_EVENT_COUNT)

 #define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1)

@@ -127,11 +127,11 @@ struct i915_pmu {
 	 * Only global counters are held here, while the per-engine ones are in
 	 * struct intel_engine_cs.
 	 */
-	struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
+	struct i915_pmu_sample sample[I915_PMU_MAX_GT][__I915_NUM_PMU_SAMPLERS];
 	/**
 	 * @sleep_last: Last time GT parked for RC6 estimation.
 	 */
-	ktime_t sleep_last[I915_PMU_MAX_GTS];
+	ktime_t sleep_last[I915_PMU_MAX_GT];
 	/**
 	 * @irq_count: Number of interrupts
 	 *

--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -941,6 +941,9 @@
 #define HECI_H_GS1(base)	_MMIO((base) + 0xc4c)
 #define   HECI_H_GS1_ER_PREP	REG_BIT(0)

+#define HECI_FWSTS5(base)		_MMIO((base) + 0xc68)
+#define   HECI_FWSTS5_HUC_AUTH_DONE	(1 << 19)
+
 #define HSW_GTT_CACHE_EN	_MMIO(0x4024)
 #define   GTT_CACHE_EN_ALL	0xF0007FFF
 #define GEN7_WR_WATERMARK	_MMIO(0x4028)

--- a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h
@@ -11,19 +11,30 @@

 /* PXP-Cmd-Op definitions */
 #define PXP43_CMDID_START_HUC_AUTH 0x0000003A
+#define PXP43_CMDID_NEW_HUC_AUTH 0x0000003F /* MTL+ */
 #define PXP43_CMDID_INIT_SESSION 0x00000036

 /* PXP-Packet sizes for MTL's GSCCS-HECI instruction */
 #define PXP43_MAX_HECI_INOUT_SIZE (SZ_32K)

-/* PXP-Input-Packet: HUC-Authentication */
+/* PXP-Packet size for MTL's NEW_HUC_AUTH instruction */
+#define PXP43_HUC_AUTH_INOUT_SIZE (SZ_4K)
+
+/* PXP-Input-Packet: HUC Load and Authentication */
 struct pxp43_start_huc_auth_in {
 	struct pxp_cmd_header header;
 	__le64 huc_base_address;
 } __packed;

-/* PXP-Output-Packet: HUC-Authentication */
-struct pxp43_start_huc_auth_out {
+/* PXP-Input-Packet: HUC Auth-only */
+struct pxp43_new_huc_auth_in {
+	struct pxp_cmd_header header;
+	u64 huc_base_address;
+	u32 huc_size;
+} __packed;
+
+/* PXP-Output-Packet: HUC Load and Authentication or Auth-only */
+struct pxp43_huc_auth_out {
 	struct pxp_cmd_header header;
 } __packed;


--- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
@@ -143,7 +143,7 @@ gsccs_send_message(struct intel_pxp *pxp,

 	reply_size = header->message_size - sizeof(*header);
 	if (reply_size > msg_out_size_max) {
-		drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%ld)\n",
+		drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%zu)\n",
 			 reply_size, msg_out_size_max);
 		reply_size = msg_out_size_max;
 	}
@@ -196,7 +196,7 @@ bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp)
 	 * gsc-proxy init flow (the last set of dependencies that
 	 * are out of order) will suffice.
 	 */
-	if (intel_huc_is_authenticated(&pxp->ctrl_gt->uc.huc) &&
+	if (intel_huc_is_authenticated(&pxp->ctrl_gt->uc.huc, INTEL_HUC_AUTH_BY_GSC) &&
 	    intel_gsc_uc_fw_proxy_init_done(&pxp->ctrl_gt->uc.gsc))
 		return true;


--- a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c
@@ -19,7 +19,7 @@ int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp)
 	struct intel_gt *gt;
 	struct intel_huc *huc;
 	struct pxp43_start_huc_auth_in huc_in = {0};
-	struct pxp43_start_huc_auth_out huc_out = {0};
+	struct pxp43_huc_auth_out huc_out = {0};
 	dma_addr_t huc_phys_addr;
 	u8 client_id = 0;
 	u8 fence_id = 0;

--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -674,7 +674,8 @@ typedef struct drm_i915_irq_wait {
 * If the IOCTL is successful, the returned parameter will be set to one of the
 * following values:
 *  * 0 if HuC firmware load is not complete,
- *  * 1 if HuC firmware is authenticated and running.
+ *  * 1 if HuC firmware is loaded and fully authenticated,
+ *  * 2 if HuC firmware is loaded and authenticated for clear media only
 */
 #define I915_PARAM_HUC_STATUS		 42

@@ -3679,9 +3680,13 @@ struct drm_i915_gem_create_ext {
 	 *
 	 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
 	 * struct drm_i915_gem_create_ext_protected_content.
+	 *
+	 * For I915_GEM_CREATE_EXT_SET_PAT usage see
+	 * struct drm_i915_gem_create_ext_set_pat.
 	 */
 #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
 #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
+#define I915_GEM_CREATE_EXT_SET_PAT 2
 	__u64 extensions;
 };

@@ -3796,6 +3801,43 @@ struct drm_i915_gem_create_ext_protected_content {
 	__u32 flags;
 };

+/**
+ * struct drm_i915_gem_create_ext_set_pat - The
+ * I915_GEM_CREATE_EXT_SET_PAT extension.
+ *
+ * If this extension is provided, the specified caching policy (PAT index) is
+ * applied to the buffer object.
+ *
+ * Below is an example on how to create an object with specific caching policy:
+ *
+ * .. code-block:: C
+ *
+ *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
+ *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
+ *              .pat_index = 0,
+ *      };
+ *      struct drm_i915_gem_create_ext create_ext = {
+ *              .size = PAGE_SIZE,
+ *              .extensions = (uintptr_t)&set_pat_ext,
+ *      };
+ *
+ *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ *      if (err) ...
+ */
+struct drm_i915_gem_create_ext_set_pat {
+	/** @base: Extension link. See struct i915_user_extension. */
+	struct i915_user_extension base;
+	/**
+	 * @pat_index: PAT index to be set
+	 * PAT index is a bit field in Page Table Entry to control caching
+	 * behaviors for GPU accesses. The definition of PAT index is
+	 * platform dependent and can be found in hardware specifications,
+	 */
+	__u32 pat_index;
+	/** @rsvd: reserved for future use */
+	__u32 rsvd;
+};
+
 /* ID of the protected content session managed by i915 when PXP is active */
 #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf