Commit ba57b9b1 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-gt-next-2023-06-08' of...

Merge tag 'drm-intel-gt-next-2023-06-08' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

UAPI Changes:

- I915_GEM_CREATE_EXT_SET_PAT for Mesa on Meteorlake.

Driver Changes:

Fixes/improvements/new stuff:

- Use large rings for compute contexts (Chris Wilson)
- Better logging/debug of unexpected GuC communication issues (Michal Wajdeczko)
- Clear out entire reports after reading if not power of 2 size (Ashutosh Dixit)
- Limit lmem allocation size to succeed on SmallBars (Andrzej Hajda)
- perf/OA capture robustness improvements on DG2 (Umesh Nerlige Ramappa)
- Fix error code in intel_gsc_uc_heci_cmd_submit_nonpriv() (Dan Carpenter)

Future platform enablement:

- Add workaround 14016712196 (Tejas Upadhyay)
- HuC loading for MTL (Daniele Ceraolo Spurio)
- Allow user to set cache at BO creation (Fei Yang)

Miscellaneous:

- Use system include style for drm headers (Jani Nikula)
- Drop legacy CTB definitions (Michal Wajdeczko)
- Turn off the timer to sample frequencies when GT is parked (Ashutosh Dixit)
- Make PMU sample array two-dimensional (Ashutosh Dixit)
- Use the correct error value when kernel_context() fails (Andi Shyti)
- Fix second parameter type of pre-gen8 pte_encode callbacks (Nathan Chancellor)
- Fix parameter in gmch_ggtt_insert_{entries, page}() (Nathan Chancellor)
- Fix size_t format specifier in gsccs_send_message() (Nathan Chancellor)
- Use the fdinfo helper (Tvrtko Ursulin)
- Add some missing error propagation (Tvrtko Ursulin)
- Reduce I915_MAX_GT to 2 (Matt Atwood)
- Rename I915_PMU_MAX_GTS to I915_PMU_MAX_GT (Matt Atwood)
- Remove some obsolete definitions (John Harrison)

Merges:

- Merge drm/drm-next into drm-intel-gt-next (Tvrtko Ursulin)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZIH09fqe5v5yArsu@tursulin-desk
parents 959294e4 24335848
...@@ -157,6 +157,7 @@ config DRM_I915_SW_FENCE_CHECK_DAG ...@@ -157,6 +157,7 @@ config DRM_I915_SW_FENCE_CHECK_DAG
config DRM_I915_DEBUG_GUC config DRM_I915_DEBUG_GUC
bool "Enable additional driver debugging for GuC" bool "Enable additional driver debugging for GuC"
depends on DRM_I915 depends on DRM_I915
select STACKDEPOT
default n default n
help help
Choose this option to turn on extra driver debugging that may affect Choose this option to turn on extra driver debugging that may affect
......
...@@ -964,7 +964,11 @@ static int intel_context_set_gem(struct intel_context *ce, ...@@ -964,7 +964,11 @@ static int intel_context_set_gem(struct intel_context *ce,
RCU_INIT_POINTER(ce->gem_context, ctx); RCU_INIT_POINTER(ce->gem_context, ctx);
GEM_BUG_ON(intel_context_is_pinned(ce)); GEM_BUG_ON(intel_context_is_pinned(ce));
ce->ring_size = SZ_16K;
if (ce->engine->class == COMPUTE_CLASS)
ce->ring_size = SZ_512K;
else
ce->ring_size = SZ_16K;
i915_vm_put(ce->vm); i915_vm_put(ce->vm);
ce->vm = i915_gem_context_get_eb_vm(ctx); ce->vm = i915_gem_context_get_eb_vm(ctx);
......
...@@ -245,6 +245,7 @@ struct create_ext { ...@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements; unsigned int n_placements;
unsigned int placement_mask; unsigned int placement_mask;
unsigned long flags; unsigned long flags;
unsigned int pat_index;
}; };
static void repr_placements(char *buf, size_t size, static void repr_placements(char *buf, size_t size,
...@@ -394,11 +395,43 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data ...@@ -394,11 +395,43 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
return 0; return 0;
} }
static int ext_set_pat(struct i915_user_extension __user *base, void *data)
{
struct create_ext *ext_data = data;
struct drm_i915_private *i915 = ext_data->i915;
struct drm_i915_gem_create_ext_set_pat ext;
unsigned int max_pat_index;
BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
/* Limiting the extension only to Meteor Lake */
if (!IS_METEORLAKE(i915))
return -ENODEV;
if (copy_from_user(&ext, base, sizeof(ext)))
return -EFAULT;
max_pat_index = INTEL_INFO(i915)->max_pat_index;
if (ext.pat_index > max_pat_index) {
drm_dbg(&i915->drm, "PAT index is invalid: %u\n",
ext.pat_index);
return -EINVAL;
}
ext_data->pat_index = ext.pat_index;
return 0;
}
static const i915_user_extension_fn create_extensions[] = { static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
[I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
}; };
#define PAT_INDEX_NOT_SET 0xffff
/** /**
* i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it.
* @dev: drm device pointer * @dev: drm device pointer
...@@ -418,6 +451,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, ...@@ -418,6 +451,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL; return -EINVAL;
ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions), ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
create_extensions, create_extensions,
ARRAY_SIZE(create_extensions), ARRAY_SIZE(create_extensions),
...@@ -454,5 +488,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, ...@@ -454,5 +488,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
if (IS_ERR(obj)) if (IS_ERR(obj))
return PTR_ERR(obj); return PTR_ERR(obj);
if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
i915_gem_object_set_pat_index(obj, ext_data.pat_index);
/* Mark pat_index is set by UMD */
obj->pat_set_by_user = true;
}
return i915_gem_publish(obj, file, &args->size, &args->handle); return i915_gem_publish(obj, file, &args->size, &args->handle);
} }
...@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) ...@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER)) if (!(obj->flags & I915_BO_ALLOC_USER))
return false; return false;
/*
* Always flush cache for UMD objects at creation time.
*/
if (obj->pat_set_by_user)
return true;
/* /*
* EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
* possible for userspace to bypass the GTT caching bits set by the * possible for userspace to bypass the GTT caching bits set by the
......
...@@ -348,8 +348,10 @@ static int live_parallel_switch(void *arg) ...@@ -348,8 +348,10 @@ static int live_parallel_switch(void *arg)
continue; continue;
ce = intel_context_create(data[m].ce[0]->engine); ce = intel_context_create(data[m].ce[0]->engine);
if (IS_ERR(ce)) if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out; goto out;
}
err = intel_context_pin(ce); err = intel_context_pin(ce);
if (err) { if (err) {
...@@ -369,8 +371,10 @@ static int live_parallel_switch(void *arg) ...@@ -369,8 +371,10 @@ static int live_parallel_switch(void *arg)
worker = kthread_create_worker(0, "igt/parallel:%s", worker = kthread_create_worker(0, "igt/parallel:%s",
data[n].ce[0]->engine->name); data[n].ce[0]->engine->name);
if (IS_ERR(worker)) if (IS_ERR(worker)) {
err = PTR_ERR(worker);
goto out; goto out;
}
data[n].worker = worker; data[n].worker = worker;
} }
...@@ -399,8 +403,10 @@ static int live_parallel_switch(void *arg) ...@@ -399,8 +403,10 @@ static int live_parallel_switch(void *arg)
} }
} }
if (igt_live_test_end(&t)) if (igt_live_test_end(&t)) {
err = -EIO; err = err ?: -EIO;
break;
}
} }
out: out:
......
...@@ -177,14 +177,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv ...@@ -177,14 +177,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
return cs; return cs;
} }
static int mtl_dummy_pipe_control(struct i915_request *rq)
{
/* Wa_14016712196 */
if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) ||
IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) {
u32 *cs;
/* dummy PIPE_CONTROL + depth flush */
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
cs = gen12_emit_pipe_control(cs,
0,
PIPE_CONTROL_DEPTH_CACHE_FLUSH,
LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
return 0;
}
int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{ {
struct intel_engine_cs *engine = rq->engine; struct intel_engine_cs *engine = rq->engine;
if (mode & EMIT_FLUSH) { if (mode & EMIT_FLUSH) {
u32 flags = 0; u32 flags = 0;
int err;
u32 *cs; u32 *cs;
err = mtl_dummy_pipe_control(rq);
if (err)
return err;
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
flags |= PIPE_CONTROL_FLUSH_L3; flags |= PIPE_CONTROL_FLUSH_L3;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
...@@ -217,6 +243,11 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) ...@@ -217,6 +243,11 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
if (mode & EMIT_INVALIDATE) { if (mode & EMIT_INVALIDATE) {
u32 flags = 0; u32 flags = 0;
u32 *cs, count; u32 *cs, count;
int err;
err = mtl_dummy_pipe_control(rq);
if (err)
return err;
flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_TLB_INVALIDATE;
...@@ -733,6 +764,13 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) ...@@ -733,6 +764,13 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE); PIPE_CONTROL_FLUSH_ENABLE);
/* Wa_14016712196 */
if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
/* dummy PIPE_CONTROL + depth flush */
cs = gen12_emit_pipe_control(cs, 0,
PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
/* Wa_1409600907 */ /* Wa_1409600907 */
flags |= PIPE_CONTROL_DEPTH_STALL; flags |= PIPE_CONTROL_DEPTH_STALL;
......
...@@ -1015,16 +1015,16 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ...@@ -1015,16 +1015,16 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
/* /*
* For pre-gen8 platforms pat_index is the same as enum i915_cache_level, * For pre-gen8 platforms pat_index is the same as enum i915_cache_level,
* so these PTE encode functions are left with using cache_level. * so the switch-case statements in these PTE encode functions are still valid.
* See translation table LEGACY_CACHELEVEL. * See translation table LEGACY_CACHELEVEL.
*/ */
static u64 snb_pte_encode(dma_addr_t addr, static u64 snb_pte_encode(dma_addr_t addr,
enum i915_cache_level level, unsigned int pat_index,
u32 flags) u32 flags)
{ {
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
switch (level) { switch (pat_index) {
case I915_CACHE_L3_LLC: case I915_CACHE_L3_LLC:
case I915_CACHE_LLC: case I915_CACHE_LLC:
pte |= GEN6_PTE_CACHE_LLC; pte |= GEN6_PTE_CACHE_LLC;
...@@ -1033,19 +1033,19 @@ static u64 snb_pte_encode(dma_addr_t addr, ...@@ -1033,19 +1033,19 @@ static u64 snb_pte_encode(dma_addr_t addr,
pte |= GEN6_PTE_UNCACHED; pte |= GEN6_PTE_UNCACHED;
break; break;
default: default:
MISSING_CASE(level); MISSING_CASE(pat_index);
} }
return pte; return pte;
} }
static u64 ivb_pte_encode(dma_addr_t addr, static u64 ivb_pte_encode(dma_addr_t addr,
enum i915_cache_level level, unsigned int pat_index,
u32 flags) u32 flags)
{ {
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
switch (level) { switch (pat_index) {
case I915_CACHE_L3_LLC: case I915_CACHE_L3_LLC:
pte |= GEN7_PTE_CACHE_L3_LLC; pte |= GEN7_PTE_CACHE_L3_LLC;
break; break;
...@@ -1056,14 +1056,14 @@ static u64 ivb_pte_encode(dma_addr_t addr, ...@@ -1056,14 +1056,14 @@ static u64 ivb_pte_encode(dma_addr_t addr,
pte |= GEN6_PTE_UNCACHED; pte |= GEN6_PTE_UNCACHED;
break; break;
default: default:
MISSING_CASE(level); MISSING_CASE(pat_index);
} }
return pte; return pte;
} }
static u64 byt_pte_encode(dma_addr_t addr, static u64 byt_pte_encode(dma_addr_t addr,
enum i915_cache_level level, unsigned int pat_index,
u32 flags) u32 flags)
{ {
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
...@@ -1071,31 +1071,31 @@ static u64 byt_pte_encode(dma_addr_t addr, ...@@ -1071,31 +1071,31 @@ static u64 byt_pte_encode(dma_addr_t addr,
if (!(flags & PTE_READ_ONLY)) if (!(flags & PTE_READ_ONLY))
pte |= BYT_PTE_WRITEABLE; pte |= BYT_PTE_WRITEABLE;
if (level != I915_CACHE_NONE) if (pat_index != I915_CACHE_NONE)
pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
return pte; return pte;
} }
static u64 hsw_pte_encode(dma_addr_t addr, static u64 hsw_pte_encode(dma_addr_t addr,
enum i915_cache_level level, unsigned int pat_index,
u32 flags) u32 flags)
{ {
gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
if (level != I915_CACHE_NONE) if (pat_index != I915_CACHE_NONE)
pte |= HSW_WB_LLC_AGE3; pte |= HSW_WB_LLC_AGE3;
return pte; return pte;
} }
static u64 iris_pte_encode(dma_addr_t addr, static u64 iris_pte_encode(dma_addr_t addr,
enum i915_cache_level level, unsigned int pat_index,
u32 flags) u32 flags)
{ {
gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
switch (level) { switch (pat_index) {
case I915_CACHE_NONE: case I915_CACHE_NONE:
break; break;
case I915_CACHE_WT: case I915_CACHE_WT:
...@@ -1326,6 +1326,9 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) ...@@ -1326,6 +1326,9 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start, ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
ggtt->error_capture.size); ggtt->error_capture.size);
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
intel_uc_resume_mappings(&gt->uc);
ggtt->invalidate(ggtt); ggtt->invalidate(ggtt);
if (flush) if (flush)
......
...@@ -18,10 +18,10 @@ ...@@ -18,10 +18,10 @@
static void gmch_ggtt_insert_page(struct i915_address_space *vm, static void gmch_ggtt_insert_page(struct i915_address_space *vm,
dma_addr_t addr, dma_addr_t addr,
u64 offset, u64 offset,
enum i915_cache_level cache_level, unsigned int pat_index,
u32 unused) u32 unused)
{ {
unsigned int flags = (cache_level == I915_CACHE_NONE) ? unsigned int flags = (pat_index == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
...@@ -29,10 +29,10 @@ static void gmch_ggtt_insert_page(struct i915_address_space *vm, ...@@ -29,10 +29,10 @@ static void gmch_ggtt_insert_page(struct i915_address_space *vm,
static void gmch_ggtt_insert_entries(struct i915_address_space *vm, static void gmch_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res, struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level, unsigned int pat_index,
u32 unused) u32 unused)
{ {
unsigned int flags = (cache_level == I915_CACHE_NONE) ? unsigned int flags = (pat_index == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT, intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
......
...@@ -1530,8 +1530,8 @@ static int live_busywait_preempt(void *arg) ...@@ -1530,8 +1530,8 @@ static int live_busywait_preempt(void *arg)
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
struct i915_vma *vma; struct i915_vma *vma;
enum intel_engine_id id; enum intel_engine_id id;
int err = -ENOMEM;
u32 *map; u32 *map;
int err;
/* /*
* Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
...@@ -1539,13 +1539,17 @@ static int live_busywait_preempt(void *arg) ...@@ -1539,13 +1539,17 @@ static int live_busywait_preempt(void *arg)
*/ */
ctx_hi = kernel_context(gt->i915, NULL); ctx_hi = kernel_context(gt->i915, NULL);
if (!ctx_hi) if (IS_ERR(ctx_hi))
return -ENOMEM; return PTR_ERR(ctx_hi);
ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915, NULL); ctx_lo = kernel_context(gt->i915, NULL);
if (!ctx_lo) if (IS_ERR(ctx_lo)) {
err = PTR_ERR(ctx_lo);
goto err_ctx_hi; goto err_ctx_hi;
}
ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
......
...@@ -190,11 +190,18 @@ pte_tlbinv(struct intel_context *ce, ...@@ -190,11 +190,18 @@ pte_tlbinv(struct intel_context *ce,
static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt) static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
{ {
struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
resource_size_t size = SZ_1G;
/* /*
* Allocation of largest possible page size allows to test all types * Allocation of largest possible page size allows to test all types
* of pages. * of pages. To succeed with both allocations, especially in case of Small
* BAR, try to allocate no more than quarter of mappable memory.
*/ */
return i915_gem_object_create_lmem(gt->i915, SZ_1G, I915_BO_ALLOC_CONTIGUOUS); if (mr && size > mr->io_size / 4)
size = mr->io_size / 4;
return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
} }
static struct drm_i915_gem_object *create_smem(struct intel_gt *gt) static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)
......
...@@ -167,25 +167,4 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64); ...@@ -167,25 +167,4 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
* - **flags**, holds various bits to control message handling * - **flags**, holds various bits to control message handling
*/ */
/*
* Definition of the command transport message header (DW0)
*
* bit[4..0] message len (in dwords)
* bit[7..5] reserved
* bit[8] response (G2H only)
* bit[8] write fence to desc (H2G only)
* bit[9] write status to H2G buff (H2G only)
* bit[10] send status back via G2H (H2G only)
* bit[15..11] reserved
* bit[31..16] action code
*/
#define GUC_CT_MSG_LEN_SHIFT 0
#define GUC_CT_MSG_LEN_MASK 0x1F
#define GUC_CT_MSG_IS_RESPONSE (1 << 8)
#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8)
#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9)
#define GUC_CT_MSG_SEND_STATUS (1 << 10)
#define GUC_CT_MSG_ACTION_SHIFT 16
#define GUC_CT_MSG_ACTION_MASK 0xFFFF
#endif /* _ABI_GUC_COMMUNICATION_CTB_ABI_H */ #endif /* _ABI_GUC_COMMUNICATION_CTB_ABI_H */
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
* | | 30:28 | **TYPE** - message type | * | | 30:28 | **TYPE** - message type |
* | | | - _`GUC_HXG_TYPE_REQUEST` = 0 | * | | | - _`GUC_HXG_TYPE_REQUEST` = 0 |
* | | | - _`GUC_HXG_TYPE_EVENT` = 1 | * | | | - _`GUC_HXG_TYPE_EVENT` = 1 |
* | | | - _`GUC_HXG_TYPE_FAST_REQUEST` = 2 |
* | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 | * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 |
* | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 | * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 |
* | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 | * | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 |
...@@ -46,6 +47,7 @@ ...@@ -46,6 +47,7 @@
#define GUC_HXG_MSG_0_TYPE (0x7 << 28) #define GUC_HXG_MSG_0_TYPE (0x7 << 28)
#define GUC_HXG_TYPE_REQUEST 0u #define GUC_HXG_TYPE_REQUEST 0u
#define GUC_HXG_TYPE_EVENT 1u #define GUC_HXG_TYPE_EVENT 1u
#define GUC_HXG_TYPE_FAST_REQUEST 2u
#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u #define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u
#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u #define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u
#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u #define GUC_HXG_TYPE_RESPONSE_FAILURE 6u
...@@ -89,6 +91,34 @@ ...@@ -89,6 +91,34 @@
#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0) #define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0)
#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD #define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD
/**
* DOC: HXG Fast Request
*
* The `HXG Request`_ message should be used to initiate asynchronous activity
* for which confirmation or return data is not expected.
*
* If confirmation is required then `HXG Request`_ shall be used instead.
*
* The recipient of this message may only use `HXG Failure`_ message if it was
* unable to accept this request (like invalid data).
*
* Format of `HXG Fast Request`_ message is same as `HXG Request`_ except @TYPE.
*
* +---+-------+--------------------------------------------------------------+
* | | Bits | Description |
* +===+=======+==============================================================+
* | 0 | 31 | ORIGIN - see `HXG Message`_ |
* | +-------+--------------------------------------------------------------+
* | | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_ |
* | +-------+--------------------------------------------------------------+
* | | 27:16 | DATA0 - see `HXG Request`_ |
* | +-------+--------------------------------------------------------------+
* | | 15:0 | ACTION - see `HXG Request`_ |
* +---+-------+--------------------------------------------------------------+
* |...| | DATAn - see `HXG Request`_ |
* +---+-------+--------------------------------------------------------------+
*/
/** /**
* DOC: HXG Event * DOC: HXG Event
* *
......
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2023 Intel Corporation
*/
#ifndef _INTEL_GSC_BINARY_HEADERS_H_
#define _INTEL_GSC_BINARY_HEADERS_H_
#include <linux/types.h>
/* Code partition directory (CPD) structures */
struct intel_gsc_cpd_header_v2 {
u32 header_marker;
#define INTEL_GSC_CPD_HEADER_MARKER 0x44504324
u32 num_of_entries;
u8 header_version;
u8 entry_version;
u8 header_length; /* in bytes */
u8 flags;
u32 partition_name;
u32 crc32;
} __packed;
struct intel_gsc_cpd_entry {
u8 name[12];
/*
* Bits 0-24: offset from the beginning of the code partition
* Bit 25: huffman compressed
* Bits 26-31: reserved
*/
u32 offset;
#define INTEL_GSC_CPD_ENTRY_OFFSET_MASK GENMASK(24, 0)
#define INTEL_GSC_CPD_ENTRY_HUFFMAN_COMP BIT(25)
/*
* Module/Item length, in bytes. For Huffman-compressed modules, this
* refers to the uncompressed size. For software-compressed modules,
* this refers to the compressed size.
*/
u32 length;
u8 reserved[4];
} __packed;
struct intel_gsc_version {
u16 major;
u16 minor;
u16 hotfix;
u16 build;
} __packed;
struct intel_gsc_manifest_header {
u32 header_type; /* 0x4 for manifest type */
u32 header_length; /* in dwords */
u32 header_version;
u32 flags;
u32 vendor;
u32 date;
u32 size; /* In dwords, size of entire manifest (header + extensions) */
u32 header_id;
u32 internal_data;
struct intel_gsc_version fw_version;
u32 security_version;
struct intel_gsc_version meu_kit_version;
u32 meu_manifest_version;
u8 general_data[4];
u8 reserved3[56];
u32 modulus_size; /* in dwords */
u32 exponent_size; /* in dwords */
} __packed;
#endif
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
#include <linux/component.h> #include <linux/component.h>
#include "drm/i915_component.h" #include <drm/i915_component.h>
#include "drm/i915_gsc_proxy_mei_interface.h" #include <drm/i915_gsc_proxy_mei_interface.h>
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_gt_print.h" #include "gt/intel_gt_print.h"
......
...@@ -29,13 +29,32 @@ static void gsc_work(struct work_struct *work) ...@@ -29,13 +29,32 @@ static void gsc_work(struct work_struct *work)
if (actions & GSC_ACTION_FW_LOAD) { if (actions & GSC_ACTION_FW_LOAD) {
ret = intel_gsc_uc_fw_upload(gsc); ret = intel_gsc_uc_fw_upload(gsc);
if (ret == -EEXIST) /* skip proxy if not a new load */ if (!ret)
actions &= ~GSC_ACTION_FW_LOAD; /* setup proxy on a new load */
else if (ret) actions |= GSC_ACTION_SW_PROXY;
else if (ret != -EEXIST)
goto out_put; goto out_put;
/*
* The HuC auth can be done both before or after the proxy init;
* if done after, a proxy request will be issued and must be
* serviced before the authentication can complete.
* Since this worker also handles proxy requests, we can't
* perform an action that requires the proxy from within it and
* then stall waiting for it, because we'd be blocking the
* service path. Therefore, it is easier for us to load HuC
* first and do proxy later. The GSC will ack the HuC auth and
* then send the HuC proxy request as part of the proxy init
* flow.
* Note that we can only do the GSC auth if the GuC auth was
* successful.
*/
if (intel_uc_uses_huc(&gt->uc) &&
intel_huc_is_authenticated(&gt->uc.huc, INTEL_HUC_AUTH_BY_GUC))
intel_huc_auth(&gt->uc.huc, INTEL_HUC_AUTH_BY_GSC);
} }
if (actions & (GSC_ACTION_FW_LOAD | GSC_ACTION_SW_PROXY)) { if (actions & GSC_ACTION_SW_PROXY) {
if (!intel_gsc_uc_fw_init_done(gsc)) { if (!intel_gsc_uc_fw_init_done(gsc)) {
gt_err(gt, "Proxy request received with GSC not loaded!\n"); gt_err(gt, "Proxy request received with GSC not loaded!\n");
goto out_put; goto out_put;
...@@ -90,7 +109,12 @@ void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc) ...@@ -90,7 +109,12 @@ void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc)
{ {
struct intel_gt *gt = gsc_uc_to_gt(gsc); struct intel_gt *gt = gsc_uc_to_gt(gsc);
intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC); /*
* GSC FW needs to be copied to a dedicated memory allocations for
* loading (see gsc->local), so we don't need to GGTT map the FW image
* itself into GGTT.
*/
intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC, false);
INIT_WORK(&gsc->work, gsc_work); INIT_WORK(&gsc->work, gsc_work);
/* we can arrive here from i915_driver_early_probe for primary /* we can arrive here from i915_driver_early_probe for primary
......
...@@ -99,7 +99,7 @@ void intel_gsc_uc_heci_cmd_emit_mtl_header(struct intel_gsc_mtl_header *header, ...@@ -99,7 +99,7 @@ void intel_gsc_uc_heci_cmd_emit_mtl_header(struct intel_gsc_mtl_header *header,
u64 host_session_id) u64 host_session_id)
{ {
host_session_id &= ~HOST_SESSION_MASK; host_session_id &= ~HOST_SESSION_MASK;
if (heci_client_id == HECI_MEADDRESS_PXP) if (host_session_id && heci_client_id == HECI_MEADDRESS_PXP)
host_session_id |= HOST_SESSION_PXP_SINGLE; host_session_id |= HOST_SESSION_PXP_SINGLE;
header->validity_marker = GSC_HECI_VALIDITY_MARKER; header->validity_marker = GSC_HECI_VALIDITY_MARKER;
...@@ -202,7 +202,7 @@ intel_gsc_uc_heci_cmd_submit_nonpriv(struct intel_gsc_uc *gsc, ...@@ -202,7 +202,7 @@ intel_gsc_uc_heci_cmd_submit_nonpriv(struct intel_gsc_uc *gsc,
if (++trials < 10) if (++trials < 10)
goto retry; goto retry;
else else
err = EAGAIN; err = -EAGAIN;
} }
} }
i915_gem_ww_ctx_fini(&ww); i915_gem_ww_ctx_fini(&ww);
......
...@@ -164,7 +164,7 @@ void intel_guc_init_early(struct intel_guc *guc) ...@@ -164,7 +164,7 @@ void intel_guc_init_early(struct intel_guc *guc)
struct intel_gt *gt = guc_to_gt(guc); struct intel_gt *gt = guc_to_gt(guc);
struct drm_i915_private *i915 = gt->i915; struct drm_i915_private *i915 = gt->i915;
intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC); intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, true);
intel_guc_ct_init_early(&guc->ct); intel_guc_ct_init_early(&guc->ct);
intel_guc_log_init_early(&guc->log); intel_guc_log_init_early(&guc->log);
intel_guc_submission_init_early(guc); intel_guc_submission_init_early(guc);
......
...@@ -376,6 +376,24 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct) ...@@ -376,6 +376,24 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
} }
} }
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
static void ct_track_lost_and_found(struct intel_guc_ct *ct, u32 fence, u32 action)
{
unsigned int lost = fence % ARRAY_SIZE(ct->requests.lost_and_found);
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
unsigned long entries[SZ_32];
unsigned int n;
n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
/* May be called under spinlock, so avoid sleeping */
ct->requests.lost_and_found[lost].stack = stack_depot_save(entries, n, GFP_NOWAIT);
#endif
ct->requests.lost_and_found[lost].fence = fence;
ct->requests.lost_and_found[lost].action = action;
}
#endif
static u32 ct_get_next_fence(struct intel_guc_ct *ct) static u32 ct_get_next_fence(struct intel_guc_ct *ct)
{ {
/* For now it's trivial */ /* For now it's trivial */
...@@ -426,11 +444,11 @@ static int ct_write(struct intel_guc_ct *ct, ...@@ -426,11 +444,11 @@ static int ct_write(struct intel_guc_ct *ct,
FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence); FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence);
type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_EVENT : type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_FAST_REQUEST :
GUC_HXG_TYPE_REQUEST; GUC_HXG_TYPE_REQUEST;
hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) | hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) |
FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
GUC_HXG_EVENT_MSG_0_DATA0, action[0]); GUC_HXG_REQUEST_MSG_0_DATA0, action[0]);
CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n", CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]); tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
...@@ -447,6 +465,11 @@ static int ct_write(struct intel_guc_ct *ct, ...@@ -447,6 +465,11 @@ static int ct_write(struct intel_guc_ct *ct,
} }
GEM_BUG_ON(tail > size); GEM_BUG_ON(tail > size);
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
ct_track_lost_and_found(ct, fence,
FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0]));
#endif
/* /*
* make sure H2G buffer update and LRC tail update (if this triggering a * make sure H2G buffer update and LRC tail update (if this triggering a
* submission) are visible before updating the descriptor tail * submission) are visible before updating the descriptor tail
...@@ -675,7 +698,7 @@ static int ct_send(struct intel_guc_ct *ct, ...@@ -675,7 +698,7 @@ static int ct_send(struct intel_guc_ct *ct,
GEM_BUG_ON(!ct->enabled); GEM_BUG_ON(!ct->enabled);
GEM_BUG_ON(!len); GEM_BUG_ON(!len);
GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); GEM_BUG_ON(len > GUC_CTB_HXG_MSG_MAX_LEN - GUC_CTB_HDR_LEN);
GEM_BUG_ON(!response_buf && response_buf_size); GEM_BUG_ON(!response_buf && response_buf_size);
might_sleep(); might_sleep();
...@@ -953,6 +976,43 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) ...@@ -953,6 +976,43 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
return -EPIPE; return -EPIPE;
} }
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
{
unsigned int n;
char *buf = NULL;
bool found = false;
lockdep_assert_held(&ct->requests.lock);
for (n = 0; n < ARRAY_SIZE(ct->requests.lost_and_found); n++) {
if (ct->requests.lost_and_found[n].fence != fence)
continue;
found = true;
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
buf = kmalloc(SZ_4K, GFP_NOWAIT);
if (buf && stack_depot_snprint(ct->requests.lost_and_found[n].stack,
buf, SZ_4K, 0)) {
CT_ERROR(ct, "Fence %u was used by action %#04x sent at\n%s",
fence, ct->requests.lost_and_found[n].action, buf);
break;
}
#endif
CT_ERROR(ct, "Fence %u was used by action %#04x\n",
fence, ct->requests.lost_and_found[n].action);
break;
}
kfree(buf);
return found;
}
#else
static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
{
return false;
}
#endif
static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response) static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response)
{ {
u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]); u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]);
...@@ -994,12 +1054,13 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r ...@@ -994,12 +1054,13 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
break; break;
} }
if (!found) { if (!found) {
CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence); CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence, len, hxg[0], fence, ct->requests.last_fence);
ct->requests.last_fence); if (!ct_check_lost_and_found(ct, fence)) {
list_for_each_entry(req, &ct->requests.pending, link) list_for_each_entry(req, &ct->requests.pending, link)
CT_ERROR(ct, "request %u awaits response\n", CT_ERROR(ct, "request %u awaits response\n",
req->fence); req->fence);
}
err = -ENOKEY; err = -ENOKEY;
} }
spin_unlock_irqrestore(&ct->requests.lock, flags); spin_unlock_irqrestore(&ct->requests.lock, flags);
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/stackdepot.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/ktime.h> #include <linux/ktime.h>
#include <linux/wait.h> #include <linux/wait.h>
...@@ -81,6 +82,16 @@ struct intel_guc_ct { ...@@ -81,6 +82,16 @@ struct intel_guc_ct {
struct list_head incoming; /* incoming requests */ struct list_head incoming; /* incoming requests */
struct work_struct worker; /* handler for incoming requests */ struct work_struct worker; /* handler for incoming requests */
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
struct {
u16 fence;
u16 action;
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
depot_stack_handle_t stack;
#endif
} lost_and_found[SZ_16];
#endif
} requests; } requests;
/** @stall_time: time of first time a CTB submission is stalled */ /** @stall_time: time of first time a CTB submission is stalled */
......
...@@ -35,13 +35,6 @@ ...@@ -35,13 +35,6 @@
#define GUC_MAX_CONTEXT_ID 65535 #define GUC_MAX_CONTEXT_ID 65535
#define GUC_INVALID_CONTEXT_ID GUC_MAX_CONTEXT_ID #define GUC_INVALID_CONTEXT_ID GUC_MAX_CONTEXT_ID
#define GUC_RENDER_ENGINE 0
#define GUC_VIDEO_ENGINE 1
#define GUC_BLITTER_ENGINE 2
#define GUC_VIDEOENHANCE_ENGINE 3
#define GUC_VIDEO_ENGINE2 4
#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1)
#define GUC_RENDER_CLASS 0 #define GUC_RENDER_CLASS 0
#define GUC_VIDEO_CLASS 1 #define GUC_VIDEO_CLASS 1
#define GUC_VIDEOENHANCE_CLASS 2 #define GUC_VIDEOENHANCE_CLASS 2
...@@ -499,32 +492,6 @@ struct guc_log_buffer_state { ...@@ -499,32 +492,6 @@ struct guc_log_buffer_state {
u32 version; u32 version;
} __packed; } __packed;
struct guc_ctx_report {
u32 report_return_status;
u32 reserved1[64];
u32 affected_count;
u32 reserved2[2];
} __packed;
/* GuC Shared Context Data Struct */
struct guc_shared_ctx_data {
u32 addr_of_last_preempted_data_low;
u32 addr_of_last_preempted_data_high;
u32 addr_of_last_preempted_data_high_tmp;
u32 padding;
u32 is_mapped_to_proxy;
u32 proxy_ctx_id;
u32 engine_reset_ctx_id;
u32 media_reset_count;
u32 reserved1[8];
u32 uk_last_ctx_switch_reason;
u32 was_reset;
u32 lrca_gpu_addr;
u64 execlist_ctx;
u32 reserved2[66];
struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM];
} __packed;
/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
enum intel_guc_recv_message { enum intel_guc_recv_message {
INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
......
This diff is collapsed.
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/hrtimer.h> #include <linux/hrtimer.h>
struct bus_type; struct bus_type;
struct i915_vma;
enum intel_huc_delayed_load_status { enum intel_huc_delayed_load_status {
INTEL_HUC_WAITING_ON_GSC = 0, INTEL_HUC_WAITING_ON_GSC = 0,
...@@ -22,6 +23,12 @@ enum intel_huc_delayed_load_status { ...@@ -22,6 +23,12 @@ enum intel_huc_delayed_load_status {
INTEL_HUC_DELAYED_LOAD_ERROR, INTEL_HUC_DELAYED_LOAD_ERROR,
}; };
enum intel_huc_authentication_type {
INTEL_HUC_AUTH_BY_GUC = 0,
INTEL_HUC_AUTH_BY_GSC,
INTEL_HUC_AUTH_MAX_MODES
};
struct intel_huc { struct intel_huc {
/* Generic uC firmware management */ /* Generic uC firmware management */
struct intel_uc_fw fw; struct intel_uc_fw fw;
...@@ -31,7 +38,7 @@ struct intel_huc { ...@@ -31,7 +38,7 @@ struct intel_huc {
i915_reg_t reg; i915_reg_t reg;
u32 mask; u32 mask;
u32 value; u32 value;
} status; } status[INTEL_HUC_AUTH_MAX_MODES];
struct { struct {
struct i915_sw_fence fence; struct i915_sw_fence fence;
...@@ -39,6 +46,11 @@ struct intel_huc { ...@@ -39,6 +46,11 @@ struct intel_huc {
struct notifier_block nb; struct notifier_block nb;
enum intel_huc_delayed_load_status status; enum intel_huc_delayed_load_status status;
} delayed_load; } delayed_load;
/* for load via GSCCS */
struct i915_vma *heci_pkt;
bool loaded_via_gsc;
}; };
int intel_huc_sanitize(struct intel_huc *huc); int intel_huc_sanitize(struct intel_huc *huc);
...@@ -46,11 +58,13 @@ void intel_huc_init_early(struct intel_huc *huc); ...@@ -46,11 +58,13 @@ void intel_huc_init_early(struct intel_huc *huc);
int intel_huc_init(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc);
void intel_huc_fini(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc);
void intel_huc_suspend(struct intel_huc *huc); void intel_huc_suspend(struct intel_huc *huc);
int intel_huc_auth(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type);
int intel_huc_wait_for_auth_complete(struct intel_huc *huc); int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
enum intel_huc_authentication_type type);
bool intel_huc_is_authenticated(struct intel_huc *huc,
enum intel_huc_authentication_type type);
int intel_huc_check_status(struct intel_huc *huc); int intel_huc_check_status(struct intel_huc *huc);
void intel_huc_update_auth_status(struct intel_huc *huc); void intel_huc_update_auth_status(struct intel_huc *huc);
bool intel_huc_is_authenticated(struct intel_huc *huc);
void intel_huc_register_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus); void intel_huc_register_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus);
void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus); void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus);
...@@ -73,13 +87,13 @@ static inline bool intel_huc_is_used(struct intel_huc *huc) ...@@ -73,13 +87,13 @@ static inline bool intel_huc_is_used(struct intel_huc *huc)
static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc) static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc)
{ {
return huc->fw.loaded_via_gsc; return huc->loaded_via_gsc;
} }
static inline bool intel_huc_wait_required(struct intel_huc *huc) static inline bool intel_huc_wait_required(struct intel_huc *huc)
{ {
return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) && return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) &&
!intel_huc_is_authenticated(huc); !intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC);
} }
void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p); void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
......
...@@ -5,10 +5,241 @@ ...@@ -5,10 +5,241 @@
#include "gt/intel_gsc.h" #include "gt/intel_gsc.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "intel_gsc_binary_headers.h"
#include "intel_gsc_uc_heci_cmd_submit.h"
#include "intel_huc.h" #include "intel_huc.h"
#include "intel_huc_fw.h" #include "intel_huc_fw.h"
#include "intel_huc_print.h"
#include "i915_drv.h" #include "i915_drv.h"
#include "pxp/intel_pxp_huc.h" #include "pxp/intel_pxp_huc.h"
#include "pxp/intel_pxp_cmd_interface_43.h"
struct mtl_huc_auth_msg_in {
struct intel_gsc_mtl_header header;
struct pxp43_new_huc_auth_in huc_in;
} __packed;
struct mtl_huc_auth_msg_out {
struct intel_gsc_mtl_header header;
struct pxp43_huc_auth_out huc_out;
} __packed;
int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc)
{
struct intel_gt *gt = huc_to_gt(huc);
struct drm_i915_private *i915 = gt->i915;
struct drm_i915_gem_object *obj;
struct mtl_huc_auth_msg_in *msg_in;
struct mtl_huc_auth_msg_out *msg_out;
void *pkt_vaddr;
u64 pkt_offset;
int retry = 5;
int err = 0;
if (!huc->heci_pkt)
return -ENODEV;
obj = huc->heci_pkt->obj;
pkt_offset = i915_ggtt_offset(huc->heci_pkt);
pkt_vaddr = i915_gem_object_pin_map_unlocked(obj,
i915_coherent_map_type(i915, obj, true));
if (IS_ERR(pkt_vaddr))
return PTR_ERR(pkt_vaddr);
msg_in = pkt_vaddr;
msg_out = pkt_vaddr + PXP43_HUC_AUTH_INOUT_SIZE;
intel_gsc_uc_heci_cmd_emit_mtl_header(&msg_in->header,
HECI_MEADDRESS_PXP,
sizeof(*msg_in), 0);
msg_in->huc_in.header.api_version = PXP_APIVER(4, 3);
msg_in->huc_in.header.command_id = PXP43_CMDID_NEW_HUC_AUTH;
msg_in->huc_in.header.status = 0;
msg_in->huc_in.header.buffer_len = sizeof(msg_in->huc_in) -
sizeof(msg_in->huc_in.header);
msg_in->huc_in.huc_base_address = huc->fw.vma_res.start;
msg_in->huc_in.huc_size = huc->fw.obj->base.size;
do {
err = intel_gsc_uc_heci_cmd_submit_packet(&gt->uc.gsc,
pkt_offset, sizeof(*msg_in),
pkt_offset + PXP43_HUC_AUTH_INOUT_SIZE,
PXP43_HUC_AUTH_INOUT_SIZE);
if (err) {
huc_err(huc, "failed to submit GSC request to auth: %d\n", err);
goto out_unpin;
}
if (msg_out->header.flags & GSC_OUTFLAG_MSG_PENDING) {
msg_in->header.gsc_message_handle = msg_out->header.gsc_message_handle;
err = -EBUSY;
msleep(50);
}
} while (--retry && err == -EBUSY);
if (err)
goto out_unpin;
if (msg_out->header.message_size != sizeof(*msg_out)) {
huc_err(huc, "invalid GSC reply length %u [expected %zu]\n",
msg_out->header.message_size, sizeof(*msg_out));
err = -EPROTO;
goto out_unpin;
}
/*
* The GSC will return PXP_STATUS_OP_NOT_PERMITTED if the HuC is already
* loaded. If the same error is ever returned with HuC not loaded we'll
* still catch it when we check the authentication bit later.
*/
if (msg_out->huc_out.header.status != PXP_STATUS_SUCCESS &&
msg_out->huc_out.header.status != PXP_STATUS_OP_NOT_PERMITTED) {
huc_err(huc, "auth failed with GSC error = 0x%x\n",
msg_out->huc_out.header.status);
err = -EIO;
goto out_unpin;
}
out_unpin:
i915_gem_object_unpin_map(obj);
return err;
}
static void get_version_from_gsc_manifest(struct intel_uc_fw_ver *ver, const void *data)
{
const struct intel_gsc_manifest_header *manifest = data;
ver->major = manifest->fw_version.major;
ver->minor = manifest->fw_version.minor;
ver->patch = manifest->fw_version.hotfix;
}
static bool css_valid(const void *data, size_t size)
{
const struct uc_css_header *css = data;
if (unlikely(size < sizeof(struct uc_css_header)))
return false;
if (css->module_type != 0x6)
return false;
if (css->module_vendor != PCI_VENDOR_ID_INTEL)
return false;
return true;
}
static inline u32 entry_offset(const struct intel_gsc_cpd_entry *entry)
{
return entry->offset & INTEL_GSC_CPD_ENTRY_OFFSET_MASK;
}
int intel_huc_fw_get_binary_info(struct intel_uc_fw *huc_fw, const void *data, size_t size)
{
struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
const struct intel_gsc_cpd_header_v2 *header = data;
const struct intel_gsc_cpd_entry *entry;
size_t min_size = sizeof(*header);
int i;
if (!huc_fw->has_gsc_headers) {
huc_err(huc, "Invalid FW type for GSC header parsing!\n");
return -EINVAL;
}
if (size < sizeof(*header)) {
huc_err(huc, "FW too small! %zu < %zu\n", size, min_size);
return -ENODATA;
}
/*
* The GSC-enabled HuC binary starts with a directory header, followed
* by a series of entries. Each entry is identified by a name and
* points to a specific section of the binary containing the relevant
* data. The entries we're interested in are:
* - "HUCP.man": points to the GSC manifest header for the HuC, which
* contains the version info.
* - "huc_fw": points to the legacy-style binary that can be used for
* load via the DMA. This entry only contains a valid CSS
* on binaries for platforms that support 2-step HuC load
* via dma and auth via GSC (like MTL).
*
* --------------------------------------------------
* [ intel_gsc_cpd_header_v2 ]
* --------------------------------------------------
* [ intel_gsc_cpd_entry[] ]
* [ entry1 ]
* [ ... ]
* [ entryX ]
* [ "HUCP.man" ]
* [ ... ]
* [ offset >----------------------------]------o
* [ ... ] |
* [ entryY ] |
* [ "huc_fw" ] |
* [ ... ] |
* [ offset >----------------------------]----------o
* -------------------------------------------------- | |
* | |
* -------------------------------------------------- | |
* [ intel_gsc_manifest_header ]<-----o |
* [ ... ] |
* [ intel_gsc_version fw_version ] |
* [ ... ] |
* -------------------------------------------------- |
* |
* -------------------------------------------------- |
* [ data[] ]<---------o
* [ ... ]
* [ ... ]
* --------------------------------------------------
*/
if (header->header_marker != INTEL_GSC_CPD_HEADER_MARKER) {
huc_err(huc, "invalid marker for CPD header: 0x%08x!\n",
header->header_marker);
return -EINVAL;
}
/* we only have binaries with header v2 and entry v1 for now */
if (header->header_version != 2 || header->entry_version != 1) {
huc_err(huc, "invalid CPD header/entry version %u:%u!\n",
header->header_version, header->entry_version);
return -EINVAL;
}
if (header->header_length < sizeof(struct intel_gsc_cpd_header_v2)) {
huc_err(huc, "invalid CPD header length %u!\n",
header->header_length);
return -EINVAL;
}
min_size = header->header_length + sizeof(*entry) * header->num_of_entries;
if (size < min_size) {
huc_err(huc, "FW too small! %zu < %zu\n", size, min_size);
return -ENODATA;
}
entry = data + header->header_length;
for (i = 0; i < header->num_of_entries; i++, entry++) {
if (strcmp(entry->name, "HUCP.man") == 0)
get_version_from_gsc_manifest(&huc_fw->file_selected.ver,
data + entry_offset(entry));
if (strcmp(entry->name, "huc_fw") == 0) {
u32 offset = entry_offset(entry);
if (offset < size && css_valid(data + offset, size - offset))
huc_fw->dma_start_offset = offset;
}
}
return 0;
}
int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc) int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
{ {
...@@ -25,7 +256,7 @@ int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc) ...@@ -25,7 +256,7 @@ int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
* component gets re-bound and this function called again. If so, just * component gets re-bound and this function called again. If so, just
* mark the HuC as loaded. * mark the HuC as loaded.
*/ */
if (intel_huc_is_authenticated(huc)) { if (intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC)) {
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
return 0; return 0;
} }
...@@ -38,7 +269,7 @@ int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc) ...@@ -38,7 +269,7 @@ int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_TRANSFERRED); intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_TRANSFERRED);
return intel_huc_wait_for_auth_complete(huc); return intel_huc_wait_for_auth_complete(huc, INTEL_HUC_AUTH_BY_GSC);
} }
/** /**
......
...@@ -7,8 +7,12 @@ ...@@ -7,8 +7,12 @@
#define _INTEL_HUC_FW_H_ #define _INTEL_HUC_FW_H_
struct intel_huc; struct intel_huc;
struct intel_uc_fw;
#include <linux/types.h>
int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc); int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc);
int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc);
int intel_huc_fw_upload(struct intel_huc *huc); int intel_huc_fw_upload(struct intel_huc *huc);
int intel_huc_fw_get_binary_info(struct intel_uc_fw *huc_fw, const void *data, size_t size);
#endif #endif
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2023 Intel Corporation
*/
#ifndef __INTEL_HUC_PRINT__
#define __INTEL_HUC_PRINT__
#include "gt/intel_gt.h"
#include "gt/intel_gt_print.h"
#define huc_printk(_huc, _level, _fmt, ...) \
gt_##_level(huc_to_gt(_huc), "HuC: " _fmt, ##__VA_ARGS__)
#define huc_err(_huc, _fmt, ...) huc_printk((_huc), err, _fmt, ##__VA_ARGS__)
#define huc_warn(_huc, _fmt, ...) huc_printk((_huc), warn, _fmt, ##__VA_ARGS__)
#define huc_notice(_huc, _fmt, ...) huc_printk((_huc), notice, _fmt, ##__VA_ARGS__)
#define huc_info(_huc, _fmt, ...) huc_printk((_huc), info, _fmt, ##__VA_ARGS__)
#define huc_dbg(_huc, _fmt, ...) huc_printk((_huc), dbg, _fmt, ##__VA_ARGS__)
#define huc_probe_error(_huc, _fmt, ...) huc_printk((_huc), probe_error, _fmt, ##__VA_ARGS__)
#endif /* __INTEL_HUC_PRINT__ */
...@@ -538,7 +538,7 @@ static int __uc_init_hw(struct intel_uc *uc) ...@@ -538,7 +538,7 @@ static int __uc_init_hw(struct intel_uc *uc)
if (intel_huc_is_loaded_by_gsc(huc)) if (intel_huc_is_loaded_by_gsc(huc))
intel_huc_update_auth_status(huc); intel_huc_update_auth_status(huc);
else else
intel_huc_auth(huc); intel_huc_auth(huc, INTEL_HUC_AUTH_BY_GUC);
if (intel_uc_uses_guc_submission(uc)) { if (intel_uc_uses_guc_submission(uc)) {
ret = intel_guc_submission_enable(guc); ret = intel_guc_submission_enable(guc);
...@@ -700,6 +700,12 @@ void intel_uc_suspend(struct intel_uc *uc) ...@@ -700,6 +700,12 @@ void intel_uc_suspend(struct intel_uc *uc)
} }
} }
static void __uc_resume_mappings(struct intel_uc *uc)
{
intel_uc_fw_resume_mapping(&uc->guc.fw);
intel_uc_fw_resume_mapping(&uc->huc.fw);
}
static int __uc_resume(struct intel_uc *uc, bool enable_communication) static int __uc_resume(struct intel_uc *uc, bool enable_communication)
{ {
struct intel_guc *guc = &uc->guc; struct intel_guc *guc = &uc->guc;
...@@ -767,4 +773,6 @@ static const struct intel_uc_ops uc_ops_on = { ...@@ -767,4 +773,6 @@ static const struct intel_uc_ops uc_ops_on = {
.init_hw = __uc_init_hw, .init_hw = __uc_init_hw,
.fini_hw = __uc_fini_hw, .fini_hw = __uc_fini_hw,
.resume_mappings = __uc_resume_mappings,
}; };
...@@ -24,6 +24,7 @@ struct intel_uc_ops { ...@@ -24,6 +24,7 @@ struct intel_uc_ops {
void (*fini)(struct intel_uc *uc); void (*fini)(struct intel_uc *uc);
int (*init_hw)(struct intel_uc *uc); int (*init_hw)(struct intel_uc *uc);
void (*fini_hw)(struct intel_uc *uc); void (*fini_hw)(struct intel_uc *uc);
void (*resume_mappings)(struct intel_uc *uc);
}; };
struct intel_uc { struct intel_uc {
...@@ -114,6 +115,7 @@ intel_uc_ops_function(init, init, int, 0); ...@@ -114,6 +115,7 @@ intel_uc_ops_function(init, init, int, 0);
intel_uc_ops_function(fini, fini, void, ); intel_uc_ops_function(fini, fini, void, );
intel_uc_ops_function(init_hw, init_hw, int, 0); intel_uc_ops_function(init_hw, init_hw, int, 0);
intel_uc_ops_function(fini_hw, fini_hw, void, ); intel_uc_ops_function(fini_hw, fini_hw, void, );
intel_uc_ops_function(resume_mappings, resume_mappings, void, );
#undef intel_uc_ops_function #undef intel_uc_ops_function
#endif #endif
This diff is collapsed.
...@@ -99,20 +99,28 @@ struct intel_uc_fw { ...@@ -99,20 +99,28 @@ struct intel_uc_fw {
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
/** /**
* @dummy: A vma used in binding the uc fw to ggtt. We can't define this * @needs_ggtt_mapping: indicates whether the fw object needs to be
* vma on the stack as it can lead to a stack overflow, so we define it * pinned to ggtt. If true, the fw is pinned at init time and unpinned
* here. Safe to have 1 copy per uc fw because the binding is single * during driver unload.
* threaded as it done during driver load (inherently single threaded)
* or during a GT reset (mutex guarantees single threaded).
*/ */
struct i915_vma_resource dummy; bool needs_ggtt_mapping;
/**
* @vma_res: A vma resource used in binding the uc fw to ggtt. The fw is
* pinned in a reserved area of the ggtt (above the maximum address
* usable by GuC); therefore, we can't use the normal vma functions to
* do the pinning and we instead use this resource to do so.
*/
struct i915_vma_resource vma_res;
struct i915_vma *rsa_data; struct i915_vma *rsa_data;
u32 rsa_size; u32 rsa_size;
u32 ucode_size; u32 ucode_size;
u32 private_data_size; u32 private_data_size;
bool loaded_via_gsc; u32 dma_start_offset;
bool has_gsc_headers;
}; };
/* /*
...@@ -282,12 +290,14 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) ...@@ -282,12 +290,14 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
} }
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
enum intel_uc_fw_type type); enum intel_uc_fw_type type,
bool needs_ggtt_mapping);
int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw); int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);
int intel_uc_fw_init(struct intel_uc_fw *uc_fw); int intel_uc_fw_init(struct intel_uc_fw *uc_fw);
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
void intel_uc_fw_resume_mapping(struct intel_uc_fw *uc_fw);
size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len); size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len);
int intel_uc_fw_mark_load_failed(struct intel_uc_fw *uc_fw, int err); int intel_uc_fw_mark_load_failed(struct intel_uc_fw *uc_fw, int err);
void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p);
......
...@@ -84,10 +84,4 @@ struct uc_css_header { ...@@ -84,10 +84,4 @@ struct uc_css_header {
} __packed; } __packed;
static_assert(sizeof(struct uc_css_header) == 128); static_assert(sizeof(struct uc_css_header) == 128);
#define HUC_GSC_VERSION_HI_DW 44
#define HUC_GSC_MAJOR_VER_HI_MASK (0xFF << 0)
#define HUC_GSC_MINOR_VER_HI_MASK (0xFF << 16)
#define HUC_GSC_VERSION_LO_DW 45
#define HUC_GSC_PATCH_VER_LO_MASK (0xFF << 0)
#endif /* _INTEL_UC_FW_ABI_H */ #endif /* _INTEL_UC_FW_ABI_H */
...@@ -243,8 +243,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) ...@@ -243,8 +243,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
if (ret < 0) if (ret < 0)
goto err_rootgt; goto err_rootgt;
i915_drm_clients_init(&dev_priv->clients, dev_priv);
i915_gem_init_early(dev_priv); i915_gem_init_early(dev_priv);
/* This must be called before any calls to HAS_PCH_* */ /* This must be called before any calls to HAS_PCH_* */
...@@ -278,7 +276,6 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv) ...@@ -278,7 +276,6 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv)
intel_power_domains_cleanup(dev_priv); intel_power_domains_cleanup(dev_priv);
i915_gem_cleanup_early(dev_priv); i915_gem_cleanup_early(dev_priv);
intel_gt_driver_late_release_all(dev_priv); intel_gt_driver_late_release_all(dev_priv);
i915_drm_clients_fini(&dev_priv->clients);
intel_region_ttm_device_fini(dev_priv); intel_region_ttm_device_fini(dev_priv);
vlv_suspend_cleanup(dev_priv); vlv_suspend_cleanup(dev_priv);
i915_workqueues_cleanup(dev_priv); i915_workqueues_cleanup(dev_priv);
...@@ -1706,7 +1703,7 @@ static const struct file_operations i915_driver_fops = { ...@@ -1706,7 +1703,7 @@ static const struct file_operations i915_driver_fops = {
.compat_ioctl = i915_ioc32_compat_ioctl, .compat_ioctl = i915_ioc32_compat_ioctl,
.llseek = noop_llseek, .llseek = noop_llseek,
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
.show_fdinfo = i915_drm_client_fdinfo, .show_fdinfo = drm_show_fdinfo,
#endif #endif
}; };
...@@ -1806,6 +1803,7 @@ static const struct drm_driver i915_drm_driver = { ...@@ -1806,6 +1803,7 @@ static const struct drm_driver i915_drm_driver = {
.open = i915_driver_open, .open = i915_driver_open,
.lastclose = i915_driver_lastclose, .lastclose = i915_driver_lastclose,
.postclose = i915_driver_postclose, .postclose = i915_driver_postclose,
.show_fdinfo = i915_drm_client_fdinfo,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle, .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
......
...@@ -17,64 +17,29 @@ ...@@ -17,64 +17,29 @@
#include "i915_gem.h" #include "i915_gem.h"
#include "i915_utils.h" #include "i915_utils.h"
void i915_drm_clients_init(struct i915_drm_clients *clients, struct i915_drm_client *i915_drm_client_alloc(void)
struct drm_i915_private *i915)
{
clients->i915 = i915;
clients->next_id = 0;
xa_init_flags(&clients->xarray, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
}
struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients)
{ {
struct i915_drm_client *client; struct i915_drm_client *client;
struct xarray *xa = &clients->xarray;
int ret;
client = kzalloc(sizeof(*client), GFP_KERNEL); client = kzalloc(sizeof(*client), GFP_KERNEL);
if (!client) if (!client)
return ERR_PTR(-ENOMEM); return NULL;
xa_lock_irq(xa);
ret = __xa_alloc_cyclic(xa, &client->id, client, xa_limit_32b,
&clients->next_id, GFP_KERNEL);
xa_unlock_irq(xa);
if (ret < 0)
goto err;
kref_init(&client->kref); kref_init(&client->kref);
spin_lock_init(&client->ctx_lock); spin_lock_init(&client->ctx_lock);
INIT_LIST_HEAD(&client->ctx_list); INIT_LIST_HEAD(&client->ctx_list);
client->clients = clients;
return client; return client;
err:
kfree(client);
return ERR_PTR(ret);
} }
void __i915_drm_client_free(struct kref *kref) void __i915_drm_client_free(struct kref *kref)
{ {
struct i915_drm_client *client = struct i915_drm_client *client =
container_of(kref, typeof(*client), kref); container_of(kref, typeof(*client), kref);
struct xarray *xa = &client->clients->xarray;
unsigned long flags;
xa_lock_irqsave(xa, flags);
__xa_erase(xa, client->id);
xa_unlock_irqrestore(xa, flags);
kfree(client); kfree(client);
} }
void i915_drm_clients_fini(struct i915_drm_clients *clients)
{
GEM_BUG_ON(!xa_empty(&clients->xarray));
xa_destroy(&clients->xarray);
}
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
static const char * const uabi_class_names[] = { static const char * const uabi_class_names[] = {
[I915_ENGINE_CLASS_RENDER] = "render", [I915_ENGINE_CLASS_RENDER] = "render",
...@@ -101,38 +66,34 @@ static u64 busy_add(struct i915_gem_context *ctx, unsigned int class) ...@@ -101,38 +66,34 @@ static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
} }
static void static void
show_client_class(struct seq_file *m, show_client_class(struct drm_printer *p,
struct drm_i915_private *i915,
struct i915_drm_client *client, struct i915_drm_client *client,
unsigned int class) unsigned int class)
{ {
const struct list_head *list = &client->ctx_list; const unsigned int capacity = i915->engine_uabi_class_count[class];
u64 total = atomic64_read(&client->past_runtime[class]); u64 total = atomic64_read(&client->past_runtime[class]);
const unsigned int capacity =
client->clients->i915->engine_uabi_class_count[class];
struct i915_gem_context *ctx; struct i915_gem_context *ctx;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(ctx, list, client_link) list_for_each_entry_rcu(ctx, &client->ctx_list, client_link)
total += busy_add(ctx, class); total += busy_add(ctx, class);
rcu_read_unlock(); rcu_read_unlock();
if (capacity) if (capacity)
seq_printf(m, "drm-engine-%s:\t%llu ns\n", drm_printf(p, "drm-engine-%s:\t%llu ns\n",
uabi_class_names[class], total); uabi_class_names[class], total);
if (capacity > 1) if (capacity > 1)
seq_printf(m, "drm-engine-capacity-%s:\t%u\n", drm_printf(p, "drm-engine-capacity-%s:\t%u\n",
uabi_class_names[class], uabi_class_names[class],
capacity); capacity);
} }
void i915_drm_client_fdinfo(struct seq_file *m, struct file *f) void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
{ {
struct drm_file *file = f->private_data;
struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_private *i915 = file_priv->i915; struct drm_i915_private *i915 = file_priv->i915;
struct i915_drm_client *client = file_priv->client;
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
unsigned int i; unsigned int i;
/* /*
...@@ -141,16 +102,10 @@ void i915_drm_client_fdinfo(struct seq_file *m, struct file *f) ...@@ -141,16 +102,10 @@ void i915_drm_client_fdinfo(struct seq_file *m, struct file *f)
* ****************************************************************** * ******************************************************************
*/ */
seq_printf(m, "drm-driver:\t%s\n", i915->drm.driver->name);
seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n",
pci_domain_nr(pdev->bus), pdev->bus->number,
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
seq_printf(m, "drm-client-id:\t%u\n", client->id);
if (GRAPHICS_VER(i915) < 8) if (GRAPHICS_VER(i915) < 8)
return; return;
for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
show_client_class(m, client, i); show_client_class(p, i915, file_priv->client, i);
} }
#endif #endif
...@@ -9,20 +9,13 @@ ...@@ -9,20 +9,13 @@
#include <linux/kref.h> #include <linux/kref.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/xarray.h>
#include <uapi/drm/i915_drm.h> #include <uapi/drm/i915_drm.h>
#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
struct drm_i915_private; struct drm_file;
struct drm_printer;
struct i915_drm_clients {
struct drm_i915_private *i915;
struct xarray xarray;
u32 next_id;
};
struct i915_drm_client { struct i915_drm_client {
struct kref kref; struct kref kref;
...@@ -32,17 +25,12 @@ struct i915_drm_client { ...@@ -32,17 +25,12 @@ struct i915_drm_client {
spinlock_t ctx_lock; /* For add/remove from ctx_list. */ spinlock_t ctx_lock; /* For add/remove from ctx_list. */
struct list_head ctx_list; /* List of contexts belonging to client. */ struct list_head ctx_list; /* List of contexts belonging to client. */
struct i915_drm_clients *clients;
/** /**
* @past_runtime: Accumulation of pphwsp runtimes from closed contexts. * @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
*/ */
atomic64_t past_runtime[I915_LAST_UABI_ENGINE_CLASS + 1]; atomic64_t past_runtime[I915_LAST_UABI_ENGINE_CLASS + 1];
}; };
void i915_drm_clients_init(struct i915_drm_clients *clients,
struct drm_i915_private *i915);
static inline struct i915_drm_client * static inline struct i915_drm_client *
i915_drm_client_get(struct i915_drm_client *client) i915_drm_client_get(struct i915_drm_client *client)
{ {
...@@ -57,12 +45,10 @@ static inline void i915_drm_client_put(struct i915_drm_client *client) ...@@ -57,12 +45,10 @@ static inline void i915_drm_client_put(struct i915_drm_client *client)
kref_put(&client->kref, __i915_drm_client_free); kref_put(&client->kref, __i915_drm_client_free);
} }
struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients); struct i915_drm_client *i915_drm_client_alloc(void);
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
void i915_drm_client_fdinfo(struct seq_file *m, struct file *f); void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
#endif #endif
void i915_drm_clients_fini(struct i915_drm_clients *clients);
#endif /* !__I915_DRM_CLIENT_H__ */ #endif /* !__I915_DRM_CLIENT_H__ */
...@@ -314,7 +314,7 @@ struct drm_i915_private { ...@@ -314,7 +314,7 @@ struct drm_i915_private {
/* /*
* i915->gt[0] == &i915->gt0 * i915->gt[0] == &i915->gt0
*/ */
#define I915_MAX_GT 4 #define I915_MAX_GT 2
struct intel_gt *gt[I915_MAX_GT]; struct intel_gt *gt[I915_MAX_GT];
struct kobject *sysfs_gt; struct kobject *sysfs_gt;
...@@ -348,8 +348,6 @@ struct drm_i915_private { ...@@ -348,8 +348,6 @@ struct drm_i915_private {
struct i915_pmu pmu; struct i915_pmu pmu;
struct i915_drm_clients clients;
/* The TTM device structure. */ /* The TTM device structure. */
struct ttm_device bdev; struct ttm_device bdev;
......
...@@ -1325,11 +1325,9 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) ...@@ -1325,11 +1325,9 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
if (!file_priv) if (!file_priv)
goto err_alloc; goto err_alloc;
client = i915_drm_client_add(&i915->clients); client = i915_drm_client_alloc();
if (IS_ERR(client)) { if (!client)
ret = PTR_ERR(client);
goto err_client; goto err_client;
}
file->driver_priv = file_priv; file->driver_priv = file_priv;
file_priv->i915 = i915; file_priv->i915 = i915;
......
...@@ -100,7 +100,11 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, ...@@ -100,7 +100,11 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = sseu->min_eu_in_pool; value = sseu->min_eu_in_pool;
break; break;
case I915_PARAM_HUC_STATUS: case I915_PARAM_HUC_STATUS:
value = intel_huc_check_status(&to_gt(i915)->uc.huc); /* On platform with a media GT, the HuC is on that GT */
if (i915->media_gt)
value = intel_huc_check_status(&i915->media_gt->uc.huc);
else
value = intel_huc_check_status(&to_gt(i915)->uc.huc);
if (value < 0) if (value < 0)
return value; return value;
break; break;
......
...@@ -531,8 +531,7 @@ static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report) ...@@ -531,8 +531,7 @@ static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report)
* (See description of OA_TAIL_MARGIN_NSEC above for further details.) * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
* *
* Besides returning true when there is data available to read() this function * Besides returning true when there is data available to read() this function
* also updates the tail, aging_tail and aging_timestamp in the oa_buffer * also updates the tail in the oa_buffer object.
* object.
* *
* Note: It's safe to read OA config state here unlocked, assuming that this is * Note: It's safe to read OA config state here unlocked, assuming that this is
* only called while the stream is enabled, while the global OA configuration * only called while the stream is enabled, while the global OA configuration
...@@ -544,10 +543,10 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) ...@@ -544,10 +543,10 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
{ {
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
int report_size = stream->oa_buffer.format->size; int report_size = stream->oa_buffer.format->size;
u32 head, tail, read_tail;
unsigned long flags; unsigned long flags;
bool pollin; bool pollin;
u32 hw_tail; u32 hw_tail;
u64 now;
u32 partial_report_size; u32 partial_report_size;
/* We have to consider the (unlikely) possibility that read() errors /* We have to consider the (unlikely) possibility that read() errors
...@@ -566,64 +565,48 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) ...@@ -566,64 +565,48 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
partial_report_size %= report_size; partial_report_size %= report_size;
/* Subtract partial amount off the tail */ /* Subtract partial amount off the tail */
hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size); hw_tail = OA_TAKEN(hw_tail, partial_report_size);
now = ktime_get_mono_fast_ns(); /* NB: The head we observe here might effectively be a little
* out of date. If a read() is in progress, the head could be
* anywhere between this head and stream->oa_buffer.tail.
*/
head = stream->oa_buffer.head - gtt_offset;
read_tail = stream->oa_buffer.tail - gtt_offset;
if (hw_tail == stream->oa_buffer.aging_tail && tail = hw_tail;
(now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) {
/* If the HW tail hasn't move since the last check and the HW
* tail has been aging for long enough, declare it the new
* tail.
*/
stream->oa_buffer.tail = stream->oa_buffer.aging_tail;
} else {
u32 head, tail, aged_tail;
/* NB: The head we observe here might effectively be a little /* Walk the stream backward until we find a report with report
* out of date. If a read() is in progress, the head could be * id and timestmap not at 0. Since the circular buffer pointers
* anywhere between this head and stream->oa_buffer.tail. * progress by increments of 64 bytes and that reports can be up
*/ * to 256 bytes long, we can't tell whether a report has fully
head = stream->oa_buffer.head - gtt_offset; * landed in memory before the report id and timestamp of the
aged_tail = stream->oa_buffer.tail - gtt_offset; * following report have effectively landed.
*
hw_tail -= gtt_offset; * This is assuming that the writes of the OA unit land in
tail = hw_tail; * memory in the order they were written to.
* If not : (╯°□°)╯︵ ┻━┻
/* Walk the stream backward until we find a report with report */
* id and timestmap not at 0. Since the circular buffer pointers while (OA_TAKEN(tail, read_tail) >= report_size) {
* progress by increments of 64 bytes and that reports can be up void *report = stream->oa_buffer.vaddr + tail;
* to 256 bytes long, we can't tell whether a report has fully
* landed in memory before the report id and timestamp of the
* following report have effectively landed.
*
* This is assuming that the writes of the OA unit land in
* memory in the order they were written to.
* If not : (╯°□°)╯︵ ┻━┻
*/
while (OA_TAKEN(tail, aged_tail) >= report_size) {
void *report = stream->oa_buffer.vaddr + tail;
if (oa_report_id(stream, report) || if (oa_report_id(stream, report) ||
oa_timestamp(stream, report)) oa_timestamp(stream, report))
break; break;
tail = (tail - report_size) & (OA_BUFFER_SIZE - 1); tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
} }
if (OA_TAKEN(hw_tail, tail) > report_size && if (OA_TAKEN(hw_tail, tail) > report_size &&
__ratelimit(&stream->perf->tail_pointer_race)) __ratelimit(&stream->perf->tail_pointer_race))
drm_notice(&stream->uncore->i915->drm, drm_notice(&stream->uncore->i915->drm,
"unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n",
head, tail, hw_tail); head, tail, hw_tail);
stream->oa_buffer.tail = gtt_offset + tail; stream->oa_buffer.tail = gtt_offset + tail;
stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
stream->oa_buffer.aging_timestamp = now;
}
pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset, pollin = OA_TAKEN(stream->oa_buffer.tail,
stream->oa_buffer.head - gtt_offset) >= report_size; stream->oa_buffer.head) >= report_size;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
...@@ -877,12 +860,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, ...@@ -877,12 +860,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
stream->oa_buffer.last_ctx_id = ctx_id; stream->oa_buffer.last_ctx_id = ctx_id;
} }
/* if (is_power_of_2(report_size)) {
* Clear out the report id and timestamp as a means to detect unlanded /*
* reports. * Clear out the report id and timestamp as a means
*/ * to detect unlanded reports.
oa_report_id_clear(stream, report32); */
oa_timestamp_clear(stream, report32); oa_report_id_clear(stream, report32);
oa_timestamp_clear(stream, report32);
} else {
/* Zero out the entire report */
memset(report32, 0, report_size);
}
} }
if (start_offset != *offset) { if (start_offset != *offset) {
...@@ -1722,7 +1710,6 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) ...@@ -1722,7 +1710,6 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
gtt_offset | OABUFFER_SIZE_16M); gtt_offset | OABUFFER_SIZE_16M);
/* Mark that we need updated tail pointers to read from... */ /* Mark that we need updated tail pointers to read from... */
stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
stream->oa_buffer.tail = gtt_offset; stream->oa_buffer.tail = gtt_offset;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
...@@ -1774,7 +1761,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) ...@@ -1774,7 +1761,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */ /* Mark that we need updated tail pointers to read from... */
stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
stream->oa_buffer.tail = gtt_offset; stream->oa_buffer.tail = gtt_offset;
/* /*
...@@ -1828,7 +1814,6 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) ...@@ -1828,7 +1814,6 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
gtt_offset & GEN12_OAG_OATAILPTR_MASK); gtt_offset & GEN12_OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */ /* Mark that we need updated tail pointers to read from... */
stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
stream->oa_buffer.tail = gtt_offset; stream->oa_buffer.tail = gtt_offset;
/* /*
......
...@@ -312,18 +312,6 @@ struct i915_perf_stream { ...@@ -312,18 +312,6 @@ struct i915_perf_stream {
*/ */
spinlock_t ptr_lock; spinlock_t ptr_lock;
/**
* @aging_tail: The last HW tail reported by HW. The data
* might not have made it to memory yet though.
*/
u32 aging_tail;
/**
* @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
* was read; used to determine when it is old enough to trust.
*/
u64 aging_timestamp;
/** /**
* @head: Although we can always read back the head pointer register, * @head: Although we can always read back the head pointer register,
* we prefer to avoid trusting the HW state, just to avoid any * we prefer to avoid trusting the HW state, just to avoid any
......
...@@ -132,14 +132,14 @@ static u32 frequency_enabled_mask(void) ...@@ -132,14 +132,14 @@ static u32 frequency_enabled_mask(void)
unsigned int i; unsigned int i;
u32 mask = 0; u32 mask = 0;
for (i = 0; i < I915_PMU_MAX_GTS; i++) for (i = 0; i < I915_PMU_MAX_GT; i++)
mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) | mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) |
config_mask(__I915_PMU_REQUESTED_FREQUENCY(i)); config_mask(__I915_PMU_REQUESTED_FREQUENCY(i));
return mask; return mask;
} }
static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) static bool pmu_needs_timer(struct i915_pmu *pmu)
{ {
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
u32 enable; u32 enable;
...@@ -157,17 +157,11 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) ...@@ -157,17 +157,11 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
*/ */
enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK; enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
/*
* When the GPU is idle per-engine counters do not need to be
* running so clear those bits out.
*/
if (!gpu_active)
enable &= ~ENGINE_SAMPLE_MASK;
/* /*
* Also there is software busyness tracking available we do not * Also there is software busyness tracking available we do not
* need the timer for I915_SAMPLE_BUSY counter. * need the timer for I915_SAMPLE_BUSY counter.
*/ */
else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
enable &= ~BIT(I915_SAMPLE_BUSY); enable &= ~BIT(I915_SAMPLE_BUSY);
/* /*
...@@ -197,31 +191,21 @@ static inline s64 ktime_since_raw(const ktime_t kt) ...@@ -197,31 +191,21 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt)); return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
} }
static unsigned int
__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
{
unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
return idx;
}
static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample) static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
{ {
return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur; return pmu->sample[gt_id][sample].cur;
} }
static void static void
store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val) store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
{ {
pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val; pmu->sample[gt_id][sample].cur = val;
} }
static void static void
add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul) add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
{ {
pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul); pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul);
} }
static u64 get_rc6(struct intel_gt *gt) static u64 get_rc6(struct intel_gt *gt)
...@@ -295,7 +279,7 @@ static void park_rc6(struct intel_gt *gt) ...@@ -295,7 +279,7 @@ static void park_rc6(struct intel_gt *gt)
static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
{ {
if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { if (!pmu->timer_enabled && pmu_needs_timer(pmu)) {
pmu->timer_enabled = true; pmu->timer_enabled = true;
pmu->timer_last = ktime_get(); pmu->timer_last = ktime_get();
hrtimer_start_range_ns(&pmu->timer, hrtimer_start_range_ns(&pmu->timer,
...@@ -321,7 +305,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt) ...@@ -321,7 +305,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt)
*/ */
pmu->unparked &= ~BIT(gt->info.id); pmu->unparked &= ~BIT(gt->info.id);
if (pmu->unparked == 0) if (pmu->unparked == 0)
pmu->timer_enabled = pmu_needs_timer(pmu, false); pmu->timer_enabled = false;
spin_unlock_irq(&pmu->lock); spin_unlock_irq(&pmu->lock);
} }
...@@ -827,7 +811,7 @@ static void i915_pmu_disable(struct perf_event *event) ...@@ -827,7 +811,7 @@ static void i915_pmu_disable(struct perf_event *event)
*/ */
if (--pmu->enable_count[bit] == 0) { if (--pmu->enable_count[bit] == 0) {
pmu->enable &= ~BIT(bit); pmu->enable &= ~BIT(bit);
pmu->timer_enabled &= pmu_needs_timer(pmu, true); pmu->timer_enabled &= pmu_needs_timer(pmu);
} }
spin_unlock_irqrestore(&pmu->lock, flags); spin_unlock_irqrestore(&pmu->lock, flags);
......
...@@ -38,7 +38,7 @@ enum { ...@@ -38,7 +38,7 @@ enum {
__I915_NUM_PMU_SAMPLERS __I915_NUM_PMU_SAMPLERS
}; };
#define I915_PMU_MAX_GTS 2 #define I915_PMU_MAX_GT 2
/* /*
* How many different events we track in the global PMU mask. * How many different events we track in the global PMU mask.
...@@ -47,7 +47,7 @@ enum { ...@@ -47,7 +47,7 @@ enum {
*/ */
#define I915_PMU_MASK_BITS \ #define I915_PMU_MASK_BITS \
(I915_ENGINE_SAMPLE_COUNT + \ (I915_ENGINE_SAMPLE_COUNT + \
I915_PMU_MAX_GTS * __I915_PMU_TRACKED_EVENT_COUNT) I915_PMU_MAX_GT * __I915_PMU_TRACKED_EVENT_COUNT)
#define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1) #define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1)
...@@ -127,11 +127,11 @@ struct i915_pmu { ...@@ -127,11 +127,11 @@ struct i915_pmu {
* Only global counters are held here, while the per-engine ones are in * Only global counters are held here, while the per-engine ones are in
* struct intel_engine_cs. * struct intel_engine_cs.
*/ */
struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS]; struct i915_pmu_sample sample[I915_PMU_MAX_GT][__I915_NUM_PMU_SAMPLERS];
/** /**
* @sleep_last: Last time GT parked for RC6 estimation. * @sleep_last: Last time GT parked for RC6 estimation.
*/ */
ktime_t sleep_last[I915_PMU_MAX_GTS]; ktime_t sleep_last[I915_PMU_MAX_GT];
/** /**
* @irq_count: Number of interrupts * @irq_count: Number of interrupts
* *
......
...@@ -941,6 +941,9 @@ ...@@ -941,6 +941,9 @@
#define HECI_H_GS1(base) _MMIO((base) + 0xc4c) #define HECI_H_GS1(base) _MMIO((base) + 0xc4c)
#define HECI_H_GS1_ER_PREP REG_BIT(0) #define HECI_H_GS1_ER_PREP REG_BIT(0)
#define HECI_FWSTS5(base) _MMIO((base) + 0xc68)
#define HECI_FWSTS5_HUC_AUTH_DONE (1 << 19)
#define HSW_GTT_CACHE_EN _MMIO(0x4024) #define HSW_GTT_CACHE_EN _MMIO(0x4024)
#define GTT_CACHE_EN_ALL 0xF0007FFF #define GTT_CACHE_EN_ALL 0xF0007FFF
#define GEN7_WR_WATERMARK _MMIO(0x4028) #define GEN7_WR_WATERMARK _MMIO(0x4028)
......
...@@ -11,19 +11,30 @@ ...@@ -11,19 +11,30 @@
/* PXP-Cmd-Op definitions */ /* PXP-Cmd-Op definitions */
#define PXP43_CMDID_START_HUC_AUTH 0x0000003A #define PXP43_CMDID_START_HUC_AUTH 0x0000003A
#define PXP43_CMDID_NEW_HUC_AUTH 0x0000003F /* MTL+ */
#define PXP43_CMDID_INIT_SESSION 0x00000036 #define PXP43_CMDID_INIT_SESSION 0x00000036
/* PXP-Packet sizes for MTL's GSCCS-HECI instruction */ /* PXP-Packet sizes for MTL's GSCCS-HECI instruction */
#define PXP43_MAX_HECI_INOUT_SIZE (SZ_32K) #define PXP43_MAX_HECI_INOUT_SIZE (SZ_32K)
/* PXP-Input-Packet: HUC-Authentication */ /* PXP-Packet size for MTL's NEW_HUC_AUTH instruction */
#define PXP43_HUC_AUTH_INOUT_SIZE (SZ_4K)
/* PXP-Input-Packet: HUC Load and Authentication */
struct pxp43_start_huc_auth_in { struct pxp43_start_huc_auth_in {
struct pxp_cmd_header header; struct pxp_cmd_header header;
__le64 huc_base_address; __le64 huc_base_address;
} __packed; } __packed;
/* PXP-Output-Packet: HUC-Authentication */ /* PXP-Input-Packet: HUC Auth-only */
struct pxp43_start_huc_auth_out { struct pxp43_new_huc_auth_in {
struct pxp_cmd_header header;
u64 huc_base_address;
u32 huc_size;
} __packed;
/* PXP-Output-Packet: HUC Load and Authentication or Auth-only */
struct pxp43_huc_auth_out {
struct pxp_cmd_header header; struct pxp_cmd_header header;
} __packed; } __packed;
......
...@@ -143,7 +143,7 @@ gsccs_send_message(struct intel_pxp *pxp, ...@@ -143,7 +143,7 @@ gsccs_send_message(struct intel_pxp *pxp,
reply_size = header->message_size - sizeof(*header); reply_size = header->message_size - sizeof(*header);
if (reply_size > msg_out_size_max) { if (reply_size > msg_out_size_max) {
drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%ld)\n", drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%zu)\n",
reply_size, msg_out_size_max); reply_size, msg_out_size_max);
reply_size = msg_out_size_max; reply_size = msg_out_size_max;
} }
...@@ -196,7 +196,7 @@ bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp) ...@@ -196,7 +196,7 @@ bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp)
* gsc-proxy init flow (the last set of dependencies that * gsc-proxy init flow (the last set of dependencies that
* are out of order) will suffice. * are out of order) will suffice.
*/ */
if (intel_huc_is_authenticated(&pxp->ctrl_gt->uc.huc) && if (intel_huc_is_authenticated(&pxp->ctrl_gt->uc.huc, INTEL_HUC_AUTH_BY_GSC) &&
intel_gsc_uc_fw_proxy_init_done(&pxp->ctrl_gt->uc.gsc)) intel_gsc_uc_fw_proxy_init_done(&pxp->ctrl_gt->uc.gsc))
return true; return true;
......
...@@ -19,7 +19,7 @@ int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp) ...@@ -19,7 +19,7 @@ int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp)
struct intel_gt *gt; struct intel_gt *gt;
struct intel_huc *huc; struct intel_huc *huc;
struct pxp43_start_huc_auth_in huc_in = {0}; struct pxp43_start_huc_auth_in huc_in = {0};
struct pxp43_start_huc_auth_out huc_out = {0}; struct pxp43_huc_auth_out huc_out = {0};
dma_addr_t huc_phys_addr; dma_addr_t huc_phys_addr;
u8 client_id = 0; u8 client_id = 0;
u8 fence_id = 0; u8 fence_id = 0;
......
...@@ -674,7 +674,8 @@ typedef struct drm_i915_irq_wait { ...@@ -674,7 +674,8 @@ typedef struct drm_i915_irq_wait {
* If the IOCTL is successful, the returned parameter will be set to one of the * If the IOCTL is successful, the returned parameter will be set to one of the
* following values: * following values:
* * 0 if HuC firmware load is not complete, * * 0 if HuC firmware load is not complete,
* * 1 if HuC firmware is authenticated and running. * * 1 if HuC firmware is loaded and fully authenticated,
* * 2 if HuC firmware is loaded and authenticated for clear media only
*/ */
#define I915_PARAM_HUC_STATUS 42 #define I915_PARAM_HUC_STATUS 42
...@@ -3679,9 +3680,13 @@ struct drm_i915_gem_create_ext { ...@@ -3679,9 +3680,13 @@ struct drm_i915_gem_create_ext {
* *
* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
* struct drm_i915_gem_create_ext_protected_content. * struct drm_i915_gem_create_ext_protected_content.
*
* For I915_GEM_CREATE_EXT_SET_PAT usage see
* struct drm_i915_gem_create_ext_set_pat.
*/ */
#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1 #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
#define I915_GEM_CREATE_EXT_SET_PAT 2
__u64 extensions; __u64 extensions;
}; };
...@@ -3796,6 +3801,43 @@ struct drm_i915_gem_create_ext_protected_content { ...@@ -3796,6 +3801,43 @@ struct drm_i915_gem_create_ext_protected_content {
__u32 flags; __u32 flags;
}; };
/**
* struct drm_i915_gem_create_ext_set_pat - The
* I915_GEM_CREATE_EXT_SET_PAT extension.
*
* If this extension is provided, the specified caching policy (PAT index) is
* applied to the buffer object.
*
* Below is an example on how to create an object with specific caching policy:
*
* .. code-block:: C
*
* struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
* .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
* .pat_index = 0,
* };
* struct drm_i915_gem_create_ext create_ext = {
* .size = PAGE_SIZE,
* .extensions = (uintptr_t)&set_pat_ext,
* };
*
* int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
* if (err) ...
*/
struct drm_i915_gem_create_ext_set_pat {
/** @base: Extension link. See struct i915_user_extension. */
struct i915_user_extension base;
/**
* @pat_index: PAT index to be set
* PAT index is a bit field in Page Table Entry to control caching
* behaviors for GPU accesses. The definition of PAT index is
* platform dependent and can be found in hardware specifications,
*/
__u32 pat_index;
/** @rsvd: reserved for future use */
__u32 rsvd;
};
/* ID of the protected content session managed by i915 when PXP is active */ /* ID of the protected content session managed by i915 when PXP is active */
#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment