Commit d6219e1c authored by Niranjana Vishwanathapura's avatar Niranjana Vishwanathapura Committed by Matt Roper

drm/xe: Add Indirect Ring State support

When Indirect Ring State is enabled, the Ring Buffer state and
Batch Buffer state are context save/restored to/from Indirect
Ring State instead of the LRC. The Indirect Ring State is a 4K
page mapped in global GTT at a 4K aligned address. This address
is programmed in the INDIRECT_RING_STATE register of the
corresponding context's LRC.

v2: Fix kernel-doc, add bspec reference
v3: Fix typo in commit text

Bspec: 67296, 67139
Signed-off-by: default avatarNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: default avatarMatt Roper <matthew.d.roper@intel.com>
Signed-off-by: default avatarMatt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240507224255.5059-3-niranjana.vishwanathapura@intel.com
parent 85cfc412
...@@ -125,6 +125,7 @@ ...@@ -125,6 +125,7 @@
#define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4)
#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
#define CTX_CTRL_INDIRECT_RING_STATE_ENABLE REG_BIT(4)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0)
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_TAIL (0x06 + 1)
#define CTX_RING_START (0x08 + 1) #define CTX_RING_START (0x08 + 1)
#define CTX_RING_CTL (0x0a + 1) #define CTX_RING_CTL (0x0a + 1)
#define CTX_INDIRECT_RING_STATE (0x26 + 1)
#define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_UDW (0x30 + 1)
#define CTX_PDP0_LDW (0x32 + 1) #define CTX_PDP0_LDW (0x32 + 1)
...@@ -23,4 +24,10 @@ ...@@ -23,4 +24,10 @@
#define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3) #define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3)
#define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4) #define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4)
#define INDIRECT_CTX_RING_HEAD (0x02 + 1)
#define INDIRECT_CTX_RING_TAIL (0x04 + 1)
#define INDIRECT_CTX_RING_START (0x06 + 1)
#define INDIRECT_CTX_RING_START_UDW (0x08 + 1)
#define INDIRECT_CTX_RING_CTL (0x0a + 1)
#endif #endif
...@@ -160,7 +160,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) ...@@ -160,7 +160,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
if (q->hwe->class == XE_ENGINE_CLASS_RENDER) if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
/* Big enough to emit all of the context's 3DSTATE */ /* Big enough to emit all of the context's 3DSTATE */
bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false); bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false);
else else
/* Just pick a large BB size */ /* Just pick a large BB size */
bb = xe_bb_new(gt, SZ_4K, false); bb = xe_bb_new(gt, SZ_4K, false);
...@@ -244,7 +244,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) ...@@ -244,7 +244,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
xe_tuning_process_lrc(hwe); xe_tuning_process_lrc(hwe);
default_lrc = drmm_kzalloc(&xe->drm, default_lrc = drmm_kzalloc(&xe->drm,
xe_lrc_size(xe, hwe->class), xe_gt_lrc_size(gt, hwe->class),
GFP_KERNEL); GFP_KERNEL);
if (!default_lrc) if (!default_lrc)
return -ENOMEM; return -ENOMEM;
...@@ -294,7 +294,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) ...@@ -294,7 +294,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
xe_map_memcpy_from(xe, default_lrc, xe_map_memcpy_from(xe, default_lrc,
&q->lrc[0].bo->vmap, &q->lrc[0].bo->vmap,
xe_lrc_pphwsp_offset(&q->lrc[0]), xe_lrc_pphwsp_offset(&q->lrc[0]),
xe_lrc_size(xe, hwe->class)); xe_gt_lrc_size(gt, hwe->class));
gt->default_lrc[hwe->class] = default_lrc; gt->default_lrc[hwe->class] = default_lrc;
put_nop_q: put_nop_q:
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <drm/drm_util.h> #include <drm/drm_util.h>
#include "xe_device.h"
#include "xe_device_types.h" #include "xe_device_types.h"
#include "xe_hw_engine.h" #include "xe_hw_engine.h"
...@@ -58,6 +59,12 @@ struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, ...@@ -58,6 +59,12 @@ struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
u16 instance, u16 instance,
bool logical); bool logical);
static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt)
{
return gt->info.has_indirect_ring_state &&
xe_device_uc_enabled(gt_to_xe(gt));
}
static inline bool xe_gt_is_media_type(struct xe_gt *gt) static inline bool xe_gt_is_media_type(struct xe_gt *gt)
{ {
return gt->info.type == XE_GT_TYPE_MEDIA; return gt->info.type == XE_GT_TYPE_MEDIA;
......
...@@ -110,8 +110,6 @@ struct xe_gt { ...@@ -110,8 +110,6 @@ struct xe_gt {
struct { struct {
/** @info.type: type of GT */ /** @info.type: type of GT */
enum xe_gt_type type; enum xe_gt_type type;
/** @info.id: Unique ID of this GT within the PCI Device */
u8 id;
/** @info.reference_clock: clock frequency */ /** @info.reference_clock: clock frequency */
u32 reference_clock; u32 reference_clock;
/** @info.engine_mask: mask of engines present on GT */ /** @info.engine_mask: mask of engines present on GT */
...@@ -124,6 +122,10 @@ struct xe_gt { ...@@ -124,6 +122,10 @@ struct xe_gt {
u64 __engine_mask; u64 __engine_mask;
/** @info.gmdid: raw GMD_ID value from hardware */ /** @info.gmdid: raw GMD_ID value from hardware */
u32 gmdid; u32 gmdid;
/** @info.id: Unique ID of this GT within the PCI Device */
u8 id;
/** @info.has_indirect_ring_state: GT has indirect ring state support */
u8 has_indirect_ring_state:1;
} info; } info;
/** /**
......
...@@ -267,7 +267,6 @@ static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class) ...@@ -267,7 +267,6 @@ static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
{ {
struct xe_device *xe = ads_to_xe(ads);
struct xe_gt *gt = ads_to_gt(ads); struct xe_gt *gt = ads_to_gt(ads);
size_t total_size = 0, alloc_size, real_size; size_t total_size = 0, alloc_size, real_size;
int class; int class;
...@@ -276,7 +275,7 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) ...@@ -276,7 +275,7 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
if (!engine_enable_mask(gt, class)) if (!engine_enable_mask(gt, class))
continue; continue;
real_size = xe_lrc_size(xe, class); real_size = xe_gt_lrc_size(gt, class);
alloc_size = PAGE_ALIGN(real_size); alloc_size = PAGE_ALIGN(real_size);
total_size += alloc_size; total_size += alloc_size;
} }
...@@ -774,7 +773,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) ...@@ -774,7 +773,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
xe_gt_assert(gt, gt->default_lrc[class]); xe_gt_assert(gt, gt->default_lrc[class]);
real_size = xe_lrc_size(xe, class); real_size = xe_gt_lrc_size(gt, class);
alloc_size = PAGE_ALIGN(real_size); alloc_size = PAGE_ALIGN(real_size);
total_size += alloc_size; total_size += alloc_size;
......
...@@ -677,7 +677,7 @@ static void submit_exec_queue(struct xe_exec_queue *q) ...@@ -677,7 +677,7 @@ static void submit_exec_queue(struct xe_exec_queue *q)
if (xe_exec_queue_is_parallel(q)) if (xe_exec_queue_is_parallel(q))
wq_item_append(q); wq_item_append(q);
else else
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
return; return;
......
...@@ -34,12 +34,15 @@ ...@@ -34,12 +34,15 @@
#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
struct xe_lrc_snapshot { struct xe_lrc_snapshot {
struct xe_bo *lrc_bo; struct xe_bo *lrc_bo;
void *lrc_snapshot; void *lrc_snapshot;
unsigned long lrc_size, lrc_offset; unsigned long lrc_size, lrc_offset;
u32 context_desc; u32 context_desc;
u32 indirect_context_desc;
u32 head; u32 head;
struct { struct {
u32 internal; u32 internal;
...@@ -55,20 +58,25 @@ lrc_to_xe(struct xe_lrc *lrc) ...@@ -55,20 +58,25 @@ lrc_to_xe(struct xe_lrc *lrc)
return gt_to_xe(lrc->fence_ctx.gt); return gt_to_xe(lrc->fence_ctx.gt);
} }
size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
{ {
struct xe_device *xe = gt_to_xe(gt);
size_t size;
switch (class) { switch (class) {
case XE_ENGINE_CLASS_RENDER: case XE_ENGINE_CLASS_RENDER:
if (GRAPHICS_VER(xe) >= 20) if (GRAPHICS_VER(xe) >= 20)
return 4 * SZ_4K; size = 4 * SZ_4K;
else else
return 14 * SZ_4K; size = 14 * SZ_4K;
break;
case XE_ENGINE_CLASS_COMPUTE: case XE_ENGINE_CLASS_COMPUTE:
/* 14 pages since graphics_ver == 11 */ /* 14 pages since graphics_ver == 11 */
if (GRAPHICS_VER(xe) >= 20) if (GRAPHICS_VER(xe) >= 20)
return 3 * SZ_4K; size = 3 * SZ_4K;
else else
return 14 * SZ_4K; size = 14 * SZ_4K;
break;
default: default:
WARN(1, "Unknown engine class: %d", class); WARN(1, "Unknown engine class: %d", class);
fallthrough; fallthrough;
...@@ -76,8 +84,14 @@ size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) ...@@ -76,8 +84,14 @@ size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
case XE_ENGINE_CLASS_VIDEO_DECODE: case XE_ENGINE_CLASS_VIDEO_DECODE:
case XE_ENGINE_CLASS_VIDEO_ENHANCE: case XE_ENGINE_CLASS_VIDEO_ENHANCE:
case XE_ENGINE_CLASS_OTHER: case XE_ENGINE_CLASS_OTHER:
return 2 * SZ_4K; size = 2 * SZ_4K;
} }
/* Add indirect ring state page */
if (xe_gt_has_indirect_ring_state(gt))
size += LRC_INDIRECT_RING_STATE_SIZE;
return size;
} }
/* /*
...@@ -508,6 +522,32 @@ static const u8 xe2_xcs_offsets[] = { ...@@ -508,6 +522,32 @@ static const u8 xe2_xcs_offsets[] = {
0 0
}; };
static const u8 xe2_indirect_ring_state_offsets[] = {
NOP(1), /* [0x00] */
LRI(5, POSTED), /* [0x01] */
REG(0x034), /* [0x02] RING_BUFFER_HEAD */
REG(0x030), /* [0x04] RING_BUFFER_TAIL */
REG(0x038), /* [0x06] RING_BUFFER_START */
REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
NOP(5), /* [0x0c] */
LRI(9, POSTED), /* [0x11] */
REG(0x168), /* [0x12] BB_ADDR_UDW */
REG(0x140), /* [0x14] BB_ADDR */
REG(0x110), /* [0x16] BB_STATE */
REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
NOP(12), /* [0x00] */
0
};
#undef REG16 #undef REG16
#undef REG #undef REG
#undef LRI #undef LRI
...@@ -546,6 +586,10 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) ...@@ -546,6 +586,10 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
if (xe_gt_has_indirect_ring_state(hwe->gt))
regs[CTX_CONTEXT_CONTROL] |=
_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
/* TODO: Timestamp */ /* TODO: Timestamp */
} }
...@@ -589,6 +633,11 @@ static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) ...@@ -589,6 +633,11 @@ static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
regs[x + 1] |= STOP_RING << 16; regs[x + 1] |= STOP_RING << 16;
} }
static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
{
return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
}
static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
{ {
return 0; return 0;
...@@ -643,6 +692,12 @@ static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) ...@@ -643,6 +692,12 @@ static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
} }
static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
{
/* Indirect ring state page is at the very end of LRC */
return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
}
#define DECL_MAP_ADDR_HELPERS(elem) \ #define DECL_MAP_ADDR_HELPERS(elem) \
static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
{ \ { \
...@@ -663,6 +718,7 @@ DECL_MAP_ADDR_HELPERS(seqno) ...@@ -663,6 +718,7 @@ DECL_MAP_ADDR_HELPERS(seqno)
DECL_MAP_ADDR_HELPERS(regs) DECL_MAP_ADDR_HELPERS(regs)
DECL_MAP_ADDR_HELPERS(start_seqno) DECL_MAP_ADDR_HELPERS(start_seqno)
DECL_MAP_ADDR_HELPERS(parallel) DECL_MAP_ADDR_HELPERS(parallel)
DECL_MAP_ADDR_HELPERS(indirect_ring)
#undef DECL_MAP_ADDR_HELPERS #undef DECL_MAP_ADDR_HELPERS
...@@ -671,6 +727,35 @@ u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) ...@@ -671,6 +727,35 @@ u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
return __xe_lrc_pphwsp_ggtt_addr(lrc); return __xe_lrc_pphwsp_ggtt_addr(lrc);
} }
u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
{
if (!xe_lrc_has_indirect_ring_state(lrc))
return 0;
return __xe_lrc_indirect_ring_ggtt_addr(lrc);
}
static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
map = __xe_lrc_indirect_ring_map(lrc);
iosys_map_incr(&map, reg_nr * sizeof(u32));
return xe_map_read32(xe, &map);
}
static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
int reg_nr, u32 val)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
map = __xe_lrc_indirect_ring_map(lrc);
iosys_map_incr(&map, reg_nr * sizeof(u32));
xe_map_write32(xe, &map, val);
}
u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
{ {
struct xe_device *xe = lrc_to_xe(lrc); struct xe_device *xe = lrc_to_xe(lrc);
...@@ -693,20 +778,25 @@ void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) ...@@ -693,20 +778,25 @@ void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
static void *empty_lrc_data(struct xe_hw_engine *hwe) static void *empty_lrc_data(struct xe_hw_engine *hwe)
{ {
struct xe_device *xe = gt_to_xe(hwe->gt); struct xe_gt *gt = hwe->gt;
void *data; void *data;
u32 *regs; u32 *regs;
data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
if (!data) if (!data)
return NULL; return NULL;
/* 1st page: Per-Process of HW status Page */ /* 1st page: Per-Process of HW status Page */
regs = data + LRC_PPHWSP_SIZE; regs = data + LRC_PPHWSP_SIZE;
set_offsets(regs, reg_offsets(xe, hwe->class), hwe); set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
set_context_control(regs, hwe); set_context_control(regs, hwe);
set_memory_based_intr(regs, hwe); set_memory_based_intr(regs, hwe);
reset_stop_ring(regs, hwe); reset_stop_ring(regs, hwe);
if (xe_gt_has_indirect_ring_state(gt)) {
regs = data + xe_gt_lrc_size(gt, hwe->class) -
LRC_INDIRECT_RING_STATE_SIZE;
set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
}
return data; return data;
} }
...@@ -731,16 +821,19 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, ...@@ -731,16 +821,19 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
struct iosys_map map; struct iosys_map map;
void *init_data = NULL; void *init_data = NULL;
u32 arb_enable; u32 arb_enable;
u32 lrc_size;
int err; int err;
lrc->flags = 0; lrc->flags = 0;
lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
if (xe_gt_has_indirect_ring_state(gt))
lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
/* /*
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls. * via VM bind calls.
*/ */
lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
ring_size + xe_lrc_size(xe, hwe->class),
ttm_bo_type_kernel, ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT |
...@@ -748,6 +841,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, ...@@ -748,6 +841,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
if (IS_ERR(lrc->bo)) if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo); return PTR_ERR(lrc->bo);
lrc->size = lrc_size;
lrc->tile = gt_to_tile(hwe->gt); lrc->tile = gt_to_tile(hwe->gt);
lrc->ring.size = ring_size; lrc->ring.size = ring_size;
lrc->ring.tail = 0; lrc->ring.tail = 0;
...@@ -772,10 +866,10 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, ...@@ -772,10 +866,10 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
} else { } else {
xe_map_memcpy_to(xe, &map, 0, init_data, xe_map_memcpy_to(xe, &map, 0, init_data,
xe_lrc_size(xe, hwe->class)); xe_gt_lrc_size(gt, hwe->class));
kfree(init_data); kfree(init_data);
} }
...@@ -786,11 +880,25 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, ...@@ -786,11 +880,25 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_drm_client_add_bo(vm->xef->client, lrc->bo); xe_drm_client_add_bo(vm->xef->client, lrc->bo);
} }
if (xe_gt_has_indirect_ring_state(gt)) {
xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
__xe_lrc_indirect_ring_ggtt_addr(lrc));
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
__xe_lrc_ring_ggtt_addr(lrc));
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
} else {
xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
RING_CTL_SIZE(lrc->ring.size) | RING_VALID); RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
}
if (xe->info.has_asid && vm) if (xe->info.has_asid && vm)
xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
...@@ -834,13 +942,35 @@ void xe_lrc_finish(struct xe_lrc *lrc) ...@@ -834,13 +942,35 @@ void xe_lrc_finish(struct xe_lrc *lrc)
xe_bo_put(lrc->bo); xe_bo_put(lrc->bo);
} }
void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
{
if (xe_lrc_has_indirect_ring_state(lrc))
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
else
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
}
u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
{
if (xe_lrc_has_indirect_ring_state(lrc))
return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
else
return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
}
void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
{ {
if (xe_lrc_has_indirect_ring_state(lrc))
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
else
xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
} }
u32 xe_lrc_ring_head(struct xe_lrc *lrc) u32 xe_lrc_ring_head(struct xe_lrc *lrc)
{ {
if (xe_lrc_has_indirect_ring_state(lrc))
return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
else
return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
} }
...@@ -1214,7 +1344,7 @@ void xe_lrc_dump_default(struct drm_printer *p, ...@@ -1214,7 +1344,7 @@ void xe_lrc_dump_default(struct drm_printer *p,
* hardware status page. * hardware status page.
*/ */
dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
while (remaining_dw > 0) { while (remaining_dw > 0) {
if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
...@@ -1355,9 +1485,10 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) ...@@ -1355,9 +1485,10 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
return NULL; return NULL;
snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
snapshot->head = xe_lrc_ring_head(lrc); snapshot->head = xe_lrc_ring_head(lrc);
snapshot->tail.internal = lrc->ring.tail; snapshot->tail.internal = lrc->ring.tail;
snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); snapshot->tail.memory = xe_lrc_ring_tail(lrc);
snapshot->start_seqno = xe_lrc_start_seqno(lrc); snapshot->start_seqno = xe_lrc_start_seqno(lrc);
snapshot->seqno = xe_lrc_seqno(lrc); snapshot->seqno = xe_lrc_seqno(lrc);
snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_bo = xe_bo_get(lrc->bo);
...@@ -1405,6 +1536,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer ...@@ -1405,6 +1536,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
return; return;
drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
snapshot->indirect_context_desc);
drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
snapshot->tail.internal, snapshot->tail.memory); snapshot->tail.internal, snapshot->tail.memory);
......
...@@ -21,14 +21,17 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, ...@@ -21,14 +21,17 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size); struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size);
void xe_lrc_finish(struct xe_lrc *lrc); void xe_lrc_finish(struct xe_lrc *lrc);
size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class); size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class);
u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail);
u32 xe_lrc_ring_tail(struct xe_lrc *lrc);
void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head); void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head);
u32 xe_lrc_ring_head(struct xe_lrc *lrc); u32 xe_lrc_ring_head(struct xe_lrc *lrc);
u32 xe_lrc_ring_space(struct xe_lrc *lrc); u32 xe_lrc_ring_space(struct xe_lrc *lrc);
void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size);
u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
u32 *xe_lrc_regs(struct xe_lrc *lrc); u32 *xe_lrc_regs(struct xe_lrc *lrc);
......
...@@ -20,10 +20,14 @@ struct xe_lrc { ...@@ -20,10 +20,14 @@ struct xe_lrc {
*/ */
struct xe_bo *bo; struct xe_bo *bo;
/** @size: size of lrc including any indirect ring state page */
u32 size;
/** @tile: tile which this LRC belongs to */ /** @tile: tile which this LRC belongs to */
struct xe_tile *tile; struct xe_tile *tile;
/** @flags: LRC flags */ /** @flags: LRC flags */
#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1
u32 flags; u32 flags;
/** @ring: submission ring state */ /** @ring: submission ring state */
......
...@@ -661,6 +661,7 @@ static int xe_info_init(struct xe_device *xe, ...@@ -661,6 +661,7 @@ static int xe_info_init(struct xe_device *xe,
gt = tile->primary_gt; gt = tile->primary_gt;
gt->info.id = xe->info.gt_count++; gt->info.id = xe->info.gt_count++;
gt->info.type = XE_GT_TYPE_MAIN; gt->info.type = XE_GT_TYPE_MAIN;
gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state;
gt->info.__engine_mask = graphics_desc->hw_engine_mask; gt->info.__engine_mask = graphics_desc->hw_engine_mask;
if (MEDIA_VER(xe) < 13 && media_desc) if (MEDIA_VER(xe) < 13 && media_desc)
gt->info.__engine_mask |= media_desc->hw_engine_mask; gt->info.__engine_mask |= media_desc->hw_engine_mask;
...@@ -678,6 +679,7 @@ static int xe_info_init(struct xe_device *xe, ...@@ -678,6 +679,7 @@ static int xe_info_init(struct xe_device *xe,
gt = tile->media_gt; gt = tile->media_gt;
gt->info.type = XE_GT_TYPE_MEDIA; gt->info.type = XE_GT_TYPE_MEDIA;
gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state;
gt->info.__engine_mask = media_desc->hw_engine_mask; gt->info.__engine_mask = media_desc->hw_engine_mask;
gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
......
...@@ -27,6 +27,7 @@ struct xe_graphics_desc { ...@@ -27,6 +27,7 @@ struct xe_graphics_desc {
u8 has_asid:1; u8 has_asid:1;
u8 has_atomic_enable_pte_bit:1; u8 has_atomic_enable_pte_bit:1;
u8 has_flat_ccs:1; u8 has_flat_ccs:1;
u8 has_indirect_ring_state:1;
u8 has_range_tlb_invalidation:1; u8 has_range_tlb_invalidation:1;
u8 has_usm:1; u8 has_usm:1;
}; };
...@@ -37,6 +38,8 @@ struct xe_media_desc { ...@@ -37,6 +38,8 @@ struct xe_media_desc {
u8 rel; u8 rel;
u64 hw_engine_mask; /* hardware engines provided by media IP */ u64 hw_engine_mask; /* hardware engines provided by media IP */
u8 has_indirect_ring_state:1;
}; };
struct gmdid_map { struct gmdid_map {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment