Commit 30603b5b authored by Haridhar Kalvala's avatar Haridhar Kalvala Committed by Rodrigo Vivi

drm/xe/xe2: Update MOCS fields in blitter instructions

Xe2 changes or adds bits for mocs in a few BLT instructions:
XY_CTRL_SURF_COPY_BLT, XY_FAST_COLOR_BLT, XY_FAST_COPY_BLT, and MEM_SET.
Modify the code to deal with the new location. Unlike Xe1, the MOCS
field in those instructions is only the MOCS index and not the
Structure_MEMORY_OBJECT_CONTROL_STATE anymore. The pxp bit is now
explicitly documented separately.

Bspec: 57567,57566,57565,57562
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: default avatarHaridhar Kalvala <haridhar.kalvala@intel.com>
Reviewed-by: default avatarMatt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230929213640.3189912-5-lucas.demarchi@intel.comSigned-off-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent 4bdd8c2e
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#define CCS_SIZE_MASK 0x3FF #define CCS_SIZE_MASK 0x3FF
#define CCS_SIZE_SHIFT 8 #define CCS_SIZE_SHIFT 8
#define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 26) #define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 26)
#define XE2_XY_CTRL_SURF_MOCS_INDEX_MASK GENMASK(31, 28)
#define NUM_CCS_BYTES_PER_BLOCK 256 #define NUM_CCS_BYTES_PER_BLOCK 256
#define NUM_BYTES_PER_CCS_BYTE 256 #define NUM_BYTES_PER_CCS_BYTE 256
#define NUM_CCS_BLKS_PER_XFER 1024 #define NUM_CCS_BLKS_PER_XFER 1024
...@@ -53,12 +54,14 @@ ...@@ -53,12 +54,14 @@
#define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19) #define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19)
#define XY_FAST_COLOR_BLT_DW 16 #define XY_FAST_COLOR_BLT_DW 16
#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 22) #define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 22)
#define XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK GENMASK(27, 24)
#define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 #define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
#define XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) #define XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22)
#define XY_FAST_COPY_BLT_DEPTH_32 (3<<24) #define XY_FAST_COPY_BLT_DEPTH_32 (3<<24)
#define XY_FAST_COPY_BLT_D1_SRC_TILE4 REG_BIT(31) #define XY_FAST_COPY_BLT_D1_SRC_TILE4 REG_BIT(31)
#define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30) #define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30)
#define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20)
#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) #define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22)
#define PVC_MEM_SET_CMD_LEN_DW 7 #define PVC_MEM_SET_CMD_LEN_DW 7
...@@ -66,6 +69,7 @@ ...@@ -66,6 +69,7 @@
#define PVC_MEM_SET_DATA_FIELD GENMASK(31, 24) #define PVC_MEM_SET_DATA_FIELD GENMASK(31, 24)
/* Bspec lists field as [6:0], but index alone is from [6:1] */ /* Bspec lists field as [6:0], but index alone is from [6:1] */
#define PVC_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 1) #define PVC_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 1)
#define XE2_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 3)
#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
......
...@@ -517,23 +517,28 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, ...@@ -517,23 +517,28 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
u64 src_ofs, bool src_is_indirect, u64 src_ofs, bool src_is_indirect,
u32 size) u32 size)
{ {
struct xe_device *xe = gt_to_xe(gt);
u32 *cs = bb->cs + bb->len; u32 *cs = bb->cs + bb->len;
u32 num_ccs_blks; u32 num_ccs_blks;
u32 mocs = gt->mocs.uc_index; u32 mocs;
num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
NUM_CCS_BYTES_PER_BLOCK); NUM_CCS_BYTES_PER_BLOCK);
xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER); xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER);
if (GRAPHICS_VERx100(xe) >= 2000)
mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index);
else
mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index);
*cs++ = XY_CTRL_SURF_COPY_BLT | *cs++ = XY_CTRL_SURF_COPY_BLT |
(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT | (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT | (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT; ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
*cs++ = lower_32_bits(src_ofs); *cs++ = lower_32_bits(src_ofs);
*cs++ = upper_32_bits(src_ofs) | *cs++ = upper_32_bits(src_ofs) | mocs;
FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
*cs++ = lower_32_bits(dst_ofs); *cs++ = lower_32_bits(dst_ofs);
*cs++ = upper_32_bits(dst_ofs) | *cs++ = upper_32_bits(dst_ofs) | mocs;
FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
bb->len = cs - bb->cs; bb->len = cs - bb->cs;
} }
...@@ -544,24 +549,27 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, ...@@ -544,24 +549,27 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
unsigned int pitch) unsigned int pitch)
{ {
struct xe_device *xe = gt_to_xe(gt); struct xe_device *xe = gt_to_xe(gt);
u32 mocs = 0;
u32 tile_y = 0;
xe_gt_assert(gt, size / pitch <= S16_MAX); xe_gt_assert(gt, size / pitch <= S16_MAX);
xe_gt_assert(gt, pitch / 4 <= S16_MAX); xe_gt_assert(gt, pitch / 4 <= S16_MAX);
xe_gt_assert(gt, pitch <= U16_MAX); xe_gt_assert(gt, pitch <= U16_MAX);
bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); if (GRAPHICS_VER(xe) >= 20)
mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index);
if (GRAPHICS_VERx100(xe) >= 1250) if (GRAPHICS_VERx100(xe) >= 1250)
bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4;
XY_FAST_COPY_BLT_D1_SRC_TILE4 |
XY_FAST_COPY_BLT_D1_DST_TILE4; bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
else bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs;
bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch;
bb->cs[bb->len++] = 0; bb->cs[bb->len++] = 0;
bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
bb->cs[bb->len++] = lower_32_bits(dst_ofs); bb->cs[bb->len++] = lower_32_bits(dst_ofs);
bb->cs[bb->len++] = upper_32_bits(dst_ofs); bb->cs[bb->len++] = upper_32_bits(dst_ofs);
bb->cs[bb->len++] = 0; bb->cs[bb->len++] = 0;
bb->cs[bb->len++] = pitch; bb->cs[bb->len++] = pitch | mocs;
bb->cs[bb->len++] = lower_32_bits(src_ofs); bb->cs[bb->len++] = lower_32_bits(src_ofs);
bb->cs[bb->len++] = upper_32_bits(src_ofs); bb->cs[bb->len++] = upper_32_bits(src_ofs);
} }
...@@ -812,8 +820,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, ...@@ -812,8 +820,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
u32 size, u32 pitch) u32 size, u32 pitch)
{ {
struct xe_device *xe = gt_to_xe(gt);
u32 *cs = bb->cs + bb->len; u32 *cs = bb->cs + bb->len;
u32 mocs = gt->mocs.uc_index;
u32 len = PVC_MEM_SET_CMD_LEN_DW; u32 len = PVC_MEM_SET_CMD_LEN_DW;
*cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2); *cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2);
...@@ -822,7 +830,10 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs ...@@ -822,7 +830,10 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs
*cs++ = pitch - 1; *cs++ = pitch - 1;
*cs++ = lower_32_bits(src_ofs); *cs++ = lower_32_bits(src_ofs);
*cs++ = upper_32_bits(src_ofs); *cs++ = upper_32_bits(src_ofs);
*cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, mocs); if (GRAPHICS_VERx100(xe) >= 2000)
*cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
else
*cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
xe_gt_assert(gt, cs - bb->cs == len + bb->len); xe_gt_assert(gt, cs - bb->cs == len + bb->len);
...@@ -835,15 +846,18 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, ...@@ -835,15 +846,18 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
struct xe_device *xe = gt_to_xe(gt); struct xe_device *xe = gt_to_xe(gt);
u32 *cs = bb->cs + bb->len; u32 *cs = bb->cs + bb->len;
u32 len = XY_FAST_COLOR_BLT_DW; u32 len = XY_FAST_COLOR_BLT_DW;
u32 mocs = gt->mocs.uc_index;
if (GRAPHICS_VERx100(xe) < 1250) if (GRAPHICS_VERx100(xe) < 1250)
len = 11; len = 11;
*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
(len - 2); (len - 2);
*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | if (GRAPHICS_VERx100(xe) >= 2000)
(pitch - 1); *cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) |
(pitch - 1);
else
*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) |
(pitch - 1);
*cs++ = 0; *cs++ = 0;
*cs++ = (size / pitch) << 16 | pitch / 4; *cs++ = (size / pitch) << 16 | pitch / 4;
*cs++ = lower_32_bits(src_ofs); *cs++ = lower_32_bits(src_ofs);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment