Commit 3ad7d18c authored by Andrzej Hajda's avatar Andrzej Hajda Committed by Matthew Brost

drm/xe: flush engine buffers before signalling user fence on all engines

Tests show that user fence signalling requires kind of write barrier,
otherwise not all writes performed by the workload will be available
to userspace. It is already done for render and compute, we need it
also for the rest: video, gsc, copy.

Fixes: dd08ebf6 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: default avatarAndrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240605-fix_user_fence_posted-v3-2-06e7932f784a@intel.com
parent 3494f5f5
...@@ -80,6 +80,16 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) ...@@ -80,6 +80,16 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
return i; return i;
} }
static int emit_flush_dw(u32 *dw, int i)
{
dw[i++] = MI_FLUSH_DW | MI_FLUSH_IMM_DW;
dw[i++] = 0;
dw[i++] = 0;
dw[i++] = 0;
return i;
}
static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb, static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
u32 *dw, int i) u32 *dw, int i)
{ {
...@@ -234,10 +244,12 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc ...@@ -234,10 +244,12 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
if (job->user_fence.used) if (job->user_fence.used) {
i = emit_flush_dw(dw, i);
i = emit_store_imm_ppgtt_posted(job->user_fence.addr, i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
job->user_fence.value, job->user_fence.value,
dw, i); dw, i);
}
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
...@@ -293,10 +305,12 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, ...@@ -293,10 +305,12 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
if (job->user_fence.used) if (job->user_fence.used) {
i = emit_flush_dw(dw, i);
i = emit_store_imm_ppgtt_posted(job->user_fence.addr, i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
job->user_fence.value, job->user_fence.value,
dw, i); dw, i);
}
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment