Commit 83f00078 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-xe-fixes-2024-10-17' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

Driver Changes:
- New workaround to Xe2 (Aradhya)
- Fix unbalanced rpm put (Matthew Auld)
- Remove fragile lock optimization (Matthew Brost)
- Fix job release, delegating it to the drm scheduler (Matthew Brost)
- Fix timestamp bit width for Xe2 (Lucas)
- Fix external BO's dma-resv usag (Matthew Brost)
- Fix returning success for timeout in wait_token (Nirmoy)
- Initialize fence to avoid it being detected as signaled (Matthew Auld)
- Improve cache flush for BMG (Matthew Auld)
- Don't allow hflip for tile4 framebuffer on Xe2 (Juha-Pekka)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/jkldrex5733ldxrla75b4ayvhujjhw2kccmasl5rotoufoacj4@pkvlrrv4orc7
parents 49ff3e79 ffafd126
...@@ -438,6 +438,19 @@ bool intel_fb_needs_64k_phys(u64 modifier) ...@@ -438,6 +438,19 @@ bool intel_fb_needs_64k_phys(u64 modifier)
INTEL_PLANE_CAP_NEED64K_PHYS); INTEL_PLANE_CAP_NEED64K_PHYS);
} }
/**
* intel_fb_is_tile4_modifier: Check if a modifier is a tile4 modifier type
* @modifier: Modifier to check
*
* Returns:
* Returns %true if @modifier is a tile4 modifier.
*/
bool intel_fb_is_tile4_modifier(u64 modifier)
{
return plane_caps_contain_any(lookup_modifier(modifier)->plane_caps,
INTEL_PLANE_CAP_TILING_4);
}
static bool check_modifier_display_ver_range(const struct intel_modifier_desc *md, static bool check_modifier_display_ver_range(const struct intel_modifier_desc *md,
u8 display_ver_from, u8 display_ver_until) u8 display_ver_from, u8 display_ver_until)
{ {
......
...@@ -35,6 +35,7 @@ bool intel_fb_is_ccs_modifier(u64 modifier); ...@@ -35,6 +35,7 @@ bool intel_fb_is_ccs_modifier(u64 modifier);
bool intel_fb_is_rc_ccs_cc_modifier(u64 modifier); bool intel_fb_is_rc_ccs_cc_modifier(u64 modifier);
bool intel_fb_is_mc_ccs_modifier(u64 modifier); bool intel_fb_is_mc_ccs_modifier(u64 modifier);
bool intel_fb_needs_64k_phys(u64 modifier); bool intel_fb_needs_64k_phys(u64 modifier);
bool intel_fb_is_tile4_modifier(u64 modifier);
bool intel_fb_is_ccs_aux_plane(const struct drm_framebuffer *fb, int color_plane); bool intel_fb_is_ccs_aux_plane(const struct drm_framebuffer *fb, int color_plane);
int intel_fb_rc_ccs_cc_plane(const struct drm_framebuffer *fb); int intel_fb_rc_ccs_cc_plane(const struct drm_framebuffer *fb);
......
...@@ -1591,6 +1591,17 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state, ...@@ -1591,6 +1591,17 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state,
return -EINVAL; return -EINVAL;
} }
/*
* Display20 onward tile4 hflip is not supported
*/
if (rotation & DRM_MODE_REFLECT_X &&
intel_fb_is_tile4_modifier(fb->modifier) &&
DISPLAY_VER(dev_priv) >= 20) {
drm_dbg_kms(&dev_priv->drm,
"horizontal flip is not supported with tile4 surface formats\n");
return -EINVAL;
}
if (drm_rotation_90_or_270(rotation)) { if (drm_rotation_90_or_270(rotation)) {
if (!intel_fb_supports_90_270_rotation(to_intel_framebuffer(fb))) { if (!intel_fb_supports_90_270_rotation(to_intel_framebuffer(fb))) {
drm_dbg_kms(&dev_priv->drm, drm_dbg_kms(&dev_priv->drm,
......
...@@ -393,9 +393,6 @@ ...@@ -393,9 +393,6 @@
#define XE2_GLOBAL_INVAL XE_REG(0xb404) #define XE2_GLOBAL_INVAL XE_REG(0xb404)
#define SCRATCH1LPFC XE_REG(0xb474)
#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
#define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604) #define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604)
#define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608) #define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608)
......
...@@ -980,13 +980,13 @@ void xe_device_declare_wedged(struct xe_device *xe) ...@@ -980,13 +980,13 @@ void xe_device_declare_wedged(struct xe_device *xe)
return; return;
} }
xe_pm_runtime_get_noresume(xe);
if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) { if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n"); drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
return; return;
} }
xe_pm_runtime_get_noresume(xe);
if (!atomic_xchg(&xe->wedged.flag, 1)) { if (!atomic_xchg(&xe->wedged.flag, 1)) {
xe->needs_flr_on_fini = true; xe->needs_flr_on_fini = true;
drm_err(&xe->drm, drm_err(&xe->drm,
......
...@@ -41,11 +41,6 @@ ...@@ -41,11 +41,6 @@
* user knows an exec writes to a BO and reads from the BO in the next exec, it * user knows an exec writes to a BO and reads from the BO in the next exec, it
* is the user's responsibility to pass in / out fence between the two execs). * is the user's responsibility to pass in / out fence between the two execs).
* *
* Implicit dependencies for external BOs are handled by using the dma-buf
* implicit dependency uAPI (TODO: add link). To make this works each exec must
* install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external
* BO mapped in the VM.
*
* We do not allow a user to trigger a bind at exec time rather we have a VM * We do not allow a user to trigger a bind at exec time rather we have a VM
* bind IOCTL which uses the same in / out fence interface as exec. In that * bind IOCTL which uses the same in / out fence interface as exec. In that
* sense, a VM bind is basically the same operation as an exec from the user * sense, a VM bind is basically the same operation as an exec from the user
...@@ -59,8 +54,8 @@ ...@@ -59,8 +54,8 @@
* behind any pending kernel operations on any external BOs in VM or any BOs * behind any pending kernel operations on any external BOs in VM or any BOs
* private to the VM. This is accomplished by the rebinds waiting on BOs * private to the VM. This is accomplished by the rebinds waiting on BOs
* DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs
* slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and * slots (inflight execs are in the DMA_RESV_USAGE_BOOKKEEP for private BOs and
* in DMA_RESV_USAGE_WRITE for external BOs). * for external BOs).
* *
* Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute
* mode VMs we use preempt fences and a rebind worker (TODO: add link). * mode VMs we use preempt fences and a rebind worker (TODO: add link).
...@@ -304,7 +299,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -304,7 +299,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
xe_sched_job_arm(job); xe_sched_job_arm(job);
if (!xe_vm_in_lr_mode(vm)) if (!xe_vm_in_lr_mode(vm))
drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished, drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished,
DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); DMA_RESV_USAGE_BOOKKEEP,
DMA_RESV_USAGE_BOOKKEEP);
for (i = 0; i < num_syncs; i++) { for (i = 0; i < num_syncs; i++) {
xe_sync_entry_signal(&syncs[i], &job->drm.s_fence->finished); xe_sync_entry_signal(&syncs[i], &job->drm.s_fence->finished);
......
...@@ -63,7 +63,9 @@ xe_sched_invalidate_job(struct xe_sched_job *job, int threshold) ...@@ -63,7 +63,9 @@ xe_sched_invalidate_job(struct xe_sched_job *job, int threshold)
static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched,
struct xe_sched_job *job) struct xe_sched_job *job)
{ {
spin_lock(&sched->base.job_list_lock);
list_add(&job->drm.list, &sched->base.pending_list); list_add(&job->drm.list, &sched->base.pending_list);
spin_unlock(&sched->base.job_list_lock);
} }
static inline static inline
......
...@@ -108,7 +108,6 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) ...@@ -108,7 +108,6 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
return; return;
if (!xe_gt_is_media_type(gt)) { if (!xe_gt_is_media_type(gt)) {
xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
reg |= CG_DIS_CNTLBUS; reg |= CG_DIS_CNTLBUS;
xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
......
...@@ -37,6 +37,15 @@ static long tlb_timeout_jiffies(struct xe_gt *gt) ...@@ -37,6 +37,15 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
return hw_tlb_timeout + 2 * delay; return hw_tlb_timeout + 2 * delay;
} }
static void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
{
if (WARN_ON_ONCE(!fence->gt))
return;
xe_pm_runtime_put(gt_to_xe(fence->gt));
fence->gt = NULL; /* fini() should be called once */
}
static void static void
__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) __invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
{ {
...@@ -204,7 +213,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, ...@@ -204,7 +213,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
tlb_timeout_jiffies(gt)); tlb_timeout_jiffies(gt));
} }
spin_unlock_irq(&gt->tlb_invalidation.pending_lock); spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
} else if (ret < 0) { } else {
__invalidation_fence_signal(xe, fence); __invalidation_fence_signal(xe, fence);
} }
if (!ret) { if (!ret) {
...@@ -267,10 +276,8 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) ...@@ -267,10 +276,8 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
xe_gt_tlb_invalidation_fence_init(gt, &fence, true); xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
ret = xe_gt_tlb_invalidation_guc(gt, &fence); ret = xe_gt_tlb_invalidation_guc(gt, &fence);
if (ret < 0) { if (ret)
xe_gt_tlb_invalidation_fence_fini(&fence);
return ret; return ret;
}
xe_gt_tlb_invalidation_fence_wait(&fence); xe_gt_tlb_invalidation_fence_wait(&fence);
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
...@@ -496,7 +503,8 @@ static const struct dma_fence_ops invalidation_fence_ops = { ...@@ -496,7 +503,8 @@ static const struct dma_fence_ops invalidation_fence_ops = {
* @stack: fence is stack variable * @stack: fence is stack variable
* *
* Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini
* must be called if fence is not signaled. * will be automatically called when fence is signalled (all fences must signal),
* even on error.
*/ */
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence, struct xe_gt_tlb_invalidation_fence *fence,
...@@ -516,14 +524,3 @@ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, ...@@ -516,14 +524,3 @@ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
dma_fence_get(&fence->base); dma_fence_get(&fence->base);
fence->gt = gt; fence->gt = gt;
} }
/**
* xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence
* @fence: TLB invalidation fence to finalize
*
* Drop PM ref which fence took durinig init.
*/
void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
{
xe_pm_runtime_put(gt_to_xe(fence->gt));
}
...@@ -28,7 +28,6 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); ...@@ -28,7 +28,6 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence, struct xe_gt_tlb_invalidation_fence *fence,
bool stack); bool stack);
void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence);
static inline void static inline void
xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
......
...@@ -1030,10 +1030,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) ...@@ -1030,10 +1030,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
/* /*
* TDR has fired before free job worker. Common if exec queue * TDR has fired before free job worker. Common if exec queue
* immediately closed after last fence signaled. * immediately closed after last fence signaled. Add back to pending
* list so job can be freed and kick scheduler ensuring free job is not
* lost.
*/ */
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
guc_exec_queue_free_job(drm_job); xe_sched_add_pending_job(sched, job);
xe_sched_submission_start(sched);
return DRM_GPU_SCHED_STAT_NOMINAL; return DRM_GPU_SCHED_STAT_NOMINAL;
} }
......
...@@ -161,6 +161,10 @@ query_engine_cycles(struct xe_device *xe, ...@@ -161,6 +161,10 @@ query_engine_cycles(struct xe_device *xe,
cpu_clock); cpu_clock);
xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (GRAPHICS_VER(xe) >= 20)
resp.width = 64;
else
resp.width = 36; resp.width = 36;
/* Only write to the output fields of user query */ /* Only write to the output fields of user query */
......
...@@ -58,7 +58,7 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, ...@@ -58,7 +58,7 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
if (!access_ok(ptr, sizeof(*ptr))) if (!access_ok(ptr, sizeof(*ptr)))
return ERR_PTR(-EFAULT); return ERR_PTR(-EFAULT);
ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); ufence = kzalloc(sizeof(*ufence), GFP_KERNEL);
if (!ufence) if (!ufence)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
......
...@@ -3199,10 +3199,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) ...@@ -3199,10 +3199,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
&fence[fence_id], vma); &fence[fence_id], vma);
if (ret < 0) { if (ret)
xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
goto wait; goto wait;
}
++fence_id; ++fence_id;
if (!tile->media_gt) if (!tile->media_gt)
...@@ -3214,10 +3212,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) ...@@ -3214,10 +3212,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
ret = xe_gt_tlb_invalidation_vma(tile->media_gt, ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
&fence[fence_id], vma); &fence[fence_id], vma);
if (ret < 0) { if (ret)
xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
goto wait; goto wait;
}
++fence_id; ++fence_id;
} }
} }
......
...@@ -710,6 +710,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { ...@@ -710,6 +710,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
DIS_PARTIAL_AUTOSTRIP | DIS_PARTIAL_AUTOSTRIP |
DIS_AUTOSTRIP)) DIS_AUTOSTRIP))
}, },
{ XE_RTP_NAME("15016589081"),
XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
},
/* Xe2_HPG */ /* Xe2_HPG */
{ XE_RTP_NAME("15010599737"), { XE_RTP_NAME("15010599737"),
......
...@@ -169,9 +169,6 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, ...@@ -169,9 +169,6 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
args->timeout = 0; args->timeout = 0;
} }
if (!timeout && !(err < 0))
err = -ETIME;
if (q) if (q)
xe_exec_queue_put(q); xe_exec_queue_put(q);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment