Commit a5b5ab33 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-xe-fixes-2024-04-04' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

- Stop using system_unbound_wq for preempt fences,
  as this can cause starvation when reaching more
  than max_active defined by workqueue
- Fix saving unordered rebinding fences by attaching
  them as kernel feces to the vm's resv
- Fix TLB invalidation fences completing out of order
- Move rebind TLB invalidation to the ring ops to reduce
  the latency
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/tizan6wdpxu4ayudeikjglxdgzmnhdzj3li3z2pgkierjtozzw@lbfddeg43a7h
parents 4cf09f17 77a01101
...@@ -193,6 +193,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) ...@@ -193,6 +193,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
{ {
struct xe_device *xe = to_xe_device(dev); struct xe_device *xe = to_xe_device(dev);
if (xe->preempt_fence_wq)
destroy_workqueue(xe->preempt_fence_wq);
if (xe->ordered_wq) if (xe->ordered_wq)
destroy_workqueue(xe->ordered_wq); destroy_workqueue(xe->ordered_wq);
...@@ -258,9 +261,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, ...@@ -258,9 +261,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
INIT_LIST_HEAD(&xe->pinned.external_vram); INIT_LIST_HEAD(&xe->pinned.external_vram);
INIT_LIST_HEAD(&xe->pinned.evicted); INIT_LIST_HEAD(&xe->pinned.evicted);
xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
if (!xe->ordered_wq || !xe->unordered_wq) { if (!xe->ordered_wq || !xe->unordered_wq ||
!xe->preempt_fence_wq) {
/*
* Cleanup done in xe_device_destroy via
* drmm_add_action_or_reset register above
*/
drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
err = -ENOMEM; err = -ENOMEM;
goto err; goto err;
......
...@@ -363,6 +363,9 @@ struct xe_device { ...@@ -363,6 +363,9 @@ struct xe_device {
/** @ufence_wq: user fence wait queue */ /** @ufence_wq: user fence wait queue */
wait_queue_head_t ufence_wq; wait_queue_head_t ufence_wq;
/** @preempt_fence_wq: used to serialize preempt fences */
struct workqueue_struct *preempt_fence_wq;
/** @ordered_wq: used to serialize compute mode resume */ /** @ordered_wq: used to serialize compute mode resume */
struct workqueue_struct *ordered_wq; struct workqueue_struct *ordered_wq;
......
...@@ -94,48 +94,16 @@ ...@@ -94,48 +94,16 @@
* Unlock all * Unlock all
*/ */
/*
* Add validation and rebinding to the drm_exec locking loop, since both can
* trigger eviction which may require sleeping dma_resv locks.
*/
static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
{ {
struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm); struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
struct drm_gem_object *obj;
unsigned long index;
int num_fences;
int ret;
ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
if (ret)
return ret;
/*
* 1 fence slot for the final submit, and 1 more for every per-tile for
* GPU bind and 1 extra for CPU bind. Note that there are potentially
* many vma per object/dma-resv, however the fence slot will just be
* re-used, since they are largely the same timeline and the seqno
* should be in order. In the case of CPU bind there is dummy fence used
* for all CPU binds, so no need to have a per-tile slot for that.
*/
num_fences = 1 + 1 + vm->xe->info.tile_count;
/* /* The fence slot added here is intended for the exec sched job. */
* We don't know upfront exactly how many fence slots we will need at return xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
* the start of the exec, since the TTM bo_validate above can consume
* numerous fence slots. Also due to how the dma_resv_reserve_fences()
* works it only ensures that at least that many fence slots are
* available i.e if there are already 10 slots available and we reserve
* two more, it can just noop without reserving anything. With this it
* is quite possible that TTM steals some of the fence slots and then
* when it comes time to do the vma binding and final exec stage we are
* lacking enough fence slots, leading to some nasty BUG_ON() when
* adding the fences. Hence just add our own fences here, after the
* validate stage.
*/
drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) {
ret = dma_resv_reserve_fences(obj->resv, num_fences);
if (ret)
return ret;
}
return 0;
} }
int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
...@@ -152,7 +120,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -152,7 +120,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct drm_exec *exec = &vm_exec.exec; struct drm_exec *exec = &vm_exec.exec;
u32 i, num_syncs = 0, num_ufence = 0; u32 i, num_syncs = 0, num_ufence = 0;
struct xe_sched_job *job; struct xe_sched_job *job;
struct dma_fence *rebind_fence;
struct xe_vm *vm; struct xe_vm *vm;
bool write_locked, skip_retry = false; bool write_locked, skip_retry = false;
ktime_t end = 0; ktime_t end = 0;
...@@ -290,39 +257,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -290,39 +257,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto err_exec; goto err_exec;
} }
/* /* Wait behind rebinds */
* Rebind any invalidated userptr or evicted BOs in the VM, non-compute
* VM mode only.
*/
rebind_fence = xe_vm_rebind(vm, false);
if (IS_ERR(rebind_fence)) {
err = PTR_ERR(rebind_fence);
goto err_put_job;
}
/*
* We store the rebind_fence in the VM so subsequent execs don't get
* scheduled before the rebinds of userptrs / evicted BOs is complete.
*/
if (rebind_fence) {
dma_fence_put(vm->rebind_fence);
vm->rebind_fence = rebind_fence;
}
if (vm->rebind_fence) {
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&vm->rebind_fence->flags)) {
dma_fence_put(vm->rebind_fence);
vm->rebind_fence = NULL;
} else {
dma_fence_get(vm->rebind_fence);
err = drm_sched_job_add_dependency(&job->drm,
vm->rebind_fence);
if (err)
goto err_put_job;
}
}
/* Wait behind munmap style rebinds */
if (!xe_vm_in_lr_mode(vm)) { if (!xe_vm_in_lr_mode(vm)) {
err = drm_sched_job_add_resv_dependencies(&job->drm, err = drm_sched_job_add_resv_dependencies(&job->drm,
xe_vm_resv(vm), xe_vm_resv(vm),
......
...@@ -148,6 +148,11 @@ struct xe_exec_queue { ...@@ -148,6 +148,11 @@ struct xe_exec_queue {
const struct xe_ring_ops *ring_ops; const struct xe_ring_ops *ring_ops;
/** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */ /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
struct drm_sched_entity *entity; struct drm_sched_entity *entity;
/**
* @tlb_flush_seqno: The seqno of the last rebind tlb flush performed
* Protected by @vm's resv. Unused if @vm == NULL.
*/
u64 tlb_flush_seqno;
/** @lrc: logical ring context for this exec queue */ /** @lrc: logical ring context for this exec queue */
struct xe_lrc lrc[]; struct xe_lrc lrc[];
}; };
......
...@@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, ...@@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
{ {
struct xe_bo *bo = xe_vma_bo(vma); struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma); struct xe_vm *vm = xe_vma_vm(vma);
unsigned int num_shared = 2; /* slots for bind + move */
int err; int err;
err = xe_vm_prepare_vma(exec, vma, num_shared); err = xe_vm_lock_vma(exec, vma);
if (err) if (err)
return err; return err;
......
...@@ -61,7 +61,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) ...@@ -61,7 +61,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences); INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
spin_lock_init(&gt->tlb_invalidation.pending_lock); spin_lock_init(&gt->tlb_invalidation.pending_lock);
spin_lock_init(&gt->tlb_invalidation.lock); spin_lock_init(&gt->tlb_invalidation.lock);
gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr, INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
xe_gt_tlb_fence_timeout); xe_gt_tlb_fence_timeout);
......
...@@ -177,13 +177,6 @@ struct xe_gt { ...@@ -177,13 +177,6 @@ struct xe_gt {
* xe_gt_tlb_fence_timeout after the timeut interval is over. * xe_gt_tlb_fence_timeout after the timeut interval is over.
*/ */
struct delayed_work fence_tdr; struct delayed_work fence_tdr;
/** @tlb_invalidation.fence_context: context for TLB invalidation fences */
u64 fence_context;
/**
* @tlb_invalidation.fence_seqno: seqno to TLB invalidation fences, protected by
* tlb_invalidation.lock
*/
u32 fence_seqno;
/** @tlb_invalidation.lock: protects TLB invalidation fences */ /** @tlb_invalidation.lock: protects TLB invalidation fences */
spinlock_t lock; spinlock_t lock;
} tlb_invalidation; } tlb_invalidation;
......
...@@ -49,7 +49,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence) ...@@ -49,7 +49,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence)
struct xe_exec_queue *q = pfence->q; struct xe_exec_queue *q = pfence->q;
pfence->error = q->ops->suspend(q); pfence->error = q->ops->suspend(q);
queue_work(system_unbound_wq, &pfence->preempt_work); queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work);
return true; return true;
} }
......
...@@ -1135,8 +1135,7 @@ static int invalidation_fence_init(struct xe_gt *gt, ...@@ -1135,8 +1135,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
spin_lock_irq(&gt->tlb_invalidation.lock); spin_lock_irq(&gt->tlb_invalidation.lock);
dma_fence_init(&ifence->base.base, &invalidation_fence_ops, dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
&gt->tlb_invalidation.lock, &gt->tlb_invalidation.lock,
gt->tlb_invalidation.fence_context, dma_fence_context_alloc(1), 1);
++gt->tlb_invalidation.fence_seqno);
spin_unlock_irq(&gt->tlb_invalidation.lock); spin_unlock_irq(&gt->tlb_invalidation.lock);
INIT_LIST_HEAD(&ifence->base.link); INIT_LIST_HEAD(&ifence->base.link);
...@@ -1236,6 +1235,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue ...@@ -1236,6 +1235,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); err = xe_pt_prepare_bind(tile, vma, entries, &num_entries);
if (err) if (err)
goto err; goto err;
err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
if (err)
goto err;
xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
...@@ -1254,11 +1260,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue ...@@ -1254,11 +1260,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
* non-faulting LR, in particular on user-space batch buffer chaining, * non-faulting LR, in particular on user-space batch buffer chaining,
* it needs to be done here. * it needs to be done here.
*/ */
if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) || if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
(!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
if (!ifence) if (!ifence)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} else if (rebind && !xe_vm_in_lr_mode(vm)) {
/* We bump also if batch_invalidate_tlb is true */
vm->tlb_flush_seqno++;
} }
rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
...@@ -1297,7 +1305,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue ...@@ -1297,7 +1305,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
} }
/* add shared fence now for pagetable delayed destroy */ /* add shared fence now for pagetable delayed destroy */
dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind && dma_resv_add_fence(xe_vm_resv(vm), fence, rebind ||
last_munmap_rebind ? last_munmap_rebind ?
DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_KERNEL :
DMA_RESV_USAGE_BOOKKEEP); DMA_RESV_USAGE_BOOKKEEP);
...@@ -1576,6 +1584,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu ...@@ -1576,6 +1584,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
struct dma_fence *fence = NULL; struct dma_fence *fence = NULL;
struct invalidation_fence *ifence; struct invalidation_fence *ifence;
struct xe_range_fence *rfence; struct xe_range_fence *rfence;
int err;
LLIST_HEAD(deferred); LLIST_HEAD(deferred);
...@@ -1593,6 +1602,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu ...@@ -1593,6 +1602,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
num_entries); num_entries);
err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
if (err)
return ERR_PTR(err);
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
if (!ifence) if (!ifence)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
......
...@@ -219,10 +219,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc ...@@ -219,10 +219,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
{ {
u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 dw[MAX_JOB_SIZE_DW], i = 0;
u32 ppgtt_flag = get_ppgtt_flag(job); u32 ppgtt_flag = get_ppgtt_flag(job);
struct xe_vm *vm = job->q->vm;
struct xe_gt *gt = job->q->gt; struct xe_gt *gt = job->q->gt;
if (vm && vm->batch_invalidate_tlb) { if (job->ring_ops_flush_tlb) {
dw[i++] = preparser_disable(true); dw[i++] = preparser_disable(true);
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, true, dw, i); seqno, true, dw, i);
...@@ -270,7 +269,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, ...@@ -270,7 +269,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
struct xe_gt *gt = job->q->gt; struct xe_gt *gt = job->q->gt;
struct xe_device *xe = gt_to_xe(gt); struct xe_device *xe = gt_to_xe(gt);
bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
struct xe_vm *vm = job->q->vm;
dw[i++] = preparser_disable(true); dw[i++] = preparser_disable(true);
...@@ -282,13 +280,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, ...@@ -282,13 +280,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i); i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
} }
if (vm && vm->batch_invalidate_tlb) if (job->ring_ops_flush_tlb)
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, true, dw, i); seqno, true, dw, i);
dw[i++] = preparser_disable(false); dw[i++] = preparser_disable(false);
if (!vm || !vm->batch_invalidate_tlb) if (!job->ring_ops_flush_tlb)
i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, dw, i); seqno, dw, i);
...@@ -317,7 +315,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, ...@@ -317,7 +315,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
struct xe_gt *gt = job->q->gt; struct xe_gt *gt = job->q->gt;
struct xe_device *xe = gt_to_xe(gt); struct xe_device *xe = gt_to_xe(gt);
bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
struct xe_vm *vm = job->q->vm;
u32 mask_flags = 0; u32 mask_flags = 0;
dw[i++] = preparser_disable(true); dw[i++] = preparser_disable(true);
...@@ -327,7 +324,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, ...@@ -327,7 +324,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i); i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i);
/* hsdes: 1809175790 */ /* hsdes: 1809175790 */
if (has_aux_ccs(xe)) if (has_aux_ccs(xe))
......
...@@ -250,6 +250,16 @@ bool xe_sched_job_completed(struct xe_sched_job *job) ...@@ -250,6 +250,16 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
void xe_sched_job_arm(struct xe_sched_job *job) void xe_sched_job_arm(struct xe_sched_job *job)
{ {
struct xe_exec_queue *q = job->q;
struct xe_vm *vm = q->vm;
if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
(vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
xe_vm_assert_held(vm);
q->tlb_flush_seqno = vm->tlb_flush_seqno;
job->ring_ops_flush_tlb = true;
}
drm_sched_job_arm(&job->drm); drm_sched_job_arm(&job->drm);
} }
......
...@@ -39,6 +39,8 @@ struct xe_sched_job { ...@@ -39,6 +39,8 @@ struct xe_sched_job {
} user_fence; } user_fence;
/** @migrate_flush_flags: Additional flush flags for migration jobs */ /** @migrate_flush_flags: Additional flush flags for migration jobs */
u32 migrate_flush_flags; u32 migrate_flush_flags;
/** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
bool ring_ops_flush_tlb;
/** @batch_addr: batch buffer address of job */ /** @batch_addr: batch buffer address of job */
u64 batch_addr[]; u64 batch_addr[];
}; };
......
...@@ -482,17 +482,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) ...@@ -482,17 +482,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
return 0; return 0;
} }
/**
* xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
* @vm: The vm for which we are rebinding.
* @exec: The struct drm_exec with the locked GEM objects.
* @num_fences: The number of fences to reserve for the operation, not
* including rebinds and validations.
*
* Validates all evicted gem objects and rebinds their vmas. Note that
* rebindings may cause evictions and hence the validation-rebind
* sequence is rerun until there are no more objects to validate.
*
* Return: 0 on success, negative error code on error. In particular,
* may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
* the drm_exec transaction needs to be restarted.
*/
int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
unsigned int num_fences)
{
struct drm_gem_object *obj;
unsigned long index;
int ret;
do {
ret = drm_gpuvm_validate(&vm->gpuvm, exec);
if (ret)
return ret;
ret = xe_vm_rebind(vm, false);
if (ret)
return ret;
} while (!list_empty(&vm->gpuvm.evict.list));
drm_exec_for_each_locked_object(exec, index, obj) {
ret = dma_resv_reserve_fences(obj->resv, num_fences);
if (ret)
return ret;
}
return 0;
}
static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
bool *done) bool *done)
{ {
int err; int err;
/* err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
* 1 fence for each preempt fence plus a fence for each tile from a
* possible rebind
*/
err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
vm->xe->info.tile_count);
if (err) if (err)
return err; return err;
...@@ -507,7 +543,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, ...@@ -507,7 +543,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
return 0; return 0;
} }
err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues); err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
if (err) if (err)
return err; return err;
...@@ -515,14 +551,19 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, ...@@ -515,14 +551,19 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
if (err) if (err)
return err; return err;
return drm_gpuvm_validate(&vm->gpuvm, exec); /*
* Add validation and rebinding to the locking loop since both can
* cause evictions which may require blocing dma_resv locks.
* The fence reservation here is intended for the new preempt fences
* we attach at the end of the rebind work.
*/
return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
} }
static void preempt_rebind_work_func(struct work_struct *w) static void preempt_rebind_work_func(struct work_struct *w)
{ {
struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
struct drm_exec exec; struct drm_exec exec;
struct dma_fence *rebind_fence;
unsigned int fence_count = 0; unsigned int fence_count = 0;
LIST_HEAD(preempt_fences); LIST_HEAD(preempt_fences);
ktime_t end = 0; ktime_t end = 0;
...@@ -568,18 +609,11 @@ static void preempt_rebind_work_func(struct work_struct *w) ...@@ -568,18 +609,11 @@ static void preempt_rebind_work_func(struct work_struct *w)
if (err) if (err)
goto out_unlock; goto out_unlock;
rebind_fence = xe_vm_rebind(vm, true); err = xe_vm_rebind(vm, true);
if (IS_ERR(rebind_fence)) { if (err)
err = PTR_ERR(rebind_fence);
goto out_unlock; goto out_unlock;
}
if (rebind_fence) { /* Wait on rebinds and munmap style VM unbinds */
dma_fence_wait(rebind_fence, false);
dma_fence_put(rebind_fence);
}
/* Wait on munmap style VM unbinds */
wait = dma_resv_wait_timeout(xe_vm_resv(vm), wait = dma_resv_wait_timeout(xe_vm_resv(vm),
DMA_RESV_USAGE_KERNEL, DMA_RESV_USAGE_KERNEL,
false, MAX_SCHEDULE_TIMEOUT); false, MAX_SCHEDULE_TIMEOUT);
...@@ -773,14 +807,14 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, ...@@ -773,14 +807,14 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
struct xe_sync_entry *syncs, u32 num_syncs, struct xe_sync_entry *syncs, u32 num_syncs,
bool first_op, bool last_op); bool first_op, bool last_op);
struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
{ {
struct dma_fence *fence = NULL; struct dma_fence *fence;
struct xe_vma *vma, *next; struct xe_vma *vma, *next;
lockdep_assert_held(&vm->lock); lockdep_assert_held(&vm->lock);
if (xe_vm_in_lr_mode(vm) && !rebind_worker) if (xe_vm_in_lr_mode(vm) && !rebind_worker)
return NULL; return 0;
xe_vm_assert_held(vm); xe_vm_assert_held(vm);
list_for_each_entry_safe(vma, next, &vm->rebind_list, list_for_each_entry_safe(vma, next, &vm->rebind_list,
...@@ -788,17 +822,17 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) ...@@ -788,17 +822,17 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
xe_assert(vm->xe, vma->tile_present); xe_assert(vm->xe, vma->tile_present);
list_del_init(&vma->combined_links.rebind); list_del_init(&vma->combined_links.rebind);
dma_fence_put(fence);
if (rebind_worker) if (rebind_worker)
trace_xe_vma_rebind_worker(vma); trace_xe_vma_rebind_worker(vma);
else else
trace_xe_vma_rebind_exec(vma); trace_xe_vma_rebind_exec(vma);
fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
if (IS_ERR(fence)) if (IS_ERR(fence))
return fence; return PTR_ERR(fence);
dma_fence_put(fence);
} }
return fence; return 0;
} }
static void xe_vma_free(struct xe_vma *vma) static void xe_vma_free(struct xe_vma *vma)
...@@ -1004,35 +1038,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) ...@@ -1004,35 +1038,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
} }
/** /**
* xe_vm_prepare_vma() - drm_exec utility to lock a vma * xe_vm_lock_vma() - drm_exec utility to lock a vma
* @exec: The drm_exec object we're currently locking for. * @exec: The drm_exec object we're currently locking for.
* @vma: The vma for witch we want to lock the vm resv and any attached * @vma: The vma for witch we want to lock the vm resv and any attached
* object's resv. * object's resv.
* @num_shared: The number of dma-fence slots to pre-allocate in the
* objects' reservation objects.
* *
* Return: 0 on success, negative error code on error. In particular * Return: 0 on success, negative error code on error. In particular
* may return -EDEADLK on WW transaction contention and -EINTR if * may return -EDEADLK on WW transaction contention and -EINTR if
* an interruptible wait is terminated by a signal. * an interruptible wait is terminated by a signal.
*/ */
int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
unsigned int num_shared)
{ {
struct xe_vm *vm = xe_vma_vm(vma); struct xe_vm *vm = xe_vma_vm(vma);
struct xe_bo *bo = xe_vma_bo(vma); struct xe_bo *bo = xe_vma_bo(vma);
int err; int err;
XE_WARN_ON(!vm); XE_WARN_ON(!vm);
if (num_shared)
err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
else
err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
if (!err && bo && !bo->vm) { if (!err && bo && !bo->vm)
if (num_shared)
err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
else
err = drm_exec_lock_obj(exec, &bo->ttm.base); err = drm_exec_lock_obj(exec, &bo->ttm.base);
}
return err; return err;
} }
...@@ -1044,7 +1069,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma) ...@@ -1044,7 +1069,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma)
drm_exec_init(&exec, 0, 0); drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) { drm_exec_until_all_locked(&exec) {
err = xe_vm_prepare_vma(&exec, vma, 0); err = xe_vm_lock_vma(&exec, vma);
drm_exec_retry_on_contention(&exec); drm_exec_retry_on_contention(&exec);
if (XE_WARN_ON(err)) if (XE_WARN_ON(err))
break; break;
...@@ -1589,7 +1614,6 @@ static void vm_destroy_work_func(struct work_struct *w) ...@@ -1589,7 +1614,6 @@ static void vm_destroy_work_func(struct work_struct *w)
XE_WARN_ON(vm->pt_root[id]); XE_WARN_ON(vm->pt_root[id]);
trace_xe_vm_free(vm); trace_xe_vm_free(vm);
dma_fence_put(vm->rebind_fence);
kfree(vm); kfree(vm);
} }
...@@ -2512,7 +2536,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, ...@@ -2512,7 +2536,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
lockdep_assert_held_write(&vm->lock); lockdep_assert_held_write(&vm->lock);
err = xe_vm_prepare_vma(exec, vma, 1); err = xe_vm_lock_vma(exec, vma);
if (err) if (err)
return err; return err;
......
...@@ -207,7 +207,7 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm); ...@@ -207,7 +207,7 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
int xe_vm_userptr_check_repin(struct xe_vm *vm); int xe_vm_userptr_check_repin(struct xe_vm *vm);
struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
int xe_vm_invalidate_vma(struct xe_vma *vma); int xe_vm_invalidate_vma(struct xe_vma *vma);
...@@ -242,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); ...@@ -242,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
unsigned int num_shared);
int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
unsigned int num_fences);
/** /**
* xe_vm_resv() - Return's the vm's reservation object * xe_vm_resv() - Return's the vm's reservation object
......
...@@ -177,9 +177,6 @@ struct xe_vm { ...@@ -177,9 +177,6 @@ struct xe_vm {
*/ */
struct list_head rebind_list; struct list_head rebind_list;
/** @rebind_fence: rebind fence from execbuf */
struct dma_fence *rebind_fence;
/** /**
* @destroy_work: worker to destroy VM, needed as a dma_fence signaling * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
* from an irq context can be last put and the destroy needs to be able * from an irq context can be last put and the destroy needs to be able
...@@ -264,6 +261,11 @@ struct xe_vm { ...@@ -264,6 +261,11 @@ struct xe_vm {
bool capture_once; bool capture_once;
} error_capture; } error_capture;
/**
* @tlb_flush_seqno: Required TLB flush seqno for the next exec.
* protected by the vm resv.
*/
u64 tlb_flush_seqno;
/** @batch_invalidate_tlb: Always invalidate TLB before batch start */ /** @batch_invalidate_tlb: Always invalidate TLB before batch start */
bool batch_invalidate_tlb; bool batch_invalidate_tlb;
/** @xef: XE file handle for tracking this VM's drm client */ /** @xef: XE file handle for tracking this VM's drm client */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment