Commit 5b272bf7 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-xe-fixes-2024-10-03' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

Driver Changes:
- Restore pci state on resume (Rodrigo Vivi)
- Fix locking on submission, queue and vm (Matthew Auld, Matthew Brost)
- Fix UAF on queue destruction (Matthew Auld)
- Fix resource release on freq init error path (He Lugang)
- Use rw_semaphore to reduce contention on ASID->VM lookup (Matthew Brost)
- Fix steering for media on Xe2_HPM (Gustavo Sousa)
- Tuning updates to Xe2 (Gustavo Sousa)
- Resume TDR after GT reset to prevent jobs running forever (Matthew Brost)
- Move id allocation to avoid userspace using a guessed number
  to trigger UAF (Matthew Auld, Matthew Brost)
- Fix OA stream close preventing pbatch buffers to complete (José)
- Fix NPD when migrating memory on LNL (Zhanjun Dong)
- Fix memory leak when aborting binds (Matthew Brost)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/2fiv63yanlal5mpw3mxtotte6yvkvtex74c7mkjxca4bazlyja@o4iejcfragxy
parents 240ebf1c a6f3b252
......@@ -169,6 +169,8 @@
#define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED)
#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14)
#define XE2LPM_CCCHKNREG1 XE_REG(0x82a8)
#define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED)
#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
......@@ -378,6 +380,9 @@
#define L3SQCREG3 XE_REG_MCR(0xb108)
#define COMPPWOVERFETCHEN REG_BIT(28)
#define SCRATCH3_LBCF XE_REG_MCR(0xb154)
#define RWFLUSHALLEN REG_BIT(17)
#define XEHP_L3SQCREG5 XE_REG_MCR(0xb158)
#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0)
......@@ -391,6 +396,12 @@
#define SCRATCH1LPFC XE_REG(0xb474)
#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
#define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604)
#define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608)
#define XE2LPM_SCRATCH3_LBCF XE_REG_MCR(0xb654)
#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
#define XE2_TDF_CTRL XE_REG(0xb418)
......
......@@ -680,8 +680,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
(ttm->page_flags & TTM_TT_FLAG_SWAPPED));
move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) :
(!mem_type_is_vram(old_mem_type) && !tt_has_data);
move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
(!mem_type_is_vram(old_mem_type) && !tt_has_data));
needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
(!ttm && ttm_bo->type == ttm_bo_type_device);
......
......@@ -171,10 +171,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
xe_exec_queue_kill(q);
xe_exec_queue_put(q);
}
mutex_lock(&xef->vm.lock);
xa_for_each(&xef->vm.xa, idx, vm)
xe_vm_close_and_put(vm);
mutex_unlock(&xef->vm.lock);
xe_file_put(xef);
......@@ -298,6 +296,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
if (xe->unordered_wq)
destroy_workqueue(xe->unordered_wq);
if (xe->destroy_wq)
destroy_workqueue(xe->destroy_wq);
ttm_device_fini(&xe->ttm);
}
......@@ -336,9 +337,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
init_waitqueue_head(&xe->ufence_wq);
err = drmm_mutex_init(&xe->drm, &xe->usm.lock);
if (err)
goto err;
init_rwsem(&xe->usm.lock);
xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
......@@ -363,8 +362,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
if (!xe->ordered_wq || !xe->unordered_wq ||
!xe->preempt_fence_wq) {
!xe->preempt_fence_wq || !xe->destroy_wq) {
/*
* Cleanup done in xe_device_destroy via
* drmm_add_action_or_reset register above
......
......@@ -369,7 +369,7 @@ struct xe_device {
/** @usm.next_asid: next ASID, used to cyclical alloc asids */
u32 next_asid;
/** @usm.lock: protects UM state */
struct mutex lock;
struct rw_semaphore lock;
} usm;
/** @pinned: pinned BO state */
......@@ -396,6 +396,9 @@ struct xe_device {
/** @unordered_wq: used to serialize unordered work, mostly display */
struct workqueue_struct *unordered_wq;
/** @destroy_wq: used to serialize user destroy work, like queue */
struct workqueue_struct *destroy_wq;
/** @tiles: device tiles */
struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
......@@ -567,15 +570,23 @@ struct xe_file {
struct {
/** @vm.xe: xarray to store VMs */
struct xarray xa;
/** @vm.lock: protects file VM state */
/**
* @vm.lock: Protects VM lookup + reference and removal a from
* file xarray. Not an intended to be an outer lock which does
* thing while being held.
*/
struct mutex lock;
} vm;
/** @exec_queue: Submission exec queue state for file */
struct {
/** @exec_queue.xe: xarray to store engines */
/** @exec_queue.xa: xarray to store exece queues */
struct xarray xa;
/** @exec_queue.lock: protects file engine state */
/**
* @exec_queue.lock: Protects exec queue lookup + reference and
* removal a frommfile xarray. Not an intended to be an outer
* lock which does thing while being held.
*/
struct mutex lock;
} exec_queue;
......
......@@ -283,8 +283,15 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
/* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock);
xa_for_each(&xef->exec_queue.xa, i, q)
xa_for_each(&xef->exec_queue.xa, i, q) {
xe_exec_queue_get(q);
mutex_unlock(&xef->exec_queue.lock);
xe_exec_queue_update_run_ticks(q);
mutex_lock(&xef->exec_queue.lock);
xe_exec_queue_put(q);
}
mutex_unlock(&xef->exec_queue.lock);
/* Get the total GPU cycles */
......
......@@ -635,14 +635,14 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
}
}
mutex_lock(&xef->exec_queue.lock);
q->xef = xe_file_get(xef);
/* user id alloc must always be last in ioctl to prevent UAF */
err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
mutex_unlock(&xef->exec_queue.lock);
if (err)
goto kill_exec_queue;
args->exec_queue_id = id;
q->xef = xe_file_get(xef);
return 0;
......
......@@ -90,6 +90,11 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched)
cancel_work_sync(&sched->work_process_msg);
}
void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched)
{
drm_sched_resume_timeout(&sched->base, sched->base.timeout);
}
void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
struct xe_sched_msg *msg)
{
......
......@@ -22,6 +22,8 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched);
void xe_sched_submission_start(struct xe_gpu_scheduler *sched);
void xe_sched_submission_stop(struct xe_gpu_scheduler *sched);
void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched);
void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
struct xe_sched_msg *msg);
void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched,
......
......@@ -237,11 +237,11 @@ int xe_gt_freq_init(struct xe_gt *gt)
if (!gt->freq)
return -ENOMEM;
err = devm_add_action(xe->drm.dev, freq_fini, gt->freq);
err = sysfs_create_files(gt->freq, freq_attrs);
if (err)
return err;
err = sysfs_create_files(gt->freq, freq_attrs);
err = devm_add_action_or_reset(xe->drm.dev, freq_fini, gt->freq);
if (err)
return err;
......
......@@ -439,7 +439,7 @@ void xe_gt_mcr_init(struct xe_gt *gt)
if (gt->info.type == XE_GT_TYPE_MEDIA) {
drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
if (MEDIA_VER(xe) >= 20) {
if (MEDIA_VERx100(xe) >= 1301) {
gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table;
gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table;
} else {
......
......@@ -185,6 +185,21 @@ static int handle_vma_pagefault(struct xe_tile *tile, struct pagefault *pf,
return err;
}
static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid)
{
struct xe_vm *vm;
down_read(&xe->usm.lock);
vm = xa_load(&xe->usm.asid_to_vm, asid);
if (vm && xe_vm_in_fault_mode(vm))
xe_vm_get(vm);
else
vm = ERR_PTR(-EINVAL);
up_read(&xe->usm.lock);
return vm;
}
static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
{
struct xe_device *xe = gt_to_xe(gt);
......@@ -197,16 +212,9 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
if (pf->trva_fault)
return -EFAULT;
/* ASID to VM */
mutex_lock(&xe->usm.lock);
vm = xa_load(&xe->usm.asid_to_vm, pf->asid);
if (vm && xe_vm_in_fault_mode(vm))
xe_vm_get(vm);
else
vm = NULL;
mutex_unlock(&xe->usm.lock);
if (!vm)
return -EINVAL;
vm = asid_to_vm(xe, pf->asid);
if (IS_ERR(vm))
return PTR_ERR(vm);
/*
* TODO: Change to read lock? Using write lock for simplicity.
......@@ -548,14 +556,9 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
if (acc->access_type != ACC_TRIGGER)
return -EINVAL;
/* ASID to VM */
mutex_lock(&xe->usm.lock);
vm = xa_load(&xe->usm.asid_to_vm, acc->asid);
if (vm)
xe_vm_get(vm);
mutex_unlock(&xe->usm.lock);
if (!vm || !xe_vm_in_fault_mode(vm))
return -EINVAL;
vm = asid_to_vm(xe, acc->asid);
if (IS_ERR(vm))
return PTR_ERR(vm);
down_read(&vm->lock);
......
......@@ -51,5 +51,5 @@ int xe_gt_sysfs_init(struct xe_gt *gt)
gt->sysfs = &kg->base;
return devm_add_action(xe->drm.dev, gt_sysfs_fini, gt);
return devm_add_action_or_reset(xe->drm.dev, gt_sysfs_fini, gt);
}
......@@ -276,10 +276,26 @@ static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
}
#endif
static void xe_guc_submit_fini(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_gt *gt = guc_to_gt(guc);
int ret;
ret = wait_event_timeout(guc->submission_state.fini_wq,
xa_empty(&guc->submission_state.exec_queue_lookup),
HZ * 5);
drain_workqueue(xe->destroy_wq);
xe_gt_assert(gt, ret);
}
static void guc_submit_fini(struct drm_device *drm, void *arg)
{
struct xe_guc *guc = arg;
xe_guc_submit_fini(guc);
xa_destroy(&guc->submission_state.exec_queue_lookup);
free_submit_wq(guc);
}
......@@ -290,9 +306,15 @@ static void guc_submit_wedged_fini(void *arg)
struct xe_exec_queue *q;
unsigned long index;
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
if (exec_queue_wedged(q))
mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
if (exec_queue_wedged(q)) {
mutex_unlock(&guc->submission_state.lock);
xe_exec_queue_put(q);
mutex_lock(&guc->submission_state.lock);
}
}
mutex_unlock(&guc->submission_state.lock);
}
static const struct xe_exec_queue_ops guc_exec_queue_ops;
......@@ -345,6 +367,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
xa_init(&guc->submission_state.exec_queue_lookup);
init_waitqueue_head(&guc->submission_state.fini_wq);
primelockdep(guc);
return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
......@@ -361,6 +385,9 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
q->guc->id, q->width);
if (xa_empty(&guc->submission_state.exec_queue_lookup))
wake_up(&guc->submission_state.fini_wq);
}
static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
......@@ -1268,13 +1295,16 @@ static void __guc_exec_queue_fini_async(struct work_struct *w)
static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
/* We must block on kernel engines so slabs are empty on driver unload */
if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
__guc_exec_queue_fini_async(&q->guc->fini_async);
else
queue_work(system_wq, &q->guc->fini_async);
queue_work(xe->destroy_wq, &q->guc->fini_async);
}
static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
......@@ -1796,6 +1826,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q)
}
xe_sched_submission_start(sched);
xe_sched_submission_resume_tdr(sched);
}
int xe_guc_submit_start(struct xe_guc *guc)
......
......@@ -81,6 +81,8 @@ struct xe_guc {
#endif
/** @submission_state.enabled: submission is enabled */
bool enabled;
/** @submission_state.fini_wq: submit fini wait queue */
wait_queue_head_t fini_wq;
} submission_state;
/** @hwconfig: Hardware config state */
struct {
......
......@@ -709,8 +709,7 @@ static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable)
{
RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
regs_offset + CTX_CONTEXT_CONTROL,
_MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0)
_MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE),
},
};
struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol };
......@@ -742,10 +741,8 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
{
RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
regs_offset + CTX_CONTEXT_CONTROL,
_MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) |
_MASKED_FIELD(CTX_CTRL_RUN_ALONE,
enable ? CTX_CTRL_RUN_ALONE : 0),
_MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) |
_MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0),
},
};
struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol };
......
......@@ -924,6 +924,8 @@ static int xe_pci_resume(struct device *dev)
if (err)
return err;
pci_restore_state(pdev);
err = pci_enable_device(pdev);
if (err)
return err;
......
......@@ -2188,5 +2188,5 @@ void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops)
pt_op->num_entries);
}
xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred);
xe_pt_update_ops_fini(tile, vops);
}
......@@ -42,20 +42,48 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX),
SET(CCCHKNREG1, L3CMPCTRL))
},
{ XE_RTP_NAME("Tuning: Compression Overfetch - media"),
XE_RTP_RULES(MEDIA_VERSION(2000)),
XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX),
SET(XE2LPM_CCCHKNREG1, L3CMPCTRL))
},
{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
},
{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"),
XE_RTP_RULES(MEDIA_VERSION(2000)),
XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN))
},
{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(SET(L3SQCREG2,
COMPMEMRD256BOVRFETCHEN))
},
{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
XE_RTP_RULES(MEDIA_VERSION(2000)),
XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
COMPMEMRD256BOVRFETCHEN))
},
{ XE_RTP_NAME("Tuning: Stateless compression control"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
},
{ XE_RTP_NAME("Tuning: Stateless compression control - media"),
XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 2000)),
XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
},
{ XE_RTP_NAME("Tuning: L3 RW flush all Cache"),
XE_RTP_RULES(GRAPHICS_VERSION(2004)),
XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN))
},
{ XE_RTP_NAME("Tuning: L3 RW flush all cache - media"),
XE_RTP_RULES(MEDIA_VERSION(2000)),
XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN))
},
{}
};
......
......@@ -1613,7 +1613,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
up_write(&vm->lock);
mutex_lock(&xe->usm.lock);
down_write(&xe->usm.lock);
if (vm->usm.asid) {
void *lookup;
......@@ -1623,7 +1623,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
xe_assert(xe, lookup == vm);
}
mutex_unlock(&xe->usm.lock);
up_write(&xe->usm.lock);
for_each_tile(tile, xe, id)
xe_range_fence_tree_fini(&vm->rftree[id]);
......@@ -1765,25 +1765,18 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
if (IS_ERR(vm))
return PTR_ERR(vm);
mutex_lock(&xef->vm.lock);
err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
mutex_unlock(&xef->vm.lock);
if (err)
goto err_close_and_put;
if (xe->info.has_asid) {
mutex_lock(&xe->usm.lock);
down_write(&xe->usm.lock);
err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
XA_LIMIT(1, XE_MAX_ASID - 1),
&xe->usm.next_asid, GFP_KERNEL);
mutex_unlock(&xe->usm.lock);
up_write(&xe->usm.lock);
if (err < 0)
goto err_free_id;
goto err_close_and_put;
vm->usm.asid = asid;
}
args->vm_id = id;
vm->xef = xe_file_get(xef);
/* Record BO memory for VM pagetable created against client */
......@@ -1796,12 +1789,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
#endif
/* user id alloc must always be last in ioctl to prevent UAF */
err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
if (err)
goto err_close_and_put;
args->vm_id = id;
return 0;
err_free_id:
mutex_lock(&xef->vm.lock);
xa_erase(&xef->vm.xa, id);
mutex_unlock(&xef->vm.lock);
err_close_and_put:
xe_vm_close_and_put(vm);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment