Commit 009f0a64 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-xe-next-fixes-2024-01-16' of...

Merge tag 'drm-xe-next-fixes-2024-01-16' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

Driver Changes:
- Fix for definition of wakeref_t
- Fix for an error code aliasing
- Fix for VM_UNBIND_ALL in the case there are no bound VMAs
- Fixes for a number of __iomem address space mismatches reported by sparse
- Fixes for the assignment of exec_queue priority
- A Fix for skip_guc_pc not taking effect
- Workaround for a build problem on GCC 11
- A couple of fixes for error paths
- Fix a Flat CCS compression metadata copy issue
- Fix a misplace array bounds checking
- Don't have display support depend on EXPERT (as discussed on IRC)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240116102204.106520-1-thomas.hellstrom@linux.intel.com
parents cacea813 bf3ff145
...@@ -47,7 +47,7 @@ config DRM_XE ...@@ -47,7 +47,7 @@ config DRM_XE
config DRM_XE_DISPLAY config DRM_XE_DISPLAY
bool "Enable display support" bool "Enable display support"
depends on DRM_XE && EXPERT && DRM_XE=m depends on DRM_XE && DRM_XE=m
select FB_IOMEM_HELPERS select FB_IOMEM_HELPERS
select I2C select I2C
select I2C_ALGOBIT select I2C_ALGOBIT
......
...@@ -17,7 +17,6 @@ subdir-ccflags-y += $(call cc-option, -Wunused-const-variable) ...@@ -17,7 +17,6 @@ subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned) subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
subdir-ccflags-y += $(call cc-option, -Wformat-overflow) subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
subdir-ccflags-y += $(call cc-option, -Wformat-truncation) subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
subdir-ccflags-y += $(call cc-option, -Wstringop-overflow)
subdir-ccflags-y += $(call cc-option, -Wstringop-truncation) subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
# The following turn off the warnings enabled by -Wextra # The following turn off the warnings enabled by -Wextra
ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
......
...@@ -5,4 +5,4 @@ ...@@ -5,4 +5,4 @@
#include <linux/types.h> #include <linux/types.h>
typedef bool intel_wakeref_t; typedef unsigned long intel_wakeref_t;
...@@ -125,14 +125,13 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, ...@@ -125,14 +125,13 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
ttm_bo_type_device, bo_flags); ttm_bo_type_device, bo_flags);
xe_bo_lock(bo, false);
if (IS_ERR(bo)) { if (IS_ERR(bo)) {
KUNIT_FAIL(test, "Failed to create bo.\n"); KUNIT_FAIL(test, "Failed to create bo.\n");
return; return;
} }
xe_bo_lock(bo, false);
kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
test); test);
......
...@@ -331,7 +331,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) ...@@ -331,7 +331,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it);
emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false,
&src_it, XE_PAGE_SIZE, pt); &src_it, XE_PAGE_SIZE, pt->ttm.resource);
run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test); run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test);
......
...@@ -125,9 +125,9 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res) ...@@ -125,9 +125,9 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
static void try_add_system(struct xe_device *xe, struct xe_bo *bo, static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c) u32 bo_flags, u32 *c)
{ {
if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
bo->placements[*c] = (struct ttm_place) { bo->placements[*c] = (struct ttm_place) {
.mem_type = XE_PL_TT, .mem_type = XE_PL_TT,
}; };
...@@ -145,6 +145,8 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, ...@@ -145,6 +145,8 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
struct xe_mem_region *vram; struct xe_mem_region *vram;
u64 io_size; u64 io_size;
xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram; vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
xe_assert(xe, vram && vram->usable_size); xe_assert(xe, vram && vram->usable_size);
io_size = vram->io_size; io_size = vram->io_size;
...@@ -175,8 +177,6 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, ...@@ -175,8 +177,6 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c) u32 bo_flags, u32 *c)
{ {
xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
if (bo->props.preferred_gt == XE_GT1) { if (bo->props.preferred_gt == XE_GT1) {
if (bo_flags & XE_BO_CREATE_VRAM1_BIT) if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
...@@ -193,9 +193,9 @@ static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, ...@@ -193,9 +193,9 @@ static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c) u32 bo_flags, u32 *c)
{ {
if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
bo->placements[*c] = (struct ttm_place) { bo->placements[*c] = (struct ttm_place) {
.mem_type = XE_PL_STOLEN, .mem_type = XE_PL_STOLEN,
.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
...@@ -442,7 +442,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, ...@@ -442,7 +442,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
if (vram->mapping && if (vram->mapping &&
mem->placement & TTM_PL_FLAG_CONTIGUOUS) mem->placement & TTM_PL_FLAG_CONTIGUOUS)
mem->bus.addr = (u8 *)vram->mapping + mem->bus.addr = (u8 __force *)vram->mapping +
mem->bus.offset; mem->bus.offset;
mem->bus.offset += vram->io_start; mem->bus.offset += vram->io_start;
...@@ -734,7 +734,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, ...@@ -734,7 +734,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
/* Create a new VMAP once kernel BO back in VRAM */ /* Create a new VMAP once kernel BO back in VRAM */
if (!ret && resource_is_vram(new_mem)) { if (!ret && resource_is_vram(new_mem)) {
struct xe_mem_region *vram = res_to_mem_region(new_mem); struct xe_mem_region *vram = res_to_mem_region(new_mem);
void *new_addr = vram->mapping + void __iomem *new_addr = vram->mapping +
(new_mem->start << PAGE_SHIFT); (new_mem->start << PAGE_SHIFT);
if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
......
...@@ -484,7 +484,7 @@ int xe_device_probe(struct xe_device *xe) ...@@ -484,7 +484,7 @@ int xe_device_probe(struct xe_device *xe)
err = xe_device_set_has_flat_ccs(xe); err = xe_device_set_has_flat_ccs(xe);
if (err) if (err)
return err; goto err_irq_shutdown;
err = xe_mmio_probe_vram(xe); err = xe_mmio_probe_vram(xe);
if (err) if (err)
......
...@@ -97,7 +97,7 @@ struct xe_mem_region { ...@@ -97,7 +97,7 @@ struct xe_mem_region {
*/ */
resource_size_t actual_physical_size; resource_size_t actual_physical_size;
/** @mapping: pointer to VRAM mappable space */ /** @mapping: pointer to VRAM mappable space */
void *__iomem mapping; void __iomem *mapping;
}; };
/** /**
...@@ -146,7 +146,7 @@ struct xe_tile { ...@@ -146,7 +146,7 @@ struct xe_tile {
size_t size; size_t size;
/** @regs: pointer to tile's MMIO space (starting with registers) */ /** @regs: pointer to tile's MMIO space (starting with registers) */
void *regs; void __iomem *regs;
} mmio; } mmio;
/** /**
...@@ -159,7 +159,7 @@ struct xe_tile { ...@@ -159,7 +159,7 @@ struct xe_tile {
size_t size; size_t size;
/** @regs: pointer to tile's additional MMIO-extension space */ /** @regs: pointer to tile's additional MMIO-extension space */
void *regs; void __iomem *regs;
} mmio_ext; } mmio_ext;
/** @mem: memory management info for tile */ /** @mem: memory management info for tile */
...@@ -301,7 +301,7 @@ struct xe_device { ...@@ -301,7 +301,7 @@ struct xe_device {
/** @size: size of MMIO space for device */ /** @size: size of MMIO space for device */
size_t size; size_t size;
/** @regs: pointer to MMIO space for device */ /** @regs: pointer to MMIO space for device */
void *regs; void __iomem *regs;
} mmio; } mmio;
/** @mem: memory info for device */ /** @mem: memory info for device */
......
...@@ -115,7 +115,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -115,7 +115,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct xe_sched_job *job; struct xe_sched_job *job;
struct dma_fence *rebind_fence; struct dma_fence *rebind_fence;
struct xe_vm *vm; struct xe_vm *vm;
bool write_locked; bool write_locked, skip_retry = false;
ktime_t end = 0; ktime_t end = 0;
int err = 0; int err = 0;
...@@ -227,7 +227,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -227,7 +227,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
} }
if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) { if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
err = -EWOULDBLOCK; err = -EWOULDBLOCK; /* Aliased to -EAGAIN */
skip_retry = true;
goto err_exec; goto err_exec;
} }
...@@ -337,7 +338,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -337,7 +338,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
up_write(&vm->lock); up_write(&vm->lock);
else else
up_read(&vm->lock); up_read(&vm->lock);
if (err == -EAGAIN) if (err == -EAGAIN && !skip_retry)
goto retry; goto retry;
err_syncs: err_syncs:
for (i = 0; i < num_syncs; i++) for (i = 0; i < num_syncs; i++)
......
...@@ -67,6 +67,11 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, ...@@ -67,6 +67,11 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
q->sched_props.preempt_timeout_us = q->sched_props.preempt_timeout_us =
hwe->eclass->sched_props.preempt_timeout_us; hwe->eclass->sched_props.preempt_timeout_us;
if (q->flags & EXEC_QUEUE_FLAG_KERNEL &&
q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY)
q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
else
q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
if (xe_exec_queue_is_parallel(q)) { if (xe_exec_queue_is_parallel(q)) {
q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
......
...@@ -52,8 +52,6 @@ struct xe_exec_queue { ...@@ -52,8 +52,6 @@ struct xe_exec_queue {
struct xe_vm *vm; struct xe_vm *vm;
/** @class: class of this exec queue */ /** @class: class of this exec queue */
enum xe_engine_class class; enum xe_engine_class class;
/** @priority: priority of this exec queue */
enum xe_exec_queue_priority priority;
/** /**
* @logical_mask: logical mask of where job submitted to exec queue can run * @logical_mask: logical mask of where job submitted to exec queue can run
*/ */
...@@ -84,6 +82,8 @@ struct xe_exec_queue { ...@@ -84,6 +82,8 @@ struct xe_exec_queue {
#define EXEC_QUEUE_FLAG_VM BIT(4) #define EXEC_QUEUE_FLAG_VM BIT(4)
/* child of VM queue for multi-tile VM jobs */ /* child of VM queue for multi-tile VM jobs */
#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5)
/* kernel exec_queue only, set priority to highest level */
#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(6)
/** /**
* @flags: flags for this exec queue, should statically setup aside from ban * @flags: flags for this exec queue, should statically setup aside from ban
...@@ -142,6 +142,8 @@ struct xe_exec_queue { ...@@ -142,6 +142,8 @@ struct xe_exec_queue {
u32 timeslice_us; u32 timeslice_us;
/** @preempt_timeout_us: preemption timeout in micro-seconds */ /** @preempt_timeout_us: preemption timeout in micro-seconds */
u32 preempt_timeout_us; u32 preempt_timeout_us;
/** @priority: priority of this exec queue */
enum xe_exec_queue_priority priority;
} sched_props; } sched_props;
/** @compute: compute exec queue state */ /** @compute: compute exec queue state */
......
...@@ -196,6 +196,9 @@ void xe_gt_freq_init(struct xe_gt *gt) ...@@ -196,6 +196,9 @@ void xe_gt_freq_init(struct xe_gt *gt)
struct xe_device *xe = gt_to_xe(gt); struct xe_device *xe = gt_to_xe(gt);
int err; int err;
if (xe->info.skip_guc_pc)
return;
gt->freq = kobject_create_and_add("freq0", gt->sysfs); gt->freq = kobject_create_and_add("freq0", gt->sysfs);
if (!gt->freq) { if (!gt->freq) {
drm_warn(&xe->drm, "failed to add freq0 directory to %s\n", drm_warn(&xe->drm, "failed to add freq0 directory to %s\n",
......
...@@ -60,7 +60,12 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc) ...@@ -60,7 +60,12 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc)
static u32 guc_ctl_feature_flags(struct xe_guc *guc) static u32 guc_ctl_feature_flags(struct xe_guc *guc)
{ {
return GUC_CTL_ENABLE_SLPC; u32 flags = 0;
if (!guc_to_xe(guc)->info.skip_guc_pc)
flags |= GUC_CTL_ENABLE_SLPC;
return flags;
} }
static u32 guc_ctl_log_params_flags(struct xe_guc *guc) static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
......
...@@ -421,7 +421,7 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) ...@@ -421,7 +421,7 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
{ {
struct exec_queue_policy policy; struct exec_queue_policy policy;
struct xe_device *xe = guc_to_xe(guc); struct xe_device *xe = guc_to_xe(guc);
enum xe_exec_queue_priority prio = q->priority; enum xe_exec_queue_priority prio = q->sched_props.priority;
u32 timeslice_us = q->sched_props.timeslice_us; u32 timeslice_us = q->sched_props.timeslice_us;
u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
...@@ -1231,7 +1231,6 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) ...@@ -1231,7 +1231,6 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
err = xe_sched_entity_init(&ge->entity, sched); err = xe_sched_entity_init(&ge->entity, sched);
if (err) if (err)
goto err_sched; goto err_sched;
q->priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
if (xe_exec_queue_is_lr(q)) if (xe_exec_queue_is_lr(q))
INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
...@@ -1301,15 +1300,15 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q, ...@@ -1301,15 +1300,15 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
{ {
struct xe_sched_msg *msg; struct xe_sched_msg *msg;
if (q->priority == priority || exec_queue_killed_or_banned(q)) if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q))
return 0; return 0;
msg = kmalloc(sizeof(*msg), GFP_KERNEL); msg = kmalloc(sizeof(*msg), GFP_KERNEL);
if (!msg) if (!msg)
return -ENOMEM; return -ENOMEM;
q->sched_props.priority = priority;
guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
q->priority = priority;
return 0; return 0;
} }
......
...@@ -62,6 +62,8 @@ struct xe_migrate { ...@@ -62,6 +62,8 @@ struct xe_migrate {
* out of the pt_bo. * out of the pt_bo.
*/ */
struct drm_suballoc_manager vm_update_sa; struct drm_suballoc_manager vm_update_sa;
/** @min_chunk_size: For dgfx, Minimum chunk size */
u64 min_chunk_size;
}; };
#define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
...@@ -344,7 +346,8 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) ...@@ -344,7 +346,8 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_KERNEL |
EXEC_QUEUE_FLAG_PERMANENT); EXEC_QUEUE_FLAG_PERMANENT |
EXEC_QUEUE_FLAG_HIGH_PRIORITY);
} else { } else {
m->q = xe_exec_queue_create_class(xe, primary_gt, vm, m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
XE_ENGINE_CLASS_COPY, XE_ENGINE_CLASS_COPY,
...@@ -355,8 +358,6 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) ...@@ -355,8 +358,6 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
xe_vm_close_and_put(vm); xe_vm_close_and_put(vm);
return ERR_CAST(m->q); return ERR_CAST(m->q);
} }
if (xe->info.has_usm)
m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
mutex_init(&m->job_mutex); mutex_init(&m->job_mutex);
...@@ -364,6 +365,19 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) ...@@ -364,6 +365,19 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
if (err) if (err)
return ERR_PTR(err); return ERR_PTR(err);
if (IS_DGFX(xe)) {
if (xe_device_has_flat_ccs(xe))
/* min chunk size corresponds to 4K of CCS Metadata */
m->min_chunk_size = SZ_4K * SZ_64K /
xe_device_ccs_bytes(xe, SZ_64K);
else
/* Somewhat arbitrary to avoid a huge amount of blits */
m->min_chunk_size = SZ_64K;
m->min_chunk_size = roundup_pow_of_two(m->min_chunk_size);
drm_dbg(&xe->drm, "Migrate min chunk size is 0x%08llx\n",
(unsigned long long)m->min_chunk_size);
}
return m; return m;
} }
...@@ -375,16 +389,35 @@ static u64 max_mem_transfer_per_pass(struct xe_device *xe) ...@@ -375,16 +389,35 @@ static u64 max_mem_transfer_per_pass(struct xe_device *xe)
return MAX_PREEMPTDISABLE_TRANSFER; return MAX_PREEMPTDISABLE_TRANSFER;
} }
static u64 xe_migrate_res_sizes(struct xe_device *xe, struct xe_res_cursor *cur) static u64 xe_migrate_res_sizes(struct xe_migrate *m, struct xe_res_cursor *cur)
{ {
struct xe_device *xe = tile_to_xe(m->tile);
u64 size = min_t(u64, max_mem_transfer_per_pass(xe), cur->remaining);
if (mem_type_is_vram(cur->mem_type)) {
/* /*
* For VRAM we use identity mapped pages so we are limited to current * VRAM we want to blit in chunks with sizes aligned to
* cursor size. For system we program the pages ourselves so we have no * min_chunk_size in order for the offset to CCS metadata to be
* such limitation. * page-aligned. If it's the last chunk it may be smaller.
*
* Another constraint is that we need to limit the blit to
* the VRAM block size, unless size is smaller than
* min_chunk_size.
*/ */
return min_t(u64, max_mem_transfer_per_pass(xe), u64 chunk = max_t(u64, cur->size, m->min_chunk_size);
mem_type_is_vram(cur->mem_type) ? cur->size :
cur->remaining); size = min_t(u64, size, chunk);
if (size > m->min_chunk_size)
size = round_down(size, m->min_chunk_size);
}
return size;
}
static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur)
{
/* If the chunk is not fragmented, allow identity map. */
return cur->size >= size;
} }
static u32 pte_update_size(struct xe_migrate *m, static u32 pte_update_size(struct xe_migrate *m,
...@@ -397,7 +430,12 @@ static u32 pte_update_size(struct xe_migrate *m, ...@@ -397,7 +430,12 @@ static u32 pte_update_size(struct xe_migrate *m,
u32 cmds = 0; u32 cmds = 0;
*L0_pt = pt_ofs; *L0_pt = pt_ofs;
if (!is_vram) { if (is_vram && xe_migrate_allow_identity(*L0, cur)) {
/* Offset into identity map. */
*L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile),
cur->start + vram_region_gpu_offset(res));
cmds += cmd_size;
} else {
/* Clip L0 to available size */ /* Clip L0 to available size */
u64 size = min(*L0, (u64)avail_pts * SZ_2M); u64 size = min(*L0, (u64)avail_pts * SZ_2M);
u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
...@@ -413,11 +451,6 @@ static u32 pte_update_size(struct xe_migrate *m, ...@@ -413,11 +451,6 @@ static u32 pte_update_size(struct xe_migrate *m,
/* Each chunk has a single blit command */ /* Each chunk has a single blit command */
cmds += cmd_size; cmds += cmd_size;
} else {
/* Offset into identity map. */
*L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile),
cur->start + vram_region_gpu_offset(res));
cmds += cmd_size;
} }
return cmds; return cmds;
...@@ -427,10 +460,10 @@ static void emit_pte(struct xe_migrate *m, ...@@ -427,10 +460,10 @@ static void emit_pte(struct xe_migrate *m,
struct xe_bb *bb, u32 at_pt, struct xe_bb *bb, u32 at_pt,
bool is_vram, bool is_comp_pte, bool is_vram, bool is_comp_pte,
struct xe_res_cursor *cur, struct xe_res_cursor *cur,
u32 size, struct xe_bo *bo) u32 size, struct ttm_resource *res)
{ {
struct xe_device *xe = tile_to_xe(m->tile); struct xe_device *xe = tile_to_xe(m->tile);
struct xe_vm *vm = m->q->vm;
u16 pat_index; u16 pat_index;
u32 ptes; u32 ptes;
u64 ofs = at_pt * XE_PAGE_SIZE; u64 ofs = at_pt * XE_PAGE_SIZE;
...@@ -443,13 +476,6 @@ static void emit_pte(struct xe_migrate *m, ...@@ -443,13 +476,6 @@ static void emit_pte(struct xe_migrate *m,
else else
pat_index = xe->pat.idx[XE_CACHE_WB]; pat_index = xe->pat.idx[XE_CACHE_WB];
/*
* FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently
* we're only emitting VRAM PTEs during sanity tests, so when
* that's moved to a Kunit test, we should condition VRAM PTEs
* on running tests.
*/
ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
while (ptes) { while (ptes) {
...@@ -469,18 +495,20 @@ static void emit_pte(struct xe_migrate *m, ...@@ -469,18 +495,20 @@ static void emit_pte(struct xe_migrate *m,
addr = xe_res_dma(cur) & PAGE_MASK; addr = xe_res_dma(cur) & PAGE_MASK;
if (is_vram) { if (is_vram) {
/* Is this a 64K PTE entry? */ if (vm->flags & XE_VM_FLAG_64K) {
if ((m->q->vm->flags & XE_VM_FLAG_64K) && u64 va = cur_ofs * XE_PAGE_SIZE / 8;
!(cur_ofs & (16 * 8 - 1))) {
xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K)); xe_assert(xe, (va & (SZ_64K - 1)) ==
(addr & (SZ_64K - 1)));
flags |= XE_PTE_PS64; flags |= XE_PTE_PS64;
} }
addr += vram_region_gpu_offset(bo->ttm.resource); addr += vram_region_gpu_offset(res);
devmem = true; devmem = true;
} }
addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, addr = vm->pt_ops->pte_encode_addr(m->tile->xe,
addr, pat_index, addr, pat_index,
0, devmem, flags); 0, devmem, flags);
bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = lower_32_bits(addr);
...@@ -694,8 +722,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, ...@@ -694,8 +722,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
bool usm = xe->info.has_usm; bool usm = xe->info.has_usm;
u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
src_L0 = xe_migrate_res_sizes(xe, &src_it); src_L0 = xe_migrate_res_sizes(m, &src_it);
dst_L0 = xe_migrate_res_sizes(xe, &dst_it); dst_L0 = xe_migrate_res_sizes(m, &dst_it);
drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
pass++, src_L0, dst_L0); pass++, src_L0, dst_L0);
...@@ -716,6 +744,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, ...@@ -716,6 +744,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
&ccs_ofs, &ccs_pt, 0, &ccs_ofs, &ccs_pt, 0,
2 * avail_pts, 2 * avail_pts,
avail_pts); avail_pts);
xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
} }
/* Add copy commands size here */ /* Add copy commands size here */
...@@ -728,20 +757,20 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, ...@@ -728,20 +757,20 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
goto err_sync; goto err_sync;
} }
if (!src_is_vram) if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it))
emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0,
src_bo);
else
xe_res_next(&src_it, src_L0); xe_res_next(&src_it, src_L0);
if (!dst_is_vram)
emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0,
dst_bo);
else else
emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0,
src);
if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
xe_res_next(&dst_it, src_L0); xe_res_next(&dst_it, src_L0);
else
emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0,
dst);
if (copy_system_ccs) if (copy_system_ccs)
emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src_bo); emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
bb->cs[bb->len++] = MI_BATCH_BUFFER_END; bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
update_idx = bb->len; update_idx = bb->len;
...@@ -950,7 +979,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, ...@@ -950,7 +979,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
bool usm = xe->info.has_usm; bool usm = xe->info.has_usm;
u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
clear_L0 = xe_migrate_res_sizes(xe, &src_it); clear_L0 = xe_migrate_res_sizes(m, &src_it);
drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
...@@ -977,12 +1006,12 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, ...@@ -977,12 +1006,12 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
size -= clear_L0; size -= clear_L0;
/* Preemption is enabled again by the ring ops. */ /* Preemption is enabled again by the ring ops. */
if (!clear_vram) { if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
bo);
} else {
xe_res_next(&src_it, clear_L0); xe_res_next(&src_it, clear_L0);
} else
emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
dst);
bb->cs[bb->len++] = MI_BATCH_BUFFER_END; bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
update_idx = bb->len; update_idx = bb->len;
......
...@@ -303,7 +303,7 @@ void xe_mmio_probe_tiles(struct xe_device *xe) ...@@ -303,7 +303,7 @@ void xe_mmio_probe_tiles(struct xe_device *xe)
u8 id, tile_count = xe->info.tile_count; u8 id, tile_count = xe->info.tile_count;
struct xe_gt *gt = xe_root_mmio_gt(xe); struct xe_gt *gt = xe_root_mmio_gt(xe);
struct xe_tile *tile; struct xe_tile *tile;
void *regs; void __iomem *regs;
u32 mtcfg; u32 mtcfg;
if (tile_count == 1) if (tile_count == 1)
......
...@@ -31,7 +31,7 @@ struct xe_ttm_stolen_mgr { ...@@ -31,7 +31,7 @@ struct xe_ttm_stolen_mgr {
/* GPU base offset */ /* GPU base offset */
resource_size_t stolen_base; resource_size_t stolen_base;
void *__iomem mapping; void __iomem *mapping;
}; };
static inline struct xe_ttm_stolen_mgr * static inline struct xe_ttm_stolen_mgr *
...@@ -275,7 +275,7 @@ static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, ...@@ -275,7 +275,7 @@ static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset; mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset;
mem->bus.offset += mgr->io_base; mem->bus.offset += mgr->io_base;
mem->bus.is_iomem = true; mem->bus.is_iomem = true;
......
...@@ -335,13 +335,13 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) ...@@ -335,13 +335,13 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
down_write(&vm->lock); down_write(&vm->lock);
err = drm_gpuvm_exec_lock(&vm_exec); err = drm_gpuvm_exec_lock(&vm_exec);
if (err) if (err)
return err; goto out_up_write;
pfence = xe_preempt_fence_create(q, q->compute.context, pfence = xe_preempt_fence_create(q, q->compute.context,
++q->compute.seqno); ++q->compute.seqno);
if (!pfence) { if (!pfence) {
err = -ENOMEM; err = -ENOMEM;
goto out_unlock; goto out_fini;
} }
list_add(&q->compute.link, &vm->preempt.exec_queues); list_add(&q->compute.link, &vm->preempt.exec_queues);
...@@ -364,8 +364,9 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) ...@@ -364,8 +364,9 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
up_read(&vm->userptr.notifier_lock); up_read(&vm->userptr.notifier_lock);
out_unlock: out_fini:
drm_exec_fini(exec); drm_exec_fini(exec);
out_up_write:
up_write(&vm->lock); up_write(&vm->lock);
return err; return err;
...@@ -2063,9 +2064,11 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, ...@@ -2063,9 +2064,11 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
if (err) if (err)
return ERR_PTR(err); return ERR_PTR(err);
vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj); vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
if (!vm_bo) if (IS_ERR(vm_bo)) {
break; xe_bo_unlock(bo);
return ERR_CAST(vm_bo);
}
ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
drm_gpuvm_bo_put(vm_bo); drm_gpuvm_bo_put(vm_bo);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment