Commit 0aea5e4a authored by Alex Deucher's avatar Alex Deucher

drm/radeon: use an intervall tree to manage the VMA v2

Scales much better than scanning the address range linearly.

v2: store pfn instead of address
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Tested-by: default avatarMichel Dänzer <michel.daenzer@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent c265f24d
...@@ -114,6 +114,7 @@ config DRM_RADEON ...@@ -114,6 +114,7 @@ config DRM_RADEON
select POWER_SUPPLY select POWER_SUPPLY
select HWMON select HWMON
select BACKLIGHT_CLASS_DEVICE select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
help help
Choose this option if you have an ATI Radeon graphics card. There Choose this option if you have an ATI Radeon graphics card. There
are both PCI and AGP versions. You don't need to choose this to are both PCI and AGP versions. You don't need to choose this to
......
...@@ -64,6 +64,7 @@ ...@@ -64,6 +64,7 @@
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/kref.h> #include <linux/kref.h>
#include <linux/interval_tree.h>
#include <ttm/ttm_bo_api.h> #include <ttm/ttm_bo_api.h>
#include <ttm/ttm_bo_driver.h> #include <ttm/ttm_bo_driver.h>
...@@ -447,14 +448,12 @@ struct radeon_mman { ...@@ -447,14 +448,12 @@ struct radeon_mman {
struct radeon_bo_va { struct radeon_bo_va {
/* protected by bo being reserved */ /* protected by bo being reserved */
struct list_head bo_list; struct list_head bo_list;
uint64_t soffset;
uint64_t eoffset;
uint32_t flags; uint32_t flags;
uint64_t addr; uint64_t addr;
unsigned ref_count; unsigned ref_count;
/* protected by vm mutex */ /* protected by vm mutex */
struct list_head vm_list; struct interval_tree_node it;
struct list_head vm_status; struct list_head vm_status;
/* constant after initialization */ /* constant after initialization */
...@@ -877,7 +876,7 @@ struct radeon_vm_pt { ...@@ -877,7 +876,7 @@ struct radeon_vm_pt {
}; };
struct radeon_vm { struct radeon_vm {
struct list_head va; struct rb_root va;
unsigned id; unsigned id;
/* BOs moved, but not yet updated in the PT */ /* BOs moved, but not yet updated in the PT */
......
...@@ -496,9 +496,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -496,9 +496,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
switch (args->operation) { switch (args->operation) {
case RADEON_VA_MAP: case RADEON_VA_MAP:
if (bo_va->soffset) { if (bo_va->it.start) {
args->operation = RADEON_VA_RESULT_VA_EXIST; args->operation = RADEON_VA_RESULT_VA_EXIST;
args->offset = bo_va->soffset; args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE;
goto out; goto out;
} }
r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags); r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags);
......
...@@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update, ...@@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update,
), ),
TP_fast_assign( TP_fast_assign(
__entry->soffset = bo_va->soffset; __entry->soffset = bo_va->it.start;
__entry->eoffset = bo_va->eoffset; __entry->eoffset = bo_va->it.last + 1;
__entry->flags = bo_va->flags; __entry->flags = bo_va->flags;
), ),
TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
......
...@@ -326,17 +326,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, ...@@ -326,17 +326,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
} }
bo_va->vm = vm; bo_va->vm = vm;
bo_va->bo = bo; bo_va->bo = bo;
bo_va->soffset = 0; bo_va->it.start = 0;
bo_va->eoffset = 0; bo_va->it.last = 0;
bo_va->flags = 0; bo_va->flags = 0;
bo_va->addr = 0; bo_va->addr = 0;
bo_va->ref_count = 1; bo_va->ref_count = 1;
INIT_LIST_HEAD(&bo_va->bo_list); INIT_LIST_HEAD(&bo_va->bo_list);
INIT_LIST_HEAD(&bo_va->vm_list);
INIT_LIST_HEAD(&bo_va->vm_status); INIT_LIST_HEAD(&bo_va->vm_status);
mutex_lock(&vm->mutex); mutex_lock(&vm->mutex);
list_add(&bo_va->vm_list, &vm->va);
list_add_tail(&bo_va->bo_list, &bo->va); list_add_tail(&bo_va->bo_list, &bo->va);
mutex_unlock(&vm->mutex); mutex_unlock(&vm->mutex);
...@@ -420,11 +418,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, ...@@ -420,11 +418,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
uint32_t flags) uint32_t flags)
{ {
uint64_t size = radeon_bo_size(bo_va->bo); uint64_t size = radeon_bo_size(bo_va->bo);
uint64_t eoffset, last_offset = 0;
struct radeon_vm *vm = bo_va->vm; struct radeon_vm *vm = bo_va->vm;
struct radeon_bo_va *tmp;
struct list_head *head;
unsigned last_pfn, pt_idx; unsigned last_pfn, pt_idx;
uint64_t eoffset;
int r; int r;
if (soffset) { if (soffset) {
...@@ -446,51 +442,48 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, ...@@ -446,51 +442,48 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
} }
mutex_lock(&vm->mutex); mutex_lock(&vm->mutex);
head = &vm->va; if (bo_va->it.start || bo_va->it.last) {
last_offset = 0; if (bo_va->addr) {
list_for_each_entry(tmp, &vm->va, vm_list) { /* add a clone of the bo_va to clear the old address */
if (bo_va == tmp) { struct radeon_bo_va *tmp;
/* skip over currently modified bo */ tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
continue; tmp->it.start = bo_va->it.start;
tmp->it.last = bo_va->it.last;
tmp->vm = vm;
tmp->addr = bo_va->addr;
list_add(&tmp->vm_status, &vm->freed);
} }
if (soffset >= last_offset && eoffset <= tmp->soffset) { interval_tree_remove(&bo_va->it, &vm->va);
/* bo can be added before this one */ bo_va->it.start = 0;
break; bo_va->it.last = 0;
} }
if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
soffset /= RADEON_GPU_PAGE_SIZE;
eoffset /= RADEON_GPU_PAGE_SIZE;
if (soffset || eoffset) {
struct interval_tree_node *it;
it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1);
if (it) {
struct radeon_bo_va *tmp;
tmp = container_of(it, struct radeon_bo_va, it);
/* bo and tmp overlap, invalid offset */ /* bo and tmp overlap, invalid offset */
dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
(unsigned)tmp->soffset, (unsigned)tmp->eoffset); soffset, tmp->bo, tmp->it.start, tmp->it.last);
mutex_unlock(&vm->mutex); mutex_unlock(&vm->mutex);
return -EINVAL; return -EINVAL;
} }
last_offset = tmp->eoffset; bo_va->it.start = soffset;
head = &tmp->vm_list; bo_va->it.last = eoffset - 1;
interval_tree_insert(&bo_va->it, &vm->va);
} }
if (bo_va->soffset) {
/* add a clone of the bo_va to clear the old address */
tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
if (!tmp) {
mutex_unlock(&vm->mutex);
return -ENOMEM;
}
tmp->soffset = bo_va->soffset;
tmp->eoffset = bo_va->eoffset;
tmp->vm = vm;
list_add(&tmp->vm_status, &vm->freed);
}
bo_va->soffset = soffset;
bo_va->eoffset = eoffset;
bo_va->flags = flags; bo_va->flags = flags;
bo_va->addr = 0; bo_va->addr = 0;
list_move(&bo_va->vm_list, head);
soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; soffset >>= radeon_vm_block_size;
eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; eoffset >>= radeon_vm_block_size;
BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
...@@ -778,9 +771,6 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, ...@@ -778,9 +771,6 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
unsigned count = 0; unsigned count = 0;
uint64_t addr; uint64_t addr;
start = start / RADEON_GPU_PAGE_SIZE;
end = end / RADEON_GPU_PAGE_SIZE;
/* walk over the address space and update the page tables */ /* walk over the address space and update the page tables */
for (addr = start; addr < end; ) { for (addr = start; addr < end; ) {
uint64_t pt_idx = addr >> radeon_vm_block_size; uint64_t pt_idx = addr >> radeon_vm_block_size;
...@@ -847,7 +837,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, ...@@ -847,7 +837,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
uint64_t addr; uint64_t addr;
int r; int r;
if (!bo_va->soffset) { if (!bo_va->it.start) {
dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
bo_va->bo, vm); bo_va->bo, vm);
return -EINVAL; return -EINVAL;
...@@ -881,7 +871,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, ...@@ -881,7 +871,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
trace_radeon_vm_bo_update(bo_va); trace_radeon_vm_bo_update(bo_va);
nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE; nptes = bo_va->it.last - bo_va->it.start + 1;
/* padding, etc. */ /* padding, etc. */
ndw = 64; ndw = 64;
...@@ -906,8 +896,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev, ...@@ -906,8 +896,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
return r; return r;
ib.length_dw = 0; ib.length_dw = 0;
radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
addr, radeon_vm_page_flags(bo_va->flags)); bo_va->it.last + 1, addr,
radeon_vm_page_flags(bo_va->flags));
radeon_semaphore_sync_to(ib.semaphore, vm->fence); radeon_semaphore_sync_to(ib.semaphore, vm->fence);
r = radeon_ib_schedule(rdev, &ib, NULL); r = radeon_ib_schedule(rdev, &ib, NULL);
...@@ -993,7 +984,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev, ...@@ -993,7 +984,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
list_del(&bo_va->bo_list); list_del(&bo_va->bo_list);
mutex_lock(&vm->mutex); mutex_lock(&vm->mutex);
list_del(&bo_va->vm_list); interval_tree_remove(&bo_va->it, &vm->va);
list_del(&bo_va->vm_status); list_del(&bo_va->vm_status);
if (bo_va->addr) { if (bo_va->addr) {
...@@ -1051,7 +1042,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) ...@@ -1051,7 +1042,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
vm->last_flush = NULL; vm->last_flush = NULL;
vm->last_id_use = NULL; vm->last_id_use = NULL;
mutex_init(&vm->mutex); mutex_init(&vm->mutex);
INIT_LIST_HEAD(&vm->va); vm->va = RB_ROOT;
INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->invalidated);
INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->freed);
...@@ -1096,11 +1087,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) ...@@ -1096,11 +1087,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
struct radeon_bo_va *bo_va, *tmp; struct radeon_bo_va *bo_va, *tmp;
int i, r; int i, r;
if (!list_empty(&vm->va)) { if (!RB_EMPTY_ROOT(&vm->va)) {
dev_err(rdev->dev, "still active bo inside vm\n"); dev_err(rdev->dev, "still active bo inside vm\n");
} }
list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) {
list_del_init(&bo_va->vm_list); interval_tree_remove(&bo_va->it, &vm->va);
r = radeon_bo_reserve(bo_va->bo, false); r = radeon_bo_reserve(bo_va->bo, false);
if (!r) { if (!r) {
list_del_init(&bo_va->bo_list); list_del_init(&bo_va->bo_list);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment