Commit c3b7fe8b authored by Christian König's avatar Christian König Committed by Dave Airlie

drm/radeon: multiple ring allocator v3

A startover with a new idea for a multiple ring allocator.
Should perform as well as a normal ring allocator as long
as only one ring does somthing, but falls back to a more
complex algorithm if more complex things start to happen.

We store the last allocated bo in last, we always try to allocate
after the last allocated bo. Principle is that in a linear GPU ring
progression was is after last is the oldest bo we allocated and thus
the first one that should no longer be in use by the GPU.

If it's not the case we skip over the bo after last to the closest
done bo if such one exist. If none exist and we are not asked to
block we report failure to allocate.

If we are asked to block we wait on all the oldest fence of all
rings. We just wait for any of those fence to complete.

v2: We need to be able to let hole point to the list_head, otherwise
    try free will never free the first allocation of the list. Also
    stop calling radeon_fence_signalled more than necessary.

v3: Don't free allocations without considering them as a hole,
    otherwise we might lose holes. Also return ENOMEM instead of ENOENT
    when running out of fences to wait for. Limit the number of holes
    we try for each ring to 3.
Signed-off-by: default avatarChristian König <deathsimple@vodafone.de>
Signed-off-by: default avatarJerome Glisse <jglisse@redhat.com>
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
parent 0085c950
...@@ -385,7 +385,9 @@ struct radeon_bo_list { ...@@ -385,7 +385,9 @@ struct radeon_bo_list {
struct radeon_sa_manager { struct radeon_sa_manager {
spinlock_t lock; spinlock_t lock;
struct radeon_bo *bo; struct radeon_bo *bo;
struct list_head sa_bo; struct list_head *hole;
struct list_head flist[RADEON_NUM_RINGS];
struct list_head olist;
unsigned size; unsigned size;
uint64_t gpu_addr; uint64_t gpu_addr;
void *cpu_ptr; void *cpu_ptr;
...@@ -396,7 +398,8 @@ struct radeon_sa_bo; ...@@ -396,7 +398,8 @@ struct radeon_sa_bo;
/* sub-allocation buffer */ /* sub-allocation buffer */
struct radeon_sa_bo { struct radeon_sa_bo {
struct list_head list; struct list_head olist;
struct list_head flist;
struct radeon_sa_manager *manager; struct radeon_sa_manager *manager;
unsigned soffset; unsigned soffset;
unsigned eoffset; unsigned eoffset;
......
...@@ -204,25 +204,22 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) ...@@ -204,25 +204,22 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
int radeon_ib_pool_init(struct radeon_device *rdev) int radeon_ib_pool_init(struct radeon_device *rdev)
{ {
struct radeon_sa_manager tmp;
int i, r; int i, r;
r = radeon_sa_bo_manager_init(rdev, &tmp,
RADEON_IB_POOL_SIZE*64*1024,
RADEON_GEM_DOMAIN_GTT);
if (r) {
return r;
}
radeon_mutex_lock(&rdev->ib_pool.mutex); radeon_mutex_lock(&rdev->ib_pool.mutex);
if (rdev->ib_pool.ready) { if (rdev->ib_pool.ready) {
radeon_mutex_unlock(&rdev->ib_pool.mutex); radeon_mutex_unlock(&rdev->ib_pool.mutex);
radeon_sa_bo_manager_fini(rdev, &tmp);
return 0; return 0;
} }
rdev->ib_pool.sa_manager = tmp; r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo); RADEON_IB_POOL_SIZE*64*1024,
RADEON_GEM_DOMAIN_GTT);
if (r) {
radeon_mutex_unlock(&rdev->ib_pool.mutex);
return r;
}
for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
rdev->ib_pool.ibs[i].fence = NULL; rdev->ib_pool.ibs[i].fence = NULL;
rdev->ib_pool.ibs[i].idx = i; rdev->ib_pool.ibs[i].idx = i;
......
...@@ -27,21 +27,42 @@ ...@@ -27,21 +27,42 @@
* Authors: * Authors:
* Jerome Glisse <glisse@freedesktop.org> * Jerome Glisse <glisse@freedesktop.org>
*/ */
/* Algorithm:
*
* We store the last allocated bo in "hole", we always try to allocate
* after the last allocated bo. Principle is that in a linear GPU ring
* progression was is after last is the oldest bo we allocated and thus
* the first one that should no longer be in use by the GPU.
*
* If it's not the case we skip over the bo after last to the closest
* done bo if such one exist. If none exist and we are not asked to
* block we report failure to allocate.
*
* If we are asked to block we wait on all the oldest fence of all
* rings. We just wait for any of those fence to complete.
*/
#include "drmP.h" #include "drmP.h"
#include "drm.h" #include "drm.h"
#include "radeon.h" #include "radeon.h"
static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo);
static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager);
int radeon_sa_bo_manager_init(struct radeon_device *rdev, int radeon_sa_bo_manager_init(struct radeon_device *rdev,
struct radeon_sa_manager *sa_manager, struct radeon_sa_manager *sa_manager,
unsigned size, u32 domain) unsigned size, u32 domain)
{ {
int r; int i, r;
spin_lock_init(&sa_manager->lock); spin_lock_init(&sa_manager->lock);
sa_manager->bo = NULL; sa_manager->bo = NULL;
sa_manager->size = size; sa_manager->size = size;
sa_manager->domain = domain; sa_manager->domain = domain;
INIT_LIST_HEAD(&sa_manager->sa_bo); sa_manager->hole = &sa_manager->olist;
INIT_LIST_HEAD(&sa_manager->olist);
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
INIT_LIST_HEAD(&sa_manager->flist[i]);
}
r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true, r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
RADEON_GEM_DOMAIN_CPU, &sa_manager->bo); RADEON_GEM_DOMAIN_CPU, &sa_manager->bo);
...@@ -58,11 +79,15 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev, ...@@ -58,11 +79,15 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
{ {
struct radeon_sa_bo *sa_bo, *tmp; struct radeon_sa_bo *sa_bo, *tmp;
if (!list_empty(&sa_manager->sa_bo)) { if (!list_empty(&sa_manager->olist)) {
sa_manager->hole = &sa_manager->olist,
radeon_sa_bo_try_free(sa_manager);
if (!list_empty(&sa_manager->olist)) {
dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n"); dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
} }
list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) { }
list_del_init(&sa_bo->list); list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
radeon_sa_bo_remove_locked(sa_bo);
} }
radeon_bo_unref(&sa_manager->bo); radeon_bo_unref(&sa_manager->bo);
sa_manager->size = 0; sa_manager->size = 0;
...@@ -114,111 +139,203 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev, ...@@ -114,111 +139,203 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
return r; return r;
} }
/*
* Principe is simple, we keep a list of sub allocation in offset
* order (first entry has offset == 0, last entry has the highest
* offset).
*
* When allocating new object we first check if there is room at
* the end total_size - (last_object_offset + last_object_size) >=
* alloc_size. If so we allocate new object there.
*
* When there is not enough room at the end, we start waiting for
* each sub object until we reach object_offset+object_size >=
* alloc_size, this object then become the sub object we return.
*
* Alignment can't be bigger than page size
*/
static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo) static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo)
{ {
list_del(&sa_bo->list); struct radeon_sa_manager *sa_manager = sa_bo->manager;
if (sa_manager->hole == &sa_bo->olist) {
sa_manager->hole = sa_bo->olist.prev;
}
list_del_init(&sa_bo->olist);
list_del_init(&sa_bo->flist);
radeon_fence_unref(&sa_bo->fence); radeon_fence_unref(&sa_bo->fence);
kfree(sa_bo); kfree(sa_bo);
} }
int radeon_sa_bo_new(struct radeon_device *rdev, static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager)
struct radeon_sa_manager *sa_manager,
struct radeon_sa_bo **sa_bo,
unsigned size, unsigned align, bool block)
{ {
struct radeon_fence *fence = NULL; struct radeon_sa_bo *sa_bo, *tmp;
struct radeon_sa_bo *tmp, *next;
struct list_head *head;
unsigned offset = 0, wasted = 0;
int r;
BUG_ON(align > RADEON_GPU_PAGE_SIZE); if (sa_manager->hole->next == &sa_manager->olist)
BUG_ON(size > sa_manager->size); return;
*sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL); sa_bo = list_entry(sa_manager->hole->next, struct radeon_sa_bo, olist);
list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
if (sa_bo->fence == NULL || !radeon_fence_signaled(sa_bo->fence)) {
return;
}
radeon_sa_bo_remove_locked(sa_bo);
}
}
retry: static inline unsigned radeon_sa_bo_hole_soffset(struct radeon_sa_manager *sa_manager)
{
struct list_head *hole = sa_manager->hole;
spin_lock(&sa_manager->lock); if (hole != &sa_manager->olist) {
return list_entry(hole, struct radeon_sa_bo, olist)->eoffset;
}
return 0;
}
static inline unsigned radeon_sa_bo_hole_eoffset(struct radeon_sa_manager *sa_manager)
{
struct list_head *hole = sa_manager->hole;
/* no one ? */ if (hole->next != &sa_manager->olist) {
head = sa_manager->sa_bo.prev; return list_entry(hole->next, struct radeon_sa_bo, olist)->soffset;
if (list_empty(&sa_manager->sa_bo)) {
goto out;
} }
return sa_manager->size;
}
static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
struct radeon_sa_bo *sa_bo,
unsigned size, unsigned align)
{
unsigned soffset, eoffset, wasted;
soffset = radeon_sa_bo_hole_soffset(sa_manager);
eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
wasted = (align - (soffset % align)) % align;
if ((eoffset - soffset) >= (size + wasted)) {
soffset += wasted;
sa_bo->manager = sa_manager;
sa_bo->soffset = soffset;
sa_bo->eoffset = soffset + size;
list_add(&sa_bo->olist, sa_manager->hole);
INIT_LIST_HEAD(&sa_bo->flist);
sa_manager->hole = &sa_bo->olist;
return true;
}
return false;
}
static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
struct radeon_fence **fences,
unsigned *tries)
{
struct radeon_sa_bo *best_bo = NULL;
unsigned i, soffset, best, tmp;
/* if hole points to the end of the buffer */
if (sa_manager->hole->next == &sa_manager->olist) {
/* try again with its beginning */
sa_manager->hole = &sa_manager->olist;
return true;
}
soffset = radeon_sa_bo_hole_soffset(sa_manager);
/* to handle wrap around we add sa_manager->size */
best = sa_manager->size * 2;
/* go over all fence list and try to find the closest sa_bo
* of the current last
*/
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
struct radeon_sa_bo *sa_bo;
/* look for a hole big enough */ if (list_empty(&sa_manager->flist[i])) {
offset = 0;
list_for_each_entry_safe(tmp, next, &sa_manager->sa_bo, list) {
/* try to free this object */
if (tmp->fence) {
if (radeon_fence_signaled(tmp->fence)) {
radeon_sa_bo_remove_locked(tmp);
continue; continue;
} else {
fence = tmp->fence;
} }
sa_bo = list_first_entry(&sa_manager->flist[i],
struct radeon_sa_bo, flist);
if (!radeon_fence_signaled(sa_bo->fence)) {
fences[i] = sa_bo->fence;
continue;
} }
/* room before this object ? */ /* limit the number of tries each ring gets */
if (offset < tmp->soffset && (tmp->soffset - offset) >= size) { if (tries[i] > 2) {
head = tmp->list.prev; continue;
goto out;
} }
offset = tmp->eoffset;
wasted = offset % align; tmp = sa_bo->soffset;
if (wasted) { if (tmp < soffset) {
wasted = align - wasted; /* wrap around, pretend it's after */
tmp += sa_manager->size;
} }
offset += wasted; tmp -= soffset;
if (tmp < best) {
/* this sa bo is the closest one */
best = tmp;
best_bo = sa_bo;
} }
/* room at the end ? */
head = sa_manager->sa_bo.prev;
tmp = list_entry(head, struct radeon_sa_bo, list);
offset = tmp->eoffset;
wasted = offset % align;
if (wasted) {
wasted = align - wasted;
} }
offset += wasted;
if ((sa_manager->size - offset) < size) {
/* failed to find somethings big enough */
spin_unlock(&sa_manager->lock);
if (block && fence) {
r = radeon_fence_wait(fence, false);
if (r)
return r;
goto retry; if (best_bo) {
++tries[best_bo->fence->ring];
sa_manager->hole = best_bo->olist.prev;
/* we knew that this one is signaled,
so it's save to remote it */
radeon_sa_bo_remove_locked(best_bo);
return true;
} }
kfree(*sa_bo); return false;
*sa_bo = NULL; }
int radeon_sa_bo_new(struct radeon_device *rdev,
struct radeon_sa_manager *sa_manager,
struct radeon_sa_bo **sa_bo,
unsigned size, unsigned align, bool block)
{
struct radeon_fence *fences[RADEON_NUM_RINGS];
unsigned tries[RADEON_NUM_RINGS];
int i, r = -ENOMEM;
BUG_ON(align > RADEON_GPU_PAGE_SIZE);
BUG_ON(size > sa_manager->size);
*sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);
if ((*sa_bo) == NULL) {
return -ENOMEM; return -ENOMEM;
} }
out:
(*sa_bo)->manager = sa_manager; (*sa_bo)->manager = sa_manager;
(*sa_bo)->soffset = offset; (*sa_bo)->fence = NULL;
(*sa_bo)->eoffset = offset + size; INIT_LIST_HEAD(&(*sa_bo)->olist);
list_add(&(*sa_bo)->list, head); INIT_LIST_HEAD(&(*sa_bo)->flist);
spin_lock(&sa_manager->lock);
do {
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
fences[i] = NULL;
tries[i] = 0;
}
do {
radeon_sa_bo_try_free(sa_manager);
if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
size, align)) {
spin_unlock(&sa_manager->lock); spin_unlock(&sa_manager->lock);
return 0; return 0;
}
/* see if we can skip over some allocations */
} while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
if (block) {
spin_unlock(&sa_manager->lock);
r = radeon_fence_wait_any(rdev, fences, false);
spin_lock(&sa_manager->lock);
if (r) {
/* if we have nothing to wait for we
are practically out of memory */
if (r == -ENOENT) {
r = -ENOMEM;
}
goto out_err;
}
}
} while (block);
out_err:
spin_unlock(&sa_manager->lock);
kfree(*sa_bo);
*sa_bo = NULL;
return r;
} }
void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
...@@ -226,13 +343,16 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, ...@@ -226,13 +343,16 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
{ {
struct radeon_sa_manager *sa_manager; struct radeon_sa_manager *sa_manager;
if (!sa_bo || !*sa_bo) if (sa_bo == NULL || *sa_bo == NULL) {
return; return;
}
sa_manager = (*sa_bo)->manager; sa_manager = (*sa_bo)->manager;
spin_lock(&sa_manager->lock); spin_lock(&sa_manager->lock);
if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
(*sa_bo)->fence = radeon_fence_ref(fence); (*sa_bo)->fence = radeon_fence_ref(fence);
list_add_tail(&(*sa_bo)->flist,
&sa_manager->flist[fence->ring]);
} else { } else {
radeon_sa_bo_remove_locked(*sa_bo); radeon_sa_bo_remove_locked(*sa_bo);
} }
...@@ -247,15 +367,19 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager, ...@@ -247,15 +367,19 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
struct radeon_sa_bo *i; struct radeon_sa_bo *i;
spin_lock(&sa_manager->lock); spin_lock(&sa_manager->lock);
list_for_each_entry(i, &sa_manager->sa_bo, list) { list_for_each_entry(i, &sa_manager->olist, olist) {
seq_printf(m, "[%08x %08x] size %4d (%p)", if (&i->olist == sa_manager->hole) {
i->soffset, i->eoffset, i->eoffset - i->soffset, i); seq_printf(m, ">");
if (i->fence) {
seq_printf(m, " protected by %Ld (%p) on ring %d\n",
i->fence->seq, i->fence, i->fence->ring);
} else { } else {
seq_printf(m, "\n"); seq_printf(m, " ");
} }
seq_printf(m, "[0x%08x 0x%08x] size %8d",
i->soffset, i->eoffset, i->eoffset - i->soffset);
if (i->fence) {
seq_printf(m, " protected by 0x%016llx on ring %d",
i->fence->seq, i->fence->ring);
}
seq_printf(m, "\n");
} }
spin_unlock(&sa_manager->lock); spin_unlock(&sa_manager->lock);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment