Commit 9bff18d1 authored by Christian König's avatar Christian König

drm/ttm: use per BO cleanup workers

Instead of a single worker going over the list of delete BOs in regular
intervals use a per BO worker which blocks for the resv object and
locking of the BO.

This not only simplifies the handling massively, but also results in
much better response time when cleaning up buffers.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarArunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221125102137.1801-3-christian.koenig@amd.com
parent cd3a8a59
...@@ -3984,7 +3984,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) ...@@ -3984,7 +3984,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
amdgpu_fence_driver_hw_fini(adev); amdgpu_fence_driver_hw_fini(adev);
if (adev->mman.initialized) if (adev->mman.initialized)
flush_delayed_work(&adev->mman.bdev.wq); drain_workqueue(adev->mman.bdev.wq);
if (adev->pm_sysfs_en) if (adev->pm_sysfs_en)
amdgpu_pm_sysfs_fini(adev); amdgpu_pm_sysfs_fini(adev);
......
...@@ -1099,7 +1099,7 @@ void i915_gem_drain_freed_objects(struct drm_i915_private *i915) ...@@ -1099,7 +1099,7 @@ void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
{ {
while (atomic_read(&i915->mm.free_count)) { while (atomic_read(&i915->mm.free_count)) {
flush_work(&i915->mm.free_work); flush_work(&i915->mm.free_work);
flush_delayed_work(&i915->bdev.wq); drain_workqueue(i915->bdev.wq);
rcu_barrier(); rcu_barrier();
} }
} }
......
...@@ -132,7 +132,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) ...@@ -132,7 +132,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
break; break;
msleep(20); msleep(20);
flush_delayed_work(&mem->i915->bdev.wq); drain_workqueue(mem->i915->bdev.wq);
} }
/* If we leaked objects, Don't free the region causing use after free */ /* If we leaked objects, Don't free the region causing use after free */
......
...@@ -280,14 +280,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, ...@@ -280,14 +280,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
ret = 0; ret = 0;
} }
if (ret || unlikely(list_empty(&bo->ddestroy))) { if (ret) {
if (unlock_resv) if (unlock_resv)
dma_resv_unlock(bo->base.resv); dma_resv_unlock(bo->base.resv);
spin_unlock(&bo->bdev->lru_lock); spin_unlock(&bo->bdev->lru_lock);
return ret; return ret;
} }
list_del_init(&bo->ddestroy);
spin_unlock(&bo->bdev->lru_lock); spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo); ttm_bo_cleanup_memtype_use(bo);
...@@ -300,47 +299,21 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, ...@@ -300,47 +299,21 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
} }
/* /*
* Traverse the delayed list, and call ttm_bo_cleanup_refs on all * Block for the dma_resv object to become idle, lock the buffer and clean up
* encountered buffers. * the resource and tt object.
*/ */
bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) static void ttm_bo_delayed_delete(struct work_struct *work)
{ {
struct list_head removed; struct ttm_buffer_object *bo;
bool empty;
INIT_LIST_HEAD(&removed);
spin_lock(&bdev->lru_lock);
while (!list_empty(&bdev->ddestroy)) {
struct ttm_buffer_object *bo;
bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object,
ddestroy);
list_move_tail(&bo->ddestroy, &removed);
if (!ttm_bo_get_unless_zero(bo))
continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
spin_unlock(&bdev->lru_lock);
dma_resv_lock(bo->base.resv, NULL);
spin_lock(&bdev->lru_lock);
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) {
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else {
spin_unlock(&bdev->lru_lock);
}
ttm_bo_put(bo); bo = container_of(work, typeof(*bo), delayed_delete);
spin_lock(&bdev->lru_lock);
}
list_splice_tail(&removed, &bdev->ddestroy);
empty = list_empty(&bdev->ddestroy);
spin_unlock(&bdev->lru_lock);
return empty; dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, false,
MAX_SCHEDULE_TIMEOUT);
dma_resv_lock(bo->base.resv, NULL);
ttm_bo_cleanup_memtype_use(bo);
dma_resv_unlock(bo->base.resv);
ttm_bo_put(bo);
} }
static void ttm_bo_release(struct kref *kref) static void ttm_bo_release(struct kref *kref)
...@@ -369,44 +342,40 @@ static void ttm_bo_release(struct kref *kref) ...@@ -369,44 +342,40 @@ static void ttm_bo_release(struct kref *kref)
drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
ttm_mem_io_free(bdev, bo->resource); ttm_mem_io_free(bdev, bo->resource);
}
if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP) ||
!dma_resv_trylock(bo->base.resv)) {
/* The BO is not idle, resurrect it for delayed destroy */
ttm_bo_flush_all_fences(bo);
bo->deleted = true;
spin_lock(&bo->bdev->lru_lock); if (!dma_resv_test_signaled(bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP) ||
!dma_resv_trylock(bo->base.resv)) {
/* The BO is not idle, resurrect it for delayed destroy */
ttm_bo_flush_all_fences(bo);
bo->deleted = true;
/* spin_lock(&bo->bdev->lru_lock);
* Make pinned bos immediately available to
* shrinkers, now that they are queued for
* destruction.
*
* FIXME: QXL is triggering this. Can be removed when the
* driver is fixed.
*/
if (bo->pin_count) {
bo->pin_count = 0;
ttm_resource_move_to_lru_tail(bo->resource);
}
kref_init(&bo->kref); /*
list_add_tail(&bo->ddestroy, &bdev->ddestroy); * Make pinned bos immediately available to
spin_unlock(&bo->bdev->lru_lock); * shrinkers, now that they are queued for
* destruction.
*
* FIXME: QXL is triggering this. Can be removed when the
* driver is fixed.
*/
if (bo->pin_count) {
bo->pin_count = 0;
ttm_resource_move_to_lru_tail(bo->resource);
}
schedule_delayed_work(&bdev->wq, kref_init(&bo->kref);
((HZ / 100) < 1) ? 1 : HZ / 100); spin_unlock(&bo->bdev->lru_lock);
return;
}
spin_lock(&bo->bdev->lru_lock); INIT_WORK(&bo->delayed_delete, ttm_bo_delayed_delete);
list_del(&bo->ddestroy); queue_work(bdev->wq, &bo->delayed_delete);
spin_unlock(&bo->bdev->lru_lock); return;
}
ttm_bo_cleanup_memtype_use(bo); ttm_bo_cleanup_memtype_use(bo);
dma_resv_unlock(bo->base.resv); dma_resv_unlock(bo->base.resv);
}
atomic_dec(&ttm_glob.bo_count); atomic_dec(&ttm_glob.bo_count);
bo->destroy(bo); bo->destroy(bo);
...@@ -946,7 +915,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, struct ttm_buffer_object *bo, ...@@ -946,7 +915,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, struct ttm_buffer_object *bo,
int ret; int ret;
kref_init(&bo->kref); kref_init(&bo->kref);
INIT_LIST_HEAD(&bo->ddestroy);
bo->bdev = bdev; bo->bdev = bdev;
bo->type = type; bo->type = type;
bo->page_alignment = alignment; bo->page_alignment = alignment;
......
...@@ -230,7 +230,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, ...@@ -230,7 +230,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
*/ */
atomic_inc(&ttm_glob.bo_count); atomic_inc(&ttm_glob.bo_count);
INIT_LIST_HEAD(&fbo->base.ddestroy);
drm_vma_node_reset(&fbo->base.base.vma_node); drm_vma_node_reset(&fbo->base.base.vma_node);
kref_init(&fbo->base.kref); kref_init(&fbo->base.kref);
......
...@@ -175,16 +175,6 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, ...@@ -175,16 +175,6 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
} }
EXPORT_SYMBOL(ttm_device_swapout); EXPORT_SYMBOL(ttm_device_swapout);
static void ttm_device_delayed_workqueue(struct work_struct *work)
{
struct ttm_device *bdev =
container_of(work, struct ttm_device, wq.work);
if (!ttm_bo_delayed_delete(bdev, false))
schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
}
/** /**
* ttm_device_init * ttm_device_init
* *
...@@ -215,15 +205,19 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs, ...@@ -215,15 +205,19 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
if (ret) if (ret)
return ret; return ret;
bdev->wq = alloc_workqueue("ttm", WQ_MEM_RECLAIM | WQ_HIGHPRI, 16);
if (!bdev->wq) {
ttm_global_release();
return -ENOMEM;
}
bdev->funcs = funcs; bdev->funcs = funcs;
ttm_sys_man_init(bdev); ttm_sys_man_init(bdev);
ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32); ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32);
bdev->vma_manager = vma_manager; bdev->vma_manager = vma_manager;
INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
spin_lock_init(&bdev->lru_lock); spin_lock_init(&bdev->lru_lock);
INIT_LIST_HEAD(&bdev->ddestroy);
INIT_LIST_HEAD(&bdev->pinned); INIT_LIST_HEAD(&bdev->pinned);
bdev->dev_mapping = mapping; bdev->dev_mapping = mapping;
mutex_lock(&ttm_global_mutex); mutex_lock(&ttm_global_mutex);
...@@ -247,10 +241,8 @@ void ttm_device_fini(struct ttm_device *bdev) ...@@ -247,10 +241,8 @@ void ttm_device_fini(struct ttm_device *bdev)
list_del(&bdev->device_list); list_del(&bdev->device_list);
mutex_unlock(&ttm_global_mutex); mutex_unlock(&ttm_global_mutex);
cancel_delayed_work_sync(&bdev->wq); drain_workqueue(bdev->wq);
destroy_workqueue(bdev->wq);
if (ttm_bo_delayed_delete(bdev, true))
pr_debug("Delayed destroy list was clean\n");
spin_lock(&bdev->lru_lock); spin_lock(&bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
......
...@@ -92,7 +92,6 @@ struct ttm_tt; ...@@ -92,7 +92,6 @@ struct ttm_tt;
* @ttm: TTM structure holding system pages. * @ttm: TTM structure holding system pages.
* @evicted: Whether the object was evicted without user-space knowing. * @evicted: Whether the object was evicted without user-space knowing.
* @deleted: True if the object is only a zombie and already deleted. * @deleted: True if the object is only a zombie and already deleted.
* @ddestroy: List head for the delayed destroy list.
* @swap: List head for swap LRU list. * @swap: List head for swap LRU list.
* @offset: The current GPU offset, which can have different meanings * @offset: The current GPU offset, which can have different meanings
* depending on the memory type. For SYSTEM type memory, it should be 0. * depending on the memory type. For SYSTEM type memory, it should be 0.
...@@ -135,19 +134,14 @@ struct ttm_buffer_object { ...@@ -135,19 +134,14 @@ struct ttm_buffer_object {
struct ttm_tt *ttm; struct ttm_tt *ttm;
bool deleted; bool deleted;
struct ttm_lru_bulk_move *bulk_move; struct ttm_lru_bulk_move *bulk_move;
unsigned priority;
unsigned pin_count;
/** /**
* Members protected by the bdev::lru_lock. * @delayed_delete: Work item used when we can't delete the BO
*/ * immediately
struct list_head ddestroy;
/**
* Members protected by a bo reservation.
*/ */
struct work_struct delayed_delete;
unsigned priority;
unsigned pin_count;
/** /**
* Special members that are protected by the reserve lock * Special members that are protected by the reserve lock
...@@ -448,8 +442,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma); ...@@ -448,8 +442,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma);
int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write); void *buf, int len, int write);
bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all);
vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot); vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot);
#endif #endif
...@@ -251,11 +251,6 @@ struct ttm_device { ...@@ -251,11 +251,6 @@ struct ttm_device {
*/ */
spinlock_t lru_lock; spinlock_t lru_lock;
/**
* @ddestroy: Destroyed but not yet cleaned up buffer objects.
*/
struct list_head ddestroy;
/** /**
* @pinned: Buffer objects which are pinned and so not on any LRU list. * @pinned: Buffer objects which are pinned and so not on any LRU list.
*/ */
...@@ -270,7 +265,7 @@ struct ttm_device { ...@@ -270,7 +265,7 @@ struct ttm_device {
/** /**
* @wq: Work queue structure for the delayed delete workqueue. * @wq: Work queue structure for the delayed delete workqueue.
*/ */
struct delayed_work wq; struct workqueue_struct *wq;
}; };
int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment