Commit 5255e146 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: rework TLB flushing

Instead of tracking the VM updates through the dependencies just use a
sequence counter for page table updates which indicates the need to
flush the TLB.

This reduces the need to flush the TLB drastically.

v2: squash in NULL check fix (Christian)
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Acked-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent e997b827
...@@ -810,7 +810,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -810,7 +810,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update); r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
if (r) if (r)
return r; return r;
...@@ -821,7 +821,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -821,7 +821,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
if (r) if (r)
return r; return r;
} }
...@@ -840,7 +840,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -840,7 +840,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
if (r) if (r)
return r; return r;
} }
...@@ -853,7 +853,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -853,7 +853,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update); r = amdgpu_sync_fence(&p->job->sync, vm->last_update);
if (r) if (r)
return r; return r;
......
...@@ -277,7 +277,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, ...@@ -277,7 +277,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
unsigned vmhub = ring->funcs->vmhub; unsigned vmhub = ring->funcs->vmhub;
uint64_t fence_context = adev->fence_context + ring->idx; uint64_t fence_context = adev->fence_context + ring->idx;
bool needs_flush = vm->use_cpu_for_update; bool needs_flush = vm->use_cpu_for_update;
uint64_t updates = sync->last_vm_update; uint64_t updates = amdgpu_vm_tlb_seq(vm);
int r; int r;
*id = vm->reserved_vmid[vmhub]; *id = vm->reserved_vmid[vmhub];
...@@ -338,7 +338,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, ...@@ -338,7 +338,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
unsigned vmhub = ring->funcs->vmhub; unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
uint64_t fence_context = adev->fence_context + ring->idx; uint64_t fence_context = adev->fence_context + ring->idx;
uint64_t updates = sync->last_vm_update; uint64_t updates = amdgpu_vm_tlb_seq(vm);
int r; int r;
job->vm_needs_flush = vm->use_cpu_for_update; job->vm_needs_flush = vm->use_cpu_for_update;
...@@ -426,7 +426,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, ...@@ -426,7 +426,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
if (r) if (r)
goto error; goto error;
id->flushed_updates = sync->last_vm_update; id->flushed_updates = amdgpu_vm_tlb_seq(vm);
job->vm_needs_flush = true; job->vm_needs_flush = true;
} }
......
...@@ -51,7 +51,6 @@ static struct kmem_cache *amdgpu_sync_slab; ...@@ -51,7 +51,6 @@ static struct kmem_cache *amdgpu_sync_slab;
void amdgpu_sync_create(struct amdgpu_sync *sync) void amdgpu_sync_create(struct amdgpu_sync *sync)
{ {
hash_init(sync->fences); hash_init(sync->fences);
sync->last_vm_update = 0;
} }
/** /**
...@@ -171,23 +170,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) ...@@ -171,23 +170,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
return 0; return 0;
} }
/**
* amdgpu_sync_vm_fence - remember to sync to this VM fence
*
* @sync: sync object to add fence to
* @fence: the VM fence to add
*
* Add the fence to the sync object and remember it as VM update.
*/
int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
{
if (!fence)
return 0;
sync->last_vm_update = max(sync->last_vm_update, fence->seqno);
return amdgpu_sync_fence(sync, fence);
}
/* Determine based on the owner and mode if we should sync to a fence or not */ /* Determine based on the owner and mode if we should sync to a fence or not */
static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
enum amdgpu_sync_mode mode, enum amdgpu_sync_mode mode,
...@@ -376,8 +358,6 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) ...@@ -376,8 +358,6 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
} }
} }
clone->last_vm_update = source->last_vm_update;
return 0; return 0;
} }
......
...@@ -43,12 +43,10 @@ enum amdgpu_sync_mode { ...@@ -43,12 +43,10 @@ enum amdgpu_sync_mode {
*/ */
struct amdgpu_sync { struct amdgpu_sync {
DECLARE_HASHTABLE(fences, 4); DECLARE_HASHTABLE(fences, 4);
uint64_t last_vm_update;
}; };
void amdgpu_sync_create(struct amdgpu_sync *sync); void amdgpu_sync_create(struct amdgpu_sync *sync);
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode, struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner); void *owner);
......
...@@ -88,6 +88,21 @@ struct amdgpu_prt_cb { ...@@ -88,6 +88,21 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb; struct dma_fence_cb cb;
}; };
/**
* amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence
*/
struct amdgpu_vm_tlb_seq_cb {
/**
* @vm: pointer to the amdgpu_vm structure to set the fence sequence on
*/
struct amdgpu_vm *vm;
/**
* @cb: callback
*/
struct dma_fence_cb cb;
};
/** /**
* amdgpu_vm_set_pasid - manage pasid and vm ptr mapping * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
* *
...@@ -760,6 +775,23 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, ...@@ -760,6 +775,23 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
return r; return r;
} }
/**
* amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
* @fence: unused
* @cb: the callback structure
*
* Increments the tlb sequence to make sure that future CS execute a VM flush.
*/
static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
struct dma_fence_cb *cb)
{
struct amdgpu_vm_tlb_seq_cb *tlb_cb;
tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
atomic64_inc(&tlb_cb->vm->tlb_seq);
kfree(tlb_cb);
}
/** /**
* amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
* *
...@@ -795,6 +827,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -795,6 +827,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
bool *table_freed) bool *table_freed)
{ {
struct amdgpu_vm_update_params params; struct amdgpu_vm_update_params params;
struct amdgpu_vm_tlb_seq_cb *tlb_cb;
struct amdgpu_res_cursor cursor; struct amdgpu_res_cursor cursor;
enum amdgpu_sync_mode sync_mode; enum amdgpu_sync_mode sync_mode;
int r, idx; int r, idx;
...@@ -802,6 +835,12 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -802,6 +835,12 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (!drm_dev_enter(adev_to_drm(adev), &idx)) if (!drm_dev_enter(adev_to_drm(adev), &idx))
return -ENODEV; return -ENODEV;
tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
if (!tlb_cb) {
r = -ENOMEM;
goto error_unlock;
}
memset(&params, 0, sizeof(params)); memset(&params, 0, sizeof(params));
params.adev = adev; params.adev = adev;
params.vm = vm; params.vm = vm;
...@@ -820,7 +859,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -820,7 +859,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
amdgpu_vm_eviction_lock(vm); amdgpu_vm_eviction_lock(vm);
if (vm->evicting) { if (vm->evicting) {
r = -EBUSY; r = -EBUSY;
goto error_unlock; goto error_free;
} }
if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
...@@ -833,7 +872,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -833,7 +872,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
r = vm->update_funcs->prepare(&params, resv, sync_mode); r = vm->update_funcs->prepare(&params, resv, sync_mode);
if (r) if (r)
goto error_unlock; goto error_free;
amdgpu_res_first(pages_addr ? NULL : res, offset, amdgpu_res_first(pages_addr ? NULL : res, offset,
(last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor); (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
...@@ -882,7 +921,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -882,7 +921,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
tmp = start + num_entries; tmp = start + num_entries;
r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags); r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags);
if (r) if (r)
goto error_unlock; goto error_free;
amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE); amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
start = tmp; start = tmp;
...@@ -890,9 +929,21 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -890,9 +929,21 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
r = vm->update_funcs->commit(&params, fence); r = vm->update_funcs->commit(&params, fence);
if (!unlocked && (!(flags & AMDGPU_PTE_VALID) || params.table_freed)) {
tlb_cb->vm = vm;
if (!fence || !*fence ||
dma_fence_add_callback(*fence, &tlb_cb->cb,
amdgpu_vm_tlb_seq_cb))
amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
tlb_cb = NULL;
}
if (table_freed) if (table_freed)
*table_freed = *table_freed || params.table_freed; *table_freed = *table_freed || params.table_freed;
error_free:
kfree(tlb_cb);
error_unlock: error_unlock:
amdgpu_vm_eviction_unlock(vm); amdgpu_vm_eviction_unlock(vm);
drm_dev_exit(idx); drm_dev_exit(idx);
......
...@@ -284,6 +284,9 @@ struct amdgpu_vm { ...@@ -284,6 +284,9 @@ struct amdgpu_vm {
struct drm_sched_entity immediate; struct drm_sched_entity immediate;
struct drm_sched_entity delayed; struct drm_sched_entity delayed;
/* Last finished delayed update */
atomic64_t tlb_seq;
/* Last unlocked submission to the scheduler entities */ /* Last unlocked submission to the scheduler entities */
struct dma_fence *last_unlocked; struct dma_fence *last_unlocked;
...@@ -478,4 +481,16 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, ...@@ -478,4 +481,16 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
#endif #endif
/**
* amdgpu_vm_tlb_seq - return tlb flush sequence number
* @vm: the amdgpu_vm structure to query
*
* Returns the tlb flush sequence number which indicates that the VM TLBs needs
* to be invalidated whenever the sequence number change.
*/
static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
{
return atomic64_read(&vm->tlb_seq);
}
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment