Commit 31f33243 authored by Eric Huang's avatar Eric Huang Committed by Alex Deucher

drm/amdkfd: Make TLB flush conditional on mapping

It is to optimize memory mapping latency, and also aviod
a page fault in a corner case of changing valid PDE into
PTE.
Signed-off-by: default avatarEric Huang <jinhuieric.huang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 075e8080
...@@ -269,7 +269,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( ...@@ -269,7 +269,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
uint64_t *size); uint64_t *size);
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *table_freed);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory( int amdgpu_amdkfd_gpuvm_sync_memory(
......
...@@ -1070,7 +1070,8 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, ...@@ -1070,7 +1070,8 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
static int update_gpuvm_pte(struct kgd_mem *mem, static int update_gpuvm_pte(struct kgd_mem *mem,
struct kfd_mem_attachment *entry, struct kfd_mem_attachment *entry,
struct amdgpu_sync *sync) struct amdgpu_sync *sync,
bool *table_freed)
{ {
struct amdgpu_bo_va *bo_va = entry->bo_va; struct amdgpu_bo_va *bo_va = entry->bo_va;
struct amdgpu_device *adev = entry->adev; struct amdgpu_device *adev = entry->adev;
...@@ -1081,7 +1082,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, ...@@ -1081,7 +1082,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret; return ret;
/* Update the page tables */ /* Update the page tables */
ret = amdgpu_vm_bo_update(adev, bo_va, false); ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed);
if (ret) { if (ret) {
pr_err("amdgpu_vm_bo_update failed\n"); pr_err("amdgpu_vm_bo_update failed\n");
return ret; return ret;
...@@ -1093,7 +1094,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem, ...@@ -1093,7 +1094,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
static int map_bo_to_gpuvm(struct kgd_mem *mem, static int map_bo_to_gpuvm(struct kgd_mem *mem,
struct kfd_mem_attachment *entry, struct kfd_mem_attachment *entry,
struct amdgpu_sync *sync, struct amdgpu_sync *sync,
bool no_update_pte) bool no_update_pte,
bool *table_freed)
{ {
int ret; int ret;
...@@ -1110,7 +1112,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, ...@@ -1110,7 +1112,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
if (no_update_pte) if (no_update_pte)
return 0; return 0;
ret = update_gpuvm_pte(mem, entry, sync); ret = update_gpuvm_pte(mem, entry, sync, table_freed);
if (ret) { if (ret) {
pr_err("update_gpuvm_pte() failed\n"); pr_err("update_gpuvm_pte() failed\n");
goto update_gpuvm_pte_failed; goto update_gpuvm_pte_failed;
...@@ -1608,7 +1610,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( ...@@ -1608,7 +1610,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
} }
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) struct kgd_dev *kgd, struct kgd_mem *mem,
void *drm_priv, bool *table_freed)
{ {
struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
...@@ -1696,7 +1699,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( ...@@ -1696,7 +1699,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
entry->va, entry->va + bo_size, entry); entry->va, entry->va + bo_size, entry);
ret = map_bo_to_gpuvm(mem, entry, ctx.sync, ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
is_invalid_userptr); is_invalid_userptr, table_freed);
if (ret) { if (ret) {
pr_err("Failed to map bo to gpuvm\n"); pr_err("Failed to map bo to gpuvm\n");
goto out_unreserve; goto out_unreserve;
...@@ -2146,7 +2149,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) ...@@ -2146,7 +2149,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
continue; continue;
kfd_mem_dmaunmap_attachment(mem, attachment); kfd_mem_dmaunmap_attachment(mem, attachment);
ret = update_gpuvm_pte(mem, attachment, &sync); ret = update_gpuvm_pte(mem, attachment, &sync, NULL);
if (ret) { if (ret) {
pr_err("%s: update PTE failed\n", __func__); pr_err("%s: update PTE failed\n", __func__);
/* make sure this gets validated again */ /* make sure this gets validated again */
...@@ -2352,7 +2355,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) ...@@ -2352,7 +2355,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
continue; continue;
kfd_mem_dmaunmap_attachment(mem, attachment); kfd_mem_dmaunmap_attachment(mem, attachment);
ret = update_gpuvm_pte(mem, attachment, &sync_obj); ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
if (ret) { if (ret) {
pr_debug("Memory eviction: update PTE failed. Try again\n"); pr_debug("Memory eviction: update PTE failed. Try again\n");
goto validate_map_fail; goto validate_map_fail;
......
...@@ -1393,6 +1393,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, ...@@ -1393,6 +1393,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
long err = 0; long err = 0;
int i; int i;
uint32_t *devices_arr = NULL; uint32_t *devices_arr = NULL;
bool table_freed = false;
dev = kfd_device_by_id(GET_GPU_ID(args->handle)); dev = kfd_device_by_id(GET_GPU_ID(args->handle));
if (!dev) if (!dev)
...@@ -1450,7 +1451,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, ...@@ -1450,7 +1451,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
goto get_mem_obj_from_handle_failed; goto get_mem_obj_from_handle_failed;
} }
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv); peer->kgd, (struct kgd_mem *)mem,
peer_pdd->drm_priv, &table_freed);
if (err) { if (err) {
pr_err("Failed to map to gpu %d/%d\n", pr_err("Failed to map to gpu %d/%d\n",
i, args->n_devices); i, args->n_devices);
...@@ -1468,6 +1470,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, ...@@ -1468,6 +1470,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
} }
/* Flush TLBs after waiting for the page table updates to complete */ /* Flush TLBs after waiting for the page table updates to complete */
if (table_freed) {
for (i = 0; i < args->n_devices; i++) { for (i = 0; i < args->n_devices; i++) {
peer = kfd_device_by_id(devices_arr[i]); peer = kfd_device_by_id(devices_arr[i]);
if (WARN_ON_ONCE(!peer)) if (WARN_ON_ONCE(!peer))
...@@ -1477,7 +1480,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, ...@@ -1477,7 +1480,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
continue; continue;
kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
} }
}
kfree(devices_arr); kfree(devices_arr);
return err; return err;
......
...@@ -672,7 +672,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, ...@@ -672,7 +672,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
if (err) if (err)
goto err_alloc_mem; goto err_alloc_mem;
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv); err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
pdd->drm_priv, NULL);
if (err) if (err)
goto err_map_mem; goto err_map_mem;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment