Commit b2fe31cf authored by xinhui pan's avatar xinhui pan Committed by Alex Deucher

drm/amdgpu: Put drm_dev_enter/exit outside hot codepath

We hit soft hang while doing memory pressure test on one numa system.
After a qucik look, this is because kfd invalid/valid userptr memory
frequently with process_info lock hold.
Looks like update page table mapping use too much cpu time.

perf top says below,
75.81%  [kernel]       [k] __srcu_read_unlock
 6.19%  [amdgpu]       [k] amdgpu_gmc_set_pte_pde
 3.56%  [kernel]       [k] __srcu_read_lock
 2.20%  [amdgpu]       [k] amdgpu_vm_cpu_update
 2.20%  [kernel]       [k] __sg_page_iter_dma_next
 2.15%  [drm]          [k] drm_dev_enter
 1.70%  [drm]          [k] drm_prime_sg_to_dma_addr_array
 1.18%  [kernel]       [k] __sg_alloc_table_from_pages
 1.09%  [drm]          [k] drm_dev_exit

So move drm_dev_enter/exit outside gmc code, instead let caller do it.
They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
gmc_init_pdb0. vm_bo_update_mapping already calls it.
Signed-off-by: default avatarxinhui pan <xinhui.pan@amd.com>
Reviewed-and-tested-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 006c26a0
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <asm/set_memory.h> #include <asm/set_memory.h>
#endif #endif
#include "amdgpu.h" #include "amdgpu.h"
#include <drm/drm_drv.h>
/* /*
* GART * GART
...@@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, ...@@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
u64 page_base; u64 page_base;
/* Starting from VEGA10, system bit must be 0 to mean invalid. */ /* Starting from VEGA10, system bit must be 0 to mean invalid. */
uint64_t flags = 0; uint64_t flags = 0;
int idx;
if (!adev->gart.ready) { if (!adev->gart.ready) {
WARN(1, "trying to unbind memory from uninitialized GART !\n"); WARN(1, "trying to unbind memory from uninitialized GART !\n");
return -EINVAL; return -EINVAL;
} }
if (!drm_dev_enter(&adev->ddev, &idx))
return 0;
t = offset / AMDGPU_GPU_PAGE_SIZE; t = offset / AMDGPU_GPU_PAGE_SIZE;
p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE; p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
for (i = 0; i < pages; i++, p++) { for (i = 0; i < pages; i++, p++) {
...@@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, ...@@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
for (i = 0; i < adev->num_vmhubs; i++) for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
drm_dev_exit(idx);
return 0; return 0;
} }
...@@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, ...@@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
{ {
uint64_t page_base; uint64_t page_base;
unsigned i, j, t; unsigned i, j, t;
int idx;
if (!adev->gart.ready) { if (!adev->gart.ready) {
WARN(1, "trying to bind memory to uninitialized GART !\n"); WARN(1, "trying to bind memory to uninitialized GART !\n");
return -EINVAL; return -EINVAL;
} }
if (!drm_dev_enter(&adev->ddev, &idx))
return 0;
t = offset / AMDGPU_GPU_PAGE_SIZE; t = offset / AMDGPU_GPU_PAGE_SIZE;
for (i = 0; i < pages; i++) { for (i = 0; i < pages; i++) {
...@@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, ...@@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
page_base += AMDGPU_GPU_PAGE_SIZE; page_base += AMDGPU_GPU_PAGE_SIZE;
} }
} }
drm_dev_exit(idx);
return 0; return 0;
} }
......
...@@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, ...@@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
{ {
void __iomem *ptr = (void *)cpu_pt_addr; void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value; uint64_t value;
int idx;
if (!drm_dev_enter(&adev->ddev, &idx))
return 0;
/* /*
* The following is for PTE only. GART does not have PDEs. * The following is for PTE only. GART does not have PDEs.
...@@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, ...@@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
value |= flags; value |= flags;
writeq(value, ptr + (gpu_page_idx * 8)); writeq(value, ptr + (gpu_page_idx * 8));
drm_dev_exit(idx);
return 0; return 0;
} }
...@@ -749,6 +743,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) ...@@ -749,6 +743,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
u64 vram_end = vram_addr + vram_size; u64 vram_end = vram_addr + vram_size;
u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo); u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
int idx;
if (!drm_dev_enter(&adev->ddev, &idx))
return;
flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE; flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
flags |= AMDGPU_PTE_WRITEABLE; flags |= AMDGPU_PTE_WRITEABLE;
...@@ -770,6 +768,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) ...@@ -770,6 +768,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED; flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
/* Requires gart_ptb_gpu_pa to be 4K aligned */ /* Requires gart_ptb_gpu_pa to be 4K aligned */
amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags); amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
drm_dev_exit(idx);
} }
/** /**
......
...@@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, ...@@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_bo *bo = &vmbo->bo; struct amdgpu_bo *bo = &vmbo->bo;
unsigned entries, ats_entries; unsigned entries, ats_entries;
uint64_t addr; uint64_t addr;
int r; int r, idx;
/* Figure out our place in the hierarchy */ /* Figure out our place in the hierarchy */
if (ancestor->parent) { if (ancestor->parent) {
...@@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, ...@@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
return r; return r;
} }
if (!drm_dev_enter(&adev->ddev, &idx))
return -ENODEV;
r = vm->update_funcs->map_table(vmbo); r = vm->update_funcs->map_table(vmbo);
if (r) if (r)
return r; goto exit;
memset(&params, 0, sizeof(params)); memset(&params, 0, sizeof(params));
params.adev = adev; params.adev = adev;
...@@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, ...@@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT); r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
if (r) if (r)
return r; goto exit;
addr = 0; addr = 0;
if (ats_entries) { if (ats_entries) {
...@@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, ...@@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->update(&params, vmbo, addr, 0, ats_entries, r = vm->update_funcs->update(&params, vmbo, addr, 0, ats_entries,
value, flags); value, flags);
if (r) if (r)
return r; goto exit;
addr += ats_entries * 8; addr += ats_entries * 8;
} }
...@@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, ...@@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->update(&params, vmbo, addr, 0, entries, r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
value, flags); value, flags);
if (r) if (r)
return r; goto exit;
} }
return vm->update_funcs->commit(&params, NULL); r = vm->update_funcs->commit(&params, NULL);
exit:
drm_dev_exit(idx);
return r;
} }
/** /**
...@@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, ...@@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate) struct amdgpu_vm *vm, bool immediate)
{ {
struct amdgpu_vm_update_params params; struct amdgpu_vm_update_params params;
int r; int r, idx;
if (list_empty(&vm->relocated)) if (list_empty(&vm->relocated))
return 0; return 0;
if (!drm_dev_enter(&adev->ddev, &idx))
return -ENODEV;
memset(&params, 0, sizeof(params)); memset(&params, 0, sizeof(params));
params.adev = adev; params.adev = adev;
params.vm = vm; params.vm = vm;
...@@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, ...@@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT); r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
if (r) if (r)
return r; goto exit;
while (!list_empty(&vm->relocated)) { while (!list_empty(&vm->relocated)) {
struct amdgpu_vm_bo_base *entry; struct amdgpu_vm_bo_base *entry;
...@@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, ...@@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->commit(&params, &vm->last_update); r = vm->update_funcs->commit(&params, &vm->last_update);
if (r) if (r)
goto error; goto error;
drm_dev_exit(idx);
return 0; return 0;
error: error:
amdgpu_vm_invalidate_pds(adev, vm); amdgpu_vm_invalidate_pds(adev, vm);
exit:
drm_dev_exit(idx);
return r; return r;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment