Commit 3798e9a6 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: use new scheduler load balancing for VMs

Instead of the fixed round robin use let the scheduler balance the load
of page table updates.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarChunming Zhou <david1.zhou@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent aa5873dc
...@@ -2348,7 +2348,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -2348,7 +2348,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL; adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL; adev->vm_manager.vm_pte_funcs = NULL;
adev->vm_manager.vm_pte_num_rings = 0; adev->vm_manager.vm_pte_num_rqs = 0;
adev->gmc.gmc_funcs = NULL; adev->gmc.gmc_funcs = NULL;
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
......
...@@ -2569,9 +2569,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -2569,9 +2569,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *root; struct amdgpu_bo *root;
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT(adev) * 8); AMDGPU_VM_PTE_COUNT(adev) * 8);
unsigned ring_instance;
struct amdgpu_ring *ring;
struct drm_sched_rq *rq;
unsigned long size; unsigned long size;
uint64_t flags; uint64_t flags;
int r, i; int r, i;
...@@ -2587,12 +2584,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -2587,12 +2584,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->freed);
/* create scheduler entity for page table updates */ /* create scheduler entity for page table updates */
r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs,
ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); adev->vm_manager.vm_pte_num_rqs, NULL);
ring_instance %= adev->vm_manager.vm_pte_num_rings;
ring = adev->vm_manager.vm_pte_rings[ring_instance];
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL);
if (r) if (r)
return r; return r;
...@@ -2901,7 +2894,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) ...@@ -2901,7 +2894,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
adev->vm_manager.seqno[i] = 0; adev->vm_manager.seqno[i] = 0;
atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
spin_lock_init(&adev->vm_manager.prt_lock); spin_lock_init(&adev->vm_manager.prt_lock);
atomic_set(&adev->vm_manager.num_prt_users, 0); atomic_set(&adev->vm_manager.num_prt_users, 0);
......
...@@ -265,10 +265,9 @@ struct amdgpu_vm_manager { ...@@ -265,10 +265,9 @@ struct amdgpu_vm_manager {
/* vram base address for page table entry */ /* vram base address for page table entry */
u64 vram_base_offset; u64 vram_base_offset;
/* vm pte handling */ /* vm pte handling */
const struct amdgpu_vm_pte_funcs *vm_pte_funcs; const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; struct drm_sched_rq *vm_pte_rqs[AMDGPU_MAX_RINGS];
unsigned vm_pte_num_rings; unsigned vm_pte_num_rqs;
atomic_t vm_pte_next_ring;
/* partial resident texture handling */ /* partial resident texture handling */
spinlock_t prt_lock; spinlock_t prt_lock;
......
...@@ -1386,15 +1386,17 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { ...@@ -1386,15 +1386,17 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
struct drm_gpu_scheduler *sched;
unsigned i; unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) for (i = 0; i < adev->sdma.num_instances; i++) {
adev->vm_manager.vm_pte_rings[i] = sched = &adev->sdma.instance[i].ring.sched;
&adev->sdma.instance[i].ring; adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; }
adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
} }
} }
......
...@@ -1312,15 +1312,17 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { ...@@ -1312,15 +1312,17 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
struct drm_gpu_scheduler *sched;
unsigned i; unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) for (i = 0; i < adev->sdma.num_instances; i++) {
adev->vm_manager.vm_pte_rings[i] = sched = &adev->sdma.instance[i].ring.sched;
&adev->sdma.instance[i].ring; adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; }
adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
} }
} }
......
...@@ -1752,15 +1752,17 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { ...@@ -1752,15 +1752,17 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
struct drm_gpu_scheduler *sched;
unsigned i; unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) for (i = 0; i < adev->sdma.num_instances; i++) {
adev->vm_manager.vm_pte_rings[i] = sched = &adev->sdma.instance[i].ring.sched;
&adev->sdma.instance[i].ring; adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; }
adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
} }
} }
......
...@@ -1796,15 +1796,17 @@ static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { ...@@ -1796,15 +1796,17 @@ static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
struct drm_gpu_scheduler *sched;
unsigned i; unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) for (i = 0; i < adev->sdma.num_instances; i++) {
adev->vm_manager.vm_pte_rings[i] = sched = &adev->sdma.instance[i].ring.sched;
&adev->sdma.instance[i].ring; adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; }
adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
} }
} }
......
...@@ -879,15 +879,17 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { ...@@ -879,15 +879,17 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
struct drm_gpu_scheduler *sched;
unsigned i; unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) for (i = 0; i < adev->sdma.num_instances; i++) {
adev->vm_manager.vm_pte_rings[i] = sched = &adev->sdma.instance[i].ring.sched;
&adev->sdma.instance[i].ring; adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; }
adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment