Commit dfcd99f6 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: meld together VM fragment and huge page handling

This optimizes the generating of PTEs by walking the hierarchy only once
for a range and making changes as necessary.

It allows for both huge (2MB) as well giant (1GB) pages to be used on
Vega and Raven.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarHuang Rui <ray.huang@amd.com>
Acked-by: default avatarJunwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent dfa70550
...@@ -1488,46 +1488,76 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, ...@@ -1488,46 +1488,76 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
} }
/** /**
* amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages * amdgpu_vm_update_huge - figure out parameters for PTE updates
* *
* @p: see amdgpu_pte_update_params definition * Make sure to set the right flags for the PTEs at the desired level.
* @entry: vm_pt entry to check
* @parent: parent entry
* @nptes: number of PTEs updated with this operation
* @dst: destination address where the PTEs should point to
* @flags: access flags fro the PTEs
*
* Check if we can update the PD with a huge page.
*/ */
static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params,
struct amdgpu_vm_pt *entry, struct amdgpu_bo *bo, unsigned level,
struct amdgpu_vm_pt *parent, uint64_t pe, uint64_t addr,
unsigned nptes, uint64_t dst, unsigned count, uint32_t incr,
uint64_t flags) uint64_t flags)
{
uint64_t pde;
/* In the case of a mixed PT the PDE must point to it*/ {
if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && if (level != AMDGPU_VM_PTB) {
nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
/* Set the huge page flag to stop scanning at this PDE */
flags |= AMDGPU_PDE_PTE; flags |= AMDGPU_PDE_PTE;
amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
} }
if (!(flags & AMDGPU_PDE_PTE)) { amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags);
if (entry->huge) { }
/* Add the entry to the relocated list to update it. */
entry->huge = false; /**
amdgpu_vm_bo_relocated(&entry->base); * amdgpu_vm_fragment - get fragment for PTEs
} *
* @params: see amdgpu_pte_update_params definition
* @start: first PTE to handle
* @end: last PTE to handle
* @flags: hw mapping flags
* @frag: resulting fragment size
* @frag_end: end of this fragment
*
* Returns the first possible fragment for the start and end address.
*/
static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params,
uint64_t start, uint64_t end, uint64_t flags,
unsigned int *frag, uint64_t *frag_end)
{
/**
* The MC L1 TLB supports variable sized pages, based on a fragment
* field in the PTE. When this field is set to a non-zero value, page
* granularity is increased from 4KB to (1 << (12 + frag)). The PTE
* flags are considered valid for all PTEs within the fragment range
* and corresponding mappings are assumed to be physically contiguous.
*
* The L1 TLB can store a single PTE for the whole fragment,
* significantly increasing the space available for translation
* caching. This leads to large improvements in throughput when the
* TLB is under pressure.
*
* The L2 TLB distributes small and large fragments into two
* asymmetric partitions. The large fragment cache is significantly
* larger. Thus, we try to use large fragments wherever possible.
* Userspace can support this by aligning virtual base address and
* allocation size to the fragment size.
*/
unsigned max_frag = params->adev->vm_manager.fragment_size;
/* system pages are non continuously */
if (params->src || !(flags & AMDGPU_PTE_VALID)) {
*frag = 0;
*frag_end = end;
return; return;
} }
entry->huge = true; /* This intentionally wraps around if no bit is set */
amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1);
if (*frag >= max_frag) {
pde = (entry - parent->entries) * 8; *frag = max_frag;
amdgpu_vm_update_func(p, parent->base.bo, pde, dst, 1, 0, flags); *frag_end = end & ~((1ULL << max_frag) - 1);
} else {
*frag_end = start + (1 << *frag);
}
} }
/** /**
...@@ -1545,108 +1575,105 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, ...@@ -1545,108 +1575,105 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
* 0 for success, -EINVAL for failure. * 0 for success, -EINVAL for failure.
*/ */
static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
uint64_t start, uint64_t end, uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags) uint64_t dst, uint64_t flags)
{ {
struct amdgpu_device *adev = params->adev; struct amdgpu_device *adev = params->adev;
const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_pt_cursor cursor;
uint64_t frag_start = start, frag_end;
unsigned int frag;
/* walk over the address space and update the page tables */ /* figure out the initial fragment */
for_each_amdgpu_vm_pt_leaf(adev, params->vm, start, end - 1, cursor) { amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
/* walk over the address space and update the PTs */
amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
while (cursor.pfn < end) {
struct amdgpu_bo *pt = cursor.entry->base.bo; struct amdgpu_bo *pt = cursor.entry->base.bo;
uint64_t pe_start; unsigned shift, parent_shift, num_entries;
unsigned nptes; uint64_t incr, entry_end, pe_start;
if (!pt || cursor.level != AMDGPU_VM_PTB) if (!pt)
return -ENOENT; return -ENOENT;
if ((cursor.pfn & ~mask) == (end & ~mask)) /* The root level can't be a huge page */
nptes = end - cursor.pfn; if (cursor.level == adev->vm_manager.root_level) {
else if (!amdgpu_vm_pt_descendant(adev, &cursor))
nptes = AMDGPU_VM_PTE_COUNT(adev) - (cursor.pfn & mask); return -ENOENT;
amdgpu_vm_handle_huge_pages(params, cursor.entry, cursor.parent,
nptes, dst, flags);
/* We don't need to update PTEs for huge pages */
if (cursor.entry->huge) {
dst += nptes * AMDGPU_GPU_PAGE_SIZE;
continue; continue;
} }
pe_start = (cursor.pfn & mask) * 8; /* First check if the entry is already handled */
amdgpu_vm_update_func(params, pt, pe_start, dst, nptes, if (cursor.pfn < frag_start) {
AMDGPU_GPU_PAGE_SIZE, flags); cursor.entry->huge = true;
dst += nptes * AMDGPU_GPU_PAGE_SIZE; amdgpu_vm_pt_next(adev, &cursor);
} continue;
}
return 0;
}
/* /* If it isn't already handled it can't be a huge page */
* amdgpu_vm_frag_ptes - add fragment information to PTEs if (cursor.entry->huge) {
* /* Add the entry to the relocated list to update it. */
* @params: see amdgpu_pte_update_params definition cursor.entry->huge = false;
* @vm: requested vm amdgpu_vm_bo_relocated(&cursor.entry->base);
* @start: first PTE to handle }
* @end: last PTE to handle
* @dst: addr those PTEs should point to
* @flags: hw mapping flags
*
* Returns:
* 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags)
{
/**
* The MC L1 TLB supports variable sized pages, based on a fragment
* field in the PTE. When this field is set to a non-zero value, page
* granularity is increased from 4KB to (1 << (12 + frag)). The PTE
* flags are considered valid for all PTEs within the fragment range
* and corresponding mappings are assumed to be physically contiguous.
*
* The L1 TLB can store a single PTE for the whole fragment,
* significantly increasing the space available for translation
* caching. This leads to large improvements in throughput when the
* TLB is under pressure.
*
* The L2 TLB distributes small and large fragments into two
* asymmetric partitions. The large fragment cache is significantly
* larger. Thus, we try to use large fragments wherever possible.
* Userspace can support this by aligning virtual base address and
* allocation size to the fragment size.
*/
unsigned max_frag = params->adev->vm_manager.fragment_size;
int r;
/* system pages are non continuously */ shift = amdgpu_vm_level_shift(adev, cursor.level);
if (params->src || !(flags & AMDGPU_PTE_VALID)) parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
return amdgpu_vm_update_ptes(params, start, end, dst, flags); if (adev->asic_type < CHIP_VEGA10) {
/* No huge page support before GMC v9 */
while (start != end) { if (cursor.level != AMDGPU_VM_PTB) {
uint64_t frag_flags, frag_end; if (!amdgpu_vm_pt_descendant(adev, &cursor))
unsigned frag; return -ENOENT;
continue;
/* This intentionally wraps around if no bit is set */ }
frag = min((unsigned)ffs(start) - 1, } else if (frag < shift) {
(unsigned)fls64(end - start) - 1); /* We can't use this level when the fragment size is
if (frag >= max_frag) { * smaller than the address shift. Go to the next
frag_flags = AMDGPU_PTE_FRAG(max_frag); * child entry and try again.
frag_end = end & ~((1ULL << max_frag) - 1); */
} else { if (!amdgpu_vm_pt_descendant(adev, &cursor))
frag_flags = AMDGPU_PTE_FRAG(frag); return -ENOENT;
frag_end = start + (1 << frag); continue;
} else if (frag >= parent_shift) {
/* If the fragment size is even larger than the parent
* shift we should go up one level and check it again.
*/
if (!amdgpu_vm_pt_ancestor(&cursor))
return -ENOENT;
continue;
} }
r = amdgpu_vm_update_ptes(params, start, frag_end, dst, /* Looks good so far, calculate parameters for the update */
flags | frag_flags); incr = AMDGPU_GPU_PAGE_SIZE << shift;
if (r) num_entries = amdgpu_vm_num_entries(adev, cursor.level);
return r; pe_start = ((cursor.pfn >> shift) & (num_entries - 1)) * 8;
entry_end = num_entries << shift;
entry_end += cursor.pfn & ~(entry_end - 1);
entry_end = min(entry_end, end);
do {
uint64_t upd_end = min(entry_end, frag_end);
unsigned nptes = (upd_end - frag_start) >> shift;
amdgpu_vm_update_huge(params, pt, cursor.level,
pe_start, dst, nptes, incr,
flags | AMDGPU_PTE_FRAG(frag));
pe_start += nptes * 8;
dst += nptes * AMDGPU_GPU_PAGE_SIZE << shift;
frag_start = upd_end;
if (frag_start >= frag_end) {
/* figure out the next fragment */
amdgpu_vm_fragment(params, frag_start, end,
flags, &frag, &frag_end);
if (frag < shift)
break;
}
} while (frag_start < entry_end);
dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; if (frag >= shift)
start = frag_end; amdgpu_vm_pt_next(adev, &cursor);
} }
return 0; return 0;
...@@ -1708,8 +1735,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -1708,8 +1735,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
params.func = amdgpu_vm_cpu_set_ptes; params.func = amdgpu_vm_cpu_set_ptes;
params.pages_addr = pages_addr; params.pages_addr = pages_addr;
return amdgpu_vm_frag_ptes(&params, start, last + 1, return amdgpu_vm_update_ptes(&params, start, last + 1,
addr, flags); addr, flags);
} }
ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
...@@ -1788,7 +1815,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -1788,7 +1815,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (r) if (r)
goto error_free; goto error_free;
r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags); r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
if (r) if (r)
goto error_free; goto error_free;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment