Commit a546a276 authored by Xiaogang Chen's avatar Xiaogang Chen Committed by Alex Deucher

drm/amdkfd: Use partial migrations/mapping for GPU/CPU page faults in SVM

This patch implements partial migration/mapping for gpu/cpu page faults in SVM
according to migration granularity(default 2MB). A svm range may include pages
from both system ram and vram of one gpu now. These chagnes are expected to
improve migration performance and reduce mmu callback and TLB flush workloads.
Signed-off-by: default avatarXiaogang Chen <xiaogang.chen@amd.com>
Reviewed-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent fa745b55
...@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, ...@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
goto out_free; goto out_free;
} }
if (cpages != npages) if (cpages != npages)
pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
cpages, npages); cpages, npages);
else else
pr_debug("0x%lx pages migrated\n", cpages); pr_debug("0x%lx pages collected\n", cpages);
r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset); r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
migrate_vma_pages(&migrate); migrate_vma_pages(&migrate);
...@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, ...@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
* svm_migrate_ram_to_vram - migrate svm range from system to device * svm_migrate_ram_to_vram - migrate svm range from system to device
* @prange: range structure * @prange: range structure
* @best_loc: the device to migrate to * @best_loc: the device to migrate to
* @start_mgr: start page to migrate
* @last_mgr: last page to migrate
* @mm: the process mm structure * @mm: the process mm structure
* @trigger: reason of migration * @trigger: reason of migration
* *
...@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, ...@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
*/ */
static int static int
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger) struct mm_struct *mm, uint32_t trigger)
{ {
unsigned long addr, start, end; unsigned long addr, start, end;
...@@ -498,10 +501,10 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, ...@@ -498,10 +501,10 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
unsigned long cpages = 0; unsigned long cpages = 0;
long r = 0; long r = 0;
if (prange->actual_loc == best_loc) { if (start_mgr < prange->start || last_mgr > prange->last) {
pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
prange->svms, prange->start, prange->last, best_loc); start_mgr, last_mgr, prange->start, prange->last);
return 0; return -EFAULT;
} }
node = svm_range_get_node_by_id(prange, best_loc); node = svm_range_get_node_by_id(prange, best_loc);
...@@ -510,18 +513,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, ...@@ -510,18 +513,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
return -ENODEV; return -ENODEV;
} }
pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
prange->start, prange->last, best_loc); prange->svms, start_mgr, last_mgr, prange->start, prange->last,
best_loc);
start = prange->start << PAGE_SHIFT; start = start_mgr << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT; end = (last_mgr + 1) << PAGE_SHIFT;
r = svm_range_vram_node_new(node, prange, true); r = svm_range_vram_node_new(node, prange, true);
if (r) { if (r) {
dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r); dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
return r; return r;
} }
ttm_res_offset = prange->offset << PAGE_SHIFT; ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT;
for (addr = start; addr < end;) { for (addr = start; addr < end;) {
unsigned long next; unsigned long next;
...@@ -544,8 +548,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, ...@@ -544,8 +548,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
if (cpages) { if (cpages) {
prange->actual_loc = best_loc; prange->actual_loc = best_loc;
svm_range_dma_unmap(prange); prange->vram_pages = prange->vram_pages + cpages;
} else { } else if (!prange->actual_loc) {
/* if no page migrated and all pages from prange are at
* sys ram drop svm_bo got from svm_range_vram_node_new
*/
svm_range_vram_node_free(prange); svm_range_vram_node_free(prange);
} }
...@@ -663,9 +670,8 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -663,9 +670,8 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
* Context: Process context, caller hold mmap read lock, prange->migrate_mutex * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
* *
* Return: * Return:
* 0 - success with all pages migrated
* negative values - indicate error * negative values - indicate error
* positive values - partial migration, number of pages not migrated * positive values or zero - number of pages got migrated
*/ */
static long static long
svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
...@@ -676,6 +682,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, ...@@ -676,6 +682,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
uint64_t npages = (end - start) >> PAGE_SHIFT; uint64_t npages = (end - start) >> PAGE_SHIFT;
unsigned long upages = npages; unsigned long upages = npages;
unsigned long cpages = 0; unsigned long cpages = 0;
unsigned long mpages = 0;
struct amdgpu_device *adev = node->adev; struct amdgpu_device *adev = node->adev;
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL; struct dma_fence *mfence = NULL;
...@@ -725,10 +732,10 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, ...@@ -725,10 +732,10 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
goto out_free; goto out_free;
} }
if (cpages != npages) if (cpages != npages)
pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
cpages, npages); cpages, npages);
else else
pr_debug("0x%lx pages migrated\n", cpages); pr_debug("0x%lx pages collected\n", cpages);
r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
scratch, npages); scratch, npages);
...@@ -751,17 +758,21 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, ...@@ -751,17 +758,21 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
kvfree(buf); kvfree(buf);
out: out:
if (!r && cpages) { if (!r && cpages) {
mpages = cpages - upages;
pdd = svm_range_get_pdd_by_node(prange, node); pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd) if (pdd)
WRITE_ONCE(pdd->page_out, pdd->page_out + cpages); WRITE_ONCE(pdd->page_out, pdd->page_out + mpages);
} }
return r ? r : upages;
return r ? r : mpages;
} }
/** /**
* svm_migrate_vram_to_ram - migrate svm range from device to system * svm_migrate_vram_to_ram - migrate svm range from device to system
* @prange: range structure * @prange: range structure
* @mm: process mm, use current->mm if NULL * @mm: process mm, use current->mm if NULL
* @start_mgr: start page need be migrated to sys ram
* @last_mgr: last page need be migrated to sys ram
* @trigger: reason of migration * @trigger: reason of migration
* @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback
* *
...@@ -771,6 +782,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, ...@@ -771,6 +782,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
* 0 - OK, otherwise error code * 0 - OK, otherwise error code
*/ */
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
unsigned long start_mgr, unsigned long last_mgr,
uint32_t trigger, struct page *fault_page) uint32_t trigger, struct page *fault_page)
{ {
struct kfd_node *node; struct kfd_node *node;
...@@ -778,26 +790,33 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, ...@@ -778,26 +790,33 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
unsigned long addr; unsigned long addr;
unsigned long start; unsigned long start;
unsigned long end; unsigned long end;
unsigned long upages = 0; unsigned long mpages = 0;
long r = 0; long r = 0;
/* this pragne has no any vram page to migrate to sys ram */
if (!prange->actual_loc) { if (!prange->actual_loc) {
pr_debug("[0x%lx 0x%lx] already migrated to ram\n", pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
prange->start, prange->last); prange->start, prange->last);
return 0; return 0;
} }
if (start_mgr < prange->start || last_mgr > prange->last) {
pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
start_mgr, last_mgr, prange->start, prange->last);
return -EFAULT;
}
node = svm_range_get_node_by_id(prange, prange->actual_loc); node = svm_range_get_node_by_id(prange, prange->actual_loc);
if (!node) { if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc); pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc);
return -ENODEV; return -ENODEV;
} }
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n", pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
prange->svms, prange, prange->start, prange->last, prange->svms, prange, start_mgr, last_mgr,
prange->actual_loc); prange->actual_loc);
start = prange->start << PAGE_SHIFT; start = start_mgr << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT; end = (last_mgr + 1) << PAGE_SHIFT;
for (addr = start; addr < end;) { for (addr = start; addr < end;) {
unsigned long next; unsigned long next;
...@@ -816,14 +835,21 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, ...@@ -816,14 +835,21 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
pr_debug("failed %ld to migrate prange %p\n", r, prange); pr_debug("failed %ld to migrate prange %p\n", r, prange);
break; break;
} else { } else {
upages += r; mpages += r;
} }
addr = next; addr = next;
} }
if (r >= 0 && !upages) { if (r >= 0) {
svm_range_vram_node_free(prange); prange->vram_pages -= mpages;
prange->actual_loc = 0;
/* prange does not have vram page set its actual_loc to system
* and drop its svm_bo ref
*/
if (prange->vram_pages == 0 && prange->ttm_res) {
prange->actual_loc = 0;
svm_range_vram_node_free(prange);
}
} }
return r < 0 ? r : 0; return r < 0 ? r : 0;
...@@ -833,17 +859,23 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, ...@@ -833,17 +859,23 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
* svm_migrate_vram_to_vram - migrate svm range from device to device * svm_migrate_vram_to_vram - migrate svm range from device to device
* @prange: range structure * @prange: range structure
* @best_loc: the device to migrate to * @best_loc: the device to migrate to
* @start: start page need be migrated to sys ram
* @last: last page need be migrated to sys ram
* @mm: process mm, use current->mm if NULL * @mm: process mm, use current->mm if NULL
* @trigger: reason of migration * @trigger: reason of migration
* *
* Context: Process context, caller hold mmap read lock, svms lock, prange lock * Context: Process context, caller hold mmap read lock, svms lock, prange lock
* *
* migrate all vram pages in prange to sys ram, then migrate
* [start, last] pages from sys ram to gpu node best_loc.
*
* Return: * Return:
* 0 - OK, otherwise error code * 0 - OK, otherwise error code
*/ */
static int static int
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm, uint32_t trigger) unsigned long start, unsigned long last,
struct mm_struct *mm, uint32_t trigger)
{ {
int r, retries = 3; int r, retries = 3;
...@@ -855,7 +887,8 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, ...@@ -855,7 +887,8 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
do { do {
r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL); r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
trigger, NULL);
if (r) if (r)
return r; return r;
} while (prange->actual_loc && --retries); } while (prange->actual_loc && --retries);
...@@ -863,17 +896,21 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, ...@@ -863,17 +896,21 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
if (prange->actual_loc) if (prange->actual_loc)
return -EDEADLK; return -EDEADLK;
return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); return svm_migrate_ram_to_vram(prange, best_loc, start, last, mm, trigger);
} }
int int
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
unsigned long start, unsigned long last,
struct mm_struct *mm, uint32_t trigger) struct mm_struct *mm, uint32_t trigger)
{ {
if (!prange->actual_loc) if (!prange->actual_loc || prange->actual_loc == best_loc)
return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); return svm_migrate_ram_to_vram(prange, best_loc, start, last,
mm, trigger);
else else
return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger); return svm_migrate_vram_to_vram(prange, best_loc, start, last,
mm, trigger);
} }
...@@ -889,10 +926,9 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, ...@@ -889,10 +926,9 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
*/ */
static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
{ {
unsigned long start, last, size;
unsigned long addr = vmf->address; unsigned long addr = vmf->address;
struct svm_range_bo *svm_bo; struct svm_range_bo *svm_bo;
enum svm_work_list_ops op;
struct svm_range *parent;
struct svm_range *prange; struct svm_range *prange;
struct kfd_process *p; struct kfd_process *p;
struct mm_struct *mm; struct mm_struct *mm;
...@@ -929,51 +965,31 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) ...@@ -929,51 +965,31 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
mutex_lock(&p->svms.lock); mutex_lock(&p->svms.lock);
prange = svm_range_from_addr(&p->svms, addr, &parent); prange = svm_range_from_addr(&p->svms, addr, NULL);
if (!prange) { if (!prange) {
pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr); pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr);
r = -EFAULT; r = -EFAULT;
goto out_unlock_svms; goto out_unlock_svms;
} }
mutex_lock(&parent->migrate_mutex); mutex_lock(&prange->migrate_mutex);
if (prange != parent)
mutex_lock_nested(&prange->migrate_mutex, 1);
if (!prange->actual_loc) if (!prange->actual_loc)
goto out_unlock_prange; goto out_unlock_prange;
svm_range_lock(parent); /* Align migration range start and size to granularity size */
if (prange != parent) size = 1UL << prange->granularity;
mutex_lock_nested(&prange->lock, 1); start = max(ALIGN_DOWN(addr, size), prange->start);
r = svm_range_split_by_granularity(p, mm, addr, parent, prange); last = min(ALIGN(addr + 1, size) - 1, prange->last);
if (prange != parent)
mutex_unlock(&prange->lock);
svm_range_unlock(parent);
if (r) {
pr_debug("failed %d to split range by granularity\n", r);
goto out_unlock_prange;
}
r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, start, last,
KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, vmf->page);
vmf->page);
if (r) if (r)
pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n", pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
r, prange->svms, prange, prange->start, prange->last); r, prange->svms, prange, start, last);
/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
if (p->xnack_enabled && parent == prange)
op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
else
op = SVM_OP_UPDATE_RANGE_NOTIFIER;
svm_range_add_list_work(&p->svms, parent, mm, op);
schedule_deferred_list_work(&p->svms);
out_unlock_prange: out_unlock_prange:
if (prange != parent) mutex_unlock(&prange->migrate_mutex);
mutex_unlock(&prange->migrate_mutex);
mutex_unlock(&parent->migrate_mutex);
out_unlock_svms: out_unlock_svms:
mutex_unlock(&p->svms.lock); mutex_unlock(&p->svms.lock);
out_unref_process: out_unref_process:
......
...@@ -41,9 +41,13 @@ enum MIGRATION_COPY_DIR { ...@@ -41,9 +41,13 @@ enum MIGRATION_COPY_DIR {
}; };
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
unsigned long start, unsigned long last,
struct mm_struct *mm, uint32_t trigger); struct mm_struct *mm, uint32_t trigger);
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
unsigned long start, unsigned long last,
uint32_t trigger, struct page *fault_page); uint32_t trigger, struct page *fault_page);
unsigned long unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
......
...@@ -158,12 +158,13 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr) ...@@ -158,12 +158,13 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
static int static int
svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
unsigned long offset, unsigned long npages, unsigned long offset, unsigned long npages,
unsigned long *hmm_pfns, uint32_t gpuidx) unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages)
{ {
enum dma_data_direction dir = DMA_BIDIRECTIONAL; enum dma_data_direction dir = DMA_BIDIRECTIONAL;
dma_addr_t *addr = prange->dma_addr[gpuidx]; dma_addr_t *addr = prange->dma_addr[gpuidx];
struct device *dev = adev->dev; struct device *dev = adev->dev;
struct page *page; struct page *page;
uint64_t vram_pages_dev;
int i, r; int i, r;
if (!addr) { if (!addr) {
...@@ -173,6 +174,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -173,6 +174,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
prange->dma_addr[gpuidx] = addr; prange->dma_addr[gpuidx] = addr;
} }
vram_pages_dev = 0;
addr += offset; addr += offset;
for (i = 0; i < npages; i++) { for (i = 0; i < npages; i++) {
if (svm_is_valid_dma_mapping_addr(dev, addr[i])) if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
...@@ -182,6 +184,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -182,6 +184,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
if (is_zone_device_page(page)) { if (is_zone_device_page(page)) {
struct amdgpu_device *bo_adev = prange->svm_bo->node->adev; struct amdgpu_device *bo_adev = prange->svm_bo->node->adev;
vram_pages_dev++;
addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
bo_adev->vm_manager.vram_base_offset - bo_adev->vm_manager.vram_base_offset -
bo_adev->kfd.pgmap.range.start; bo_adev->kfd.pgmap.range.start;
...@@ -198,13 +201,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -198,13 +201,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n", pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
addr[i] >> PAGE_SHIFT, page_to_pfn(page)); addr[i] >> PAGE_SHIFT, page_to_pfn(page));
} }
*vram_pages = vram_pages_dev;
return 0; return 0;
} }
static int static int
svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
unsigned long offset, unsigned long npages, unsigned long offset, unsigned long npages,
unsigned long *hmm_pfns) unsigned long *hmm_pfns, uint64_t *vram_pages)
{ {
struct kfd_process *p; struct kfd_process *p;
uint32_t gpuidx; uint32_t gpuidx;
...@@ -223,7 +227,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, ...@@ -223,7 +227,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
} }
r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages, r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,
hmm_pfns, gpuidx); hmm_pfns, gpuidx, vram_pages);
if (r) if (r)
break; break;
} }
...@@ -349,6 +353,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, ...@@ -349,6 +353,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
INIT_LIST_HEAD(&prange->child_list); INIT_LIST_HEAD(&prange->child_list);
atomic_set(&prange->invalid, 0); atomic_set(&prange->invalid, 0);
prange->validate_timestamp = 0; prange->validate_timestamp = 0;
prange->vram_pages = 0;
mutex_init(&prange->migrate_mutex); mutex_init(&prange->migrate_mutex);
mutex_init(&prange->lock); mutex_init(&prange->lock);
...@@ -395,6 +400,8 @@ static void svm_range_bo_release(struct kref *kref) ...@@ -395,6 +400,8 @@ static void svm_range_bo_release(struct kref *kref)
prange->start, prange->last); prange->start, prange->last);
mutex_lock(&prange->lock); mutex_lock(&prange->lock);
prange->svm_bo = NULL; prange->svm_bo = NULL;
/* prange should not hold vram page now */
WARN_ONCE(prange->actual_loc, "prange should not hold vram page");
mutex_unlock(&prange->lock); mutex_unlock(&prange->lock);
spin_lock(&svm_bo->list_lock); spin_lock(&svm_bo->list_lock);
...@@ -975,6 +982,11 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, ...@@ -975,6 +982,11 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
new->svm_bo = svm_range_bo_ref(old->svm_bo); new->svm_bo = svm_range_bo_ref(old->svm_bo);
new->ttm_res = old->ttm_res; new->ttm_res = old->ttm_res;
/* set new's vram_pages as old range's now, the acurate vram_pages
* will be updated during mapping
*/
new->vram_pages = min(old->vram_pages, new->npages);
spin_lock(&new->svm_bo->list_lock); spin_lock(&new->svm_bo->list_lock);
list_add(&new->svm_bo_list, &new->svm_bo->range_list); list_add(&new->svm_bo_list, &new->svm_bo->range_list);
spin_unlock(&new->svm_bo->list_lock); spin_unlock(&new->svm_bo->list_lock);
...@@ -1135,66 +1147,6 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, ...@@ -1135,66 +1147,6 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
list_add_tail(&pchild->child_list, &prange->child_list); list_add_tail(&pchild->child_list, &prange->child_list);
} }
/**
* svm_range_split_by_granularity - collect ranges within granularity boundary
*
* @p: the process with svms list
* @mm: mm structure
* @addr: the vm fault address in pages, to split the prange
* @parent: parent range if prange is from child list
* @prange: prange to split
*
* Trims @prange to be a single aligned block of prange->granularity if
* possible. The head and tail are added to the child_list in @parent.
*
* Context: caller must hold mmap_read_lock and prange->lock
*
* Return:
* 0 - OK, otherwise error code
*/
int
svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
unsigned long addr, struct svm_range *parent,
struct svm_range *prange)
{
struct svm_range *head, *tail;
unsigned long start, last, size;
int r;
/* Align splited range start and size to granularity size, then a single
* PTE will be used for whole range, this reduces the number of PTE
* updated and the L1 TLB space used for translation.
*/
size = 1UL << prange->granularity;
start = ALIGN_DOWN(addr, size);
last = ALIGN(addr + 1, size) - 1;
pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
prange->svms, prange->start, prange->last, start, last, size);
if (start > prange->start) {
r = svm_range_split(prange, start, prange->last, &head);
if (r)
return r;
svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
}
if (last < prange->last) {
r = svm_range_split(prange, prange->start, last, &tail);
if (r)
return r;
svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
}
/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
prange, prange->start, prange->last,
SVM_OP_ADD_RANGE_AND_MAP);
}
return 0;
}
static bool static bool
svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b) svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
{ {
...@@ -1614,12 +1566,14 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) ...@@ -1614,12 +1566,14 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
* 5. Release page table (and SVM BO) reservation * 5. Release page table (and SVM BO) reservation
*/ */
static int svm_range_validate_and_map(struct mm_struct *mm, static int svm_range_validate_and_map(struct mm_struct *mm,
unsigned long map_start, unsigned long map_last,
struct svm_range *prange, int32_t gpuidx, struct svm_range *prange, int32_t gpuidx,
bool intr, bool wait, bool flush_tlb) bool intr, bool wait, bool flush_tlb)
{ {
struct svm_validate_context *ctx; struct svm_validate_context *ctx;
unsigned long start, end, addr; unsigned long start, end, addr;
struct kfd_process *p; struct kfd_process *p;
uint64_t vram_pages;
void *owner; void *owner;
int32_t idx; int32_t idx;
int r = 0; int r = 0;
...@@ -1688,11 +1642,15 @@ static int svm_range_validate_and_map(struct mm_struct *mm, ...@@ -1688,11 +1642,15 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
} }
} }
vram_pages = 0;
start = prange->start << PAGE_SHIFT; start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT;
for (addr = start; !r && addr < end; ) { for (addr = start; !r && addr < end; ) {
struct hmm_range *hmm_range; struct hmm_range *hmm_range;
unsigned long map_start_vma;
unsigned long map_last_vma;
struct vm_area_struct *vma; struct vm_area_struct *vma;
uint64_t vram_pages_vma;
unsigned long next = 0; unsigned long next = 0;
unsigned long offset; unsigned long offset;
unsigned long npages; unsigned long npages;
...@@ -1721,9 +1679,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm, ...@@ -1721,9 +1679,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
if (!r) { if (!r) {
offset = (addr - start) >> PAGE_SHIFT; offset = (addr - start) >> PAGE_SHIFT;
r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
hmm_range->hmm_pfns); hmm_range->hmm_pfns, &vram_pages_vma);
if (r) if (r)
pr_debug("failed %d to dma map range\n", r); pr_debug("failed %d to dma map range\n", r);
else
vram_pages += vram_pages_vma;
} }
svm_range_lock(prange); svm_range_lock(prange);
...@@ -1737,9 +1697,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, ...@@ -1737,9 +1697,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
r = -EAGAIN; r = -EAGAIN;
} }
if (!r) if (!r) {
r = svm_range_map_to_gpus(prange, offset, npages, readonly, map_start_vma = max(map_start, prange->start + offset);
ctx->bitmap, wait, flush_tlb); map_last_vma = min(map_last, prange->start + offset + npages - 1);
if (map_start_vma <= map_last_vma) {
offset = map_start_vma - prange->start;
npages = map_last_vma - map_start_vma + 1;
r = svm_range_map_to_gpus(prange, offset, npages, readonly,
ctx->bitmap, wait, flush_tlb);
}
}
if (!r && next == end) if (!r && next == end)
prange->mapped_to_gpu = true; prange->mapped_to_gpu = true;
...@@ -1749,6 +1716,19 @@ static int svm_range_validate_and_map(struct mm_struct *mm, ...@@ -1749,6 +1716,19 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
addr = next; addr = next;
} }
if (addr == end) {
prange->vram_pages = vram_pages;
/* if prange does not include any vram page and it
* has not released svm_bo drop its svm_bo reference
* and set its actaul_loc to sys ram
*/
if (!vram_pages && prange->ttm_res) {
prange->actual_loc = 0;
svm_range_vram_node_free(prange);
}
}
svm_range_unreserve_bos(ctx); svm_range_unreserve_bos(ctx);
if (!r) if (!r)
prange->validate_timestamp = ktime_get_boottime(); prange->validate_timestamp = ktime_get_boottime();
...@@ -1832,8 +1812,8 @@ static void svm_range_restore_work(struct work_struct *work) ...@@ -1832,8 +1812,8 @@ static void svm_range_restore_work(struct work_struct *work)
*/ */
mutex_lock(&prange->migrate_mutex); mutex_lock(&prange->migrate_mutex);
r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
false, true, false); MAX_GPU_INSTANCE, false, true, false);
if (r) if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r, pr_debug("failed %d to map 0x%lx to gpus\n", r,
prange->start); prange->start);
...@@ -2001,6 +1981,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) ...@@ -2001,6 +1981,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
new->actual_loc = old->actual_loc; new->actual_loc = old->actual_loc;
new->granularity = old->granularity; new->granularity = old->granularity;
new->mapped_to_gpu = old->mapped_to_gpu; new->mapped_to_gpu = old->mapped_to_gpu;
new->vram_pages = old->vram_pages;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
...@@ -2908,6 +2889,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ...@@ -2908,6 +2889,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint32_t vmid, uint32_t node_id, uint32_t vmid, uint32_t node_id,
uint64_t addr, bool write_fault) uint64_t addr, bool write_fault)
{ {
unsigned long start, last, size;
struct mm_struct *mm = NULL; struct mm_struct *mm = NULL;
struct svm_range_list *svms; struct svm_range_list *svms;
struct svm_range *prange; struct svm_range *prange;
...@@ -3043,40 +3025,44 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ...@@ -3043,40 +3025,44 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
write_fault, timestamp); write_fault, timestamp);
if (prange->actual_loc != best_loc) { /* Align migration range start and size to granularity size */
size = 1UL << prange->granularity;
start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start);
last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last);
if (prange->actual_loc != 0 || best_loc != 0) {
migration = true; migration = true;
if (best_loc) { if (best_loc) {
r = svm_migrate_to_vram(prange, best_loc, mm, r = svm_migrate_to_vram(prange, best_loc, start, last,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
if (r) { if (r) {
pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
r, addr); r, addr);
/* Fallback to system memory if migration to /* Fallback to system memory if migration to
* VRAM failed * VRAM failed
*/ */
if (prange->actual_loc) if (prange->actual_loc && prange->actual_loc != best_loc)
r = svm_migrate_vram_to_ram(prange, mm, r = svm_migrate_vram_to_ram(prange, mm, start, last,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
NULL);
else else
r = 0; r = 0;
} }
} else { } else {
r = svm_migrate_vram_to_ram(prange, mm, r = svm_migrate_vram_to_ram(prange, mm, start, last,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
NULL);
} }
if (r) { if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
r, svms, prange->start, prange->last); r, svms, start, last);
goto out_unlock_range; goto out_unlock_range;
} }
} }
r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
false, false);
if (r) if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
r, svms, prange->start, prange->last); r, svms, start, last);
kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
migration); migration);
...@@ -3422,18 +3408,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, ...@@ -3422,18 +3408,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
*migrated = false; *migrated = false;
best_loc = svm_range_best_prefetch_location(prange); best_loc = svm_range_best_prefetch_location(prange);
if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || /* when best_loc is a gpu node and same as prange->actual_loc
best_loc == prange->actual_loc) * we still need do migration as prange->actual_loc !=0 does
* not mean all pages in prange are vram. hmm migrate will pick
* up right pages during migration.
*/
if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) ||
(best_loc == 0 && prange->actual_loc == 0))
return 0; return 0;
if (!best_loc) { if (!best_loc) {
r = svm_migrate_vram_to_ram(prange, mm, r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
KFD_MIGRATE_TRIGGER_PREFETCH, NULL); KFD_MIGRATE_TRIGGER_PREFETCH, NULL);
*migrated = !r; *migrated = !r;
return r; return r;
} }
r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH); r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last,
mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r; *migrated = !r;
return r; return r;
...@@ -3488,7 +3480,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) ...@@ -3488,7 +3480,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex); mutex_lock(&prange->migrate_mutex);
do { do {
/* migrate all vram pages in this prange to sys ram
* after that prange->actual_loc should be zero
*/
r = svm_migrate_vram_to_ram(prange, mm, r = svm_migrate_vram_to_ram(prange, mm,
prange->start, prange->last,
KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL); KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);
} while (!r && prange->actual_loc && --retries); } while (!r && prange->actual_loc && --retries);
...@@ -3612,8 +3608,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, ...@@ -3612,8 +3608,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
true, true, flush_tlb); MAX_GPU_INSTANCE, true, true, flush_tlb);
if (r) if (r)
pr_debug("failed %d to map svm range\n", r); pr_debug("failed %d to map svm range\n", r);
...@@ -3627,8 +3623,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, ...@@ -3627,8 +3623,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n", pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n",
prange, prange->start, prange->last); prange, prange->start, prange->last);
mutex_lock(&prange->migrate_mutex); mutex_lock(&prange->migrate_mutex);
r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
true, true, prange->mapped_to_gpu); MAX_GPU_INSTANCE, true, true, prange->mapped_to_gpu);
if (r) if (r)
pr_debug("failed %d on remap svm range\n", r); pr_debug("failed %d on remap svm range\n", r);
mutex_unlock(&prange->migrate_mutex); mutex_unlock(&prange->migrate_mutex);
......
...@@ -78,6 +78,7 @@ struct svm_work_list_item { ...@@ -78,6 +78,7 @@ struct svm_work_list_item {
* @update_list:link list node used to add to update_list * @update_list:link list node used to add to update_list
* @mapping: bo_va mapping structure to create and update GPU page table * @mapping: bo_va mapping structure to create and update GPU page table
* @npages: number of pages * @npages: number of pages
* @vram_pages: vram pages number in this svm_range
* @dma_addr: dma mapping address on each GPU for system memory physical page * @dma_addr: dma mapping address on each GPU for system memory physical page
* @ttm_res: vram ttm resource map * @ttm_res: vram ttm resource map
* @offset: range start offset within mm_nodes * @offset: range start offset within mm_nodes
...@@ -88,7 +89,9 @@ struct svm_work_list_item { ...@@ -88,7 +89,9 @@ struct svm_work_list_item {
* @flags: flags defined as KFD_IOCTL_SVM_FLAG_* * @flags: flags defined as KFD_IOCTL_SVM_FLAG_*
* @perferred_loc: perferred location, 0 for CPU, or GPU id * @perferred_loc: perferred location, 0 for CPU, or GPU id
* @perfetch_loc: last prefetch location, 0 for CPU, or GPU id * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
* @actual_loc: the actual location, 0 for CPU, or GPU id * @actual_loc: this svm_range location. 0: all pages are from sys ram;
* GPU id: this svm_range may include vram pages from GPU with
* id actual_loc.
* @granularity:migration granularity, log2 num pages * @granularity:migration granularity, log2 num pages
* @invalid: not 0 means cpu page table is invalidated * @invalid: not 0 means cpu page table is invalidated
* @validate_timestamp: system timestamp when range is validated * @validate_timestamp: system timestamp when range is validated
...@@ -112,6 +115,7 @@ struct svm_range { ...@@ -112,6 +115,7 @@ struct svm_range {
struct list_head list; struct list_head list;
struct list_head update_list; struct list_head update_list;
uint64_t npages; uint64_t npages;
uint64_t vram_pages;
dma_addr_t *dma_addr[MAX_GPU_INSTANCE]; dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
struct ttm_resource *ttm_res; struct ttm_resource *ttm_res;
uint64_t offset; uint64_t offset;
...@@ -168,9 +172,6 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange, ...@@ -168,9 +172,6 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange,
int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear); bool clear);
void svm_range_vram_node_free(struct svm_range *prange); void svm_range_vram_node_free(struct svm_range *prange);
int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
unsigned long addr, struct svm_range *parent,
struct svm_range *prange);
int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint32_t vmid, uint32_t node_id, uint64_t addr, uint32_t vmid, uint32_t node_id, uint64_t addr,
bool write_fault); bool write_fault);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment