Commit 1a3b2b5d authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher

drm/amdkfd: multiple gpu migrate vram to vram

If prefetch range to gpu with acutal location is another gpu, or GPU
retry fault restore pages to migrate the range with acutal location is
gpu, then migrate from one gpu to another gpu.

Use system memory as bridge because sdma engine may not able to access
another gpu vram, use sdma of source gpu to migrate to system memory,
then use sdma of destination gpu to migrate from system memory to gpu.

Print out gpuid or gpuidx in debug messages.
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 564d2b92
...@@ -487,7 +487,8 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -487,7 +487,8 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
* Return: * Return:
* 0 - OK, otherwise error code * 0 - OK, otherwise error code
*/ */
int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, static int
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm) struct mm_struct *mm)
{ {
unsigned long addr, start, end; unsigned long addr, start, end;
...@@ -742,6 +743,48 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) ...@@ -742,6 +743,48 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
return r; return r;
} }
/**
* svm_migrate_vram_to_vram - migrate svm range from device to device
* @prange: range structure
* @best_loc: the device to migrate to
* @mm: process mm, use current->mm if NULL
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
* Return:
* 0 - OK, otherwise error code
*/
static int
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm)
{
int r;
/*
* TODO: for both devices with PCIe large bar or on same xgmi hive, skip
* system memory as migration bridge
*/
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
r = svm_migrate_vram_to_ram(prange, mm);
if (r)
return r;
return svm_migrate_ram_to_vram(prange, best_loc, mm);
}
int
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm)
{
if (!prange->actual_loc)
return svm_migrate_ram_to_vram(prange, best_loc, mm);
else
return svm_migrate_vram_to_vram(prange, best_loc, mm);
}
/** /**
* svm_migrate_to_ram - CPU page fault handler * svm_migrate_to_ram - CPU page fault handler
* @vmf: CPU vm fault vma, address * @vmf: CPU vm fault vma, address
......
...@@ -38,7 +38,7 @@ enum MIGRATION_COPY_DIR { ...@@ -38,7 +38,7 @@ enum MIGRATION_COPY_DIR {
FROM_VRAM_TO_RAM FROM_VRAM_TO_RAM
}; };
int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm); struct mm_struct *mm);
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
unsigned long unsigned long
......
...@@ -348,8 +348,11 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo) ...@@ -348,8 +348,11 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
kref_put(&svm_bo->kref, svm_range_bo_release); kref_put(&svm_bo->kref, svm_range_bo_release);
} }
static bool svm_range_validate_svm_bo(struct svm_range *prange) static bool
svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
{ {
struct amdgpu_device *bo_adev;
mutex_lock(&prange->lock); mutex_lock(&prange->lock);
if (!prange->svm_bo) { if (!prange->svm_bo) {
mutex_unlock(&prange->lock); mutex_unlock(&prange->lock);
...@@ -361,6 +364,22 @@ static bool svm_range_validate_svm_bo(struct svm_range *prange) ...@@ -361,6 +364,22 @@ static bool svm_range_validate_svm_bo(struct svm_range *prange)
return true; return true;
} }
if (svm_bo_ref_unless_zero(prange->svm_bo)) { if (svm_bo_ref_unless_zero(prange->svm_bo)) {
/*
* Migrate from GPU to GPU, remove range from source bo_adev
* svm_bo range list, and return false to allocate svm_bo from
* destination adev.
*/
bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
if (bo_adev != adev) {
mutex_unlock(&prange->lock);
spin_lock(&prange->svm_bo->list_lock);
list_del_init(&prange->svm_bo_list);
spin_unlock(&prange->svm_bo->list_lock);
svm_range_bo_unref(prange->svm_bo);
return false;
}
if (READ_ONCE(prange->svm_bo->evicting)) { if (READ_ONCE(prange->svm_bo->evicting)) {
struct dma_fence *f; struct dma_fence *f;
struct svm_range_bo *svm_bo; struct svm_range_bo *svm_bo;
...@@ -434,7 +453,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -434,7 +453,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms, pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
prange->start, prange->last); prange->start, prange->last);
if (svm_range_validate_svm_bo(prange)) if (svm_range_validate_svm_bo(adev, prange))
return 0; return 0;
svm_bo = svm_range_bo_new(); svm_bo = svm_range_bo_new();
...@@ -1173,6 +1192,7 @@ static int svm_range_map_to_gpus(struct svm_range *prange, ...@@ -1173,6 +1192,7 @@ static int svm_range_map_to_gpus(struct svm_range *prange,
p = container_of(prange->svms, struct kfd_process, svms); p = container_of(prange->svms, struct kfd_process, svms);
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
pdd = kfd_process_device_from_gpuidx(p, gpuidx); pdd = kfd_process_device_from_gpuidx(p, gpuidx);
if (!pdd) { if (!pdd) {
pr_debug("failed to find device idx %d\n", gpuidx); pr_debug("failed to find device idx %d\n", gpuidx);
...@@ -2198,7 +2218,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ...@@ -2198,7 +2218,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
if (prange->actual_loc != best_loc) { if (prange->actual_loc != best_loc) {
if (best_loc) { if (best_loc) {
r = svm_migrate_ram_to_vram(prange, best_loc, mm); r = svm_migrate_to_vram(prange, best_loc, mm);
if (r) { if (r) {
pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
r, addr); r, addr);
...@@ -2406,6 +2426,11 @@ svm_range_best_prefetch_location(struct svm_range *prange) ...@@ -2406,6 +2426,11 @@ svm_range_best_prefetch_location(struct svm_range *prange)
goto out; goto out;
bo_adev = svm_range_get_adev_by_id(prange, best_loc); bo_adev = svm_range_get_adev_by_id(prange, best_loc);
if (!bo_adev) {
WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc);
best_loc = 0;
goto out;
}
bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
MAX_GPU_INSTANCE); MAX_GPU_INSTANCE);
...@@ -2493,20 +2518,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, ...@@ -2493,20 +2518,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
best_loc == prange->actual_loc) best_loc == prange->actual_loc)
return 0; return 0;
/*
* Prefetch to GPU without host access flag, set actual_loc to gpu, then
* validate on gpu and map to gpus will be handled afterwards.
*/
if (best_loc && !prange->actual_loc && if (best_loc && !prange->actual_loc &&
!(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) {
prange->actual_loc = best_loc;
return 0; return 0;
}
if (best_loc) { if (!best_loc) {
pr_debug("migrate from ram to vram\n");
r = svm_migrate_ram_to_vram(prange, best_loc, mm);
} else {
pr_debug("migrate from vram to ram\n");
r = svm_migrate_vram_to_ram(prange, mm); r = svm_migrate_vram_to_ram(prange, mm);
*migrated = !r;
return r;
} }
if (!r) r = svm_migrate_to_vram(prange, best_loc, mm);
*migrated = true; *migrated = !r;
return r; return r;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment