Commit 50ea50cf authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher

drm/amdkfd: copy memory through gart table

Use sdma linear copy to migrate data between ram and vram. The sdma
linear copy command uses kernel buffer function queue to access system
memory through gart table.

Use reserved gart table window 0 to map system page address, and vram
page address is direct mapping. Use the same kernel buffer function to
fill in gart table mapping, so this is serialized with memory copy by
sdma job submit. We only need wait for the last memory copy sdma fence
for larger buffer migration.
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b53fa124
......@@ -33,6 +33,178 @@
#include "kfd_svm.h"
#include "kfd_migrate.h"
static uint64_t
svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
{
return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
}
static int
svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
unsigned int num_dw, num_bytes;
struct dma_fence *fence;
uint64_t src_addr, dst_addr;
uint64_t pte_flags;
void *cpu_addr;
int r;
/* use gart window 0 */
*gart_addr = adev->gmc.gart_start;
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = npages * 8;
r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
AMDGPU_IB_POOL_DELAYED, &job);
if (r)
return r;
src_addr = num_dw * 4;
src_addr += job->ibs[0].gpu_addr;
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
dst_addr, num_bytes, false);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
pte_flags |= AMDGPU_PTE_WRITEABLE;
pte_flags |= adev->gart.gart_pte_flags;
cpu_addr = &job->ibs[0].ptr[num_dw];
r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
if (r)
goto error_free;
r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
if (r)
goto error_free;
dma_fence_put(fence);
return r;
error_free:
amdgpu_job_free(job);
return r;
}
/**
* svm_migrate_copy_memory_gart - sdma copy data between ram and vram
*
* @adev: amdgpu device the sdma ring running
* @src: source page address array
* @dst: destination page address array
* @npages: number of pages to copy
* @direction: enum MIGRATION_COPY_DIR
* @mfence: output, sdma fence to signal after sdma is done
*
* ram address uses GART table continuous entries mapping to ram pages,
* vram address uses direct mapping of vram pages, which must have npages
* number of continuous pages.
* GART update and sdma uses same buf copy function ring, sdma is splited to
* multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
* the last sdma finish fence which is returned to check copy memory is done.
*
* Context: Process context, takes and releases gtt_window_lock
*
* Return:
* 0 - OK, otherwise error code
*/
static int
svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
uint64_t *vram, uint64_t npages,
enum MIGRATION_COPY_DIR direction,
struct dma_fence **mfence)
{
const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
uint64_t gart_s, gart_d;
struct dma_fence *next;
uint64_t size;
int r;
mutex_lock(&adev->mman.gtt_window_lock);
while (npages) {
size = min(GTT_MAX_PAGES, npages);
if (direction == FROM_VRAM_TO_RAM) {
gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
} else if (direction == FROM_RAM_TO_VRAM) {
r = svm_migrate_gart_map(ring, size, sys, &gart_s,
KFD_IOCTL_SVM_FLAG_GPU_RO);
gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
}
if (r) {
pr_debug("failed %d to create gart mapping\n", r);
goto out_unlock;
}
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
NULL, &next, false, true, false);
if (r) {
pr_debug("failed %d to copy memory\n", r);
goto out_unlock;
}
dma_fence_put(*mfence);
*mfence = next;
npages -= size;
if (npages) {
sys += size;
vram += size;
}
}
out_unlock:
mutex_unlock(&adev->mman.gtt_window_lock);
return r;
}
/**
* svm_migrate_copy_done - wait for memory copy sdma is done
*
* @adev: amdgpu device the sdma memory copy is executing on
* @mfence: migrate fence
*
* Wait for dma fence is signaled, if the copy ssplit into multiple sdma
* operations, this is the last sdma operation fence.
*
* Context: called after svm_migrate_copy_memory
*
* Return:
* 0 - success
* otherwise - error code from dma fence signal
*/
int
svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
{
int r = 0;
if (mfence) {
r = dma_fence_wait(mfence, false);
dma_fence_put(mfence);
pr_debug("sdma copy memory fence done\n");
}
return r;
}
static void svm_migrate_page_free(struct page *page)
{
}
......
......@@ -33,6 +33,11 @@
#include "kfd_priv.h"
#include "kfd_svm.h"
enum MIGRATION_COPY_DIR {
FROM_RAM_TO_VRAM = 0,
FROM_VRAM_TO_RAM
};
#if defined(CONFIG_DEVICE_PRIVATE)
int svm_migrate_init(struct amdgpu_device *adev);
void svm_migrate_fini(struct amdgpu_device *adev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment