drm/amdgpu: improve debug VRAM access performance using sdma

For better performance during VRAM access for debugged processes, do read/write copies over SDMA. In order to fulfill post mortem debugging on a broken device, fallback to stable MMIO access when gpu recovery is disabled or when job submission time outs are set to max. Failed SDMA access should automatically fall back to MMIO access. Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid page-table updates and TLB flushes on access. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

drm/amdgpu: improve debug VRAM access performance using sdma
For better performance during VRAM access for debugged processes, do read/write copies over SDMA. In order to fulfill post mortem debugging on a broken device, fallback to stable MMIO access when gpu recovery is disabled or when job submission time outs are set to max. Failed SDMA access should automatically fall back to MMIO access. Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid page-table updates and TLB flushes on access. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
cb5cc4f5 · Jonathan Kim · Alex Deucher · 7389a5b8 · cb5cc4f5 · cb5cc4f5
Commit cb5cc4f5 authored Dec 09, 2021 by Jonathan Kim Committed by Alex Deucher Jan 14, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 82 additions and 0 deletions

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +78 -0

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +4 -0

No files found.
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -50,6 +50,7 @@
 #include <drm/ttm/ttm_range_manager.h>

 #include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>

 #include "amdgpu.h"
 #include "amdgpu_object.h"
@@ -1433,6 +1434,70 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
 	}
 }

+static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
+					unsigned long offset, void *buf, int len, int write)
+{
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+	struct amdgpu_job *job;
+	struct dma_fence *fence;
+	uint64_t src_addr, dst_addr;
+	unsigned int num_dw;
+	int r, idx;
+
+	if (len != PAGE_SIZE)
+		return -EINVAL;
+
+	if (!adev->mman.sdma_access_ptr)
+		return -EACCES;
+
+	r = drm_dev_enter(adev_to_drm(adev), &idx);
+	if (r)
+		return r;
+
+	if (write)
+		memcpy(adev->mman.sdma_access_ptr, buf, len);
+
+	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job);
+	if (r)
+		goto out;
+
+	src_addr = write ? amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo) :
+			amdgpu_bo_gpu_offset(abo);
+	dst_addr = write ? amdgpu_bo_gpu_offset(abo) :
+			amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
+	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, false);
+
+	amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
+	WARN_ON(job->ibs[0].length_dw > num_dw);
+
+	r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+	if (r) {
+		amdgpu_job_free(job);
+		goto out;
+	}
+
+	if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+		r = -ETIMEDOUT;
+	dma_fence_put(fence);
+
+	if (!(r || write))
+		memcpy(buf, adev->mman.sdma_access_ptr, len);
+out:
+	drm_dev_exit(idx);
+	return r;
+}
+
+static inline bool amdgpu_ttm_allow_post_mortem_debug(struct amdgpu_device *adev)
+{
+	return amdgpu_gpu_recovery == 0 ||
+		adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
+		adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
+		adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
+		adev->video_timeout == MAX_SCHEDULE_TIMEOUT;
+}
+
 /**
 * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
 *
@@ -1457,6 +1522,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 	if (bo->resource->mem_type != TTM_PL_VRAM)
 		return -EIO;

+	if (!amdgpu_ttm_allow_post_mortem_debug(adev) &&
+			!amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write))
+		return len;
+
 	amdgpu_res_first(bo->resource, offset, len, &cursor);
 	while (cursor.remaining) {
 		size_t count, size = cursor.size;
@@ -1797,6 +1866,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 	}

+	if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				&adev->mman.sdma_access_bo, NULL,
+				adev->mman.sdma_access_ptr))
+		DRM_WARN("Debug VRAM access will use slowpath MM access\n");
+
 	return 0;
 }

@@ -1837,6 +1912,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
 	ttm_device_fini(&adev->mman.bdev);
 	adev->mman.initialized = false;
+	if (adev->mman.sdma_access_ptr)
+		amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
+					&adev->mman.sdma_access_ptr);
 	DRM_INFO("amdgpu: ttm finalized\n");
 }


--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -98,6 +98,10 @@ struct amdgpu_mman {
 	u64		fw_vram_usage_size;
 	struct amdgpu_bo	*fw_vram_usage_reserved_bo;
 	void		*fw_vram_usage_va;
+
+	/* PAGE_SIZE'd BO for process memory r/w over SDMA. */
+	struct amdgpu_bo	*sdma_access_bo;
+	void			*sdma_access_ptr;
 };

 struct amdgpu_copy_mem {