drm/amdgpu: Simplify AQL queue mapping

Do AQL queue double-mapping with a single attach call. That will make it easier to create per-GPU BOs later, to be shared between the two BO VA mappings on the same GPU. Freeing the attachments is not necessary if map_to_gpu fails. These will be cleaned up when the kdg_mem object is destroyed in amdgpu_amdkfd_gpuvm_free_memory_of_gpu. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Oak Zeng <Oak.Zeng@amd.com> Acked-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

drm/amdgpu: Simplify AQL queue mapping
Do AQL queue double-mapping with a single attach call. That will make it easier to create per-GPU BOs later, to be shared between the two BO VA mappings on the same GPU. Freeing the attachments is not necessary if map_to_gpu fails. These will be cleaned up when the kdg_mem object is destroyed in amdgpu_amdkfd_gpuvm_free_memory_of_gpu. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Oak Zeng <Oak.Zeng@amd.com> Acked-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
7141394e · Felix Kuehling · Alex Deucher · 4e94272f · 7141394e
Commit 7141394e authored Apr 08, 2021 by Felix Kuehling Committed by Alex Deucher May 19, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 48 additions and 55 deletions

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +48 -55

No files found.
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -488,70 +488,76 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
 * 4a.  Validate new page tables and directories
 */
 static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
-		struct amdgpu_vm *vm, bool is_aql,
+		struct amdgpu_vm *vm, bool is_aql)
-		struct kfd_mem_attachment **p_attachment)
 {
 	unsigned long bo_size = mem->bo->tbo.base.size;
 	uint64_t va = mem->va;
-	struct kfd_mem_attachment *attachment;
+	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
-	struct amdgpu_bo *bo;
+	struct amdgpu_bo *bo[2] = {NULL, NULL};
-	int ret;
+	int i, ret;
 	if (!va) {
 		pr_err("Invalid VA when adding BO to VM\n");
 		return -EINVAL;
 	}
-	if (is_aql)
+	for (i = 0; i <= is_aql; i++) {
-		va += bo_size;
+		attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
+		if (unlikely(!attachment[i])) {
-	attachment = kzalloc(sizeof(*attachment), GFP_KERNEL);
+			ret = -ENOMEM;
-	if (!attachment)
+			goto unwind;
-		return -ENOMEM;
+		}
-	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
+		pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
-			va + bo_size, vm);
+			 va + bo_size, vm);
-	/* FIXME: For now all attachments use the same BO. This is incorrect
+		/* FIXME: For now all attachments use the same BO. This is
-	 * because one BO can only have one DMA mapping for one GPU. We need
+		 * incorrect because one BO can only have one DMA mapping
-	 * one BO per GPU, e.g. a DMABuf import with dynamic attachment. This
+		 * for one GPU. We need one BO per GPU, e.g. a DMABuf
-	 * will be addressed one BO-type at a time in subsequent patches.
+		 * import with dynamic attachment. This will be addressed
-	 */
+		 * one BO-type at a time in subsequent patches.
-	bo = mem->bo;
+		 */
-	drm_gem_object_get(&bo->tbo.base);
+		bo[i] = mem->bo;
+		drm_gem_object_get(&bo[i]->tbo.base);
-	/* Add BO to VM internal data structures*/
+		/* Add BO to VM internal data structures */
-	attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
-	if (!attachment->bo_va) {
+		if (unlikely(!attachment[i]->bo_va)) {
-		ret = -EINVAL;
+			ret = -ENOMEM;
-		pr_err("Failed to add BO object to VM. ret == %d\n",
+			pr_err("Failed to add BO object to VM. ret == %d\n",
-				ret);
+			       ret);
-		goto err_vmadd;
+			goto unwind;
-	}
+		}
-	attachment->va = va;
+		attachment[i]->va = va;
-	attachment->pte_flags = get_pte_flags(adev, mem);
+		attachment[i]->pte_flags = get_pte_flags(adev, mem);
-	attachment->adev = adev;
+		attachment[i]->adev = adev;
-	list_add(&attachment->list, &mem->attachments);
+		list_add(&attachment[i]->list, &mem->attachments);
-	if (p_attachment)
+		va += bo_size;
-		*p_attachment = attachment;
+	}
 	/* Allocate validate page tables if needed */
 	ret = vm_validate_pt_pd_bos(vm);
 	if (unlikely(ret)) {
 		pr_err("validate_pt_pd_bos() failed\n");
-		goto err_alloc_pts;
+		goto unwind;
 	}
 	return 0;
-err_alloc_pts:
+unwind:
-	amdgpu_vm_bo_rmv(adev, attachment->bo_va);
+	for (; i >= 0; i--) {
-	list_del(&attachment->list);
+		if (!attachment[i])
-err_vmadd:
+			continue;
-	drm_gem_object_put(&bo->tbo.base);
+		if (attachment[i]->bo_va) {
-	kfree(attachment);
+			amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va);
+			list_del(&attachment[i]->list);
+		}
+		if (bo[i])
+			drm_gem_object_put(&bo[i]->tbo.base);
+		kfree(attachment[i]);
+	}
 	return ret;
 }
@@ -1384,8 +1390,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	uint32_t domain;
 	struct kfd_mem_attachment *entry;
 	struct bo_vm_reservation_context ctx;
-	struct kfd_mem_attachment *attachment = NULL;
-	struct kfd_mem_attachment *attachment_aql = NULL;
 	unsigned long bo_size;
 	bool is_invalid_userptr = false;
@@ -1435,15 +1439,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 		is_invalid_userptr = true;
 	if (!kfd_mem_is_attached(avm, mem)) {
-		ret = kfd_mem_attach(adev, mem, avm, false, &attachment);
+		ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
 		if (ret)
 			goto attach_failed;
-		if (mem->aql_queue) {
-			ret = kfd_mem_attach(adev, mem, avm, true,
-					     &attachment_aql);
-			if (ret)
-				goto attach_failed_aql;
-		}
 	} else {
 		ret = vm_validate_pt_pd_bos(avm);
 		if (unlikely(ret))
@@ -1498,11 +1496,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	goto out;
 map_bo_to_gpuvm_failed:
-	if (attachment_aql)
-		kfd_mem_detach(attachment_aql);
-attach_failed_aql:
-	if (attachment)
-		kfd_mem_detach(attachment);
 attach_failed:
 	unreserve_bo_and_vms(&ctx, false, false);
 out: