Commit c3186665 authored by Shashank Sharma's avatar Shashank Sharma Committed by Alex Deucher

drm/amdgpu: use doorbell mgr for kfd kernel doorbells

This patch:
- adds a doorbell bo in kfd device structure.
- creates doorbell page for kfd kernel usages.
- updates the get_kernel_doorbell and free_kernel_doorbell functions
  accordingly

V2: Do not use wrapper API, use direct amdgpu_create_kernel(Alex)
V3:
 - Move single variable declaration below (Christian)
 - Add a to-do item to reuse the KGD kernel level doorbells for
   KFD for non-MES cases, instead of reserving one page (Felix)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarShashank Sharma <shashank.sharma@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 76bd3478
...@@ -455,8 +455,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) ...@@ -455,8 +455,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
atomic_set(&kfd->compute_profile, 0); atomic_set(&kfd->compute_profile, 0);
mutex_init(&kfd->doorbell_mutex); mutex_init(&kfd->doorbell_mutex);
memset(&kfd->doorbell_available_index, 0,
sizeof(kfd->doorbell_available_index));
ida_init(&kfd->doorbell_ida); ida_init(&kfd->doorbell_ida);
......
...@@ -61,81 +61,46 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) ...@@ -61,81 +61,46 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
/* Doorbell calculations for device init. */ /* Doorbell calculations for device init. */
int kfd_doorbell_init(struct kfd_dev *kfd) int kfd_doorbell_init(struct kfd_dev *kfd)
{ {
size_t doorbell_start_offset; int size = PAGE_SIZE;
size_t doorbell_aperture_size; int r;
size_t doorbell_process_limit;
/* /*
* With MES enabled, just set the doorbell base as it is needed * Todo: KFD kernel level operations need only one doorbell for
* to calculate doorbell physical address. * ring test/HWS. So instead of reserving a whole page here for
*/ * kernel, reserve and consume a doorbell from existing KGD kernel
if (kfd->shared_resources.enable_mes) { * doorbell page.
kfd->doorbell_base =
kfd->shared_resources.doorbell_physical_address;
return 0;
}
/*
* We start with calculations in bytes because the input data might
* only be byte-aligned.
* Only after we have done the rounding can we assume any alignment.
*/ */
doorbell_start_offset = /* Bitmap to dynamically allocate doorbells from kernel page */
roundup(kfd->shared_resources.doorbell_start_offset, kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
kfd_doorbell_process_slice(kfd)); if (!kfd->doorbell_bitmap) {
DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
doorbell_aperture_size =
rounddown(kfd->shared_resources.doorbell_aperture_size,
kfd_doorbell_process_slice(kfd));
if (doorbell_aperture_size > doorbell_start_offset)
doorbell_process_limit =
(doorbell_aperture_size - doorbell_start_offset) /
kfd_doorbell_process_slice(kfd);
else
return -ENOSPC;
if (!kfd->max_doorbell_slices ||
doorbell_process_limit < kfd->max_doorbell_slices)
kfd->max_doorbell_slices = doorbell_process_limit;
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
doorbell_start_offset;
kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
kfd_doorbell_process_slice(kfd));
if (!kfd->doorbell_kernel_ptr)
return -ENOMEM; return -ENOMEM;
}
pr_debug("Doorbell initialization:\n"); /* Alloc a doorbell page for KFD kernel usages */
pr_debug("doorbell base == 0x%08lX\n", r = amdgpu_bo_create_kernel(kfd->adev,
(uintptr_t)kfd->doorbell_base); size,
PAGE_SIZE,
pr_debug("doorbell_base_dw_offset == 0x%08lX\n", AMDGPU_GEM_DOMAIN_DOORBELL,
kfd->doorbell_base_dw_offset); &kfd->doorbells,
NULL,
pr_debug("doorbell_process_limit == 0x%08lX\n", (void **)&kfd->doorbell_kernel_ptr);
doorbell_process_limit); if (r) {
pr_err("failed to allocate kernel doorbells\n");
pr_debug("doorbell_kernel_offset == 0x%08lX\n", bitmap_free(kfd->doorbell_bitmap);
(uintptr_t)kfd->doorbell_base); return r;
}
pr_debug("doorbell aperture size == 0x%08lX\n",
kfd->shared_resources.doorbell_aperture_size);
pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
return 0; return 0;
} }
void kfd_doorbell_fini(struct kfd_dev *kfd) void kfd_doorbell_fini(struct kfd_dev *kfd)
{ {
if (kfd->doorbell_kernel_ptr) bitmap_free(kfd->doorbell_bitmap);
iounmap(kfd->doorbell_kernel_ptr); amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
(void **)&kfd->doorbell_kernel_ptr);
} }
int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process, int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
...@@ -188,22 +153,15 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, ...@@ -188,22 +153,15 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
u32 inx; u32 inx;
mutex_lock(&kfd->doorbell_mutex); mutex_lock(&kfd->doorbell_mutex);
inx = find_first_zero_bit(kfd->doorbell_available_index, inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
__set_bit(inx, kfd->doorbell_available_index); __set_bit(inx, kfd->doorbell_bitmap);
mutex_unlock(&kfd->doorbell_mutex); mutex_unlock(&kfd->doorbell_mutex);
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL; return NULL;
inx *= kfd->device_info.doorbell_size / sizeof(u32); *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
/*
* Calculating the kernel doorbell offset using the first
* doorbell page.
*/
*doorbell_off = kfd->doorbell_base_dw_offset + inx;
pr_debug("Get kernel queue doorbell\n" pr_debug("Get kernel queue doorbell\n"
" doorbell offset == 0x%08X\n" " doorbell offset == 0x%08X\n"
...@@ -217,11 +175,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) ...@@ -217,11 +175,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
{ {
unsigned int inx; unsigned int inx;
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
* sizeof(u32) / kfd->device_info.doorbell_size;
mutex_lock(&kfd->doorbell_mutex); mutex_lock(&kfd->doorbell_mutex);
__clear_bit(inx, kfd->doorbell_available_index); __clear_bit(inx, kfd->doorbell_bitmap);
mutex_unlock(&kfd->doorbell_mutex); mutex_unlock(&kfd->doorbell_mutex);
} }
......
...@@ -385,6 +385,12 @@ struct kfd_dev { ...@@ -385,6 +385,12 @@ struct kfd_dev {
/* Track per device allocated watch points */ /* Track per device allocated watch points */
uint32_t alloc_watch_ids; uint32_t alloc_watch_ids;
spinlock_t watch_points_lock; spinlock_t watch_points_lock;
/* Kernel doorbells for KFD device */
struct amdgpu_bo *doorbells;
/* bitmap for dynamic doorbell allocation from doorbell object */
unsigned long *doorbell_bitmap;
}; };
enum kfd_mempool { enum kfd_mempool {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment