Commit 19f6d2a6 authored by Oded Gabbay's avatar Oded Gabbay

amdkfd: Add basic modules to amdkfd

This patch adds the process module and three helper modules:

- kfd_process, which handles process which open /dev/kfd

- kfd_doorbell, which provides helper functions for doorbell allocation,
  release and mapping to userspace

- kfd_pasid, which provides helper functions for pasid allocation and release

- kfd_aperture, which provides helper functions for managing the LDS, Local GPU
  memory and Scratch memory apertures of the process

This patch only contains the basic kfd_process module, which doesn't contain
the reference to the queue scheduler. This was done to allow easier code review.

Also, this patch doesn't contain the calls to the IOMMU driver for binding the
pasid to the device. Again, this was done to allow easier code review

The kfd_process object is created when a process opens /dev/kfd and is closed
when the mm_struct of that process is teared-down.

v3:

Removed kfd_vidmem.c file
Replaced direct mmput call to mmu_notifier release
Removed typedefs
Moved bool field to end of the structure
Added new kernel params for gart usage limitation
Added initialization of sa manager
Fixed debug messages
Remove support for LDS in 32 bit
Changed code to support mmap of doorbell pages from userspace
Added documentation for apertures

v4: Replaced RCU by SRCU for kfd_process list management

v5:

Move amdkfd from drm/radeon/ to drm/amd/
Rename kfd_aperture.c to kfd_flat_memory.c
Protect against multiple init calls
MQD size is H/W dependent so moved it to device info structure
Rename kfd_mem_obj structure's members
Use delayed function for process tear-down
Signed-off-by: default avatarOded Gabbay <oded.gabbay@amd.com>
parent 5b5c4e40
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/
amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
kfd_process.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o obj-$(CONFIG_HSA_AMD) += amdkfd.o
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
static long kfd_ioctl(struct file *, unsigned int, unsigned long); static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *); static int kfd_open(struct inode *, struct file *);
static int kfd_mmap(struct file *, struct vm_area_struct *);
static const char kfd_dev_name[] = "kfd"; static const char kfd_dev_name[] = "kfd";
...@@ -46,6 +47,7 @@ static const struct file_operations kfd_fops = { ...@@ -46,6 +47,7 @@ static const struct file_operations kfd_fops = {
.unlocked_ioctl = kfd_ioctl, .unlocked_ioctl = kfd_ioctl,
.compat_ioctl = kfd_ioctl, .compat_ioctl = kfd_ioctl,
.open = kfd_open, .open = kfd_open,
.mmap = kfd_mmap,
}; };
static int kfd_char_dev_major = -1; static int kfd_char_dev_major = -1;
...@@ -98,9 +100,22 @@ struct device *kfd_chardev(void) ...@@ -98,9 +100,22 @@ struct device *kfd_chardev(void)
static int kfd_open(struct inode *inode, struct file *filep) static int kfd_open(struct inode *inode, struct file *filep)
{ {
struct kfd_process *process;
if (iminor(inode) != 0) if (iminor(inode) != 0)
return -ENODEV; return -ENODEV;
process = kfd_create_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
process->is_32bit_user_mode = is_compat_task();
dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
process->pasid, process->is_32bit_user_mode);
kfd_init_apertures(process);
return 0; return 0;
} }
...@@ -156,8 +171,9 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) ...@@ -156,8 +171,9 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
"ioctl cmd 0x%x (#%d), arg 0x%lx\n", "ioctl cmd 0x%x (#%d), arg 0x%lx\n",
cmd, _IOC_NR(cmd), arg); cmd, _IOC_NR(cmd), arg);
/* TODO: add function that retrieves process */ process = kfd_get_process(current);
process = NULL; if (IS_ERR(process))
return PTR_ERR(process);
switch (cmd) { switch (cmd) {
case KFD_IOC_GET_VERSION: case KFD_IOC_GET_VERSION:
...@@ -208,3 +224,14 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) ...@@ -208,3 +224,14 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
return err; return err;
} }
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
return kfd_doorbell_mmap(process, vma);
}
...@@ -26,8 +26,11 @@ ...@@ -26,8 +26,11 @@
#include <linux/slab.h> #include <linux/slab.h>
#include "kfd_priv.h" #include "kfd_priv.h"
#define MQD_SIZE_ALIGNED 768
static const struct kfd_device_info kaveri_device_info = { static const struct kfd_device_info kaveri_device_info = {
.max_pasid_bits = 16, .max_pasid_bits = 16,
.mqd_size_aligned = MQD_SIZE_ALIGNED
}; };
struct kfd_deviceid { struct kfd_deviceid {
...@@ -92,6 +95,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev) ...@@ -92,6 +95,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev)
kfd->kgd = kgd; kfd->kgd = kgd;
kfd->device_info = device_info; kfd->device_info = device_info;
kfd->pdev = pdev; kfd->pdev = pdev;
kfd->init_complete = false;
return kfd; return kfd;
} }
...@@ -99,23 +103,53 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev) ...@@ -99,23 +103,53 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev)
bool kgd2kfd_device_init(struct kfd_dev *kfd, bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources) const struct kgd2kfd_shared_resources *gpu_resources)
{ {
unsigned int size;
kfd->shared_resources = *gpu_resources; kfd->shared_resources = *gpu_resources;
if (kfd_topology_add_device(kfd) != 0) /* calculate max size of mqds needed for queues */
return false; size = max_num_of_processes *
max_num_of_queues_per_process *
kfd->device_info->mqd_size_aligned;
/* add another 512KB for all other allocations on gart */
size += 512 * 1024;
if (kfd2kgd->init_sa_manager(kfd->kgd, size)) {
dev_err(kfd_device,
"Error initializing sa manager for device (%x:%x)\n",
kfd->pdev->vendor, kfd->pdev->device);
goto out;
}
kfd_doorbell_init(kfd);
if (kfd_topology_add_device(kfd) != 0) {
dev_err(kfd_device,
"Error adding device (%x:%x) to topology\n",
kfd->pdev->vendor, kfd->pdev->device);
goto kfd_topology_add_device_error;
}
kfd->init_complete = true; kfd->init_complete = true;
dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor, dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
kfd->pdev->device); kfd->pdev->device);
return true; goto out;
kfd_topology_add_device_error:
kfd2kgd->fini_sa_manager(kfd->kgd);
dev_err(kfd_device,
"device (%x:%x) NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
out:
return kfd->init_complete;
} }
void kgd2kfd_device_exit(struct kfd_dev *kfd) void kgd2kfd_device_exit(struct kfd_dev *kfd)
{ {
int err = kfd_topology_remove_device(kfd); kfd_topology_remove_device(kfd);
BUG_ON(err != 0);
kfree(kfd); kfree(kfd);
} }
......
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "kfd_priv.h"
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/slab.h>
/*
* This extension supports a kernel level doorbells management for
* the kernel queues.
* Basically the last doorbells page is devoted to kernel queues
* and that's assures that any user process won't get access to the
* kernel doorbells page
*/
static DEFINE_MUTEX(doorbell_mutex);
static unsigned long doorbell_available_index[
DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)] = { 0 };
#define KERNEL_DOORBELL_PASID 1
#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
/*
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
* receives 32-bit writes that are passed to queues as wptr values.
* The doorbells are intended to be written by applications as part
* of queueing work on user-mode queues.
* We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
* We map the doorbell address space into user-mode when a process creates
* its first queue on each device.
* Although the mapping is done by KFD, it is equivalent to an mmap of
* the /dev/kfd with the particular device encoded in the mmap offset.
* There will be other uses for mmap of /dev/kfd, so only a range of
* offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
*/
/* # of doorbell bytes allocated for each process. */
static inline size_t doorbell_process_allocation(void)
{
return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE);
}
/* Doorbell calculations for device init. */
void kfd_doorbell_init(struct kfd_dev *kfd)
{
size_t doorbell_start_offset;
size_t doorbell_aperture_size;
size_t doorbell_process_limit;
/*
* We start with calculations in bytes because the input data might
* only be byte-aligned.
* Only after we have done the rounding can we assume any alignment.
*/
doorbell_start_offset =
roundup(kfd->shared_resources.doorbell_start_offset,
doorbell_process_allocation());
doorbell_aperture_size =
rounddown(kfd->shared_resources.doorbell_aperture_size,
doorbell_process_allocation());
if (doorbell_aperture_size > doorbell_start_offset)
doorbell_process_limit =
(doorbell_aperture_size - doorbell_start_offset) /
doorbell_process_allocation();
else
doorbell_process_limit = 0;
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
doorbell_start_offset;
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
kfd->doorbell_process_limit = doorbell_process_limit - 1;
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
doorbell_process_allocation());
BUG_ON(!kfd->doorbell_kernel_ptr);
pr_debug("kfd: doorbell initialization:\n");
pr_debug("kfd: doorbell base == 0x%08lX\n",
(uintptr_t)kfd->doorbell_base);
pr_debug("kfd: doorbell_id_offset == 0x%08lX\n",
kfd->doorbell_id_offset);
pr_debug("kfd: doorbell_process_limit == 0x%08lX\n",
doorbell_process_limit);
pr_debug("kfd: doorbell_kernel_offset == 0x%08lX\n",
(uintptr_t)kfd->doorbell_base);
pr_debug("kfd: doorbell aperture size == 0x%08lX\n",
kfd->shared_resources.doorbell_aperture_size);
pr_debug("kfd: doorbell kernel address == 0x%08lX\n",
(uintptr_t)kfd->doorbell_kernel_ptr);
}
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
{
phys_addr_t address;
struct kfd_dev *dev;
/*
* For simplicitly we only allow mapping of the entire doorbell
* allocation of a single device & process.
*/
if (vma->vm_end - vma->vm_start != doorbell_process_allocation())
return -EINVAL;
/* Find kfd device according to gpu id */
dev = kfd_device_by_id(vma->vm_pgoff);
if (dev == NULL)
return -EINVAL;
/* Find if pdd exists for combination of process and gpu id */
if (!kfd_get_process_device_data(dev, process, 0))
return -EINVAL;
/* Calculate physical address of doorbell */
address = kfd_get_process_doorbells(dev, process);
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
VM_DONTDUMP | VM_PFNMAP;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pr_debug("kfd: mapping doorbell page in kfd_doorbell_mmap\n"
" target user address == 0x%08llX\n"
" physical address == 0x%08llX\n"
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
(unsigned long long) vma->vm_start, address, vma->vm_flags,
doorbell_process_allocation());
return io_remap_pfn_range(vma,
vma->vm_start,
address >> PAGE_SHIFT,
doorbell_process_allocation(),
vma->vm_page_prot);
}
/* get kernel iomem pointer for a doorbell */
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off)
{
u32 inx;
BUG_ON(!kfd || !doorbell_off);
mutex_lock(&doorbell_mutex);
inx = find_first_zero_bit(doorbell_available_index,
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
__set_bit(inx, doorbell_available_index);
mutex_unlock(&doorbell_mutex);
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
/*
* Calculating the kernel doorbell offset using "faked" kernel
* pasid that allocated for kernel queues only
*/
*doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation() /
sizeof(u32)) + inx;
pr_debug("kfd: get kernel queue doorbell\n"
" doorbell offset == 0x%08d\n"
" kernel address == 0x%08lX\n",
*doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
return kfd->doorbell_kernel_ptr + inx;
}
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
{
unsigned int inx;
BUG_ON(!kfd || !db_addr);
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
mutex_lock(&doorbell_mutex);
__clear_bit(inx, doorbell_available_index);
mutex_unlock(&doorbell_mutex);
}
inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
{
if (db) {
writel(value, db);
pr_debug("writing %d to doorbell address 0x%p\n", value, db);
}
}
/*
* queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1
* to doorbells with the process's doorbell page
*/
unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int queue_id)
{
/*
* doorbell_id_offset accounts for doorbells taken by KGD.
* pasid * doorbell_process_allocation/sizeof(u32) adjusts
* to the process's doorbells
*/
return kfd->doorbell_id_offset +
process->pasid * (doorbell_process_allocation()/sizeof(u32)) +
queue_id;
}
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
{
uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
kfd->shared_resources.doorbell_start_offset) /
doorbell_process_allocation() + 1;
return num_of_elems;
}
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process)
{
return dev->doorbell_base +
process->pasid * doorbell_process_allocation();
}
This diff is collapsed.
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/notifier.h>
#include <linux/moduleparam.h> #include <linux/moduleparam.h>
#include <linux/device.h> #include <linux/device.h>
#include "kfd_priv.h" #include "kfd_priv.h"
...@@ -46,6 +45,16 @@ static const struct kgd2kfd_calls kgd2kfd = { ...@@ -46,6 +45,16 @@ static const struct kgd2kfd_calls kgd2kfd = {
.resume = kgd2kfd_resume, .resume = kgd2kfd_resume,
}; };
int max_num_of_processes = KFD_MAX_NUM_OF_PROCESSES_DEFAULT;
module_param(max_num_of_processes, int, 0444);
MODULE_PARM_DESC(max_num_of_processes,
"Kernel cmdline parameter that defines the amdkfd maximum number of supported processes");
int max_num_of_queues_per_process = KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT;
module_param(max_num_of_queues_per_process, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_process,
"Kernel cmdline parameter that defines the amdkfd maximum number of supported queues per process");
bool kgd2kfd_init(unsigned interface_version, bool kgd2kfd_init(unsigned interface_version,
const struct kfd2kgd_calls *f2g, const struct kfd2kgd_calls *f2g,
const struct kgd2kfd_calls **g2f) const struct kgd2kfd_calls **g2f)
...@@ -57,6 +66,10 @@ bool kgd2kfd_init(unsigned interface_version, ...@@ -57,6 +66,10 @@ bool kgd2kfd_init(unsigned interface_version,
if (interface_version != KFD_INTERFACE_VERSION) if (interface_version != KFD_INTERFACE_VERSION)
return false; return false;
/* Protection against multiple amd kgd loads */
if (kfd2kgd)
return true;
kfd2kgd = f2g; kfd2kgd = f2g;
*g2f = &kgd2kfd; *g2f = &kgd2kfd;
...@@ -72,6 +85,26 @@ static int __init kfd_module_init(void) ...@@ -72,6 +85,26 @@ static int __init kfd_module_init(void)
{ {
int err; int err;
kfd2kgd = NULL;
/* Verify module parameters */
if ((max_num_of_processes < 0) ||
(max_num_of_processes > KFD_MAX_NUM_OF_PROCESSES)) {
pr_err("kfd: max_num_of_processes must be between 0 to KFD_MAX_NUM_OF_PROCESSES\n");
return -1;
}
if ((max_num_of_queues_per_process < 0) ||
(max_num_of_queues_per_process >
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)) {
pr_err("kfd: max_num_of_queues_per_process must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_PROCESS\n");
return -1;
}
err = kfd_pasid_init();
if (err < 0)
goto err_pasid;
err = kfd_chardev_init(); err = kfd_chardev_init();
if (err < 0) if (err < 0)
goto err_ioctl; goto err_ioctl;
...@@ -80,6 +113,8 @@ static int __init kfd_module_init(void) ...@@ -80,6 +113,8 @@ static int __init kfd_module_init(void)
if (err < 0) if (err < 0)
goto err_topology; goto err_topology;
kfd_process_create_wq();
dev_info(kfd_device, "Initialized module\n"); dev_info(kfd_device, "Initialized module\n");
return 0; return 0;
...@@ -87,13 +122,17 @@ static int __init kfd_module_init(void) ...@@ -87,13 +122,17 @@ static int __init kfd_module_init(void)
err_topology: err_topology:
kfd_chardev_exit(); kfd_chardev_exit();
err_ioctl: err_ioctl:
kfd_pasid_exit();
err_pasid:
return err; return err;
} }
static void __exit kfd_module_exit(void) static void __exit kfd_module_exit(void)
{ {
kfd_process_destroy_wq();
kfd_topology_shutdown(); kfd_topology_shutdown();
kfd_chardev_exit(); kfd_chardev_exit();
kfd_pasid_exit();
dev_info(kfd_device, "Removed module\n"); dev_info(kfd_device, "Removed module\n");
} }
......
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/slab.h>
#include <linux/types.h>
#include "kfd_priv.h"
static unsigned long *pasid_bitmap;
static unsigned int pasid_limit;
static DEFINE_MUTEX(pasid_mutex);
int kfd_pasid_init(void)
{
pasid_limit = max_num_of_processes;
pasid_bitmap = kzalloc(DIV_ROUND_UP(pasid_limit, BITS_PER_BYTE),
GFP_KERNEL);
if (!pasid_bitmap)
return -ENOMEM;
set_bit(0, pasid_bitmap); /* PASID 0 is reserved. */
return 0;
}
void kfd_pasid_exit(void)
{
kfree(pasid_bitmap);
}
bool kfd_set_pasid_limit(unsigned int new_limit)
{
if (new_limit < pasid_limit) {
bool ok;
mutex_lock(&pasid_mutex);
/* ensure that no pasids >= new_limit are in-use */
ok = (find_next_bit(pasid_bitmap, pasid_limit, new_limit) ==
pasid_limit);
if (ok)
pasid_limit = new_limit;
mutex_unlock(&pasid_mutex);
return ok;
}
return true;
}
inline unsigned int kfd_get_pasid_limit(void)
{
return pasid_limit;
}
unsigned int kfd_pasid_alloc(void)
{
unsigned int found;
mutex_lock(&pasid_mutex);
found = find_first_zero_bit(pasid_bitmap, pasid_limit);
if (found == pasid_limit)
found = 0;
else
set_bit(found, pasid_bitmap);
mutex_unlock(&pasid_mutex);
return found;
}
void kfd_pasid_free(unsigned int pasid)
{
BUG_ON(pasid == 0 || pasid >= pasid_limit);
clear_bit(pasid, pasid_bitmap);
}
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/kfd_ioctl.h>
#include <kgd_kfd_interface.h> #include <kgd_kfd_interface.h>
#define KFD_SYSFS_FILE_MODE 0444 #define KFD_SYSFS_FILE_MODE 0444
...@@ -41,9 +42,26 @@ ...@@ -41,9 +42,26 @@
#define kfd_alloc_struct(ptr_to_struct) \ #define kfd_alloc_struct(ptr_to_struct) \
((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
/* Kernel module parameter to specify maximum number of supported processes */
extern int max_num_of_processes;
#define KFD_MAX_NUM_OF_PROCESSES_DEFAULT 32
#define KFD_MAX_NUM_OF_PROCESSES 512
/*
* Kernel module parameter to specify maximum number of supported queues
* per process
*/
extern int max_num_of_queues_per_process;
#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT 128
#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
struct kfd_device_info { struct kfd_device_info {
unsigned int max_pasid_bits; unsigned int max_pasid_bits;
size_t ih_ring_entry_size; size_t ih_ring_entry_size;
uint16_t mqd_size_aligned;
}; };
struct kfd_dev { struct kfd_dev {
...@@ -54,6 +72,21 @@ struct kfd_dev { ...@@ -54,6 +72,21 @@ struct kfd_dev {
unsigned int id; /* topology stub index */ unsigned int id; /* topology stub index */
phys_addr_t doorbell_base; /* Start of actual doorbells used by
* KFD. It is aligned for mapping
* into user mode
*/
size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell
* to HW doorbell, GFX reserved some
* at the start)
*/
size_t doorbell_process_limit; /* Number of processes we have doorbell
* space for.
*/
u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
* page used by kernel queue
*/
struct kgd2kfd_shared_resources shared_resources; struct kgd2kfd_shared_resources shared_resources;
bool init_complete; bool init_complete;
...@@ -69,15 +102,122 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd); ...@@ -69,15 +102,122 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd);
extern const struct kfd2kgd_calls *kfd2kgd; extern const struct kfd2kgd_calls *kfd2kgd;
struct kfd_mem_obj {
void *bo;
uint64_t gpu_addr;
uint32_t *cpu_ptr;
};
enum kfd_mempool {
KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
KFD_MEMPOOL_FRAMEBUFFER = 3,
};
/* Character device interface */ /* Character device interface */
int kfd_chardev_init(void); int kfd_chardev_init(void);
void kfd_chardev_exit(void); void kfd_chardev_exit(void);
struct device *kfd_chardev(void); struct device *kfd_chardev(void);
/* Data that is per-process-per device. */
struct kfd_process_device {
/*
* List of all per-device data for a process.
* Starts from kfd_process.per_device_data.
*/
struct list_head per_device_list;
/* The device that owns this data. */
struct kfd_dev *dev;
/*Apertures*/
uint64_t lds_base;
uint64_t lds_limit;
uint64_t gpuvm_base;
uint64_t gpuvm_limit;
uint64_t scratch_base;
uint64_t scratch_limit;
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
bool bound;
};
/* Process data */ /* Process data */
struct kfd_process { struct kfd_process {
/*
* kfd_process are stored in an mm_struct*->kfd_process*
* hash table (kfd_processes in kfd_process.c)
*/
struct hlist_node kfd_processes;
struct mm_struct *mm;
struct mutex mutex;
/*
* In any process, the thread that started main() is the lead
* thread and outlives the rest.
* It is here because amd_iommu_bind_pasid wants a task_struct.
*/
struct task_struct *lead_thread;
/* We want to receive a notification when the mm_struct is destroyed */
struct mmu_notifier mmu_notifier;
/* Use for delayed freeing of kfd_process structure */
struct rcu_head rcu;
unsigned int pasid;
/*
* List of kfd_process_device structures,
* one for each device the process is using.
*/
struct list_head per_device_data;
/* The process's queues. */
size_t queue_array_size;
/* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
struct kfd_queue **queues;
unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
/*Is the user space process 32 bit?*/
bool is_32bit_user_mode;
}; };
void kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
struct kfd_process *kfd_create_process(const struct task_struct *);
struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process *p,
int create_pdd);
/* PASIDs */
int kfd_pasid_init(void);
void kfd_pasid_exit(void);
bool kfd_set_pasid_limit(unsigned int new_limit);
unsigned int kfd_get_pasid_limit(void);
unsigned int kfd_pasid_alloc(void);
void kfd_pasid_free(unsigned int pasid);
/* Doorbells */
void kfd_doorbell_init(struct kfd_dev *kfd);
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off);
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
u32 read_kernel_doorbell(u32 __iomem *db);
void write_kernel_doorbell(u32 __iomem *db, u32 value);
unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int queue_id);
extern struct device *kfd_device; extern struct device *kfd_device;
/* Topology */ /* Topology */
...@@ -96,4 +236,11 @@ void kgd2kfd_interrupt(struct kfd_dev *dev, const void *ih_ring_entry); ...@@ -96,4 +236,11 @@ void kgd2kfd_interrupt(struct kfd_dev *dev, const void *ih_ring_entry);
void kgd2kfd_suspend(struct kfd_dev *dev); void kgd2kfd_suspend(struct kfd_dev *dev);
int kgd2kfd_resume(struct kfd_dev *dev); int kgd2kfd_resume(struct kfd_dev *dev);
/* amdkfd Apertures */
int kfd_init_apertures(struct kfd_process *process);
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process);
#endif #endif
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/mutex.h>
#include <linux/log2.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/notifier.h>
struct mm_struct;
#include "kfd_priv.h"
/*
* Initial size for the array of queues.
* The allocated size is doubled each time
* it is exceeded up to MAX_PROCESS_QUEUES.
*/
#define INITIAL_QUEUE_ARRAY_SIZE 16
/*
* List of struct kfd_process (field kfd_process).
* Unique/indexed by mm_struct*
*/
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
static DEFINE_MUTEX(kfd_processes_mutex);
DEFINE_STATIC_SRCU(kfd_processes_srcu);
static struct workqueue_struct *kfd_process_wq;
struct kfd_process_release_work {
struct work_struct kfd_work;
struct kfd_process *p;
};
static struct kfd_process *find_process(const struct task_struct *thread);
static struct kfd_process *create_process(const struct task_struct *thread);
void kfd_process_create_wq(void)
{
if (!kfd_process_wq)
kfd_process_wq = create_workqueue("kfd_process_wq");
}
void kfd_process_destroy_wq(void)
{
if (kfd_process_wq) {
flush_workqueue(kfd_process_wq);
destroy_workqueue(kfd_process_wq);
kfd_process_wq = NULL;
}
}
struct kfd_process *kfd_create_process(const struct task_struct *thread)
{
struct kfd_process *process;
BUG_ON(!kfd_process_wq);
if (thread->mm == NULL)
return ERR_PTR(-EINVAL);
/* Only the pthreads threading model is supported. */
if (thread->group_leader->mm != thread->mm)
return ERR_PTR(-EINVAL);
/* Take mmap_sem because we call __mmu_notifier_register inside */
down_write(&thread->mm->mmap_sem);
/*
* take kfd processes mutex before starting of process creation
* so there won't be a case where two threads of the same process
* create two kfd_process structures
*/
mutex_lock(&kfd_processes_mutex);
/* A prior open of /dev/kfd could have already created the process. */
process = find_process(thread);
if (process)
pr_debug("kfd: process already found\n");
if (!process)
process = create_process(thread);
mutex_unlock(&kfd_processes_mutex);
up_write(&thread->mm->mmap_sem);
return process;
}
struct kfd_process *kfd_get_process(const struct task_struct *thread)
{
struct kfd_process *process;
if (thread->mm == NULL)
return ERR_PTR(-EINVAL);
/* Only the pthreads threading model is supported. */
if (thread->group_leader->mm != thread->mm)
return ERR_PTR(-EINVAL);
process = find_process(thread);
return process;
}
static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
{
struct kfd_process *process;
hash_for_each_possible_rcu(kfd_processes_table, process,
kfd_processes, (uintptr_t)mm)
if (process->mm == mm)
return process;
return NULL;
}
static struct kfd_process *find_process(const struct task_struct *thread)
{
struct kfd_process *p;
int idx;
idx = srcu_read_lock(&kfd_processes_srcu);
p = find_process_by_mm(thread->mm);
srcu_read_unlock(&kfd_processes_srcu, idx);
return p;
}
static void kfd_process_wq_release(struct work_struct *work)
{
struct kfd_process_release_work *my_work;
struct kfd_process_device *pdd, *temp;
struct kfd_process *p;
my_work = (struct kfd_process_release_work *) work;
p = my_work->p;
mutex_lock(&p->mutex);
list_for_each_entry_safe(pdd, temp, &p->per_device_data,
per_device_list) {
list_del(&pdd->per_device_list);
kfree(pdd);
}
kfd_pasid_free(p->pasid);
mutex_unlock(&p->mutex);
mutex_destroy(&p->mutex);
kfree(p->queues);
kfree(p);
kfree((void *)work);
}
static void kfd_process_destroy_delayed(struct rcu_head *rcu)
{
struct kfd_process_release_work *work;
struct kfd_process *p;
BUG_ON(!kfd_process_wq);
p = container_of(rcu, struct kfd_process, rcu);
BUG_ON(atomic_read(&p->mm->mm_count) <= 0);
mmdrop(p->mm);
work = (struct kfd_process_release_work *)
kmalloc(sizeof(struct kfd_process_release_work), GFP_KERNEL);
if (work) {
INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
work->p = p;
queue_work(kfd_process_wq, (struct work_struct *) work);
}
}
static void kfd_process_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
struct kfd_process *p;
/*
* The kfd_process structure can not be free because the
* mmu_notifier srcu is read locked
*/
p = container_of(mn, struct kfd_process, mmu_notifier);
BUG_ON(p->mm != mm);
mutex_lock(&kfd_processes_mutex);
hash_del_rcu(&p->kfd_processes);
mutex_unlock(&kfd_processes_mutex);
synchronize_srcu(&kfd_processes_srcu);
/*
* Because we drop mm_count inside kfd_process_destroy_delayed
* and because the mmu_notifier_unregister function also drop
* mm_count we need to take an extra count here.
*/
atomic_inc(&p->mm->mm_count);
mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
}
static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
.release = kfd_process_notifier_release,
};
static struct kfd_process *create_process(const struct task_struct *thread)
{
struct kfd_process *process;
int err = -ENOMEM;
process = kzalloc(sizeof(*process), GFP_KERNEL);
if (!process)
goto err_alloc_process;
process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE,
sizeof(process->queues[0]), GFP_KERNEL);
if (!process->queues)
goto err_alloc_queues;
process->pasid = kfd_pasid_alloc();
if (process->pasid == 0)
goto err_alloc_pasid;
mutex_init(&process->mutex);
process->mm = thread->mm;
/* register notifier */
process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
err = __mmu_notifier_register(&process->mmu_notifier, process->mm);
if (err)
goto err_mmu_notifier;
hash_add_rcu(kfd_processes_table, &process->kfd_processes,
(uintptr_t)process->mm);
process->lead_thread = thread->group_leader;
process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE;
INIT_LIST_HEAD(&process->per_device_data);
return process;
err_mmu_notifier:
kfd_pasid_free(process->pasid);
err_alloc_pasid:
kfree(process->queues);
err_alloc_queues:
kfree(process);
err_alloc_process:
return ERR_PTR(err);
}
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process *p,
int create_pdd)
{
struct kfd_process_device *pdd = NULL;
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
if (pdd->dev == dev)
return pdd;
if (create_pdd) {
pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
if (pdd != NULL) {
pdd->dev = dev;
list_add(&pdd->per_device_list, &p->per_device_data);
}
}
return pdd;
}
/*
* Direct the IOMMU to bind the process (specifically the pasid->mm)
* to the device.
* Unbinding occurs when the process dies or the device is removed.
*
* Assumes that the process lock is held.
*/
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p, 1);
if (pdd == NULL)
return ERR_PTR(-ENOMEM);
if (pdd->bound)
return pdd;
pdd->bound = true;
return pdd;
}
void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
{
struct kfd_process *p;
struct kfd_process_device *pdd;
int idx, i;
BUG_ON(dev == NULL);
idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes)
if (p->pasid == pasid)
break;
srcu_read_unlock(&kfd_processes_srcu, idx);
BUG_ON(p->pasid != pasid);
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p, 0);
/*
* Just mark pdd as unbound, because we still need it to call
* amd_iommu_unbind_pasid() in when the process exits.
* We don't call amd_iommu_unbind_pasid() here
* because the IOMMU called us.
*/
if (pdd)
pdd->bound = false;
mutex_unlock(&p->mutex);
}
struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
{
return list_first_entry(&p->per_device_data,
struct kfd_process_device,
per_device_list);
}
struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
struct kfd_process_device *pdd)
{
if (list_is_last(&pdd->per_device_list, &p->per_device_data))
return NULL;
return list_next_entry(pdd, per_device_list);
}
bool kfd_has_process_device_data(struct kfd_process *p)
{
return !(list_empty(&p->per_device_data));
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment