Commit cea405b1 authored by Xihan Zhang's avatar Xihan Zhang Committed by Oded Gabbay

drm/amdkfd: Add multiple kgd support

The current code can only support one kgd instance. We have to
support multiple kgd instances in one system. i.e two amdgpu or two
radeon or one amdgpu + one radeon or more than two kgd instances.
Signed-off-by: default avatarXihan Zhang <xihan.zhang@amd.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@amd.com>
parent affa7d86
......@@ -442,7 +442,8 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
return -EINVAL;
/* Reading GPU clock counter from KGD */
args->gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd);
args->gpu_clock_counter =
dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
/* No access to rdtsc. Using raw monotonic time */
getrawmonotonic64(&time);
......
......@@ -94,7 +94,8 @@ static const struct kfd_device_info *lookup_device_info(unsigned short did)
return NULL;
}
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev)
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
{
struct kfd_dev *kfd;
......@@ -112,6 +113,11 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev)
kfd->device_info = device_info;
kfd->pdev = pdev;
kfd->init_complete = false;
kfd->kfd2kgd = f2g;
mutex_init(&kfd->doorbell_mutex);
memset(&kfd->doorbell_available_index, 0,
sizeof(kfd->doorbell_available_index));
return kfd;
}
......@@ -200,8 +206,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
/* add another 512KB for all other allocations on gart (HPD, fences) */
size += 512 * 1024;
if (kfd2kgd->init_gtt_mem_allocation(kfd->kgd, size, &kfd->gtt_mem,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)) {
if (kfd->kfd2kgd->init_gtt_mem_allocation(
kfd->kgd, size, &kfd->gtt_mem,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){
dev_err(kfd_device,
"Could not allocate %d bytes for device (%x:%x)\n",
size, kfd->pdev->vendor, kfd->pdev->device);
......@@ -270,7 +277,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_topology_add_device_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
dev_err(kfd_device,
"device (%x:%x) NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
......@@ -285,7 +292,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
amd_iommu_free_device(kfd->pdev);
kfd_topology_remove_device(kfd);
kfd_gtt_sa_fini(kfd);
kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
}
kfree(kfd);
......
......@@ -82,7 +82,8 @@ static inline unsigned int get_pipes_num_cpsch(void)
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid,
return dqm->dev->kfd2kgd->program_sh_mem_settings(
dqm->dev->kgd, qpd->vmid,
qpd->sh_mem_config,
qpd->sh_mem_ape1_base,
qpd->sh_mem_ape1_limit,
......@@ -457,9 +458,12 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
{
uint32_t pasid_mapping;
pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
ATC_VMID_PASID_MAPPING_VALID;
return kfd2kgd->set_pasid_vmid_mapping(dqm->dev->kgd, pasid_mapping,
pasid_mapping = (pasid == 0) ? 0 :
(uint32_t)pasid |
ATC_VMID_PASID_MAPPING_VALID;
return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
dqm->dev->kgd, pasid_mapping,
vmid);
}
......@@ -511,7 +515,7 @@ int init_pipelines(struct device_queue_manager *dqm,
pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
/* = log2(bytes/4)-1 */
kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
dqm->dev->kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
}
......
......@@ -32,9 +32,6 @@
* and that's assures that any user process won't get access to the
* kernel doorbells page
*/
static DEFINE_MUTEX(doorbell_mutex);
static unsigned long doorbell_available_index[
DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)] = { 0 };
#define KERNEL_DOORBELL_PASID 1
#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
......@@ -170,12 +167,12 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
BUG_ON(!kfd || !doorbell_off);
mutex_lock(&doorbell_mutex);
inx = find_first_zero_bit(doorbell_available_index,
mutex_lock(&kfd->doorbell_mutex);
inx = find_first_zero_bit(kfd->doorbell_available_index,
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
__set_bit(inx, doorbell_available_index);
mutex_unlock(&doorbell_mutex);
__set_bit(inx, kfd->doorbell_available_index);
mutex_unlock(&kfd->doorbell_mutex);
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
......@@ -203,9 +200,9 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
mutex_lock(&doorbell_mutex);
__clear_bit(inx, doorbell_available_index);
mutex_unlock(&doorbell_mutex);
mutex_lock(&kfd->doorbell_mutex);
__clear_bit(inx, kfd->doorbell_available_index);
mutex_unlock(&kfd->doorbell_mutex);
}
inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
......
......@@ -34,7 +34,6 @@
#define KFD_DRIVER_MINOR 7
#define KFD_DRIVER_PATCHLEVEL 1
const struct kfd2kgd_calls *kfd2kgd;
static const struct kgd2kfd_calls kgd2kfd = {
.exit = kgd2kfd_exit,
.probe = kgd2kfd_probe,
......@@ -55,9 +54,7 @@ module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device,
"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
bool kgd2kfd_init(unsigned interface_version,
const struct kfd2kgd_calls *f2g,
const struct kgd2kfd_calls **g2f)
bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
{
/*
* Only one interface version is supported,
......@@ -66,11 +63,6 @@ bool kgd2kfd_init(unsigned interface_version,
if (interface_version != KFD_INTERFACE_VERSION)
return false;
/* Protection against multiple amd kgd loads */
if (kfd2kgd)
return true;
kfd2kgd = f2g;
*g2f = &kgd2kfd;
return true;
......@@ -85,8 +77,6 @@ static int __init kfd_module_init(void)
{
int err;
kfd2kgd = NULL;
/* Verify module parameters */
if ((sched_policy < KFD_SCHED_POLICY_HWS) ||
(sched_policy > KFD_SCHED_POLICY_NO_HWS)) {
......
......@@ -151,14 +151,15 @@ static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr)
{
return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
return mm->dev->kfd2kgd->hqd_load
(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
}
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr)
{
return kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
}
static int update_mqd(struct mqd_manager *mm, void *mqd,
......@@ -245,7 +246,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id)
{
return kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
pipe_id, queue_id);
}
......@@ -258,7 +259,7 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id)
{
return kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
}
static bool is_occupied(struct mqd_manager *mm, void *mqd,
......@@ -266,7 +267,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
uint32_t queue_id)
{
return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
pipe_id, queue_id);
}
......@@ -275,7 +276,7 @@ static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
{
return kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
}
/*
......
......@@ -148,6 +148,11 @@ struct kfd_dev {
struct kgd2kfd_shared_resources shared_resources;
const struct kfd2kgd_calls *kfd2kgd;
struct mutex doorbell_mutex;
unsigned long doorbell_available_index[DIV_ROUND_UP(
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
void *gtt_mem;
uint64_t gtt_start_gpu_addr;
void *gtt_start_cpu_ptr;
......@@ -164,13 +169,12 @@ struct kfd_dev {
/* KGD2KFD callbacks */
void kgd2kfd_exit(void);
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev);
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
struct pci_dev *pdev, const struct kfd2kgd_calls *f2g);
bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources);
const struct kgd2kfd_shared_resources *gpu_resources);
void kgd2kfd_device_exit(struct kfd_dev *kfd);
extern const struct kfd2kgd_calls *kfd2kgd;
enum kfd_mempool {
KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
......
......@@ -726,13 +726,14 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
}
sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
kfd2kgd->get_max_engine_clock_in_mhz(
dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(
dev->gpu->kgd));
sysfs_show_64bit_prop(buffer, "local_mem_size",
kfd2kgd->get_vmem_size(dev->gpu->kgd));
dev->gpu->kfd2kgd->get_vmem_size(
dev->gpu->kgd));
sysfs_show_32bit_prop(buffer, "fw_version",
kfd2kgd->get_fw_version(
dev->gpu->kfd2kgd->get_fw_version(
dev->gpu->kgd,
KGD_ENGINE_MEC1));
}
......@@ -1099,8 +1100,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
buf[2] = gpu->pdev->subsystem_device;
buf[3] = gpu->pdev->device;
buf[4] = gpu->pdev->bus->number;
buf[5] = (uint32_t)(kfd2kgd->get_vmem_size(gpu->kgd) & 0xffffffff);
buf[6] = (uint32_t)(kfd2kgd->get_vmem_size(gpu->kgd) >> 32);
buf[5] = (uint32_t)(gpu->kfd2kgd->get_vmem_size(gpu->kgd)
& 0xffffffff);
buf[6] = (uint32_t)(gpu->kfd2kgd->get_vmem_size(gpu->kgd) >> 32);
for (i = 0, hashout = 0; i < 7; i++)
hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
......
......@@ -76,37 +76,6 @@ struct kgd2kfd_shared_resources {
size_t doorbell_start_offset;
};
/**
* struct kgd2kfd_calls
*
* @exit: Notifies amdkfd that kgd module is unloaded
*
* @probe: Notifies amdkfd about a probe done on a device in the kgd driver.
*
* @device_init: Initialize the newly probed device (if it is a device that
* amdkfd supports)
*
* @device_exit: Notifies amdkfd about a removal of a kgd device
*
* @suspend: Notifies amdkfd about a suspend action done to a kgd device
*
* @resume: Notifies amdkfd about a resume action done to a kgd device
*
* This structure contains function callback pointers so the kgd driver
* will notify to the amdkfd about certain status changes.
*
*/
struct kgd2kfd_calls {
void (*exit)(void);
struct kfd_dev* (*probe)(struct kgd_dev *kgd, struct pci_dev *pdev);
bool (*device_init)(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources);
void (*device_exit)(struct kfd_dev *kfd);
void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
void (*suspend)(struct kfd_dev *kfd);
int (*resume)(struct kfd_dev *kfd);
};
/**
* struct kfd2kgd_calls
*
......@@ -196,8 +165,39 @@ struct kfd2kgd_calls {
enum kgd_engine_type type);
};
/**
* struct kgd2kfd_calls
*
* @exit: Notifies amdkfd that kgd module is unloaded
*
* @probe: Notifies amdkfd about a probe done on a device in the kgd driver.
*
* @device_init: Initialize the newly probed device (if it is a device that
* amdkfd supports)
*
* @device_exit: Notifies amdkfd about a removal of a kgd device
*
* @suspend: Notifies amdkfd about a suspend action done to a kgd device
*
* @resume: Notifies amdkfd about a resume action done to a kgd device
*
* This structure contains function callback pointers so the kgd driver
* will notify to the amdkfd about certain status changes.
*
*/
struct kgd2kfd_calls {
void (*exit)(void);
struct kfd_dev* (*probe)(struct kgd_dev *kgd, struct pci_dev *pdev,
const struct kfd2kgd_calls *f2g);
bool (*device_init)(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources);
void (*device_exit)(struct kfd_dev *kfd);
void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
void (*suspend)(struct kfd_dev *kfd);
int (*resume)(struct kfd_dev *kfd);
};
bool kgd2kfd_init(unsigned interface_version,
const struct kfd2kgd_calls *f2g,
const struct kgd2kfd_calls **g2f);
#endif /* KGD_KFD_INTERFACE_H_INCLUDED */
......@@ -103,15 +103,14 @@ static const struct kgd2kfd_calls *kgd2kfd;
bool radeon_kfd_init(void)
{
#if defined(CONFIG_HSA_AMD_MODULE)
bool (*kgd2kfd_init_p)(unsigned, const struct kfd2kgd_calls*,
const struct kgd2kfd_calls**);
bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
kgd2kfd_init_p = symbol_request(kgd2kfd_init);
if (kgd2kfd_init_p == NULL)
return false;
if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kfd2kgd, &kgd2kfd)) {
if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
symbol_put(kgd2kfd_init);
kgd2kfd = NULL;
......@@ -120,7 +119,7 @@ bool radeon_kfd_init(void)
return true;
#elif defined(CONFIG_HSA_AMD)
if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kfd2kgd, &kgd2kfd)) {
if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
kgd2kfd = NULL;
return false;
......@@ -143,7 +142,8 @@ void radeon_kfd_fini(void)
void radeon_kfd_device_probe(struct radeon_device *rdev)
{
if (kgd2kfd)
rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, rdev->pdev);
rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
rdev->pdev, &kfd2kgd);
}
void radeon_kfd_device_init(struct radeon_device *rdev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment