Merge tag 'drm-amdkfd-fixes-2015-01-26' of...

Merge tag 'drm-amdkfd-fixes-2015-01-26' of git://people.freedesktop.org/~gabbayo/linux into drm-fixes A couple of fixes for -rc7 in amdkfd: - Forgot to free resources when creation of queue has failed - Initialization of pipelines was incorrect (3 patches) In addition, The patch "drm/amdkfd: Allow user to limit only queues per device" is not a fix, but I would like to push it for 3.19 as it changes the ABI between amdkfd and userspace (by changing the module parameters). I would prefer *not* to support the two deprecated module parameters if I don't have too, as amdkfd hasn't been released yet. * tag 'drm-amdkfd-fixes-2015-01-26' of git://people.freedesktop.org/~gabbayo/linux: drm/amdkfd: Fix bug in call to init_pipelines() drm/amdkfd: Fix bug in pipelines initialization drm/radeon: Don't increment pipe_id in kgd_init_pipeline drm/amdkfd: Allow user to limit only queues per device drm/amdkfd: PQM handle queue creation fault

Merge tag 'drm-amdkfd-fixes-2015-01-26' of...
Merge tag 'drm-amdkfd-fixes-2015-01-26' of git://people.freedesktop.org/~gabbayo/linux into drm-fixes A couple of fixes for -rc7 in amdkfd: - Forgot to free resources when creation of queue has failed - Initialization of pipelines was incorrect (3 patches) In addition, The patch "drm/amdkfd: Allow user to limit only queues per device" is not a fix, but I would like to push it for 3.19 as it changes the ABI between amdkfd and userspace (by changing the module parameters). I would prefer *not* to support the two deprecated module parameters if I don't have too, as amdkfd hasn't been released yet. * tag 'drm-amdkfd-fixes-2015-01-26' of git://people.freedesktop.org/~gabbayo/linux: drm/amdkfd: Fix bug in call to init_pipelines() drm/amdkfd: Fix bug in pipelines initialization drm/radeon: Don't increment pipe_id in kgd_init_pipeline drm/amdkfd: Allow user to limit only queues per device drm/amdkfd: PQM handle queue creation fault
db9098ba · Dave Airlie · 22cbbcef · 9fa843e7 · db9098ba · db9098ba
Commit db9098ba authored Jan 27, 2015 by Dave Airlie
8 changed files
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers.h"
 #define MQD_SIZE_ALIGNED 768
@@ -169,9 +170,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	kfd->shared_resources = *gpu_resources;
 	/* calculate max size of mqds needed for queues */
-	size = max_num_of_processes *
+	size = max_num_of_queues_per_device *
-		max_num_of_queues_per_process *
+			kfd->device_info->mqd_size_aligned;
-		kfd->device_info->mqd_size_aligned;
 	/* add another 512KB for all other allocations on gart */
 	size += 512 * 1024;

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -183,6 +183,13 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 	mutex_lock(&dqm->lock);
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		mutex_unlock(&dqm->lock);
+		return -EPERM;
+	}
 	if (list_empty(&qpd->queues_list)) {
 		retval = allocate_vmid(dqm, qpd, q);
 		if (retval != 0) {
@@ -207,6 +214,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 	list_add(&q->list, &qpd->queues_list);
 	dqm->queue_count++;
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 	mutex_unlock(&dqm->lock);
 	return 0;
 }
@@ -326,6 +341,15 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
 	if (list_empty(&qpd->queues_list))
 		deallocate_vmid(dqm, qpd, q);
 	dqm->queue_count--;
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 out:
 	mutex_unlock(&dqm->lock);
 	return retval;
@@ -541,10 +565,14 @@ static int init_pipelines(struct device_queue_manager *dqm,
 	for (i = 0; i < pipes_num; i++) {
 		inx = i + first_pipe;
+		/*
+		 * HPD buffer on GTT is allocated by amdkfd, no need to waste
+		 * space in GTT for pipelines we don't initialize
+		 */
 		pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
 		pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
 		/* = log2(bytes/4)-1 */
-		kfd2kgd->init_pipeline(dqm->dev->kgd, i,
+		kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
 				CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
 	}
@@ -560,7 +588,7 @@ static int init_scheduler(struct device_queue_manager *dqm)
 	pr_debug("kfd: In %s\n", __func__);
-	retval = init_pipelines(dqm, get_pipes_num(dqm), KFD_DQM_FIRST_PIPE);
+	retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
 	if (retval != 0)
 		return retval;
@@ -752,6 +780,21 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
 	pr_debug("kfd: In func %s\n", __func__);
 	mutex_lock(&dqm->lock);
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		mutex_unlock(&dqm->lock);
+		return -EPERM;
+	}
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 	list_add(&kq->list, &qpd->priv_queue_list);
 	dqm->queue_count++;
 	qpd->is_debug = true;
@@ -775,6 +818,13 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
 	dqm->queue_count--;
 	qpd->is_debug = false;
 	execute_queues_cpsch(dqm, false);
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 	mutex_unlock(&dqm->lock);
 }
@@ -793,6 +843,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	mutex_lock(&dqm->lock);
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		retval = -EPERM;
+		goto out;
+	}
 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
 	if (mqd == NULL) {
 		mutex_unlock(&dqm->lock);
@@ -810,6 +867,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 		retval = execute_queues_cpsch(dqm, false);
 	}
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 out:
 	mutex_unlock(&dqm->lock);
 	return retval;
@@ -930,6 +996,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 	mutex_unlock(&dqm->lock);
 	return 0;

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -130,6 +130,7 @@ struct device_queue_manager {
 	struct list_head	queues;
 	unsigned int		processes_count;
 	unsigned int		queue_count;
+	unsigned int		total_queue_count;
 	unsigned int		next_pipe_to_allocate;
 	unsigned int		*allocated_queues;
 	unsigned int		vmid_bitmap;

--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -50,15 +50,10 @@ module_param(sched_policy, int, 0444);
 MODULE_PARM_DESC(sched_policy,
 	"Kernel cmdline parameter that defines the amdkfd scheduling policy");
-int max_num_of_processes = KFD_MAX_NUM_OF_PROCESSES_DEFAULT;
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
-module_param(max_num_of_processes, int, 0444);
+module_param(max_num_of_queues_per_device, int, 0444);
-MODULE_PARM_DESC(max_num_of_processes,
+MODULE_PARM_DESC(max_num_of_queues_per_device,
-	"Kernel cmdline parameter that defines the amdkfd maximum number of supported processes");
+	"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
-int max_num_of_queues_per_process = KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT;
-module_param(max_num_of_queues_per_process, int, 0444);
-MODULE_PARM_DESC(max_num_of_queues_per_process,
-	"Kernel cmdline parameter that defines the amdkfd maximum number of supported queues per process");
 bool kgd2kfd_init(unsigned interface_version,
 		  const struct kfd2kgd_calls *f2g,
@@ -100,16 +95,10 @@ static int __init kfd_module_init(void)
 	}
 	/* Verify module parameters */
-	if ((max_num_of_processes < 0) ||
+	if ((max_num_of_queues_per_device < 0) ||
-		(max_num_of_processes > KFD_MAX_NUM_OF_PROCESSES)) {
+		(max_num_of_queues_per_device >
-		pr_err("kfd: max_num_of_processes must be between 0 to KFD_MAX_NUM_OF_PROCESSES\n");
+			KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
-		return -1;
+		pr_err("kfd: max_num_of_queues_per_device must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
-	}
-	if ((max_num_of_queues_per_process < 0) ||
-		(max_num_of_queues_per_process >
-			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)) {
-		pr_err("kfd: max_num_of_queues_per_process must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_PROCESS\n");
 		return -1;
 	}

--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -30,7 +30,7 @@ static DEFINE_MUTEX(pasid_mutex);
 int kfd_pasid_init(void)
 {
-	pasid_limit = max_num_of_processes;
+	pasid_limit = KFD_MAX_NUM_OF_PROCESSES;
 	pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
 	if (!pasid_bitmap)

--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -52,20 +52,19 @@
 #define kfd_alloc_struct(ptr_to_struct)	\
 	((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
-/* Kernel module parameter to specify maximum number of supported processes */
-extern int max_num_of_processes;
-#define KFD_MAX_NUM_OF_PROCESSES_DEFAULT 32
 #define KFD_MAX_NUM_OF_PROCESSES 512
+#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
 /*
- * Kernel module parameter to specify maximum number of supported queues
+ * Kernel module parameter to specify maximum number of supported queues per
- * per process
+ * device
 */
-extern int max_num_of_queues_per_process;
+extern int max_num_of_queues_per_device;
-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT 128
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096
-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE		\
+	(KFD_MAX_NUM_OF_PROCESSES *			\
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
 #define KFD_KERNEL_QUEUE_SIZE 2048

--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -54,11 +54,11 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
 	pr_debug("kfd: in %s\n", __func__);
 	found = find_first_zero_bit(pqm->queue_slot_bitmap,
-			max_num_of_queues_per_process);
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
 	pr_debug("kfd: the new slot id %lu\n", found);
-	if (found >= max_num_of_queues_per_process) {
+	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
 		pr_info("amdkfd: Can not open more queues for process with pasid %d\n",
 				pqm->process->pasid);
 		return -ENOMEM;
@@ -76,7 +76,7 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
 	INIT_LIST_HEAD(&pqm->queues);
 	pqm->queue_slot_bitmap =
-			kzalloc(DIV_ROUND_UP(max_num_of_queues_per_process,
+			kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
 					BITS_PER_BYTE), GFP_KERNEL);
 	if (pqm->queue_slot_bitmap == NULL)
 		return -ENOMEM;
@@ -203,6 +203,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 		pqn->kq = NULL;
 		retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd,
 						&q->properties.vmid);
+		pr_debug("DQM returned %d for create_queue\n", retval);
 		print_queue(q);
 		break;
 	case KFD_QUEUE_TYPE_DIQ:
@@ -222,7 +223,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	}
 	if (retval != 0) {
-		pr_err("kfd: error dqm create queue\n");
+		pr_debug("Error dqm create queue\n");
 		goto err_create_queue;
 	}
@@ -241,7 +242,10 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 err_create_queue:
 	kfree(pqn);
 err_allocate_pqn:
+	/* check if queues list is empty unregister process from device */
 	clear_bit(*qid, pqm->queue_slot_bitmap);
+	if (list_empty(&pqm->queues))
+		dev->dqm->unregister_process(dev->dqm, &pdd->qpd);
 	return retval;
 }

--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -436,7 +436,7 @@ static int kgd_init_memory(struct kgd_dev *kgd)
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-	uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
+	uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
 	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
 	lock_srbm(kgd, mec, pipe, 0, 0);