drm/amdkfd: Allow user to limit only queues per device

This patch replaces the two current amdkfd module parameters with a new one. The current parameters that are being replaced are: - Maximum number of HSA processes - Maximum number of queues per process The new parameter that replaces them is called "Maximum queues per device" This replacement achieves two goals: - Allows the user to have as many HSA processes as it wants (until a maximum of 512 HSA processes in Kaveri). - Removes the limitation the user had on maximum number of queues per HSA process. E.g. the user can now have processes which only have one queue and other processes which have hundreds of queues, while before the user couldn't have more than 128 queues per process (as default). The default value of the new parameter is 4096 (32 * 128, which were the defaults of the old parameters). There is almost no additional GART memory required for the default case. As a reminder, this amount of queues requires a little bit below 4MB of GART memory. v2: In addition, This patch defines a new counter for queues accounting in the DQM structure. This is done because the current counter only counts active queues which allows the user to create more queues than the max_num_of_queues_per_device module parameter allows. However, we need the current counter for the runlist packet build process, so the solution is to have a dedicated counter for this accounting. Signed-off-by: Oded Gabbay <oded.gabbay@amd.com> Reviewed-by: Ben Goz <ben.goz@amd.com>

drm/amdkfd: Allow user to limit only queues per device
This patch replaces the two current amdkfd module parameters with a new one. The current parameters that are being replaced are: - Maximum number of HSA processes - Maximum number of queues per process The new parameter that replaces them is called "Maximum queues per device" This replacement achieves two goals: - Allows the user to have as many HSA processes as it wants (until a maximum of 512 HSA processes in Kaveri). - Removes the limitation the user had on maximum number of queues per HSA process. E.g. the user can now have processes which only have one queue and other processes which have hundreds of queues, while before the user couldn't have more than 128 queues per process (as default). The default value of the new parameter is 4096 (32 * 128, which were the defaults of the old parameters). There is almost no additional GART memory required for the default case. As a reminder, this amount of queues requires a little bit below 4MB of GART memory. v2: In addition, This patch defines a new counter for queues accounting in the DQM structure. This is done because the current counter only counts active queues which allows the user to create more queues than the max_num_of_queues_per_device module parameter allows. However, we need the current counter for the runlist packet build process, so the solution is to have a dedicated counter for this accounting. Signed-off-by: Oded Gabbay <oded.gabbay@amd.com> Reviewed-by: Ben Goz <ben.goz@amd.com>
b8cbab04 · Oded Gabbay · f046bfdf · b8cbab04 · b8cbab04 · b8cbab04
Commit b8cbab04 authored Jan 18, 2015 by Oded Gabbay
7 changed files
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers.h"
 #define MQD_SIZE_ALIGNED 768
@@ -169,9 +170,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	kfd->shared_resources = *gpu_resources;
 	/* calculate max size of mqds needed for queues */
-	size = max_num_of_processes *
+	size = max_num_of_queues_per_device *
-		max_num_of_queues_per_process *
+			kfd->device_info->mqd_size_aligned;
-		kfd->device_info->mqd_size_aligned;
 	/* add another 512KB for all other allocations on gart */
 	size += 512 * 1024;

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -183,6 +183,13 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 	mutex_lock(&dqm->lock);
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		mutex_unlock(&dqm->lock);
+		return -EPERM;
+	}
 	if (list_empty(&qpd->queues_list)) {
 		retval = allocate_vmid(dqm, qpd, q);
 		if (retval != 0) {
@@ -207,6 +214,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 	list_add(&q->list, &qpd->queues_list);
 	dqm->queue_count++;
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 	mutex_unlock(&dqm->lock);
 	return 0;
 }
@@ -326,6 +341,15 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
 	if (list_empty(&qpd->queues_list))
 		deallocate_vmid(dqm, qpd, q);
 	dqm->queue_count--;
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 out:
 	mutex_unlock(&dqm->lock);
 	return retval;
@@ -752,6 +776,21 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
 	pr_debug("kfd: In func %s\n", __func__);
 	mutex_lock(&dqm->lock);
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		mutex_unlock(&dqm->lock);
+		return -EPERM;
+	}
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 	list_add(&kq->list, &qpd->priv_queue_list);
 	dqm->queue_count++;
 	qpd->is_debug = true;
@@ -775,6 +814,13 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
 	dqm->queue_count--;
 	qpd->is_debug = false;
 	execute_queues_cpsch(dqm, false);
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 	mutex_unlock(&dqm->lock);
 }
@@ -793,6 +839,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	mutex_lock(&dqm->lock);
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		retval = -EPERM;
+		goto out;
+	}
 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
 	if (mqd == NULL) {
 		mutex_unlock(&dqm->lock);
@@ -810,6 +863,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 		retval = execute_queues_cpsch(dqm, false);
 	}
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 out:
 	mutex_unlock(&dqm->lock);
 	return retval;
@@ -930,6 +992,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 	mutex_unlock(&dqm->lock);
 	return 0;

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -130,6 +130,7 @@ struct device_queue_manager {
 	struct list_head	queues;
 	unsigned int		processes_count;
 	unsigned int		queue_count;
+	unsigned int		total_queue_count;
 	unsigned int		next_pipe_to_allocate;
 	unsigned int		*allocated_queues;
 	unsigned int		vmid_bitmap;

--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -50,15 +50,10 @@ module_param(sched_policy, int, 0444);
 MODULE_PARM_DESC(sched_policy,
 	"Kernel cmdline parameter that defines the amdkfd scheduling policy");
-int max_num_of_processes = KFD_MAX_NUM_OF_PROCESSES_DEFAULT;
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
-module_param(max_num_of_processes, int, 0444);
+module_param(max_num_of_queues_per_device, int, 0444);
-MODULE_PARM_DESC(max_num_of_processes,
+MODULE_PARM_DESC(max_num_of_queues_per_device,
-	"Kernel cmdline parameter that defines the amdkfd maximum number of supported processes");
+	"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
-int max_num_of_queues_per_process = KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT;
-module_param(max_num_of_queues_per_process, int, 0444);
-MODULE_PARM_DESC(max_num_of_queues_per_process,
-	"Kernel cmdline parameter that defines the amdkfd maximum number of supported queues per process");
 bool kgd2kfd_init(unsigned interface_version,
 		  const struct kfd2kgd_calls *f2g,
@@ -100,16 +95,10 @@ static int __init kfd_module_init(void)
 	}
 	/* Verify module parameters */
-	if ((max_num_of_processes < 0) ||
+	if ((max_num_of_queues_per_device < 0) ||
-		(max_num_of_processes > KFD_MAX_NUM_OF_PROCESSES)) {
+		(max_num_of_queues_per_device >
-		pr_err("kfd: max_num_of_processes must be between 0 to KFD_MAX_NUM_OF_PROCESSES\n");
+			KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
-		return -1;
+		pr_err("kfd: max_num_of_queues_per_device must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
-	}
-	if ((max_num_of_queues_per_process < 0) ||
-		(max_num_of_queues_per_process >
-			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)) {
-		pr_err("kfd: max_num_of_queues_per_process must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_PROCESS\n");
 		return -1;
 	}

--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -30,7 +30,7 @@ static DEFINE_MUTEX(pasid_mutex);
 int kfd_pasid_init(void)
 {
-	pasid_limit = max_num_of_processes;
+	pasid_limit = KFD_MAX_NUM_OF_PROCESSES;
 	pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
 	if (!pasid_bitmap)

--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -52,20 +52,19 @@
 #define kfd_alloc_struct(ptr_to_struct)	\
 	((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
-/* Kernel module parameter to specify maximum number of supported processes */
-extern int max_num_of_processes;
-#define KFD_MAX_NUM_OF_PROCESSES_DEFAULT 32
 #define KFD_MAX_NUM_OF_PROCESSES 512
+#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
 /*
- * Kernel module parameter to specify maximum number of supported queues
+ * Kernel module parameter to specify maximum number of supported queues per
- * per process
+ * device
 */
-extern int max_num_of_queues_per_process;
+extern int max_num_of_queues_per_device;
-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT 128
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096
-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE		\
+	(KFD_MAX_NUM_OF_PROCESSES *			\
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
 #define KFD_KERNEL_QUEUE_SIZE 2048

--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -54,11 +54,11 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
 	pr_debug("kfd: in %s\n", __func__);
 	found = find_first_zero_bit(pqm->queue_slot_bitmap,
-			max_num_of_queues_per_process);
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
 	pr_debug("kfd: the new slot id %lu\n", found);
-	if (found >= max_num_of_queues_per_process) {
+	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
 		pr_info("amdkfd: Can not open more queues for process with pasid %d\n",
 				pqm->process->pasid);
 		return -ENOMEM;
@@ -76,7 +76,7 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
 	INIT_LIST_HEAD(&pqm->queues);
 	pqm->queue_slot_bitmap =
-			kzalloc(DIV_ROUND_UP(max_num_of_queues_per_process,
+			kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
 					BITS_PER_BYTE), GFP_KERNEL);
 	if (pqm->queue_slot_bitmap == NULL)
 		return -ENOMEM;
@@ -203,6 +203,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 		pqn->kq = NULL;
 		retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd,
 						&q->properties.vmid);
+		pr_debug("DQM returned %d for create_queue\n", retval);
 		print_queue(q);
 		break;
 	case KFD_QUEUE_TYPE_DIQ:
@@ -222,7 +223,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	}
 	if (retval != 0) {
-		pr_err("kfd: error dqm create queue\n");
+		pr_debug("Error dqm create queue\n");
 		goto err_create_queue;
 	}