Commit 7a1c5c67 authored by Jonathan Kim's avatar Jonathan Kim Committed by Alex Deucher

drm/amdkfd: enable cooperative groups for gfx11

MES can concurrently schedule queues on the device that require
exclusive device access if marked exclusively_scheduled without the
requirement of GWS.  Similar to the F32 HWS, MES will manage
quality of service for these queues.
Use this for cooperative groups since cooperative groups are device
occupancy limited.

Since some GFX11 devices can only be debugged with partial CUs, do not
allow the debugging of cooperative groups on these devices as the CU
occupancy limit will change on attach.

In addition, zero initialize the MES add queue submission vector for MES
initialization tests as we do not want these to be cooperative
dispatches.
Signed-off-by: default avatarJonathan Kim <jonathan.kim@amd.com>
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 83f24a8f
...@@ -642,6 +642,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, ...@@ -642,6 +642,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
unsigned long flags; unsigned long flags;
int r; int r;
memset(&queue_input, 0, sizeof(struct mes_add_queue_input));
/* allocate the mes queue buffer */ /* allocate the mes queue buffer */
queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
if (!queue) { if (!queue) {
......
...@@ -224,6 +224,7 @@ struct mes_add_queue_input { ...@@ -224,6 +224,7 @@ struct mes_add_queue_input {
uint32_t is_kfd_process; uint32_t is_kfd_process;
uint32_t is_aql_queue; uint32_t is_aql_queue;
uint32_t queue_size; uint32_t queue_size;
uint32_t exclusively_scheduled;
}; };
struct mes_remove_queue_input { struct mes_remove_queue_input {
......
...@@ -214,6 +214,8 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, ...@@ -214,6 +214,8 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
mes_add_queue_pkt.gds_size = input->queue_size; mes_add_queue_pkt.gds_size = input->queue_size;
mes_add_queue_pkt.exclusively_scheduled = input->exclusively_scheduled;
return mes_v11_0_submit_pkt_and_poll_completion(mes, return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt), &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status)); offsetof(union MESAPI__ADD_QUEUE, api_status));
......
...@@ -1487,7 +1487,8 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, ...@@ -1487,7 +1487,8 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
goto out_unlock; goto out_unlock;
} }
if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) { if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) ||
kfd_dbg_has_cwsr_workaround(dev))) {
retval = -EBUSY; retval = -EBUSY;
goto out_unlock; goto out_unlock;
} }
......
...@@ -753,7 +753,8 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd, ...@@ -753,7 +753,8 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
if (!KFD_IS_SOC15(pdd->dev)) if (!KFD_IS_SOC15(pdd->dev))
return -ENODEV; return -ENODEV;
if (!kfd_dbg_has_gws_support(pdd->dev) && pdd->qpd.num_gws) if (pdd->qpd.num_gws && (!kfd_dbg_has_gws_support(pdd->dev) ||
kfd_dbg_has_cwsr_workaround(pdd->dev)))
return -EBUSY; return -EBUSY;
} }
......
...@@ -508,6 +508,7 @@ static int kfd_gws_init(struct kfd_node *node) ...@@ -508,6 +508,7 @@ static int kfd_gws_init(struct kfd_node *node)
{ {
int ret = 0; int ret = 0;
struct kfd_dev *kfd = node->kfd; struct kfd_dev *kfd = node->kfd;
uint32_t mes_rev = node->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
return 0; return 0;
...@@ -524,7 +525,10 @@ static int kfd_gws_init(struct kfd_node *node) ...@@ -524,7 +525,10 @@ static int kfd_gws_init(struct kfd_node *node)
(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) || (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) ||
(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
&& kfd->mec2_fw_version >= 0x6b)))) && kfd->mec2_fw_version >= 0x6b) ||
(KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0)
&& mes_rev >= 68))))
ret = amdgpu_amdkfd_alloc_gws(node->adev, ret = amdgpu_amdkfd_alloc_gws(node->adev,
node->adev->gds.gws_size, &node->gws); node->adev->gds.gws_size, &node->gws);
......
...@@ -237,10 +237,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, ...@@ -237,10 +237,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
} }
queue_input.queue_type = (uint32_t)queue_type; queue_input.queue_type = (uint32_t)queue_type;
if (q->gws) { queue_input.exclusively_scheduled = q->properties.is_gws;
queue_input.gws_base = 0;
queue_input.gws_size = qpd->num_gws;
}
amdgpu_mes_lock(&adev->mes); amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
...@@ -250,7 +247,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, ...@@ -250,7 +247,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
q->properties.doorbell_off); q->properties.doorbell_off);
pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
kfd_hws_hang(dqm); kfd_hws_hang(dqm);
} }
return r; return r;
} }
......
...@@ -123,7 +123,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, ...@@ -123,7 +123,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0) if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL; return -EINVAL;
if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) { if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) {
if (gws) if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
gws, &mem); gws, &mem);
...@@ -136,7 +136,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, ...@@ -136,7 +136,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
} else { } else {
/* /*
* Intentionally set GWS to a non-NULL value * Intentionally set GWS to a non-NULL value
* for GFX 9.4.3. * for devices that do not use GWS for global wave
* synchronization but require the formality
* of setting GWS for cooperative groups.
*/ */
pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL; pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
} }
...@@ -173,7 +175,8 @@ void pqm_uninit(struct process_queue_manager *pqm) ...@@ -173,7 +175,8 @@ void pqm_uninit(struct process_queue_manager *pqm)
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
if (pqn->q && pqn->q->gws && if (pqn->q && pqn->q->gws &&
KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
!pqn->q->device->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
pqn->q->gws); pqn->q->gws);
kfd_procfs_del_queue(pqn->q); kfd_procfs_del_queue(pqn->q);
...@@ -455,7 +458,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) ...@@ -455,7 +458,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
} }
if (pqn->q->gws) { if (pqn->q->gws) {
if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
!dev->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_remove_gws_from_process( amdgpu_amdkfd_remove_gws_from_process(
pqm->process->kgd_process_info, pqm->process->kgd_process_info,
pqn->q->gws); pqn->q->gws);
......
...@@ -275,7 +275,9 @@ union MESAPI__ADD_QUEUE { ...@@ -275,7 +275,9 @@ union MESAPI__ADD_QUEUE {
uint32_t trap_en : 1; uint32_t trap_en : 1;
uint32_t is_aql_queue : 1; uint32_t is_aql_queue : 1;
uint32_t skip_process_ctx_clear : 1; uint32_t skip_process_ctx_clear : 1;
uint32_t reserved : 19; uint32_t map_legacy_kq : 1;
uint32_t exclusively_scheduled : 1;
uint32_t reserved : 17;
}; };
struct MES_API_STATUS api_status; struct MES_API_STATUS api_status;
uint64_t tma_addr; uint64_t tma_addr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment