Commit cc009e61 authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdkfd: Add KFD support for soc21 v3

Add initial support for soc21 in KFD compute
driver (Mukul)
- Add new definition for soc21 device.
- Add new file for amdgpu-kfd interface for GFX11 family.
- Add new file for queue management, interrupt handling,
  mqd management for GFX11 family in KFD driver.
- Related changes/updates for soc21 device in
  KFD driver.
- Repurpose last 2 entries of SDMA MQD for driver use.

v2: Add an optional argument into update queue operation (Mukul)

v3: Switch to ip version check, replace kgd_dev with
    amdgpu_device (Hawking)
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarOak Zeng <Oak.Zeng@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3b9186fa
......@@ -215,7 +215,8 @@ amdgpu-y += \
amdgpu_amdkfd_arcturus.o \
amdgpu_amdkfd_aldebaran.o \
amdgpu_amdkfd_gfx_v10.o \
amdgpu_amdkfd_gfx_v10_3.o
amdgpu_amdkfd_gfx_v10_3.o \
amdgpu_amdkfd_gfx_v11.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o
......
......@@ -100,7 +100,18 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
* The first num_doorbells are used by amdgpu.
* amdkfd takes whatever's left in the aperture.
*/
if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
if (adev->enable_mes) {
/*
* With MES enabled, we only need to initialize
* the base address. The size and offset are
* not initialized as AMDGPU manages the whole
* doorbell space.
*/
*aperture_base = adev->doorbell.base;
*aperture_size = 0;
*start_offset = 0;
} else if (adev->doorbell.size > adev->doorbell.num_doorbells *
sizeof(u32)) {
*aperture_base = adev->doorbell.base;
*aperture_size = adev->doorbell.size;
*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
......@@ -128,7 +139,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
AMDGPU_GMC_HOLE_START),
.drm_render_minor = adev_to_drm(adev)->render->index,
.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
.enable_mes = adev->enable_mes,
};
/* this is going to have a few of the MSBs set that we need to
......
This diff is collapsed.
......@@ -37,6 +37,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_mqd_manager_vi.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v11.o \
$(AMDKFD_PATH)/kfd_kernel_queue.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
$(AMDKFD_PATH)/kfd_packet_manager_vi.o \
......@@ -47,10 +48,12 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_vi.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v9.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v10.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v11.o \
$(AMDKFD_PATH)/kfd_interrupt.o \
$(AMDKFD_PATH)/kfd_events.o \
$(AMDKFD_PATH)/cik_event_interrupt.o \
$(AMDKFD_PATH)/kfd_int_process_v9.o \
$(AMDKFD_PATH)/kfd_int_process_v11.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o
......
......@@ -1315,6 +1315,8 @@ static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
return 1;
}
#define KFD_MAX_CACHE_TYPES 6
static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
struct kfd_gpu_cache_info *pcache_info)
{
......@@ -1408,6 +1410,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
int *num_of_entries)
{
struct kfd_gpu_cache_info *pcache_info;
struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
int num_of_cache_types = 0;
int i, j, k;
int ct = 0;
......@@ -1516,6 +1519,11 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = yellow_carp_cache_info;
num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
break;
case IP_VERSION(11, 0, 0):
pcache_info = cache_info;
num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
break;
default:
return -EINVAL;
}
......
......@@ -53,6 +53,7 @@ extern const struct kfd2kgd_calls arcturus_kfd2kgd;
extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v11_kfd2kgd;
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size);
......@@ -60,7 +61,7 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
static int kfd_resume(struct kfd_dev *kfd);
static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
{
uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
......@@ -85,6 +86,7 @@ static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */
case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
case IP_VERSION(6, 0, 0):
kfd->device_info.num_sdma_queues_per_engine = 8;
break;
default:
......@@ -93,6 +95,17 @@ static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
sdma_version);
kfd->device_info.num_sdma_queues_per_engine = 8;
}
switch (sdma_version) {
case IP_VERSION(6, 0, 0):
/* Reserve 1 for paging and 1 for gfx */
kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
/* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL;
break;
default:
break;
}
}
static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
......@@ -121,6 +134,9 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
break;
case IP_VERSION(11, 0, 0):
kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
break;
default:
dev_warn(kfd_device, "v9 event interrupt handler is set due to "
"mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version);
......@@ -145,7 +161,7 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
kfd->device_info.supports_cwsr = true;
kfd_device_info_set_sdma_queue_num(kfd);
kfd_device_info_set_sdma_info(kfd);
kfd_device_info_set_event_interrupt_class(kfd);
......@@ -346,6 +362,10 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
if (!vf)
f2g = &gfx_v10_3_kfd2kgd;
break;
case IP_VERSION(11, 0, 0):
gfx_target_version = 110000;
f2g = &gfx_v11_kfd2kgd;
break;
default:
break;
}
......
......@@ -35,6 +35,9 @@
#define VMID_NUM 16
#define KFD_MES_PROCESS_QUANTUM 100000
#define KFD_MES_GANG_QUANTUM 10000
struct device_process_node {
struct qcm_process_device *qpd;
struct list_head list;
......@@ -267,6 +270,8 @@ void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_v10_navi10(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
unsigned int get_cp_queues_num(struct device_queue_manager *dqm);
......
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "kfd_device_queue_manager.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
#include "soc21_enum.h"
static int update_qpd_v11(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->update_qpd = update_qpd_v11;
asic_ops->init_sdma_vm = init_sdma_vm_v11;
asic_ops->mqd_manager_init = mqd_manager_init_v11;
}
static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
{
uint32_t shared_base = pdd->lds_base >> 48;
uint32_t private_base = pdd->scratch_base >> 48;
return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
private_base;
}
static int update_qpd_v11(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
qpd->sh_mem_config =
(SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
return 0;
}
static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
/* Not needed on SDMAv4 onwards any more */
q->properties.sdma_vm_addr = 0;
}
......@@ -49,9 +49,13 @@
/* # of doorbell bytes allocated for each process. */
size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
return roundup(kfd->device_info.doorbell_size *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE);
if (!kfd->shared_resources.enable_mes)
return roundup(kfd->device_info.doorbell_size *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE);
else
return amdgpu_mes_doorbell_process_slice(
(struct amdgpu_device *)kfd->adev);
}
/* Doorbell calculations for device init. */
......@@ -61,6 +65,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
size_t doorbell_aperture_size;
size_t doorbell_process_limit;
/*
* With MES enabled, just set the doorbell base as it is needed
* to calculate doorbell physical address.
*/
if (kfd->shared_resources.enable_mes) {
kfd->doorbell_base =
kfd->shared_resources.doorbell_physical_address;
return 0;
}
/*
* We start with calculations in bytes because the input data might
* only be byte-aligned.
......@@ -237,10 +251,16 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
* the process's doorbells. The offset returned is in dword
* units regardless of the ASIC-dependent doorbell size.
*/
return kfd->doorbell_base_dw_offset +
pdd->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
doorbell_id * kfd->device_info.doorbell_size / sizeof(u32);
if (!kfd->shared_resources.enable_mes)
return kfd->doorbell_base_dw_offset +
pdd->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
doorbell_id *
kfd->device_info.doorbell_size / sizeof(u32);
else
return amdgpu_mes_get_doorbell_dw_offset_in_bar(
(struct amdgpu_device *)kfd->adev,
pdd->doorbell_index, doorbell_id);
}
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
......@@ -261,8 +281,16 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
{
int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
GFP_KERNEL);
int r = 0;
if (!kfd->shared_resources.enable_mes)
r = ida_simple_get(&kfd->doorbell_ida, 1,
kfd->max_doorbell_slices, GFP_KERNEL);
else
r = amdgpu_mes_alloc_process_doorbells(
(struct amdgpu_device *)kfd->adev,
doorbell_index);
if (r > 0)
*doorbell_index = r;
......@@ -271,6 +299,12 @@ int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_inde
void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
{
if (doorbell_index)
ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
if (doorbell_index) {
if (!kfd->shared_resources.enable_mes)
ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
else
amdgpu_mes_free_process_doorbells(
(struct amdgpu_device *)kfd->adev,
doorbell_index);
}
}
This diff is collapsed.
......@@ -90,7 +90,7 @@ enum SQ_INTERRUPT_ERROR_TYPE {
#define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000
#define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20
static void event_interrupt_poison_consumption(struct kfd_dev *dev,
static void event_interrupt_poison_consumption_v9(struct kfd_dev *dev,
uint16_t pasid, uint16_t client_id)
{
int old_poison, ret = -EINVAL;
......@@ -316,7 +316,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
sq_intr_err);
if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
event_interrupt_poison_consumption(dev, pasid, client_id);
event_interrupt_poison_consumption_v9(dev, pasid, client_id);
return;
}
break;
......@@ -337,7 +337,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
if (source_id == SOC15_INTSRC_SDMA_TRAP) {
kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
} else if (source_id == SOC15_INTSRC_SDMA_ECC) {
event_interrupt_poison_consumption(dev, pasid, client_id);
event_interrupt_poison_consumption_v9(dev, pasid, client_id);
return;
}
} else if (client_id == SOC15_IH_CLIENTID_VMC ||
......@@ -348,7 +348,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
event_interrupt_poison_consumption(dev, pasid, client_id);
event_interrupt_poison_consumption_v9(dev, pasid, client_id);
return;
}
......
......@@ -100,7 +100,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
{
struct kfd_cu_info cu_info;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
int i, se, sh, cu;
int i, se, sh, cu, cu_bitmap_sh_mul;
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
......@@ -120,6 +120,10 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines);
return;
}
cu_bitmap_sh_mul = (KFD_GC_VERSION(mm->dev) >= IP_VERSION(11, 0, 0) &&
KFD_GC_VERSION(mm->dev) < IP_VERSION(12, 0, 0)) ? 2 : 1;
/* Count active CUs per SH.
*
* Some CUs in an SH may be disabled. HW expects disabled CUs to be
......@@ -129,10 +133,12 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
* Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1.
*
* See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info.
* See note on GFX11 cu_bitmap layout in gfx_v11_0_get_cu_info.
*/
for (se = 0; se < cu_info.num_shader_engines; se++)
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]);
cu_per_sh[se][sh] = hweight32(
cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]);
/* Symmetrically map cu_mask to all SEs & SHs:
* se_mask programs up to 2 SH in the upper and lower 16 bits.
......
This diff is collapsed.
......@@ -228,6 +228,8 @@ struct kfd_device_info {
bool needs_pci_atomics;
uint32_t no_atomic_fw_version;
unsigned int num_sdma_queues_per_engine;
unsigned int num_reserved_sdma_queues_per_engine;
uint64_t reserved_sdma_queues_bitmap;
};
unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev);
......@@ -564,6 +566,10 @@ struct queue {
/* procfs */
struct kobject kobj;
void *gang_ctx_bo;
uint64_t gang_ctx_gpu_addr;
void *gang_ctx_cpu_ptr;
};
enum KFD_MQD_TYPE {
......@@ -779,6 +785,10 @@ struct kfd_process_device {
* checkpointed node to refer to this device.
*/
uint32_t user_gpu_id;
void *proc_ctx_bo;
uint64_t proc_ctx_gpu_addr;
void *proc_ctx_cpu_ptr;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
......@@ -1170,6 +1180,8 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
......@@ -1292,6 +1304,7 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
/* Events */
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
extern const struct kfd_event_interrupt_class event_interrupt_class_v11;
extern const struct kfd_device_global_init_class device_global_init_class_cik;
......
......@@ -1041,6 +1041,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
if (pdd->dev->shared_resources.enable_mes)
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
pdd->proc_ctx_bo);
/*
* before destroying pdd, make sure to report availability
* for auto suspend
......@@ -1484,6 +1487,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd = NULL;
int retval = 0;
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
return NULL;
......@@ -1516,6 +1520,21 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
pdd->sdma_past_activity_counter = 0;
pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
if (dev->shared_resources.enable_mes) {
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
AMDGPU_MES_PROC_CTX_SIZE,
&pdd->proc_ctx_bo,
&pdd->proc_ctx_gpu_addr,
&pdd->proc_ctx_cpu_ptr,
false);
if (retval) {
pr_err("failed to allocate process context bo\n");
goto err_free_pdd;
}
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
}
p->pdds[p->n_pdds++] = pdd;
/* Init idr used for memory handle translation */
......
......@@ -198,8 +198,26 @@ static int init_user_queue(struct process_queue_manager *pqm,
(*q)->device = dev;
(*q)->process = pqm->process;
if (dev->shared_resources.enable_mes) {
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
AMDGPU_MES_GANG_CTX_SIZE,
&(*q)->gang_ctx_bo,
&(*q)->gang_ctx_gpu_addr,
&(*q)->gang_ctx_cpu_ptr,
false);
if (retval) {
pr_err("failed to allocate gang context bo\n");
goto cleanup;
}
memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
}
pr_debug("PQM After init queue");
return 0;
cleanup:
if (dev->shared_resources.enable_mes)
uninit_queue(*q);
return retval;
}
......@@ -418,6 +436,9 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
pdd->qpd.num_gws = 0;
}
if (dev->shared_resources.enable_mes)
amdgpu_amdkfd_free_gtt_mem(dev->adev,
pqn->q->gang_ctx_bo);
uninit_queue(pqn->q);
}
......
......@@ -1412,7 +1412,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.num_sdma_xgmi_engines =
kfd_get_num_xgmi_sdma_engines(gpu);
dev->node_props.num_sdma_queues_per_engine =
gpu->device_info.num_sdma_queues_per_engine;
gpu->device_info.num_sdma_queues_per_engine -
gpu->device_info.num_reserved_sdma_queues_per_engine;
dev->node_props.num_gws = (dev->gpu->gws &&
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
dev->gpu->adev->gds.gws_size : 0;
......
......@@ -31,7 +31,8 @@
#define SOC15_INTSRC_VMC_FAULT 0
#define SOC15_INTSRC_SDMA_TRAP 224
#define SOC15_INTSRC_SDMA_ECC 220
#define SOC21_INTSRC_SDMA_TRAP 49
#define SOC21_INTSRC_SDMA_ECC 62
#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)
#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff)
......
......@@ -152,6 +152,7 @@ struct kgd2kfd_shared_resources {
/* Minor device number of the render node */
int drm_render_minor;
bool enable_mes;
};
struct tile_config {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment