Commit 517fff22 authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher

drm/amdkfd: Store queue cwsr area size to node properties

Use the queue eop buffer size, cwsr area size, ctl stack size
calculation from Thunk, store the value to KFD node properties.

Those will be used to validate queue eop buffer size, cwsr area size,
ctl stack size when creating KFD user compute queue.

Those will be exposed to user space via sysfs KFD node properties, to
remove the duplicate calculation code from Thunk.
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 47c0388b
......@@ -1295,6 +1295,7 @@ int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_
void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo);
int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
......
......@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_topology.h"
#include "kfd_svm.h"
void print_queue_properties(struct queue_properties *q)
......@@ -305,3 +306,77 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope
properties->ctx_save_restore_area_size);
return 0;
}
#define SGPR_SIZE_PER_CU 0x4000
#define LDS_SIZE_PER_CU 0x10000
#define HWREG_SIZE_PER_CU 0x1000
#define DEBUGGER_BYTES_ALIGN 64
#define DEBUGGER_BYTES_PER_WAVE 32
static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
{
u32 vgpr_size = 0x40000;
if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */
gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */
gfxv == 90008) /* GFX_VERSION_ARCTURUS */
vgpr_size = 0x80000;
else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */
gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */
gfxv == 120000 || /* GFX_VERSION_GFX1200 */
gfxv == 120001) /* GFX_VERSION_GFX1201 */
vgpr_size = 0x60000;
return vgpr_size;
}
#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \
(kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU)
#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \
((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/
#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
{
struct kfd_node_properties *props = &dev->node_props;
u32 gfxv = props->gfx_target_version;
u32 ctl_stack_size;
u32 wg_data_size;
u32 wave_num;
u32 cu_num;
if (gfxv < 80001) /* GFX_VERSION_CARRIZO */
return;
cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask);
wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */
min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
: cu_num * 32;
wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE);
ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
PAGE_SIZE);
if ((gfxv / 10000 * 10000) == 100000) {
/* HW design limits control stack size to 0x7000.
* This is insufficient for theoretical PM4 cases
* but sufficient for AQL, limited by SPI events.
*/
ctl_stack_size = min(ctl_stack_size, 0x7000);
}
props->ctl_stack_size = ctl_stack_size;
props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
props->cwsr_size = ctl_stack_size + wg_data_size;
if (gfxv == 80002) /* GFX_VERSION_TONGA */
props->eop_buffer_size = 0x8000;
else if ((gfxv / 100 * 100) == 90400) /* GFX_VERSION_AQUA_VANJARAM */
props->eop_buffer_size = 4096;
else if (gfxv >= 80000)
props->eop_buffer_size = 4096;
}
......@@ -2120,6 +2120,8 @@ int kfd_topology_add_device(struct kfd_node *gpu)
dev->gpu->adev->gmc.xgmi.connected_to_cpu)
dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS;
kfd_queue_ctx_save_restore_size(dev);
kfd_debug_print_topology();
kfd_notify_gpu_change(gpu_id, 1);
......
......@@ -74,6 +74,10 @@ struct kfd_node_properties {
uint32_t num_sdma_xgmi_engines;
uint32_t num_sdma_queues_per_engine;
uint32_t num_cp_queues;
uint32_t cwsr_size;
uint32_t ctl_stack_size;
uint32_t eop_buffer_size;
uint32_t debug_memory_size;
char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment