Commit 315e29ec authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdkfd: Move local_mem_info to kfd_node

We need to track memory usage on a per partition basis. To do
that, store the local memory information in KFD node instead
of kfd device.

v2: squash in fix ("amdkfd: Use mem_id to access mem_partition info")
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b125b80b
...@@ -428,14 +428,23 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, ...@@ -428,14 +428,23 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
} }
void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
struct kfd_local_mem_info *mem_info) struct kfd_local_mem_info *mem_info,
uint8_t xcp_id)
{ {
memset(mem_info, 0, sizeof(*mem_info)); memset(mem_info, 0, sizeof(*mem_info));
mem_info->local_mem_size_public = adev->gmc.visible_vram_size; if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
mem_info->local_mem_size_private = adev->gmc.real_vram_size - if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size)
mem_info->local_mem_size_public =
KFD_XCP_MEMORY_SIZE(adev, xcp_id);
else
mem_info->local_mem_size_private =
KFD_XCP_MEMORY_SIZE(adev, xcp_id);
} else {
mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
mem_info->local_mem_size_private = adev->gmc.real_vram_size -
adev->gmc.visible_vram_size; adev->gmc.visible_vram_size;
}
mem_info->vram_width = adev->gmc.vram_width; mem_info->vram_width = adev->gmc.vram_width;
pr_debug("Address base: %pap public 0x%llx private 0x%llx\n", pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
......
...@@ -231,7 +231,8 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem); ...@@ -231,7 +231,8 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
enum kgd_engine_type type); enum kgd_engine_type type);
void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
struct kfd_local_mem_info *mem_info); struct kfd_local_mem_info *mem_info,
uint8_t xcp_id);
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev); uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev); uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
...@@ -334,10 +335,11 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, ...@@ -334,10 +335,11 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
((adev)->xcp_mgr && (xcp_id) >= 0 ?\ ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
(adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1) (adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1)
#define KFD_XCP_MEMORY_SIZE(n) ((n)->adev->gmc.num_mem_partitions ?\ #define KFD_XCP_MEMORY_SIZE(adev, xcp_id)\
(n)->adev->gmc.mem_partitions[(n)->xcp->mem_id].size /\ ((adev)->gmc.num_mem_partitions && (xcp_id) >= 0 ?\
(n)->adev->xcp_mgr->num_xcp_per_mem_partition :\ (adev)->gmc.mem_partitions[KFD_XCP_MEM_ID((adev), (xcp_id))].size /\
(n)->adev->gmc.real_vram_size) (adev)->xcp_mgr->num_xcp_per_mem_partition :\
(adev)->gmc.real_vram_size)
#if IS_ENABLED(CONFIG_HSA_AMD) #if IS_ENABLED(CONFIG_HSA_AMD)
void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
......
...@@ -1023,11 +1023,12 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev) ...@@ -1023,11 +1023,12 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev)
if (dev->kfd->use_iommu_v2) if (dev->kfd->use_iommu_v2)
return false; return false;
if (dev->kfd->local_mem_info.local_mem_size_private == 0 && if (dev->local_mem_info.local_mem_size_private == 0 &&
dev->kfd->local_mem_info.local_mem_size_public > 0) dev->local_mem_info.local_mem_size_public > 0)
return true; return true;
if (dev->kfd->local_mem_info.local_mem_size_public == 0 && dev->kfd->adev->gmc.is_app_apu) { if (dev->local_mem_info.local_mem_size_public == 0 &&
dev->kfd->adev->gmc.is_app_apu) {
pr_debug("APP APU, Consider like a large bar system\n"); pr_debug("APP APU, Consider like a large bar system\n");
return true; return true;
} }
......
...@@ -2191,7 +2191,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, ...@@ -2191,7 +2191,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* report the total FB size (public+private) as a single * report the total FB size (public+private) as a single
* private heap. * private heap.
*/ */
local_mem_info = kdev->kfd->local_mem_info; local_mem_info = kdev->local_mem_info;
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length); sub_type_hdr->length);
......
...@@ -726,7 +726,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -726,7 +726,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
svm_migrate_init(kfd->adev); svm_migrate_init(kfd->adev);
amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info);
dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n", dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n",
kfd->num_nodes); kfd->num_nodes);
...@@ -756,7 +755,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -756,7 +755,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
if (node->xcp) { if (node->xcp) {
dev_info(kfd_device, "KFD node %d partition %d size %lldM\n", dev_info(kfd_device, "KFD node %d partition %d size %lldM\n",
node->node_id, node->xcp->mem_id, node->node_id, node->xcp->mem_id,
KFD_XCP_MEMORY_SIZE(node) >> 20); KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20);
} }
if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) && if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
...@@ -783,6 +782,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -783,6 +782,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
} }
node->max_proc_per_quantum = max_proc_per_quantum; node->max_proc_per_quantum = max_proc_per_quantum;
atomic_set(&node->sram_ecc_flag, 0); atomic_set(&node->sram_ecc_flag, 0);
amdgpu_amdkfd_get_local_mem_info(kfd->adev,
&node->local_mem_info, node->xcp->id);
/* Initialize the KFD node */ /* Initialize the KFD node */
if (kfd_init_node(node)) { if (kfd_init_node(node)) {
dev_err(kfd_device, "Error initializing KFD node\n"); dev_err(kfd_device, "Error initializing KFD node\n");
......
...@@ -313,6 +313,8 @@ struct kfd_node { ...@@ -313,6 +313,8 @@ struct kfd_node {
unsigned int compute_vmid_bitmap; unsigned int compute_vmid_bitmap;
struct kfd_local_mem_info local_mem_info;
struct kfd_dev *kfd; struct kfd_dev *kfd;
}; };
...@@ -335,7 +337,6 @@ struct kfd_dev { ...@@ -335,7 +337,6 @@ struct kfd_dev {
*/ */
struct kgd2kfd_shared_resources shared_resources; struct kgd2kfd_shared_resources shared_resources;
struct kfd_local_mem_info local_mem_info;
const struct kfd2kgd_calls *kfd2kgd; const struct kfd2kgd_calls *kfd2kgd;
struct mutex doorbell_mutex; struct mutex doorbell_mutex;
......
...@@ -1152,8 +1152,8 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) ...@@ -1152,8 +1152,8 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
if (!gpu) if (!gpu)
return 0; return 0;
local_mem_size = gpu->kfd->local_mem_info.local_mem_size_private + local_mem_size = gpu->local_mem_info.local_mem_size_private +
gpu->kfd->local_mem_info.local_mem_size_public; gpu->local_mem_info.local_mem_size_public;
buf[0] = gpu->adev->pdev->devfn; buf[0] = gpu->adev->pdev->devfn;
buf[1] = gpu->adev->pdev->subsystem_vendor | buf[1] = gpu->adev->pdev->subsystem_vendor |
(gpu->adev->pdev->subsystem_device << 16); (gpu->adev->pdev->subsystem_device << 16);
...@@ -1234,7 +1234,8 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) ...@@ -1234,7 +1234,8 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
* for APUs - If CRAT from ACPI reports more than one bank, then * for APUs - If CRAT from ACPI reports more than one bank, then
* all the banks will report the same mem_clk_max information * all the banks will report the same mem_clk_max information
*/ */
amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info); amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info,
dev->gpu->xcp->id);
list_for_each_entry(mem, &dev->mem_props, list) list_for_each_entry(mem, &dev->mem_props, list)
mem->mem_clk_max = local_mem_info.mem_clk_max; mem->mem_clk_max = local_mem_info.mem_clk_max;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment