Commit 74c5b85d authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdkfd: Add spatial partitioning support in KFD

This patch introduces multi-partition support in KFD.
This patch includes:
- Support for maximum 8 spatial partitions in KFD.
- Initialize one HIQ per partition.
- Management of VMID range depending on partition mode.
- Management of doorbell aperture space between all
  partitions.
- Each partition does its own queue management, interrupt
  handling, SMI event reporting.
- IOMMU, if enabled with multiple partitions, will only work
  on first partition.
- SPM is only supported on the first partition.
- Currently, there is no support for resetting individual
  partitions. All partitions will reset together.
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Tested-by: default avatarAmber Lin <Amber.Lin@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8dc1db31
...@@ -567,23 +567,27 @@ static int kfd_init_node(struct kfd_node *node) ...@@ -567,23 +567,27 @@ static int kfd_init_node(struct kfd_node *node)
return err; return err;
} }
static void kfd_cleanup_node(struct kfd_dev *kfd) static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
{ {
struct kfd_node *knode = kfd->node; struct kfd_node *knode;
unsigned int i;
device_queue_manager_uninit(knode->dqm);
kfd_interrupt_exit(knode); for (i = 0; i < num_nodes; i++) {
kfd_topology_remove_device(knode); knode = kfd->nodes[i];
if (knode->gws) device_queue_manager_uninit(knode->dqm);
amdgpu_amdkfd_free_gws(knode->adev, knode->gws); kfd_interrupt_exit(knode);
kfree(knode); kfd_topology_remove_device(knode);
kfd->node = NULL; if (knode->gws)
amdgpu_amdkfd_free_gws(knode->adev, knode->gws);
kfree(knode);
kfd->nodes[i] = NULL;
}
} }
bool kgd2kfd_device_init(struct kfd_dev *kfd, bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources) const struct kgd2kfd_shared_resources *gpu_resources)
{ {
unsigned int size, map_process_packet_size; unsigned int size, map_process_packet_size, i;
struct kfd_node *node; struct kfd_node *node;
uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd; uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd;
unsigned int max_proc_per_quantum; unsigned int max_proc_per_quantum;
...@@ -596,9 +600,18 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -596,9 +600,18 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
KGD_ENGINE_SDMA1); KGD_ENGINE_SDMA1);
kfd->shared_resources = *gpu_resources; kfd->shared_resources = *gpu_resources;
first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; if (kfd->adev->gfx.num_xcd == 0 || kfd->adev->gfx.num_xcd == 1 ||
last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; kfd->adev->gfx.num_xcc_per_xcp == 0)
vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1; kfd->num_nodes = 1;
else
kfd->num_nodes =
kfd->adev->gfx.num_xcd/kfd->adev->gfx.num_xcc_per_xcp;
if (kfd->num_nodes == 0) {
dev_err(kfd_device,
"KFD num nodes cannot be 0, GC inst: %d, num_xcc_in_node: %d\n",
kfd->adev->gfx.num_xcd, kfd->adev->gfx.num_xcc_per_xcp);
goto out;
}
/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
* 32 and 64-bit requests are possible and must be * 32 and 64-bit requests are possible and must be
...@@ -617,6 +630,26 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -617,6 +630,26 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
return false; return false;
} }
first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
/* For GFX9.4.3, we need special handling for VMIDs depending on
* partition mode.
* In CPX mode, the VMID range needs to be shared between XCDs.
* Additionally, there are 13 VMIDs (3-15) available for KFD. To
* divide them equally, we change starting VMID to 4 and not use
* VMID 3.
* If the VMID range changes for GFX9.4.3, then this code MUST be
* revisited.
*/
if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
kfd->adev->gfx.partition_mode == AMDGPU_CPX_PARTITION_MODE &&
kfd->num_nodes != 1) {
vmid_num_kfd /= 2;
first_vmid_kfd = last_vmid_kfd + 1 - vmid_num_kfd*2;
}
/* Verify module parameters regarding mapped process number*/ /* Verify module parameters regarding mapped process number*/
if (hws_max_conc_proc >= 0) if (hws_max_conc_proc >= 0)
max_proc_per_quantum = min((u32)hws_max_conc_proc, vmid_num_kfd); max_proc_per_quantum = min((u32)hws_max_conc_proc, vmid_num_kfd);
...@@ -682,6 +715,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -682,6 +715,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_cwsr_init(kfd); kfd_cwsr_init(kfd);
/* TODO: Needs to be updated for memory partitioning */
svm_migrate_init(kfd->adev); svm_migrate_init(kfd->adev);
/* Allocate the KFD node */ /* Allocate the KFD node */
...@@ -700,12 +734,51 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -700,12 +734,51 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
node->max_proc_per_quantum = max_proc_per_quantum; node->max_proc_per_quantum = max_proc_per_quantum;
atomic_set(&node->sram_ecc_flag, 0); atomic_set(&node->sram_ecc_flag, 0);
/* Initialize the KFD node */ dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n",
if (kfd_init_node(node)) { kfd->num_nodes);
dev_err(kfd_device, "Error initializing KFD node\n"); for (i = 0; i < kfd->num_nodes; i++) {
goto node_init_error; node = kzalloc(sizeof(struct kfd_node), GFP_KERNEL);
if (!node)
goto node_alloc_error;
node->adev = kfd->adev;
node->kfd = kfd;
node->kfd2kgd = kfd->kfd2kgd;
node->vm_info.vmid_num_kfd = vmid_num_kfd;
node->num_xcc_per_node = max(1U, kfd->adev->gfx.num_xcc_per_xcp);
node->start_xcc_id = node->num_xcc_per_node * i;
if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
kfd->adev->gfx.partition_mode == AMDGPU_CPX_PARTITION_MODE &&
kfd->num_nodes != 1) {
/* For GFX9.4.3 and CPX mode, first XCD gets VMID range
* 4-9 and second XCD gets VMID range 10-15.
*/
node->vm_info.first_vmid_kfd = (i%2 == 0) ?
first_vmid_kfd :
first_vmid_kfd+vmid_num_kfd;
node->vm_info.last_vmid_kfd = (i%2 == 0) ?
last_vmid_kfd-vmid_num_kfd :
last_vmid_kfd;
node->compute_vmid_bitmap =
((0x1 << (node->vm_info.last_vmid_kfd + 1)) - 1) -
((0x1 << (node->vm_info.first_vmid_kfd)) - 1);
} else {
node->vm_info.first_vmid_kfd = first_vmid_kfd;
node->vm_info.last_vmid_kfd = last_vmid_kfd;
node->compute_vmid_bitmap =
gpu_resources->compute_vmid_bitmap;
}
node->max_proc_per_quantum = max_proc_per_quantum;
atomic_set(&node->sram_ecc_flag, 0);
/* Initialize the KFD node */
if (kfd_init_node(node)) {
dev_err(kfd_device, "Error initializing KFD node\n");
goto node_init_error;
}
kfd->nodes[i] = node;
} }
kfd->node = node;
if (kfd_resume_iommu(kfd)) if (kfd_resume_iommu(kfd))
goto kfd_resume_iommu_error; goto kfd_resume_iommu_error;
...@@ -722,9 +795,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -722,9 +795,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto out; goto out;
kfd_resume_iommu_error: kfd_resume_iommu_error:
kfd_cleanup_node(kfd);
node_init_error: node_init_error:
node_alloc_error: node_alloc_error:
kfd_cleanup_nodes(kfd, i);
device_iommu_error: device_iommu_error:
kfd_doorbell_fini(kfd); kfd_doorbell_fini(kfd);
kfd_doorbell_error: kfd_doorbell_error:
...@@ -742,7 +815,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -742,7 +815,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
void kgd2kfd_device_exit(struct kfd_dev *kfd) void kgd2kfd_device_exit(struct kfd_dev *kfd)
{ {
if (kfd->init_complete) { if (kfd->init_complete) {
kfd_cleanup_node(kfd); /* Cleanup KFD nodes */
kfd_cleanup_nodes(kfd, kfd->num_nodes);
/* Cleanup common/shared resources */
kfd_doorbell_fini(kfd); kfd_doorbell_fini(kfd);
ida_destroy(&kfd->doorbell_ida); ida_destroy(&kfd->doorbell_ida);
kfd_gtt_sa_fini(kfd); kfd_gtt_sa_fini(kfd);
...@@ -754,18 +829,23 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) ...@@ -754,18 +829,23 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
int kgd2kfd_pre_reset(struct kfd_dev *kfd) int kgd2kfd_pre_reset(struct kfd_dev *kfd)
{ {
struct kfd_node *node = kfd->node; struct kfd_node *node;
int i;
if (!kfd->init_complete) if (!kfd->init_complete)
return 0; return 0;
kfd_smi_event_update_gpu_reset(node, false); for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
node->dqm->ops.pre_reset(node->dqm); kfd_smi_event_update_gpu_reset(node, false);
node->dqm->ops.pre_reset(node->dqm);
}
kgd2kfd_suspend(kfd, false); kgd2kfd_suspend(kfd, false);
kfd_signal_reset_event(node); for (i = 0; i < kfd->num_nodes; i++)
kfd_signal_reset_event(kfd->nodes[i]);
return 0; return 0;
} }
...@@ -778,19 +858,25 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd) ...@@ -778,19 +858,25 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
int kgd2kfd_post_reset(struct kfd_dev *kfd) int kgd2kfd_post_reset(struct kfd_dev *kfd)
{ {
int ret; int ret;
struct kfd_node *node = kfd->node; struct kfd_node *node;
int i;
if (!kfd->init_complete) if (!kfd->init_complete)
return 0; return 0;
ret = kfd_resume(node); for (i = 0; i < kfd->num_nodes; i++) {
if (ret) ret = kfd_resume(kfd->nodes[i]);
return ret; if (ret)
atomic_dec(&kfd_locked); return ret;
}
atomic_set(&node->sram_ecc_flag, 0); atomic_dec(&kfd_locked);
kfd_smi_event_update_gpu_reset(node, true); for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
atomic_set(&node->sram_ecc_flag, 0);
kfd_smi_event_update_gpu_reset(node, true);
}
return 0; return 0;
} }
...@@ -802,7 +888,8 @@ bool kfd_is_locked(void) ...@@ -802,7 +888,8 @@ bool kfd_is_locked(void)
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{ {
struct kfd_node *node = kfd->node; struct kfd_node *node;
int i;
if (!kfd->init_complete) if (!kfd->init_complete)
return; return;
...@@ -814,21 +901,25 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) ...@@ -814,21 +901,25 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
kfd_suspend_all_processes(); kfd_suspend_all_processes();
} }
node->dqm->ops.stop(node->dqm); for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
node->dqm->ops.stop(node->dqm);
}
kfd_iommu_suspend(kfd); kfd_iommu_suspend(kfd);
} }
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{ {
int ret, count; int ret, count, i;
struct kfd_node *node = kfd->node;
if (!kfd->init_complete) if (!kfd->init_complete)
return 0; return 0;
ret = kfd_resume(node); for (i = 0; i < kfd->num_nodes; i++) {
if (ret) ret = kfd_resume(kfd->nodes[i]);
return ret; if (ret)
return ret;
}
/* for runtime resume, skip unlocking kfd */ /* for runtime resume, skip unlocking kfd */
if (!run_pm) { if (!run_pm) {
...@@ -892,10 +983,10 @@ static inline void kfd_queue_work(struct workqueue_struct *wq, ...@@ -892,10 +983,10 @@ static inline void kfd_queue_work(struct workqueue_struct *wq,
/* This is called directly from KGD at ISR. */ /* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{ {
uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE]; uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE], i;
bool is_patched = false; bool is_patched = false;
unsigned long flags; unsigned long flags;
struct kfd_node *node = kfd->node; struct kfd_node *node;
if (!kfd->init_complete) if (!kfd->init_complete)
return; return;
...@@ -905,16 +996,22 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) ...@@ -905,16 +996,22 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
return; return;
} }
spin_lock_irqsave(&node->interrupt_lock, flags); for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
if (node->interrupts_active spin_lock_irqsave(&node->interrupt_lock, flags);
&& interrupt_is_wanted(node, ih_ring_entry,
patched_ihre, &is_patched) if (node->interrupts_active
&& enqueue_ih_ring_entry(node, && interrupt_is_wanted(node, ih_ring_entry,
is_patched ? patched_ihre : ih_ring_entry)) patched_ihre, &is_patched)
kfd_queue_work(node->ih_wq, &node->interrupt_work); && enqueue_ih_ring_entry(node,
is_patched ? patched_ihre : ih_ring_entry)) {
kfd_queue_work(node->ih_wq, &node->interrupt_work);
spin_unlock_irqrestore(&node->interrupt_lock, flags);
return;
}
spin_unlock_irqrestore(&node->interrupt_lock, flags);
}
spin_unlock_irqrestore(&node->interrupt_lock, flags);
} }
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger) int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
...@@ -1181,8 +1278,13 @@ int kfd_gtt_sa_free(struct kfd_node *node, struct kfd_mem_obj *mem_obj) ...@@ -1181,8 +1278,13 @@ int kfd_gtt_sa_free(struct kfd_node *node, struct kfd_mem_obj *mem_obj)
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
{ {
/*
* TODO: Currently update SRAM ECC flag for first node.
* This needs to be updated later when we can
* identify SRAM ECC error on other nodes also.
*/
if (kfd) if (kfd)
atomic_inc(&kfd->node->sram_ecc_flag); atomic_inc(&kfd->nodes[0]->sram_ecc_flag);
} }
void kfd_inc_compute_active(struct kfd_node *node) void kfd_inc_compute_active(struct kfd_node *node)
...@@ -1202,8 +1304,14 @@ void kfd_dec_compute_active(struct kfd_node *node) ...@@ -1202,8 +1304,14 @@ void kfd_dec_compute_active(struct kfd_node *node)
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{ {
/*
* TODO: For now, raise the throttling event only on first node.
* This will need to change after we are able to determine
* which node raised the throttling event.
*/
if (kfd && kfd->init_complete) if (kfd && kfd->init_complete)
kfd_smi_event_update_thermal_throttling(kfd->node, throttle_bitmask); kfd_smi_event_update_thermal_throttling(kfd->nodes[0],
throttle_bitmask);
} }
/* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and /* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and
......
...@@ -1426,7 +1426,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) ...@@ -1426,7 +1426,7 @@ static int set_sched_resources(struct device_queue_manager *dqm)
int i, mec; int i, mec;
struct scheduling_resources res; struct scheduling_resources res;
res.vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; res.vmid_mask = dqm->dev->compute_vmid_bitmap;
res.queue_mask = 0; res.queue_mask = 0;
for (i = 0; i < KGD_MAX_QUEUES; ++i) { for (i = 0; i < KGD_MAX_QUEUES; ++i) {
......
...@@ -121,6 +121,12 @@ int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd) ...@@ -121,6 +121,12 @@ int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
return -EINVAL; return -EINVAL;
} }
if (!kfd_is_first_node(dev)) {
dev_warn_once(kfd_device,
"IOMMU supported only on first node\n");
return 0;
}
err = amd_iommu_bind_pasid(dev->adev->pdev, p->pasid, p->lead_thread); err = amd_iommu_bind_pasid(dev->adev->pdev, p->pasid, p->lead_thread);
if (!err) if (!err)
pdd->bound = PDD_BOUND; pdd->bound = PDD_BOUND;
...@@ -138,7 +144,8 @@ void kfd_iommu_unbind_process(struct kfd_process *p) ...@@ -138,7 +144,8 @@ void kfd_iommu_unbind_process(struct kfd_process *p)
int i; int i;
for (i = 0; i < p->n_pdds; i++) for (i = 0; i < p->n_pdds; i++)
if (p->pdds[i]->bound == PDD_BOUND) if ((p->pdds[i]->bound == PDD_BOUND) &&
(kfd_is_first_node((p->pdds[i]->dev))))
amd_iommu_unbind_pasid(p->pdds[i]->dev->adev->pdev, amd_iommu_unbind_pasid(p->pdds[i]->dev->adev->pdev,
p->pasid); p->pasid);
} }
...@@ -281,7 +288,7 @@ void kfd_iommu_suspend(struct kfd_dev *kfd) ...@@ -281,7 +288,7 @@ void kfd_iommu_suspend(struct kfd_dev *kfd)
if (!kfd->use_iommu_v2) if (!kfd->use_iommu_v2)
return; return;
kfd_unbind_processes_from_device(kfd->node); kfd_unbind_processes_from_device(kfd->nodes[0]);
amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL); amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL); amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL);
...@@ -312,7 +319,7 @@ int kfd_iommu_resume(struct kfd_dev *kfd) ...@@ -312,7 +319,7 @@ int kfd_iommu_resume(struct kfd_dev *kfd)
amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev,
iommu_invalid_ppr_cb); iommu_invalid_ppr_cb);
err = kfd_bind_processes_to_device(kfd->node); err = kfd_bind_processes_to_device(kfd->nodes[0]);
if (err) { if (err) {
amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL); amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL); amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL);
......
...@@ -423,7 +423,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -423,7 +423,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid, kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT, start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, adev->kfd.dev->node->id, prange->prefetch_loc, 0, adev->kfd.dev->nodes[0]->id, prange->prefetch_loc,
prange->preferred_loc, trigger); prange->preferred_loc, trigger);
r = migrate_vma_setup(&migrate); r = migrate_vma_setup(&migrate);
...@@ -456,7 +456,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -456,7 +456,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid, kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT, start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, adev->kfd.dev->node->id, trigger); 0, adev->kfd.dev->nodes[0]->id, trigger);
svm_range_dma_unmap(adev->dev, scratch, 0, npages); svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange); svm_range_free_dma_mappings(prange);
...@@ -701,7 +701,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -701,7 +701,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid, kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT, start >> PAGE_SHIFT, end >> PAGE_SHIFT,
adev->kfd.dev->node->id, 0, prange->prefetch_loc, adev->kfd.dev->nodes[0]->id, 0, prange->prefetch_loc,
prange->preferred_loc, trigger); prange->preferred_loc, trigger);
r = migrate_vma_setup(&migrate); r = migrate_vma_setup(&migrate);
...@@ -737,7 +737,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -737,7 +737,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid, kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT, start >> PAGE_SHIFT, end >> PAGE_SHIFT,
adev->kfd.dev->node->id, 0, trigger); adev->kfd.dev->nodes[0]->id, 0, trigger);
svm_range_dma_unmap(adev->dev, scratch, 0, npages); svm_range_dma_unmap(adev->dev, scratch, 0, npages);
......
...@@ -255,6 +255,8 @@ struct kfd_vmid_info { ...@@ -255,6 +255,8 @@ struct kfd_vmid_info {
uint32_t vmid_num_kfd; uint32_t vmid_num_kfd;
}; };
#define MAX_KFD_NODES 8
struct kfd_dev; struct kfd_dev;
struct kfd_node { struct kfd_node {
...@@ -267,6 +269,10 @@ struct kfd_node { ...@@ -267,6 +269,10 @@ struct kfd_node {
*/ */
struct kfd_vmid_info vm_info; struct kfd_vmid_info vm_info;
unsigned int id; /* topology stub index */ unsigned int id; /* topology stub index */
unsigned int num_xcc_per_node;
unsigned int start_xcc_id; /* Starting XCC instance
* number for the node
*/
/* Interrupts */ /* Interrupts */
struct kfifo ih_fifo; struct kfifo ih_fifo;
struct workqueue_struct *ih_wq; struct workqueue_struct *ih_wq;
...@@ -300,6 +306,8 @@ struct kfd_node { ...@@ -300,6 +306,8 @@ struct kfd_node {
/* Maximum process number mapped to HW scheduler */ /* Maximum process number mapped to HW scheduler */
unsigned int max_proc_per_quantum; unsigned int max_proc_per_quantum;
unsigned int compute_vmid_bitmap;
struct kfd_dev *kfd; struct kfd_dev *kfd;
}; };
...@@ -368,7 +376,8 @@ struct kfd_dev { ...@@ -368,7 +376,8 @@ struct kfd_dev {
/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
struct dev_pagemap pgmap; struct dev_pagemap pgmap;
struct kfd_node *node; struct kfd_node *nodes[MAX_KFD_NODES];
unsigned int num_nodes;
}; };
enum kfd_mempool { enum kfd_mempool {
...@@ -1397,6 +1406,11 @@ static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd) ...@@ -1397,6 +1406,11 @@ static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd)
#endif #endif
} }
static inline bool kfd_is_first_node(struct kfd_node *node)
{
return (node == node->kfd->nodes[0]);
}
/* Debugfs */ /* Debugfs */
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
......
...@@ -254,17 +254,17 @@ void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid, ...@@ -254,17 +254,17 @@ void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
unsigned long address, bool write_fault, unsigned long address, bool write_fault,
ktime_t ts) ktime_t ts)
{ {
kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_PAGE_FAULT_START, kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_PAGE_FAULT_START,
"%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid, "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
address, dev->node->id, write_fault ? 'W' : 'R'); address, dev->nodes[0]->id, write_fault ? 'W' : 'R');
} }
void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid, void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
unsigned long address, bool migration) unsigned long address, bool migration)
{ {
kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_PAGE_FAULT_END, kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_PAGE_FAULT_END,
"%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(), "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
pid, address, dev->node->id, migration ? 'M' : 'U'); pid, address, dev->nodes[0]->id, migration ? 'M' : 'U');
} }
void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid, void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
...@@ -273,7 +273,7 @@ void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid, ...@@ -273,7 +273,7 @@ void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
uint32_t prefetch_loc, uint32_t preferred_loc, uint32_t prefetch_loc, uint32_t preferred_loc,
uint32_t trigger) uint32_t trigger)
{ {
kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_MIGRATE_START, kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_MIGRATE_START,
"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
ktime_get_boottime_ns(), pid, start, end - start, ktime_get_boottime_ns(), pid, start, end - start,
from, to, prefetch_loc, preferred_loc, trigger); from, to, prefetch_loc, preferred_loc, trigger);
...@@ -283,7 +283,7 @@ void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid, ...@@ -283,7 +283,7 @@ void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
unsigned long start, unsigned long end, unsigned long start, unsigned long end,
uint32_t from, uint32_t to, uint32_t trigger) uint32_t from, uint32_t to, uint32_t trigger)
{ {
kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_MIGRATE_END, kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_MIGRATE_END,
"%lld -%d @%lx(%lx) %x->%x %d\n", "%lld -%d @%lx(%lx) %x->%x %d\n",
ktime_get_boottime_ns(), pid, start, end - start, ktime_get_boottime_ns(), pid, start, end - start,
from, to, trigger); from, to, trigger);
...@@ -292,16 +292,16 @@ void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid, ...@@ -292,16 +292,16 @@ void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid, void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid,
uint32_t trigger) uint32_t trigger)
{ {
kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_QUEUE_EVICTION, kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_QUEUE_EVICTION,
"%lld -%d %x %d\n", ktime_get_boottime_ns(), pid, "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
dev->node->id, trigger); dev->nodes[0]->id, trigger);
} }
void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid) void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid)
{ {
kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_QUEUE_RESTORE, kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_QUEUE_RESTORE,
"%lld -%d %x\n", ktime_get_boottime_ns(), pid, "%lld -%d %x\n", ktime_get_boottime_ns(), pid,
dev->node->id); dev->nodes[0]->id);
} }
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
...@@ -328,9 +328,9 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid, ...@@ -328,9 +328,9 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,
unsigned long address, unsigned long last, unsigned long address, unsigned long last,
uint32_t trigger) uint32_t trigger)
{ {
kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_UNMAP_FROM_GPU, kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_UNMAP_FROM_GPU,
"%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(), "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
pid, address, last - address + 1, dev->node->id, trigger); pid, address, last - address + 1, dev->nodes[0]->id, trigger);
} }
int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd) int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
......
...@@ -555,7 +555,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, ...@@ -555,7 +555,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->gpu->kfd->sdma_fw_version); dev->gpu->kfd->sdma_fw_version);
sysfs_show_64bit_prop(buffer, offs, "unique_id", sysfs_show_64bit_prop(buffer, offs, "unique_id",
dev->gpu->adev->unique_id); dev->gpu->adev->unique_id);
sysfs_show_32bit_prop(buffer, offs, "num_xcc",
dev->gpu->num_xcc_per_node);
} }
return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute", return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute",
...@@ -1160,7 +1161,7 @@ void kfd_topology_shutdown(void) ...@@ -1160,7 +1161,7 @@ void kfd_topology_shutdown(void)
static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
{ {
uint32_t hashout; uint32_t hashout;
uint32_t buf[7]; uint32_t buf[8];
uint64_t local_mem_size; uint64_t local_mem_size;
int i; int i;
...@@ -1177,8 +1178,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) ...@@ -1177,8 +1178,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
buf[4] = gpu->adev->pdev->bus->number; buf[4] = gpu->adev->pdev->bus->number;
buf[5] = lower_32_bits(local_mem_size); buf[5] = lower_32_bits(local_mem_size);
buf[6] = upper_32_bits(local_mem_size); buf[6] = upper_32_bits(local_mem_size);
buf[7] = gpu->start_xcc_id | (gpu->num_xcc_per_node << 16);
for (i = 0, hashout = 0; i < 7; i++) for (i = 0, hashout = 0; i < 8; i++)
hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH); hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
return hashout; return hashout;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment