Commit ecd7963f authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-amdkfd-next-2018-07-28' of git://people.freedesktop.org/~gabbayo/linux into drm-next

This is amdkfd pull for 4.19. The major changes are:

- Add Raven support. Raven refers to Ryzen APUs with integrated GFXv9 GPU.
- Integrate GPU reset support

In addition, there are a couple of small fixes and improvements, such as:

- Better handling and reporting to user of VM faults
- Fix race upon context restore
- Allow the user to use specific Compute Units
- Basic power management
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180728122306.GA5235@ogabbay-vm
parents 6d52aacd b5aa3f4a
...@@ -243,6 +243,33 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev) ...@@ -243,6 +243,33 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
return r; return r;
} }
int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
{
int r = 0;
if (adev->kfd)
r = kgd2kfd->pre_reset(adev->kfd);
return r;
}
int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
{
int r = 0;
if (adev->kfd)
r = kgd2kfd->post_reset(adev->kfd);
return r;
}
void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
amdgpu_device_gpu_recover(adev, NULL, false);
}
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr, void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr) void **cpu_ptr)
...@@ -461,6 +488,14 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, ...@@ -461,6 +488,14 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
return ret; return ret;
} }
void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
amdgpu_dpm_switch_power_profile(adev,
PP_SMC_POWER_PROFILE_COMPUTE, !idle);
}
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
{ {
if (adev->kfd) { if (adev->kfd) {
......
...@@ -119,6 +119,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); ...@@ -119,6 +119,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr, uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len); uint32_t *ib_cmd, uint32_t ib_len);
void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
...@@ -126,6 +127,12 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); ...@@ -126,6 +127,12 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
/* Shared API */ /* Shared API */
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr, void **mem_obj, uint64_t *gpu_addr,
...@@ -183,6 +190,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, ...@@ -183,6 +190,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef); struct dma_fence **ef);
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
struct kfd_vm_fault_info *info);
void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
......
...@@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, ...@@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base); uint32_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
/* Because of REG_GET_FIELD() being used, we put this function in the /* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file. * asic specific file.
...@@ -216,6 +217,10 @@ static const struct kfd2kgd_calls kfd2kgd = { ...@@ -216,6 +217,10 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid, .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib, .submit_ib = amdgpu_amdkfd_submit_ib,
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
.gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
}; };
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
...@@ -571,6 +576,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, ...@@ -571,6 +576,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
unsigned long flags, end_jiffies; unsigned long flags, end_jiffies;
int retry; int retry;
if (adev->in_gpu_reset)
return -EIO;
acquire_queue(kgd, pipe_id, queue_id); acquire_queue(kgd, pipe_id, queue_id);
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
...@@ -882,6 +890,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) ...@@ -882,6 +890,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
int vmid; int vmid;
unsigned int tmp; unsigned int tmp;
if (adev->in_gpu_reset)
return -EIO;
for (vmid = 0; vmid < 16; vmid++) { for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
continue; continue;
...@@ -911,3 +922,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) ...@@ -911,3 +922,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
RREG32(mmVM_INVALIDATE_RESPONSE); RREG32(mmVM_INVALIDATE_RESPONSE);
return 0; return 0;
} }
/**
* read_vmid_from_vmfault_reg - read vmid from register
*
* adev: amdgpu_device pointer
* @vmid: vmid pointer
* read vmid from register (CIK).
*/
static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
}
...@@ -176,6 +176,9 @@ static const struct kfd2kgd_calls kfd2kgd = { ...@@ -176,6 +176,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid, .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib, .submit_ib = amdgpu_amdkfd_submit_ib,
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
.gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
}; };
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
...@@ -568,6 +571,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, ...@@ -568,6 +571,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
int retry; int retry;
struct vi_mqd *m = get_mqd(mqd); struct vi_mqd *m = get_mqd(mqd);
if (adev->in_gpu_reset)
return -EIO;
acquire_queue(kgd, pipe_id, queue_id); acquire_queue(kgd, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0) if (m->cp_hqd_vmid == 0)
...@@ -844,6 +850,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) ...@@ -844,6 +850,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
int vmid; int vmid;
unsigned int tmp; unsigned int tmp;
if (adev->in_gpu_reset)
return -EIO;
for (vmid = 0; vmid < 16; vmid++) { for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
continue; continue;
......
...@@ -213,6 +213,8 @@ static const struct kfd2kgd_calls kfd2kgd = { ...@@ -213,6 +213,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid, .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib, .submit_ib = amdgpu_amdkfd_submit_ib,
.gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
}; };
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
...@@ -679,6 +681,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, ...@@ -679,6 +681,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
uint32_t temp; uint32_t temp;
struct v9_mqd *m = get_mqd(mqd); struct v9_mqd *m = get_mqd(mqd);
if (adev->in_gpu_reset)
return -EIO;
acquire_queue(kgd, pipe_id, queue_id); acquire_queue(kgd, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0) if (m->cp_hqd_vmid == 0)
...@@ -866,6 +871,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) ...@@ -866,6 +871,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
int vmid; int vmid;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
if (adev->in_gpu_reset)
return -EIO;
if (ring->ready) if (ring->ready)
return invalidate_tlbs_with_kiq(adev, pasid); return invalidate_tlbs_with_kiq(adev, pasid);
......
...@@ -1621,6 +1621,20 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, ...@@ -1621,6 +1621,20 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
return ret; return ret;
} }
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
struct kfd_vm_fault_info *mem)
{
struct amdgpu_device *adev;
adev = (struct amdgpu_device *)kgd;
if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
*mem = *adev->gmc.vm_fault_info;
mb();
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
}
return 0;
}
/* Evict a userptr BO by stopping the queues if necessary /* Evict a userptr BO by stopping the queues if necessary
* *
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
......
...@@ -3307,6 +3307,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -3307,6 +3307,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
atomic_inc(&adev->gpu_reset_counter); atomic_inc(&adev->gpu_reset_counter);
adev->in_gpu_reset = 1; adev->in_gpu_reset = 1;
/* Block kfd */
amdgpu_amdkfd_pre_reset(adev);
/* block TTM */ /* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
...@@ -3322,7 +3325,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -3322,7 +3325,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
if (job && job->base.sched == &ring->sched) if (job && job->base.sched == &ring->sched)
continue; continue;
drm_sched_hw_job_reset(&ring->sched, &job->base); drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */ /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(ring); amdgpu_fence_driver_force_completion(ring);
...@@ -3363,6 +3366,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -3363,6 +3366,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter)); dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));
} }
/*unlock kfd */
amdgpu_amdkfd_post_reset(adev);
amdgpu_vf_error_trans_all(adev); amdgpu_vf_error_trans_all(adev);
adev->in_gpu_reset = 0; adev->in_gpu_reset = 0;
mutex_unlock(&adev->lock_reset); mutex_unlock(&adev->lock_reset);
......
...@@ -105,6 +105,8 @@ struct amdgpu_gmc { ...@@ -105,6 +105,8 @@ struct amdgpu_gmc {
/* protects concurrent invalidation */ /* protects concurrent invalidation */
spinlock_t invalidate_lock; spinlock_t invalidate_lock;
bool translate_further; bool translate_further;
struct kfd_vm_fault_info *vm_fault_info;
atomic_t vm_fault_info_updated;
const struct amdgpu_gmc_funcs *gmc_funcs; const struct amdgpu_gmc_funcs *gmc_funcs;
}; };
......
...@@ -495,11 +495,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, ...@@ -495,11 +495,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
eaddr = eaddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1);
flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
if (vm->root.base.bo->shadow)
flags |= AMDGPU_GEM_CREATE_SHADOW;
if (vm->use_cpu_for_update) if (vm->use_cpu_for_update)
flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
else else
flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
AMDGPU_GEM_CREATE_SHADOW);
/* walk over the address space and allocate the page tables */ /* walk over the address space and allocate the page tables */
for (pt_idx = from; pt_idx <= to; ++pt_idx) { for (pt_idx = from; pt_idx <= to; ++pt_idx) {
...@@ -2587,7 +2588,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -2587,7 +2588,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
if (vm->use_cpu_for_update) if (vm->use_cpu_for_update)
flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
else else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE)
flags |= AMDGPU_GEM_CREATE_SHADOW; flags |= AMDGPU_GEM_CREATE_SHADOW;
size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
...@@ -2662,8 +2663,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -2662,8 +2663,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
* - pasid (old PASID is released, because compute manages its own PASIDs) * - pasid (old PASID is released, because compute manages its own PASIDs)
* *
* Reinitializes the page directory to reflect the changed ATS * Reinitializes the page directory to reflect the changed ATS
* setting. May leave behind an unused shadow BO for the page * setting.
* directory when switching from SDMA updates to CPU updates.
* *
* Returns: * Returns:
* 0 for success, -errno for errors. * 0 for success, -errno for errors.
...@@ -2713,6 +2713,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -2713,6 +2713,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->pasid = 0; vm->pasid = 0;
} }
/* Free the shadow bo for compute VM */
amdgpu_bo_unref(&vm->root.base.bo->shadow);
error: error:
amdgpu_bo_unreserve(vm->root.base.bo); amdgpu_bo_unreserve(vm->root.base.bo);
return r; return r;
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "cik.h" #include "cik.h"
#include "gmc_v7_0.h" #include "gmc_v7_0.h"
#include "amdgpu_ucode.h" #include "amdgpu_ucode.h"
#include "amdgpu_amdkfd.h"
#include "bif/bif_4_1_d.h" #include "bif/bif_4_1_d.h"
#include "bif/bif_4_1_sh_mask.h" #include "bif/bif_4_1_sh_mask.h"
...@@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle) ...@@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle)
adev->vm_manager.vram_base_offset = 0; adev->vm_manager.vram_base_offset = 0;
} }
adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
return 0; return 0;
} }
...@@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle) ...@@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev); amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev); amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
gmc_v7_0_gart_fini(adev); gmc_v7_0_gart_fini(adev);
amdgpu_bo_fini(adev); amdgpu_bo_fini(adev);
release_firmware(adev->gmc.fw); release_firmware(adev->gmc.fw);
...@@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, ...@@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
u32 addr, status, mc_client; u32 addr, status, mc_client, vmid;
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
...@@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, ...@@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
entry->pasid); entry->pasid);
} }
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);
info->vmid = vmid;
info->mc_id = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
info->status = status;
info->page_addr = addr;
info->prot_valid = protections & 0x7 ? true : false;
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
mb();
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
}
return 0; return 0;
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "gmc_v8_0.h" #include "gmc_v8_0.h"
#include "amdgpu_ucode.h" #include "amdgpu_ucode.h"
#include "amdgpu_amdkfd.h"
#include "gmc/gmc_8_1_d.h" #include "gmc/gmc_8_1_d.h"
#include "gmc/gmc_8_1_sh_mask.h" #include "gmc/gmc_8_1_sh_mask.h"
...@@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle) ...@@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle)
adev->vm_manager.vram_base_offset = 0; adev->vm_manager.vram_base_offset = 0;
} }
adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
return 0; return 0;
} }
...@@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle) ...@@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev); amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev); amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
gmc_v8_0_gart_fini(adev); gmc_v8_0_gart_fini(adev);
amdgpu_bo_fini(adev); amdgpu_bo_fini(adev);
release_firmware(adev->gmc.fw); release_firmware(adev->gmc.fw);
...@@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, ...@@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
u32 addr, status, mc_client; u32 addr, status, mc_client, vmid;
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_vf(adev)) {
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
...@@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, ...@@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
entry->pasid); entry->pasid);
} }
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);
info->vmid = vmid;
info->mc_id = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
info->status = status;
info->page_addr = addr;
info->prot_valid = protections & 0x7 ? true : false;
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
mb();
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
}
return 0; return 0;
} }
......
...@@ -25,12 +25,39 @@ ...@@ -25,12 +25,39 @@
#include "cik_int.h" #include "cik_int.h"
static bool cik_event_interrupt_isr(struct kfd_dev *dev, static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const uint32_t *ih_ring_entry) const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
bool *patched_flag)
{ {
const struct cik_ih_ring_entry *ihre = const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry; (const struct cik_ih_ring_entry *)ih_ring_entry;
const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
unsigned int vmid, pasid; unsigned int vmid, pasid;
/* This workaround is due to HW/FW limitation on Hawaii that
* VMID and PASID are not written into ih_ring_entry
*/
if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
dev->device_info->asic_family == CHIP_HAWAII) {
struct cik_ih_ring_entry *tmp_ihre =
(struct cik_ih_ring_entry *)patched_ihre;
*patched_flag = true;
*tmp_ihre = *ihre;
vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid);
tmp_ihre->ring_id &= 0x000000ff;
tmp_ihre->ring_id |= vmid << 8;
tmp_ihre->ring_id |= pasid << 16;
return (pasid != 0) &&
vmid >= dev->vm_info.first_vmid_kfd &&
vmid <= dev->vm_info.last_vmid_kfd;
}
/* Only handle interrupts from KFD VMIDs */ /* Only handle interrupts from KFD VMIDs */
vmid = (ihre->ring_id & 0x0000ff00) >> 8; vmid = (ihre->ring_id & 0x0000ff00) >> 8;
if (vmid < dev->vm_info.first_vmid_kfd || if (vmid < dev->vm_info.first_vmid_kfd ||
...@@ -48,18 +75,19 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, ...@@ -48,18 +75,19 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
ihre->source_id == CIK_INTSRC_SDMA_TRAP || ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE; ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE ||
ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT;
} }
static void cik_event_interrupt_wq(struct kfd_dev *dev, static void cik_event_interrupt_wq(struct kfd_dev *dev,
const uint32_t *ih_ring_entry) const uint32_t *ih_ring_entry)
{ {
unsigned int pasid;
const struct cik_ih_ring_entry *ihre = const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry; (const struct cik_ih_ring_entry *)ih_ring_entry;
uint32_t context_id = ihre->data & 0xfffffff; uint32_t context_id = ihre->data & 0xfffffff;
unsigned int vmid = (ihre->ring_id & 0x0000ff00) >> 8;
pasid = (ihre->ring_id & 0xffff0000) >> 16; unsigned int pasid = (ihre->ring_id & 0xffff0000) >> 16;
if (pasid == 0) if (pasid == 0)
return; return;
...@@ -72,6 +100,22 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, ...@@ -72,6 +100,22 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
kfd_signal_event_interrupt(pasid, context_id & 0xff, 8); kfd_signal_event_interrupt(pasid, context_id & 0xff, 8);
else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
kfd_signal_hw_exception_event(pasid); kfd_signal_hw_exception_event(pasid);
else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
struct kfd_vm_fault_info info;
kfd_process_vm_fault(dev->dqm, pasid);
memset(&info, 0, sizeof(info));
dev->kfd2kgd->get_vm_fault_info(dev->kgd, &info);
if (!info.page_addr && !info.status)
return;
if (info.vmid == vmid)
kfd_signal_vm_fault_event(dev, pasid, &info);
else
kfd_signal_vm_fault_event(dev, pasid, NULL);
}
} }
const struct kfd_event_interrupt_class event_interrupt_class_cik = { const struct kfd_event_interrupt_class event_interrupt_class_cik = {
......
...@@ -20,8 +20,8 @@ ...@@ -20,8 +20,8 @@
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef HSA_RADEON_CIK_INT_H_INCLUDED #ifndef CIK_INT_H_INCLUDED
#define HSA_RADEON_CIK_INT_H_INCLUDED #define CIK_INT_H_INCLUDED
#include <linux/types.h> #include <linux/types.h>
...@@ -34,9 +34,10 @@ struct cik_ih_ring_entry { ...@@ -34,9 +34,10 @@ struct cik_ih_ring_entry {
#define CIK_INTSRC_CP_END_OF_PIPE 0xB5 #define CIK_INTSRC_CP_END_OF_PIPE 0xB5
#define CIK_INTSRC_CP_BAD_OPCODE 0xB7 #define CIK_INTSRC_CP_BAD_OPCODE 0xB7
#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6
#define CIK_INTSRC_SDMA_TRAP 0xE0 #define CIK_INTSRC_SDMA_TRAP 0xE0
#define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF #define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF
#define CIK_INTSRC_GFX_PAGE_INV_FAULT 0x92
#define CIK_INTSRC_GFX_MEM_PROT_FAULT 0x93
#endif #endif
...@@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 ...@@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
...@@ -251,7 +255,7 @@ if (!EMU_RUN_HACK) ...@@ -251,7 +255,7 @@ if (!EMU_RUN_HACK)
s_waitcnt lgkmcnt(0) s_waitcnt lgkmcnt(0)
s_or_b32 ttmp7, ttmp8, ttmp9 s_or_b32 ttmp7, ttmp8, ttmp9
s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set
s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC)
s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler
L_NO_NEXT_TRAP: L_NO_NEXT_TRAP:
...@@ -262,7 +266,7 @@ L_NO_NEXT_TRAP: ...@@ -262,7 +266,7 @@ L_NO_NEXT_TRAP:
s_addc_u32 ttmp1, ttmp1, 0 s_addc_u32 ttmp1, ttmp1, 0
L_EXCP_CASE: L_EXCP_CASE:
s_and_b32 ttmp1, ttmp1, 0xFFFF s_and_b32 ttmp1, ttmp1, 0xFFFF
s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC)
s_rfe_b64 [ttmp0, ttmp1] s_rfe_b64 [ttmp0, ttmp1]
end end
// ********* End handling of non-CWSR traps ******************* // ********* End handling of non-CWSR traps *******************
...@@ -1053,7 +1057,7 @@ end ...@@ -1053,7 +1057,7 @@ end
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
...@@ -1134,3 +1138,11 @@ end ...@@ -1134,3 +1138,11 @@ end
function get_hwreg_size_bytes function get_hwreg_size_bytes
return 128 //HWREG size 128 bytes return 128 //HWREG size 128 bytes
end end
function set_status_without_spi_prio(status, tmp)
// Do not restore STATUS.SPI_PRIO since scheduler may have raised it.
s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp
s_nop 0x2 // avoid S_SETREG => S_SETREG hazard
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
end
...@@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 ...@@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000 var SQ_WAVE_STATUS_HALT_MASK = 0x2000
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
...@@ -317,7 +321,7 @@ L_EXCP_CASE: ...@@ -317,7 +321,7 @@ L_EXCP_CASE:
// Restore SQ_WAVE_STATUS. // Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status set_status_without_spi_prio(s_save_status, ttmp2)
s_rfe_b64 [ttmp0, ttmp1] s_rfe_b64 [ttmp0, ttmp1]
end end
...@@ -1120,7 +1124,7 @@ end ...@@ -1120,7 +1124,7 @@ end
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
...@@ -1212,3 +1216,11 @@ function ack_sqc_store_workaround ...@@ -1212,3 +1216,11 @@ function ack_sqc_store_workaround
s_waitcnt lgkmcnt(0) s_waitcnt lgkmcnt(0)
end end
end end
function set_status_without_spi_prio(status, tmp)
// Do not restore STATUS.SPI_PRIO since scheduler may have raised it.
s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp
s_nop 0x2 // avoid S_SETREG => S_SETREG hazard
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
end
...@@ -122,6 +122,9 @@ static int kfd_open(struct inode *inode, struct file *filep) ...@@ -122,6 +122,9 @@ static int kfd_open(struct inode *inode, struct file *filep)
if (IS_ERR(process)) if (IS_ERR(process))
return PTR_ERR(process); return PTR_ERR(process);
if (kfd_is_locked())
return -EAGAIN;
dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
process->pasid, process->is_32bit_user_mode); process->pasid, process->is_32bit_user_mode);
...@@ -389,6 +392,61 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, ...@@ -389,6 +392,61 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
return retval; return retval;
} }
static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
void *data)
{
int retval;
const int max_num_cus = 1024;
struct kfd_ioctl_set_cu_mask_args *args = data;
struct queue_properties properties;
uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
if ((args->num_cu_mask % 32) != 0) {
pr_debug("num_cu_mask 0x%x must be a multiple of 32",
args->num_cu_mask);
return -EINVAL;
}
properties.cu_mask_count = args->num_cu_mask;
if (properties.cu_mask_count == 0) {
pr_debug("CU mask cannot be 0");
return -EINVAL;
}
/* To prevent an unreasonably large CU mask size, set an arbitrary
* limit of max_num_cus bits. We can then just drop any CU mask bits
* past max_num_cus bits and just use the first max_num_cus bits.
*/
if (properties.cu_mask_count > max_num_cus) {
pr_debug("CU mask cannot be greater than 1024 bits");
properties.cu_mask_count = max_num_cus;
cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
}
properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
if (!properties.cu_mask)
return -ENOMEM;
retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
if (retval) {
pr_debug("Could not copy CU mask from userspace");
kfree(properties.cu_mask);
return -EFAULT;
}
mutex_lock(&p->mutex);
retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
mutex_unlock(&p->mutex);
if (retval)
kfree(properties.cu_mask);
return retval;
}
static int kfd_ioctl_set_memory_policy(struct file *filep, static int kfd_ioctl_set_memory_policy(struct file *filep,
struct kfd_process *p, void *data) struct kfd_process *p, void *data)
{ {
...@@ -754,7 +812,6 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, ...@@ -754,7 +812,6 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
{ {
struct kfd_ioctl_get_clock_counters_args *args = data; struct kfd_ioctl_get_clock_counters_args *args = data;
struct kfd_dev *dev; struct kfd_dev *dev;
struct timespec64 time;
dev = kfd_device_by_id(args->gpu_id); dev = kfd_device_by_id(args->gpu_id);
if (dev) if (dev)
...@@ -766,11 +823,8 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, ...@@ -766,11 +823,8 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
args->gpu_clock_counter = 0; args->gpu_clock_counter = 0;
/* No access to rdtsc. Using raw monotonic time */ /* No access to rdtsc. Using raw monotonic time */
getrawmonotonic64(&time); args->cpu_clock_counter = ktime_get_raw_ns();
args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time); args->system_clock_counter = ktime_get_boot_ns();
get_monotonic_boottime64(&time);
args->system_clock_counter = (uint64_t)timespec64_to_ns(&time);
/* Since the counter is in nano-seconds we use 1GHz frequency */ /* Since the counter is in nano-seconds we use 1GHz frequency */
args->system_clock_freq = 1000000000; args->system_clock_freq = 1000000000;
...@@ -1558,6 +1612,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { ...@@ -1558,6 +1612,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
kfd_ioctl_unmap_memory_from_gpu, 0), kfd_ioctl_unmap_memory_from_gpu, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
kfd_ioctl_set_cu_mask, 0),
}; };
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
......
...@@ -189,6 +189,21 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, ...@@ -189,6 +189,21 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
return 0; return 0;
} }
static struct kfd_mem_properties *
find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width,
struct kfd_topology_device *dev)
{
struct kfd_mem_properties *props;
list_for_each_entry(props, &dev->mem_props, list) {
if (props->heap_type == heap_type
&& props->flags == flags
&& props->width == width)
return props;
}
return NULL;
}
/* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
* topology device present in the device_list * topology device present in the device_list
*/ */
...@@ -197,36 +212,56 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, ...@@ -197,36 +212,56 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
{ {
struct kfd_mem_properties *props; struct kfd_mem_properties *props;
struct kfd_topology_device *dev; struct kfd_topology_device *dev;
uint32_t heap_type;
uint64_t size_in_bytes;
uint32_t flags = 0;
uint32_t width;
pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
mem->proximity_domain); mem->proximity_domain);
list_for_each_entry(dev, device_list, list) { list_for_each_entry(dev, device_list, list) {
if (mem->proximity_domain == dev->proximity_domain) { if (mem->proximity_domain == dev->proximity_domain) {
props = kfd_alloc_struct(props);
if (!props)
return -ENOMEM;
/* We're on GPU node */ /* We're on GPU node */
if (dev->node_props.cpu_cores_count == 0) { if (dev->node_props.cpu_cores_count == 0) {
/* APU */ /* APU */
if (mem->visibility_type == 0) if (mem->visibility_type == 0)
props->heap_type = heap_type =
HSA_MEM_HEAP_TYPE_FB_PRIVATE; HSA_MEM_HEAP_TYPE_FB_PRIVATE;
/* dGPU */ /* dGPU */
else else
props->heap_type = mem->visibility_type; heap_type = mem->visibility_type;
} else } else
props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
props->flags |= HSA_MEM_FLAGS_NON_VOLATILE; flags |= HSA_MEM_FLAGS_NON_VOLATILE;
props->size_in_bytes = size_in_bytes =
((uint64_t)mem->length_high << 32) + ((uint64_t)mem->length_high << 32) +
mem->length_low; mem->length_low;
props->width = mem->width; width = mem->width;
/* Multiple banks of the same type are aggregated into
* one. User mode doesn't care about multiple physical
* memory segments. It's managed as a single virtual
* heap for user mode.
*/
props = find_subtype_mem(heap_type, flags, width, dev);
if (props) {
props->size_in_bytes += size_in_bytes;
break;
}
props = kfd_alloc_struct(props);
if (!props)
return -ENOMEM;
props->heap_type = heap_type;
props->flags = flags;
props->size_in_bytes = size_in_bytes;
props->width = width;
dev->node_props.mem_banks_count++; dev->node_props.mem_banks_count++;
list_add_tail(&props->list, &dev->mem_props); list_add_tail(&props->list, &dev->mem_props);
......
...@@ -38,7 +38,6 @@ ...@@ -38,7 +38,6 @@
#include "kfd_dbgmgr.h" #include "kfd_dbgmgr.h"
#include "kfd_dbgdev.h" #include "kfd_dbgdev.h"
#include "kfd_device_queue_manager.h" #include "kfd_device_queue_manager.h"
#include "../../radeon/cik_reg.h"
static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
{ {
......
...@@ -60,6 +60,9 @@ enum { ...@@ -60,6 +60,9 @@ enum {
SH_REG_SIZE = SH_REG_END - SH_REG_BASE SH_REG_SIZE = SH_REG_END - SH_REG_BASE
}; };
/* SQ_CMD definitions */
#define SQ_CMD 0x8DEC
enum SQ_IND_CMD_CMD { enum SQ_IND_CMD_CMD {
SQ_IND_CMD_CMD_NULL = 0x00000000, SQ_IND_CMD_CMD_NULL = 0x00000000,
SQ_IND_CMD_CMD_HALT = 0x00000001, SQ_IND_CMD_CMD_HALT = 0x00000001,
...@@ -190,4 +193,38 @@ union ULARGE_INTEGER { ...@@ -190,4 +193,38 @@ union ULARGE_INTEGER {
void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
enum DBGDEV_TYPE type); enum DBGDEV_TYPE type);
union TCP_WATCH_CNTL_BITS {
struct {
uint32_t mask:24;
uint32_t vmid:4;
uint32_t atc:1;
uint32_t mode:2;
uint32_t valid:1;
} bitfields, bits;
uint32_t u32All;
signed int i32All;
float f32All;
};
enum {
ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
/* extend the mask to 26 bits in order to match the low address field */
ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
};
enum {
MAX_TRAPID = 8, /* 3 bits in the bitfield. */
MAX_WATCH_ADDRESSES = 4
};
enum {
ADDRESS_WATCH_REG_ADDR_HI = 0,
ADDRESS_WATCH_REG_ADDR_LO,
ADDRESS_WATCH_REG_CNTL,
ADDRESS_WATCH_REG_MAX
};
#endif /* KFD_DBGDEV_H_ */ #endif /* KFD_DBGDEV_H_ */
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
*/ */
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/uaccess.h>
#include "kfd_priv.h" #include "kfd_priv.h"
static struct dentry *debugfs_root; static struct dentry *debugfs_root;
...@@ -32,6 +34,38 @@ static int kfd_debugfs_open(struct inode *inode, struct file *file) ...@@ -32,6 +34,38 @@ static int kfd_debugfs_open(struct inode *inode, struct file *file)
return single_open(file, show, NULL); return single_open(file, show, NULL);
} }
static ssize_t kfd_debugfs_hang_hws_write(struct file *file,
const char __user *user_buf, size_t size, loff_t *ppos)
{
struct kfd_dev *dev;
char tmp[16];
uint32_t gpu_id;
int ret = -EINVAL;
memset(tmp, 0, 16);
if (size >= 16) {
pr_err("Invalid input for gpu id.\n");
goto out;
}
if (copy_from_user(tmp, user_buf, size)) {
ret = -EFAULT;
goto out;
}
if (kstrtoint(tmp, 10, &gpu_id)) {
pr_err("Invalid input for gpu id.\n");
goto out;
}
dev = kfd_device_by_id(gpu_id);
if (dev) {
kfd_debugfs_hang_hws(dev);
ret = size;
} else
pr_err("Cannot find device %d.\n", gpu_id);
out:
return ret;
}
static const struct file_operations kfd_debugfs_fops = { static const struct file_operations kfd_debugfs_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.open = kfd_debugfs_open, .open = kfd_debugfs_open,
...@@ -40,6 +74,15 @@ static const struct file_operations kfd_debugfs_fops = { ...@@ -40,6 +74,15 @@ static const struct file_operations kfd_debugfs_fops = {
.release = single_release, .release = single_release,
}; };
static const struct file_operations kfd_debugfs_hang_hws_fops = {
.owner = THIS_MODULE,
.open = kfd_debugfs_open,
.read = seq_read,
.write = kfd_debugfs_hang_hws_write,
.llseek = seq_lseek,
.release = single_release,
};
void kfd_debugfs_init(void) void kfd_debugfs_init(void)
{ {
struct dentry *ent; struct dentry *ent;
...@@ -65,6 +108,11 @@ void kfd_debugfs_init(void) ...@@ -65,6 +108,11 @@ void kfd_debugfs_init(void)
ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root, ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root,
kfd_debugfs_rls_by_device, kfd_debugfs_rls_by_device,
&kfd_debugfs_fops); &kfd_debugfs_fops);
ent = debugfs_create_file("hang_hws", S_IFREG | 0644, debugfs_root,
NULL,
&kfd_debugfs_hang_hws_fops);
if (!ent) if (!ent)
pr_warn("Failed to create rls in kfd debugfs\n"); pr_warn("Failed to create rls in kfd debugfs\n");
} }
......
...@@ -30,7 +30,13 @@ ...@@ -30,7 +30,13 @@
#include "kfd_iommu.h" #include "kfd_iommu.h"
#define MQD_SIZE_ALIGNED 768 #define MQD_SIZE_ALIGNED 768
static atomic_t kfd_device_suspended = ATOMIC_INIT(0);
/*
* kfd_locked is used to lock the kfd driver during suspend or reset
* once locked, kfd driver will stop any further GPU execution.
* create process (open) will return -EAGAIN.
*/
static atomic_t kfd_locked = ATOMIC_INIT(0);
#ifdef KFD_SUPPORT_IOMMU_V2 #ifdef KFD_SUPPORT_IOMMU_V2
static const struct kfd_device_info kaveri_device_info = { static const struct kfd_device_info kaveri_device_info = {
...@@ -46,6 +52,7 @@ static const struct kfd_device_info kaveri_device_info = { ...@@ -46,6 +52,7 @@ static const struct kfd_device_info kaveri_device_info = {
.supports_cwsr = false, .supports_cwsr = false,
.needs_iommu_device = true, .needs_iommu_device = true,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2,
}; };
static const struct kfd_device_info carrizo_device_info = { static const struct kfd_device_info carrizo_device_info = {
...@@ -61,6 +68,22 @@ static const struct kfd_device_info carrizo_device_info = { ...@@ -61,6 +68,22 @@ static const struct kfd_device_info carrizo_device_info = {
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = true, .needs_iommu_device = true,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2,
};
static const struct kfd_device_info raven_device_info = {
.asic_family = CHIP_RAVEN,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 8,
.ih_ring_entry_size = 8 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_v9,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
.needs_iommu_device = true,
.needs_pci_atomics = true,
.num_sdma_engines = 1,
}; };
#endif #endif
...@@ -77,6 +100,7 @@ static const struct kfd_device_info hawaii_device_info = { ...@@ -77,6 +100,7 @@ static const struct kfd_device_info hawaii_device_info = {
.supports_cwsr = false, .supports_cwsr = false,
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2,
}; };
static const struct kfd_device_info tonga_device_info = { static const struct kfd_device_info tonga_device_info = {
...@@ -91,6 +115,7 @@ static const struct kfd_device_info tonga_device_info = { ...@@ -91,6 +115,7 @@ static const struct kfd_device_info tonga_device_info = {
.supports_cwsr = false, .supports_cwsr = false,
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
.num_sdma_engines = 2,
}; };
static const struct kfd_device_info tonga_vf_device_info = { static const struct kfd_device_info tonga_vf_device_info = {
...@@ -105,6 +130,7 @@ static const struct kfd_device_info tonga_vf_device_info = { ...@@ -105,6 +130,7 @@ static const struct kfd_device_info tonga_vf_device_info = {
.supports_cwsr = false, .supports_cwsr = false,
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2,
}; };
static const struct kfd_device_info fiji_device_info = { static const struct kfd_device_info fiji_device_info = {
...@@ -119,6 +145,7 @@ static const struct kfd_device_info fiji_device_info = { ...@@ -119,6 +145,7 @@ static const struct kfd_device_info fiji_device_info = {
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
.num_sdma_engines = 2,
}; };
static const struct kfd_device_info fiji_vf_device_info = { static const struct kfd_device_info fiji_vf_device_info = {
...@@ -133,6 +160,7 @@ static const struct kfd_device_info fiji_vf_device_info = { ...@@ -133,6 +160,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2,
}; };
...@@ -148,6 +176,7 @@ static const struct kfd_device_info polaris10_device_info = { ...@@ -148,6 +176,7 @@ static const struct kfd_device_info polaris10_device_info = {
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
.num_sdma_engines = 2,
}; };
static const struct kfd_device_info polaris10_vf_device_info = { static const struct kfd_device_info polaris10_vf_device_info = {
...@@ -162,6 +191,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { ...@@ -162,6 +191,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2,
}; };
static const struct kfd_device_info polaris11_device_info = { static const struct kfd_device_info polaris11_device_info = {
...@@ -176,6 +206,7 @@ static const struct kfd_device_info polaris11_device_info = { ...@@ -176,6 +206,7 @@ static const struct kfd_device_info polaris11_device_info = {
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
.num_sdma_engines = 2,
}; };
static const struct kfd_device_info vega10_device_info = { static const struct kfd_device_info vega10_device_info = {
...@@ -190,6 +221,7 @@ static const struct kfd_device_info vega10_device_info = { ...@@ -190,6 +221,7 @@ static const struct kfd_device_info vega10_device_info = {
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2,
}; };
static const struct kfd_device_info vega10_vf_device_info = { static const struct kfd_device_info vega10_vf_device_info = {
...@@ -204,6 +236,7 @@ static const struct kfd_device_info vega10_vf_device_info = { ...@@ -204,6 +236,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2,
}; };
...@@ -241,6 +274,7 @@ static const struct kfd_deviceid supported_devices[] = { ...@@ -241,6 +274,7 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x9875, &carrizo_device_info }, /* Carrizo */ { 0x9875, &carrizo_device_info }, /* Carrizo */
{ 0x9876, &carrizo_device_info }, /* Carrizo */ { 0x9876, &carrizo_device_info }, /* Carrizo */
{ 0x9877, &carrizo_device_info }, /* Carrizo */ { 0x9877, &carrizo_device_info }, /* Carrizo */
{ 0x15DD, &raven_device_info }, /* Raven */
#endif #endif
{ 0x67A0, &hawaii_device_info }, /* Hawaii */ { 0x67A0, &hawaii_device_info }, /* Hawaii */
{ 0x67A1, &hawaii_device_info }, /* Hawaii */ { 0x67A1, &hawaii_device_info }, /* Hawaii */
...@@ -514,13 +548,54 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) ...@@ -514,13 +548,54 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfree(kfd); kfree(kfd);
} }
int kgd2kfd_pre_reset(struct kfd_dev *kfd)
{
if (!kfd->init_complete)
return 0;
kgd2kfd_suspend(kfd);
/* hold dqm->lock to prevent further execution*/
dqm_lock(kfd->dqm);
kfd_signal_reset_event(kfd);
return 0;
}
/*
* Fix me. KFD won't be able to resume existing process for now.
* We will keep all existing process in a evicted state and
* wait the process to be terminated.
*/
int kgd2kfd_post_reset(struct kfd_dev *kfd)
{
int ret, count;
if (!kfd->init_complete)
return 0;
dqm_unlock(kfd->dqm);
ret = kfd_resume(kfd);
if (ret)
return ret;
count = atomic_dec_return(&kfd_locked);
WARN_ONCE(count != 0, "KFD reset ref. error");
return 0;
}
bool kfd_is_locked(void)
{
return (atomic_read(&kfd_locked) > 0);
}
void kgd2kfd_suspend(struct kfd_dev *kfd) void kgd2kfd_suspend(struct kfd_dev *kfd)
{ {
if (!kfd->init_complete) if (!kfd->init_complete)
return; return;
/* For first KFD device suspend all the KFD processes */ /* For first KFD device suspend all the KFD processes */
if (atomic_inc_return(&kfd_device_suspended) == 1) if (atomic_inc_return(&kfd_locked) == 1)
kfd_suspend_all_processes(); kfd_suspend_all_processes();
kfd->dqm->ops.stop(kfd->dqm); kfd->dqm->ops.stop(kfd->dqm);
...@@ -539,7 +614,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) ...@@ -539,7 +614,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
if (ret) if (ret)
return ret; return ret;
count = atomic_dec_return(&kfd_device_suspended); count = atomic_dec_return(&kfd_locked);
WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
if (count == 0) if (count == 0)
ret = kfd_resume_all_processes(); ret = kfd_resume_all_processes();
...@@ -577,14 +652,24 @@ static int kfd_resume(struct kfd_dev *kfd) ...@@ -577,14 +652,24 @@ static int kfd_resume(struct kfd_dev *kfd)
/* This is called directly from KGD at ISR. */ /* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{ {
uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
bool is_patched = false;
if (!kfd->init_complete) if (!kfd->init_complete)
return; return;
if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
dev_err_once(kfd_device, "Ring entry too small\n");
return;
}
spin_lock(&kfd->interrupt_lock); spin_lock(&kfd->interrupt_lock);
if (kfd->interrupts_active if (kfd->interrupts_active
&& interrupt_is_wanted(kfd, ih_ring_entry) && interrupt_is_wanted(kfd, ih_ring_entry,
&& enqueue_ih_ring_entry(kfd, ih_ring_entry)) patched_ihre, &is_patched)
&& enqueue_ih_ring_entry(kfd,
is_patched ? patched_ihre : ih_ring_entry))
queue_work(kfd->ih_wq, &kfd->interrupt_work); queue_work(kfd->ih_wq, &kfd->interrupt_work);
spin_unlock(&kfd->interrupt_lock); spin_unlock(&kfd->interrupt_lock);
...@@ -739,8 +824,8 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, ...@@ -739,8 +824,8 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
return -ENOMEM; return -ENOMEM;
*mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if ((*mem_obj) == NULL) if (!(*mem_obj))
return -ENOMEM; return -ENOMEM;
pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size); pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
...@@ -857,3 +942,26 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) ...@@ -857,3 +942,26 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
kfree(mem_obj); kfree(mem_obj);
return 0; return 0;
} }
#if defined(CONFIG_DEBUG_FS)
/* This function will send a package to HIQ to hang the HWS
* which will trigger a GPU reset and bring the HWS back to normal state
*/
int kfd_debugfs_hang_hws(struct kfd_dev *dev)
{
int r = 0;
if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
pr_err("HWS is not enabled");
return -EINVAL;
}
r = pm_debugfs_hang_hws(&dev->dqm->packets);
if (!r)
r = dqm_debugfs_execute_queues(dev->dqm);
return r;
}
#endif
...@@ -26,15 +26,14 @@ ...@@ -26,15 +26,14 @@
#include <linux/rwsem.h> #include <linux/rwsem.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/mutex.h>
#include <linux/sched/mm.h>
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_mqd_manager.h" #include "kfd_mqd_manager.h"
#define KFD_UNMAP_LATENCY_MS (4000) #define KFD_UNMAP_LATENCY_MS (4000)
#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000) #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000)
#define KFD_SDMA_QUEUES_PER_ENGINE (2)
#define CIK_SDMA_QUEUES (4)
#define CIK_SDMA_QUEUES_PER_ENGINE (2)
#define CIK_SDMA_ENGINE_NUM (2)
struct device_process_node { struct device_process_node {
struct qcm_process_device *qpd; struct qcm_process_device *qpd;
...@@ -170,11 +169,12 @@ struct device_queue_manager { ...@@ -170,11 +169,12 @@ struct device_queue_manager {
struct device_queue_manager_ops ops; struct device_queue_manager_ops ops;
struct device_queue_manager_asic_ops asic_ops; struct device_queue_manager_asic_ops asic_ops;
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; struct mqd_manager *mqd_mgrs[KFD_MQD_TYPE_MAX];
struct packet_manager packets; struct packet_manager packets;
struct kfd_dev *dev; struct kfd_dev *dev;
struct mutex lock; struct mutex lock_hidden; /* use dqm_lock/unlock(dqm) */
struct list_head queues; struct list_head queues;
unsigned int saved_flags;
unsigned int processes_count; unsigned int processes_count;
unsigned int queue_count; unsigned int queue_count;
unsigned int sdma_queue_count; unsigned int sdma_queue_count;
...@@ -190,6 +190,10 @@ struct device_queue_manager { ...@@ -190,6 +190,10 @@ struct device_queue_manager {
struct kfd_mem_obj *fence_mem; struct kfd_mem_obj *fence_mem;
bool active_runlist; bool active_runlist;
int sched_policy; int sched_policy;
/* hw exception */
bool is_hws_hang;
struct work_struct hw_exception_work;
}; };
void device_queue_manager_init_cik( void device_queue_manager_init_cik(
...@@ -207,6 +211,7 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, ...@@ -207,6 +211,7 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
unsigned int get_queues_num(struct device_queue_manager *dqm); unsigned int get_queues_num(struct device_queue_manager *dqm);
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{ {
...@@ -219,4 +224,19 @@ get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) ...@@ -219,4 +224,19 @@ get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
return (pdd->lds_base >> 60) & 0x0E; return (pdd->lds_base >> 60) & 0x0E;
} }
/* The DQM lock can be taken in MMU notifiers. Make sure no reclaim-FS
* happens while holding this lock anywhere to prevent deadlocks when
* an MMU notifier runs in reclaim-FS context.
*/
static inline void dqm_lock(struct device_queue_manager *dqm)
{
mutex_lock(&dqm->lock_hidden);
dqm->saved_flags = memalloc_nofs_save();
}
static inline void dqm_unlock(struct device_queue_manager *dqm)
{
memalloc_nofs_restore(dqm->saved_flags);
mutex_unlock(&dqm->lock_hidden);
}
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
...@@ -60,7 +60,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, ...@@ -60,7 +60,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config = qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
if (vega10_noretry && if (noretry &&
!dqm->dev->device_info->needs_iommu_device) !dqm->dev->device_info->needs_iommu_device)
qpd->sh_mem_config |= qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
......
...@@ -188,9 +188,9 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, ...@@ -188,9 +188,9 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
*doorbell_off = kfd->doorbell_id_offset + inx; *doorbell_off = kfd->doorbell_id_offset + inx;
pr_debug("Get kernel queue doorbell\n" pr_debug("Get kernel queue doorbell\n"
" doorbell offset == 0x%08X\n" " doorbell offset == 0x%08X\n"
" kernel address == %p\n", " doorbell index == 0x%x\n",
*doorbell_off, (kfd->doorbell_kernel_ptr + inx)); *doorbell_off, inx);
return kfd->doorbell_kernel_ptr + inx; return kfd->doorbell_kernel_ptr + inx;
} }
...@@ -199,7 +199,8 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) ...@@ -199,7 +199,8 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
{ {
unsigned int inx; unsigned int inx;
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
* sizeof(u32) / kfd->device_info->doorbell_size;
mutex_lock(&kfd->doorbell_mutex); mutex_lock(&kfd->doorbell_mutex);
__clear_bit(inx, kfd->doorbell_available_index); __clear_bit(inx, kfd->doorbell_available_index);
......
...@@ -850,6 +850,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, ...@@ -850,6 +850,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
ev->memory_exception_data = *ev_data; ev->memory_exception_data = *ev_data;
} }
if (type == KFD_EVENT_TYPE_MEMORY) {
dev_warn(kfd_device,
"Sending SIGSEGV to HSA Process with PID %d ",
p->lead_thread->pid);
send_sig(SIGSEGV, p->lead_thread, 0);
}
/* Send SIGTERM no event of type "type" has been found*/ /* Send SIGTERM no event of type "type" has been found*/
if (send_signal) { if (send_signal) {
if (send_sigterm) { if (send_sigterm) {
...@@ -904,34 +911,41 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, ...@@ -904,34 +911,41 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
memory_exception_data.failure.NotPresent = 1; memory_exception_data.failure.NotPresent = 1;
memory_exception_data.failure.NoExecute = 0; memory_exception_data.failure.NoExecute = 0;
memory_exception_data.failure.ReadOnly = 0; memory_exception_data.failure.ReadOnly = 0;
if (vma) { if (vma && address >= vma->vm_start) {
if (vma->vm_start > address) { memory_exception_data.failure.NotPresent = 0;
memory_exception_data.failure.NotPresent = 1;
memory_exception_data.failure.NoExecute = 0; if (is_write_requested && !(vma->vm_flags & VM_WRITE))
memory_exception_data.failure.ReadOnly = 1;
else
memory_exception_data.failure.ReadOnly = 0; memory_exception_data.failure.ReadOnly = 0;
} else {
memory_exception_data.failure.NotPresent = 0; if (is_execute_requested && !(vma->vm_flags & VM_EXEC))
if (is_write_requested && !(vma->vm_flags & VM_WRITE)) memory_exception_data.failure.NoExecute = 1;
memory_exception_data.failure.ReadOnly = 1; else
else memory_exception_data.failure.NoExecute = 0;
memory_exception_data.failure.ReadOnly = 0;
if (is_execute_requested && !(vma->vm_flags & VM_EXEC))
memory_exception_data.failure.NoExecute = 1;
else
memory_exception_data.failure.NoExecute = 0;
}
} }
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
mmput(mm); mmput(mm);
mutex_lock(&p->event_mutex); pr_debug("notpresent %d, noexecute %d, readonly %d\n",
memory_exception_data.failure.NotPresent,
memory_exception_data.failure.NoExecute,
memory_exception_data.failure.ReadOnly);
/* Lookup events by type and signal them */ /* Workaround on Raven to not kill the process when memory is freed
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY, * before IOMMU is able to finish processing all the excessive PPRs
&memory_exception_data); */
if (dev->device_info->asic_family != CHIP_RAVEN) {
mutex_lock(&p->event_mutex);
/* Lookup events by type and signal them */
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
&memory_exception_data);
mutex_unlock(&p->event_mutex);
}
mutex_unlock(&p->event_mutex);
kfd_unref_process(p); kfd_unref_process(p);
} }
#endif /* KFD_SUPPORT_IOMMU_V2 */ #endif /* KFD_SUPPORT_IOMMU_V2 */
...@@ -956,3 +970,67 @@ void kfd_signal_hw_exception_event(unsigned int pasid) ...@@ -956,3 +970,67 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
mutex_unlock(&p->event_mutex); mutex_unlock(&p->event_mutex);
kfd_unref_process(p); kfd_unref_process(p);
} }
void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
struct kfd_vm_fault_info *info)
{
struct kfd_event *ev;
uint32_t id;
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
struct kfd_hsa_memory_exception_data memory_exception_data;
if (!p)
return; /* Presumably process exited. */
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.gpu_id = dev->id;
memory_exception_data.failure.imprecise = 1;
/* Set failure reason */
if (info) {
memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
memory_exception_data.failure.NotPresent =
info->prot_valid ? 1 : 0;
memory_exception_data.failure.NoExecute =
info->prot_exec ? 1 : 0;
memory_exception_data.failure.ReadOnly =
info->prot_write ? 1 : 0;
memory_exception_data.failure.imprecise = 0;
}
mutex_lock(&p->event_mutex);
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
ev->memory_exception_data = memory_exception_data;
set_event(ev);
}
mutex_unlock(&p->event_mutex);
kfd_unref_process(p);
}
void kfd_signal_reset_event(struct kfd_dev *dev)
{
struct kfd_hsa_hw_exception_data hw_exception_data;
struct kfd_process *p;
struct kfd_event *ev;
unsigned int temp;
uint32_t id, idx;
/* Whole gpu reset caused by GPU hang and memory is lost */
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
hw_exception_data.gpu_id = dev->id;
hw_exception_data.memory_lost = 1;
idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->event_mutex);
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
ev->hw_exception_data = hw_exception_data;
set_event(ev);
}
mutex_unlock(&p->event_mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
...@@ -66,6 +66,7 @@ struct kfd_event { ...@@ -66,6 +66,7 @@ struct kfd_event {
/* type specific data */ /* type specific data */
union { union {
struct kfd_hsa_memory_exception_data memory_exception_data; struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
}; };
}; };
......
...@@ -26,7 +26,9 @@ ...@@ -26,7 +26,9 @@
static bool event_interrupt_isr_v9(struct kfd_dev *dev, static bool event_interrupt_isr_v9(struct kfd_dev *dev,
const uint32_t *ih_ring_entry) const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
bool *patched_flag)
{ {
uint16_t source_id, client_id, pasid, vmid; uint16_t source_id, client_id, pasid, vmid;
const uint32_t *data = ih_ring_entry; const uint32_t *data = ih_ring_entry;
...@@ -57,7 +59,9 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, ...@@ -57,7 +59,9 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
return source_id == SOC15_INTSRC_CP_END_OF_PIPE || return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
source_id == SOC15_INTSRC_SDMA_TRAP || source_id == SOC15_INTSRC_SDMA_TRAP ||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
source_id == SOC15_INTSRC_CP_BAD_OPCODE; source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
client_id == SOC15_IH_CLIENTID_VMC ||
client_id == SOC15_IH_CLIENTID_UTCL2;
} }
static void event_interrupt_wq_v9(struct kfd_dev *dev, static void event_interrupt_wq_v9(struct kfd_dev *dev,
...@@ -82,7 +86,19 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, ...@@ -82,7 +86,19 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
kfd_signal_hw_exception_event(pasid); kfd_signal_hw_exception_event(pasid);
else if (client_id == SOC15_IH_CLIENTID_VMC || else if (client_id == SOC15_IH_CLIENTID_VMC ||
client_id == SOC15_IH_CLIENTID_UTCL2) { client_id == SOC15_IH_CLIENTID_UTCL2) {
/* TODO */ struct kfd_vm_fault_info info = {0};
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
info.vmid = vmid;
info.mc_id = client_id;
info.page_addr = ih_ring_entry[4] |
(uint64_t)(ih_ring_entry[5] & 0xf) << 32;
info.prot_valid = ring_id & 0x08;
info.prot_read = ring_id & 0x10;
info.prot_write = ring_id & 0x20;
kfd_process_vm_fault(dev->dqm, pasid);
kfd_signal_vm_fault_event(dev, pasid, &info);
} }
} }
......
...@@ -151,13 +151,15 @@ static void interrupt_wq(struct work_struct *work) ...@@ -151,13 +151,15 @@ static void interrupt_wq(struct work_struct *work)
ih_ring_entry); ih_ring_entry);
} }
bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) bool interrupt_is_wanted(struct kfd_dev *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre, bool *flag)
{ {
/* integer and bitwise OR so there is no boolean short-circuiting */ /* integer and bitwise OR so there is no boolean short-circuiting */
unsigned int wanted = 0; unsigned int wanted = 0;
wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
ih_ring_entry); ih_ring_entry, patched_ihre, flag);
return wanted != 0; return wanted != 0;
} }
...@@ -190,7 +190,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, ...@@ -190,7 +190,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
{ {
struct kfd_dev *dev; struct kfd_dev *dev;
dev_warn(kfd_device, dev_warn_ratelimited(kfd_device,
"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
PCI_BUS_NUM(pdev->devfn), PCI_BUS_NUM(pdev->devfn),
PCI_SLOT(pdev->devfn), PCI_SLOT(pdev->devfn),
......
...@@ -59,7 +59,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, ...@@ -59,7 +59,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
switch (type) { switch (type) {
case KFD_QUEUE_TYPE_DIQ: case KFD_QUEUE_TYPE_DIQ:
case KFD_QUEUE_TYPE_HIQ: case KFD_QUEUE_TYPE_HIQ:
kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm, kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm,
KFD_MQD_TYPE_HIQ); KFD_MQD_TYPE_HIQ);
break; break;
default: default:
...@@ -67,7 +67,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, ...@@ -67,7 +67,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
return false; return false;
} }
if (!kq->mqd) if (!kq->mqd_mgr)
return false; return false;
prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off); prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off);
...@@ -123,6 +123,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, ...@@ -123,6 +123,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr; prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
prop.eop_ring_buffer_address = kq->eop_gpu_addr; prop.eop_ring_buffer_address = kq->eop_gpu_addr;
prop.eop_ring_buffer_size = PAGE_SIZE; prop.eop_ring_buffer_size = PAGE_SIZE;
prop.cu_mask = NULL;
if (init_queue(&kq->queue, &prop) != 0) if (init_queue(&kq->queue, &prop) != 0)
goto err_init_queue; goto err_init_queue;
...@@ -130,7 +131,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, ...@@ -130,7 +131,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->queue->device = dev; kq->queue->device = dev;
kq->queue->process = kfd_get_process(current); kq->queue->process = kfd_get_process(current);
retval = kq->mqd->init_mqd(kq->mqd, &kq->queue->mqd, retval = kq->mqd_mgr->init_mqd(kq->mqd_mgr, &kq->queue->mqd,
&kq->queue->mqd_mem_obj, &kq->queue->mqd_mem_obj,
&kq->queue->gart_mqd_addr, &kq->queue->gart_mqd_addr,
&kq->queue->properties); &kq->queue->properties);
...@@ -142,9 +143,9 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, ...@@ -142,9 +143,9 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
pr_debug("Assigning hiq to hqd\n"); pr_debug("Assigning hiq to hqd\n");
kq->queue->pipe = KFD_CIK_HIQ_PIPE; kq->queue->pipe = KFD_CIK_HIQ_PIPE;
kq->queue->queue = KFD_CIK_HIQ_QUEUE; kq->queue->queue = KFD_CIK_HIQ_QUEUE;
kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd,
kq->queue->queue, &kq->queue->properties, kq->queue->pipe, kq->queue->queue,
NULL); &kq->queue->properties, NULL);
} else { } else {
/* allocate fence for DIQ */ /* allocate fence for DIQ */
...@@ -182,7 +183,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, ...@@ -182,7 +183,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
static void uninitialize(struct kernel_queue *kq) static void uninitialize(struct kernel_queue *kq)
{ {
if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
kq->mqd->destroy_mqd(kq->mqd, kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
kq->queue->mqd, kq->queue->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
KFD_UNMAP_LATENCY_MS, KFD_UNMAP_LATENCY_MS,
...@@ -191,7 +192,8 @@ static void uninitialize(struct kernel_queue *kq) ...@@ -191,7 +192,8 @@ static void uninitialize(struct kernel_queue *kq)
else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ) else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj); kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
kq->mqd->uninit_mqd(kq->mqd, kq->queue->mqd, kq->queue->mqd_mem_obj); kq->mqd_mgr->uninit_mqd(kq->mqd_mgr, kq->queue->mqd,
kq->queue->mqd_mem_obj);
kfd_gtt_sa_free(kq->dev, kq->rptr_mem); kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
kfd_gtt_sa_free(kq->dev, kq->wptr_mem); kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
......
...@@ -70,7 +70,7 @@ struct kernel_queue { ...@@ -70,7 +70,7 @@ struct kernel_queue {
/* data */ /* data */
struct kfd_dev *dev; struct kfd_dev *dev;
struct mqd_manager *mqd; struct mqd_manager *mqd_mgr;
struct queue *queue; struct queue *queue;
uint64_t pending_wptr64; uint64_t pending_wptr64;
uint32_t pending_wptr; uint32_t pending_wptr;
......
...@@ -47,6 +47,8 @@ static const struct kgd2kfd_calls kgd2kfd = { ...@@ -47,6 +47,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
.resume_mm = kgd2kfd_resume_mm, .resume_mm = kgd2kfd_resume_mm,
.schedule_evict_and_restore_process = .schedule_evict_and_restore_process =
kgd2kfd_schedule_evict_and_restore_process, kgd2kfd_schedule_evict_and_restore_process,
.pre_reset = kgd2kfd_pre_reset,
.post_reset = kgd2kfd_post_reset,
}; };
int sched_policy = KFD_SCHED_POLICY_HWS; int sched_policy = KFD_SCHED_POLICY_HWS;
...@@ -61,7 +63,7 @@ MODULE_PARM_DESC(hws_max_conc_proc, ...@@ -61,7 +63,7 @@ MODULE_PARM_DESC(hws_max_conc_proc,
int cwsr_enable = 1; int cwsr_enable = 1;
module_param(cwsr_enable, int, 0444); module_param(cwsr_enable, int, 0444);
MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = off, 1 = on (default))");
int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT; int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
module_param(max_num_of_queues_per_device, int, 0444); module_param(max_num_of_queues_per_device, int, 0444);
...@@ -83,13 +85,19 @@ module_param(ignore_crat, int, 0444); ...@@ -83,13 +85,19 @@ module_param(ignore_crat, int, 0444);
MODULE_PARM_DESC(ignore_crat, MODULE_PARM_DESC(ignore_crat,
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
int vega10_noretry; int noretry;
module_param_named(noretry, vega10_noretry, int, 0644); module_param(noretry, int, 0644);
MODULE_PARM_DESC(noretry, MODULE_PARM_DESC(noretry,
"Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); "Set sh_mem_config.retry_disable on GFXv9+ dGPUs (0 = retry enabled (default), 1 = retry disabled)");
int halt_if_hws_hang;
module_param(halt_if_hws_hang, int, 0644);
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
static int amdkfd_init_completed; static int amdkfd_init_completed;
int kgd2kfd_init(unsigned int interface_version, int kgd2kfd_init(unsigned int interface_version,
const struct kgd2kfd_calls **g2f) const struct kgd2kfd_calls **g2f)
{ {
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
* *
*/ */
#include "kfd_priv.h" #include "kfd_mqd_manager.h"
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
struct kfd_dev *dev) struct kfd_dev *dev)
...@@ -48,3 +48,42 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, ...@@ -48,3 +48,42 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
return NULL; return NULL;
} }
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask)
{
struct kfd_cu_info cu_info;
uint32_t cu_per_sh[4] = {0};
int i, se, cu = 0;
mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info);
if (cu_mask_count > cu_info.cu_active_number)
cu_mask_count = cu_info.cu_active_number;
for (se = 0; se < cu_info.num_shader_engines; se++)
for (i = 0; i < 4; i++)
cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]);
/* Symmetrically map cu_mask to all SEs:
* cu_mask[0] bit0 -> se_mask[0] bit0;
* cu_mask[0] bit1 -> se_mask[1] bit0;
* ... (if # SE is 4)
* cu_mask[0] bit4 -> se_mask[0] bit1;
* ...
*/
se = 0;
for (i = 0; i < cu_mask_count; i++) {
if (cu_mask[i / 32] & (1 << (i % 32)))
se_mask[se] |= 1 << cu;
do {
se++;
if (se == cu_info.num_shader_engines) {
se = 0;
cu++;
}
} while (cu >= cu_per_sh[se] && cu < 32);
}
}
...@@ -93,4 +93,8 @@ struct mqd_manager { ...@@ -93,4 +93,8 @@ struct mqd_manager {
struct kfd_dev *dev; struct kfd_dev *dev;
}; };
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);
#endif /* KFD_MQD_MANAGER_H_ */ #endif /* KFD_MQD_MANAGER_H_ */
...@@ -41,6 +41,31 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) ...@@ -41,6 +41,31 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
return (struct cik_sdma_rlc_registers *)mqd; return (struct cik_sdma_rlc_registers *)mqd;
} }
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
struct cik_mqd *m;
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
if (q->cu_mask_count == 0)
return;
mqd_symmetrically_map_cu_mask(mm,
q->cu_mask, q->cu_mask_count, se_mask);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
m->compute_static_thread_mgmt_se1 = se_mask[1];
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
pr_debug("Update cu mask to %#x %#x %#x %#x\n",
m->compute_static_thread_mgmt_se0,
m->compute_static_thread_mgmt_se1,
m->compute_static_thread_mgmt_se2,
m->compute_static_thread_mgmt_se3);
}
static int init_mqd(struct mqd_manager *mm, void **mqd, static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q) struct queue_properties *q)
...@@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, ...@@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
if (q->format == KFD_QUEUE_FORMAT_AQL) if (q->format == KFD_QUEUE_FORMAT_AQL)
m->cp_hqd_pq_control |= NO_UPDATE_RPTR; m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
update_cu_mask(mm, mqd, q);
q->is_active = (q->queue_size > 0 && q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 && q->queue_address != 0 &&
q->queue_percent > 0 && q->queue_percent > 0 &&
...@@ -408,7 +435,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, ...@@ -408,7 +435,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL; return NULL;
mqd = kzalloc(sizeof(*mqd), GFP_NOIO); mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
if (!mqd) if (!mqd)
return NULL; return NULL;
......
...@@ -41,6 +41,31 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) ...@@ -41,6 +41,31 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct v9_sdma_mqd *)mqd; return (struct v9_sdma_mqd *)mqd;
} }
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
struct v9_mqd *m;
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
if (q->cu_mask_count == 0)
return;
mqd_symmetrically_map_cu_mask(mm,
q->cu_mask, q->cu_mask_count, se_mask);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
m->compute_static_thread_mgmt_se1 = se_mask[1];
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
pr_debug("update cu mask to %#x %#x %#x %#x\n",
m->compute_static_thread_mgmt_se0,
m->compute_static_thread_mgmt_se1,
m->compute_static_thread_mgmt_se2,
m->compute_static_thread_mgmt_se3);
}
static int init_mqd(struct mqd_manager *mm, void **mqd, static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q) struct queue_properties *q)
...@@ -55,7 +80,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, ...@@ -55,7 +80,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
* instead of sub-allocation function. * instead of sub-allocation function.
*/ */
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!*mqd_mem_obj) if (!*mqd_mem_obj)
return -ENOMEM; return -ENOMEM;
retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd, retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
...@@ -198,6 +223,8 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, ...@@ -198,6 +223,8 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
m->cp_hqd_ctx_save_control = 0; m->cp_hqd_ctx_save_control = 0;
update_cu_mask(mm, mqd, q);
q->is_active = (q->queue_size > 0 && q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 && q->queue_address != 0 &&
q->queue_percent > 0 && q->queue_percent > 0 &&
...@@ -393,7 +420,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, ...@@ -393,7 +420,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL; return NULL;
mqd = kzalloc(sizeof(*mqd), GFP_NOIO); mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
if (!mqd) if (!mqd)
return NULL; return NULL;
......
...@@ -43,6 +43,31 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) ...@@ -43,6 +43,31 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct vi_sdma_mqd *)mqd; return (struct vi_sdma_mqd *)mqd;
} }
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
struct vi_mqd *m;
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
if (q->cu_mask_count == 0)
return;
mqd_symmetrically_map_cu_mask(mm,
q->cu_mask, q->cu_mask_count, se_mask);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
m->compute_static_thread_mgmt_se1 = se_mask[1];
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
pr_debug("Update cu mask to %#x %#x %#x %#x\n",
m->compute_static_thread_mgmt_se0,
m->compute_static_thread_mgmt_se1,
m->compute_static_thread_mgmt_se2,
m->compute_static_thread_mgmt_se3);
}
static int init_mqd(struct mqd_manager *mm, void **mqd, static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q) struct queue_properties *q)
...@@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, ...@@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT; mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
update_cu_mask(mm, mqd, q);
q->is_active = (q->queue_size > 0 && q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 && q->queue_address != 0 &&
q->queue_percent > 0 && q->queue_percent > 0 &&
...@@ -394,7 +421,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, ...@@ -394,7 +421,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL; return NULL;
mqd = kzalloc(sizeof(*mqd), GFP_NOIO); mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
if (!mqd) if (!mqd)
return NULL; return NULL;
......
...@@ -418,4 +418,30 @@ int pm_debugfs_runlist(struct seq_file *m, void *data) ...@@ -418,4 +418,30 @@ int pm_debugfs_runlist(struct seq_file *m, void *data)
return 0; return 0;
} }
int pm_debugfs_hang_hws(struct packet_manager *pm)
{
uint32_t *buffer, size;
int r = 0;
size = pm->pmf->query_status_size;
mutex_lock(&pm->lock);
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
pr_err("Failed to allocate buffer on kernel queue\n");
r = -ENOMEM;
goto out;
}
memset(buffer, 0x55, size);
pm->priv_queue->ops.submit_packet(pm->priv_queue);
pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
buffer[0], buffer[1], buffer[2], buffer[3],
buffer[4], buffer[5], buffer[6]);
out:
mutex_unlock(&pm->lock);
return r;
}
#endif #endif
...@@ -73,7 +73,7 @@ ...@@ -73,7 +73,7 @@
/* /*
* When working with cp scheduler we should assign the HIQ manually or via * When working with cp scheduler we should assign the HIQ manually or via
* the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
* definitions for Kaveri. In Kaveri only the first ME queues participates * definitions for Kaveri. In Kaveri only the first ME queues participates
* in the cp scheduling taking that in mind we set the HIQ slot in the * in the cp scheduling taking that in mind we set the HIQ slot in the
* second ME. * second ME.
...@@ -142,7 +142,12 @@ extern int ignore_crat; ...@@ -142,7 +142,12 @@ extern int ignore_crat;
/* /*
* Set sh_mem_config.retry_disable on Vega10 * Set sh_mem_config.retry_disable on Vega10
*/ */
extern int vega10_noretry; extern int noretry;
/*
* Halt if HWS hang is detected
*/
extern int halt_if_hws_hang;
/** /**
* enum kfd_sched_policy * enum kfd_sched_policy
...@@ -180,9 +185,10 @@ enum cache_policy { ...@@ -180,9 +185,10 @@ enum cache_policy {
struct kfd_event_interrupt_class { struct kfd_event_interrupt_class {
bool (*interrupt_isr)(struct kfd_dev *dev, bool (*interrupt_isr)(struct kfd_dev *dev,
const uint32_t *ih_ring_entry); const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
bool *patched_flag);
void (*interrupt_wq)(struct kfd_dev *dev, void (*interrupt_wq)(struct kfd_dev *dev,
const uint32_t *ih_ring_entry); const uint32_t *ih_ring_entry);
}; };
struct kfd_device_info { struct kfd_device_info {
...@@ -197,6 +203,7 @@ struct kfd_device_info { ...@@ -197,6 +203,7 @@ struct kfd_device_info {
bool supports_cwsr; bool supports_cwsr;
bool needs_iommu_device; bool needs_iommu_device;
bool needs_pci_atomics; bool needs_pci_atomics;
unsigned int num_sdma_engines;
}; };
struct kfd_mem_obj { struct kfd_mem_obj {
...@@ -415,6 +422,9 @@ struct queue_properties { ...@@ -415,6 +422,9 @@ struct queue_properties {
uint32_t ctl_stack_size; uint32_t ctl_stack_size;
uint64_t tba_addr; uint64_t tba_addr;
uint64_t tma_addr; uint64_t tma_addr;
/* Relevant for CU */
uint32_t cu_mask_count; /* Must be a multiple of 32 */
uint32_t *cu_mask;
}; };
/** /**
...@@ -806,12 +816,18 @@ int kfd_interrupt_init(struct kfd_dev *dev); ...@@ -806,12 +816,18 @@ int kfd_interrupt_init(struct kfd_dev *dev);
void kfd_interrupt_exit(struct kfd_dev *dev); void kfd_interrupt_exit(struct kfd_dev *dev);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry); bool interrupt_is_wanted(struct kfd_dev *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre, bool *flag);
/* Power Management */ /* Power Management */
void kgd2kfd_suspend(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd);
int kgd2kfd_resume(struct kfd_dev *kfd); int kgd2kfd_resume(struct kfd_dev *kfd);
/* GPU reset */
int kgd2kfd_pre_reset(struct kfd_dev *kfd);
int kgd2kfd_post_reset(struct kfd_dev *kfd);
/* amdkfd Apertures */ /* amdkfd Apertures */
int kfd_init_apertures(struct kfd_process *process); int kfd_init_apertures(struct kfd_process *process);
...@@ -838,6 +854,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm); ...@@ -838,6 +854,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
enum kfd_queue_type type); enum kfd_queue_type type);
void kernel_queue_uninit(struct kernel_queue *kq); void kernel_queue_uninit(struct kernel_queue *kq);
int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid);
/* Process Queue Manager */ /* Process Queue Manager */
struct process_queue_node { struct process_queue_node {
...@@ -858,6 +875,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, ...@@ -858,6 +875,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p); struct queue_properties *p);
int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
unsigned int qid); unsigned int qid);
...@@ -964,10 +983,17 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ...@@ -964,10 +983,17 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint64_t *event_page_offset, uint32_t *event_slot_index); uint64_t *event_page_offset, uint32_t *event_slot_index);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
struct kfd_vm_fault_info *info);
void kfd_signal_reset_event(struct kfd_dev *dev);
void kfd_flush_tlb(struct kfd_process_device *pdd); void kfd_flush_tlb(struct kfd_process_device *pdd);
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
bool kfd_is_locked(void);
/* Debugfs */ /* Debugfs */
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
...@@ -980,6 +1006,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data); ...@@ -980,6 +1006,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data);
int kfd_debugfs_rls_by_device(struct seq_file *m, void *data); int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
int pm_debugfs_runlist(struct seq_file *m, void *data); int pm_debugfs_runlist(struct seq_file *m, void *data);
int kfd_debugfs_hang_hws(struct kfd_dev *dev);
int pm_debugfs_hang_hws(struct packet_manager *pm);
int dqm_debugfs_execute_queues(struct device_queue_manager *dqm);
#else #else
static inline void kfd_debugfs_init(void) {} static inline void kfd_debugfs_init(void) {}
......
...@@ -244,6 +244,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread) ...@@ -244,6 +244,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
process = find_process(thread); process = find_process(thread);
if (!process)
return ERR_PTR(-EINVAL);
return process; return process;
} }
......
...@@ -186,8 +186,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, ...@@ -186,8 +186,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
switch (type) { switch (type) {
case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA:
if (dev->dqm->queue_count >= if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) {
CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) {
pr_err("Over-subscription is not allowed for SDMA.\n"); pr_err("Over-subscription is not allowed for SDMA.\n");
retval = -EPERM; retval = -EPERM;
goto err_create_queue; goto err_create_queue;
...@@ -209,7 +208,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, ...@@ -209,7 +208,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
(dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
retval = -EPERM; retval = -EPERM;
goto err_create_queue; goto err_create_queue;
} }
...@@ -326,6 +325,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) ...@@ -326,6 +325,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (retval != -ETIME) if (retval != -ETIME)
goto err_destroy_queue; goto err_destroy_queue;
} }
kfree(pqn->q->properties.cu_mask);
pqn->q->properties.cu_mask = NULL;
uninit_queue(pqn->q); uninit_queue(pqn->q);
} }
...@@ -366,6 +367,34 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, ...@@ -366,6 +367,34 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
return 0; return 0;
} }
int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p)
{
int retval;
struct process_queue_node *pqn;
pqn = get_queue_by_qid(pqm, qid);
if (!pqn) {
pr_debug("No queue %d exists for update operation\n", qid);
return -EFAULT;
}
/* Free the old CU mask memory if it is already allocated, then
* allocate memory for the new CU mask.
*/
kfree(pqn->q->properties.cu_mask);
pqn->q->properties.cu_mask_count = p->cu_mask_count;
pqn->q->properties.cu_mask = p->cu_mask;
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
pqn->q);
if (retval != 0)
return retval;
return 0;
}
struct kernel_queue *pqm_get_kernel_queue( struct kernel_queue *pqm_get_kernel_queue(
struct process_queue_manager *pqm, struct process_queue_manager *pqm,
unsigned int qid) unsigned int qid)
...@@ -387,7 +416,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) ...@@ -387,7 +416,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
struct process_queue_node *pqn; struct process_queue_node *pqn;
struct queue *q; struct queue *q;
enum KFD_MQD_TYPE mqd_type; enum KFD_MQD_TYPE mqd_type;
struct mqd_manager *mqd_manager; struct mqd_manager *mqd_mgr;
int r = 0; int r = 0;
list_for_each_entry(pqn, &pqm->queues, process_queue_list) { list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
...@@ -410,11 +439,11 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) ...@@ -410,11 +439,11 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
q->properties.type, q->device->id); q->properties.type, q->device->id);
continue; continue;
} }
mqd_manager = q->device->dqm->ops.get_mqd_manager( mqd_mgr = q->device->dqm->ops.get_mqd_manager(
q->device->dqm, mqd_type); q->device->dqm, mqd_type);
} else if (pqn->kq) { } else if (pqn->kq) {
q = pqn->kq->queue; q = pqn->kq->queue;
mqd_manager = pqn->kq->mqd; mqd_mgr = pqn->kq->mqd_mgr;
switch (q->properties.type) { switch (q->properties.type) {
case KFD_QUEUE_TYPE_DIQ: case KFD_QUEUE_TYPE_DIQ:
seq_printf(m, " DIQ on device %x\n", seq_printf(m, " DIQ on device %x\n",
...@@ -434,7 +463,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) ...@@ -434,7 +463,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
continue; continue;
} }
r = mqd_manager->debugfs_show_mqd(m, q->mqd); r = mqd_mgr->debugfs_show_mqd(m, q->mqd);
if (r != 0) if (r != 0)
break; break;
} }
......
...@@ -47,6 +47,17 @@ enum kfd_preempt_type { ...@@ -47,6 +47,17 @@ enum kfd_preempt_type {
KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
}; };
struct kfd_vm_fault_info {
uint64_t page_addr;
uint32_t vmid;
uint32_t mc_id;
uint32_t status;
bool prot_valid;
bool prot_read;
bool prot_write;
bool prot_exec;
};
struct kfd_cu_info { struct kfd_cu_info {
uint32_t num_shader_engines; uint32_t num_shader_engines;
uint32_t num_shader_arrays_per_engine; uint32_t num_shader_arrays_per_engine;
...@@ -259,6 +270,21 @@ struct tile_config { ...@@ -259,6 +270,21 @@ struct tile_config {
* IB to the corresponding ring (ring type). The IB is executed with the * IB to the corresponding ring (ring type). The IB is executed with the
* specified VMID in a user mode context. * specified VMID in a user mode context.
* *
* @get_vm_fault_info: Return information about a recent VM fault on
* GFXv7 and v8. If multiple VM faults occurred since the last call of
* this function, it will return information about the first of those
* faults. On GFXv9 VM fault information is fully contained in the IH
* packet and this function is not needed.
*
* @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the
* IH ring entry. This function allows the KFD ISR to get the VMID
* from the fault status register as early as possible.
*
* @gpu_recover: let kgd reset gpu after kfd detect CPC hang
*
* @set_compute_idle: Indicates that compute is idle on a device. This
* can be used to change power profiles depending on compute activity.
*
* This structure contains function pointers to services that the kgd driver * This structure contains function pointers to services that the kgd driver
* provides to amdkfd driver. * provides to amdkfd driver.
* *
...@@ -374,6 +400,14 @@ struct kfd2kgd_calls { ...@@ -374,6 +400,14 @@ struct kfd2kgd_calls {
int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine, int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr, uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len); uint32_t *ib_cmd, uint32_t ib_len);
int (*get_vm_fault_info)(struct kgd_dev *kgd,
struct kfd_vm_fault_info *info);
uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
void (*gpu_recover)(struct kgd_dev *kgd);
void (*set_compute_idle)(struct kgd_dev *kgd, bool idle);
}; };
/** /**
...@@ -399,6 +433,10 @@ struct kfd2kgd_calls { ...@@ -399,6 +433,10 @@ struct kfd2kgd_calls {
* @schedule_evict_and_restore_process: Schedules work queue that will prepare * @schedule_evict_and_restore_process: Schedules work queue that will prepare
* for safe eviction of KFD BOs that belong to the specified process. * for safe eviction of KFD BOs that belong to the specified process.
* *
* @pre_reset: Notifies amdkfd that amdgpu about to reset the gpu
*
* @post_reset: Notify amdkfd that amgpu successfully reseted the gpu
*
* This structure contains function callback pointers so the kgd driver * This structure contains function callback pointers so the kgd driver
* will notify to the amdkfd about certain status changes. * will notify to the amdkfd about certain status changes.
* *
...@@ -417,6 +455,8 @@ struct kgd2kfd_calls { ...@@ -417,6 +455,8 @@ struct kgd2kfd_calls {
int (*resume_mm)(struct mm_struct *mm); int (*resume_mm)(struct mm_struct *mm);
int (*schedule_evict_and_restore_process)(struct mm_struct *mm, int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
struct dma_fence *fence); struct dma_fence *fence);
int (*pre_reset)(struct kfd_dev *kfd);
int (*post_reset)(struct kfd_dev *kfd);
}; };
int kgd2kfd_init(unsigned interface_version, int kgd2kfd_init(unsigned interface_version,
......
...@@ -76,6 +76,12 @@ struct kfd_ioctl_update_queue_args { ...@@ -76,6 +76,12 @@ struct kfd_ioctl_update_queue_args {
__u32 queue_priority; /* to KFD */ __u32 queue_priority; /* to KFD */
}; };
struct kfd_ioctl_set_cu_mask_args {
__u32 queue_id; /* to KFD */
__u32 num_cu_mask; /* to KFD */
__u64 cu_mask_ptr; /* to KFD */
};
/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
#define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_COHERENT 0
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
...@@ -189,6 +195,15 @@ struct kfd_ioctl_dbg_wave_control_args { ...@@ -189,6 +195,15 @@ struct kfd_ioctl_dbg_wave_control_args {
#define KFD_SIGNAL_EVENT_LIMIT 4096 #define KFD_SIGNAL_EVENT_LIMIT 4096
/* For kfd_event_data.hw_exception_data.reset_type. */
#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0
#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1
/* For kfd_event_data.hw_exception_data.reset_cause. */
#define KFD_HW_EXCEPTION_GPU_HANG 0
#define KFD_HW_EXCEPTION_ECC 1
struct kfd_ioctl_create_event_args { struct kfd_ioctl_create_event_args {
__u64 event_page_offset; /* from KFD */ __u64 event_page_offset; /* from KFD */
__u32 event_trigger_data; /* from KFD - signal events only */ __u32 event_trigger_data; /* from KFD - signal events only */
...@@ -219,7 +234,7 @@ struct kfd_memory_exception_failure { ...@@ -219,7 +234,7 @@ struct kfd_memory_exception_failure {
__u32 NotPresent; /* Page not present or supervisor privilege */ __u32 NotPresent; /* Page not present or supervisor privilege */
__u32 ReadOnly; /* Write access to a read-only page */ __u32 ReadOnly; /* Write access to a read-only page */
__u32 NoExecute; /* Execute access to a page marked NX */ __u32 NoExecute; /* Execute access to a page marked NX */
__u32 pad; __u32 imprecise; /* Can't determine the exact fault address */
}; };
/* memory exception data*/ /* memory exception data*/
...@@ -230,10 +245,19 @@ struct kfd_hsa_memory_exception_data { ...@@ -230,10 +245,19 @@ struct kfd_hsa_memory_exception_data {
__u32 pad; __u32 pad;
}; };
/* Event data*/ /* hw exception data */
struct kfd_hsa_hw_exception_data {
uint32_t reset_type;
uint32_t reset_cause;
uint32_t memory_lost;
uint32_t gpu_id;
};
/* Event data */
struct kfd_event_data { struct kfd_event_data {
union { union {
struct kfd_hsa_memory_exception_data memory_exception_data; struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
}; /* From KFD */ }; /* From KFD */
__u64 kfd_event_data_ext; /* pointer to an extension structure __u64 kfd_event_data_ext; /* pointer to an extension structure
for future exception types */ for future exception types */
...@@ -448,7 +472,10 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { ...@@ -448,7 +472,10 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \ #define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \
AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args) AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
#define AMDKFD_IOC_SET_CU_MASK \
AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args)
#define AMDKFD_COMMAND_START 0x01 #define AMDKFD_COMMAND_START 0x01
#define AMDKFD_COMMAND_END 0x1A #define AMDKFD_COMMAND_END 0x1B
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment