Commit 80c74918 authored by Asad Kamal's avatar Asad Kamal Committed by Alex Deucher

drm/amdkfd: Replace pr_err with dev_err

Replace pr_err with dev_err to show the bus-id of
failing device with kfd queue errors
Signed-off-by: default avatarAsad Kamal <asad.kamal@amd.com>
Reviewed-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent ee78ef04
...@@ -232,7 +232,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, ...@@ -232,7 +232,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_type = convert_to_mes_queue_type(q->properties.type); queue_type = convert_to_mes_queue_type(q->properties.type);
if (queue_type < 0) { if (queue_type < 0) {
pr_err("Queue type not supported with MES, queue:%d\n", dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n",
q->properties.type); q->properties.type);
return -EINVAL; return -EINVAL;
} }
...@@ -244,9 +244,9 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, ...@@ -244,9 +244,9 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
amdgpu_mes_unlock(&adev->mes); amdgpu_mes_unlock(&adev->mes);
if (r) { if (r) {
pr_err("failed to add hardware queue to MES, doorbell=0x%x\n", dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n",
q->properties.doorbell_off); q->properties.doorbell_off);
pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
kfd_hws_hang(dqm); kfd_hws_hang(dqm);
} }
...@@ -272,9 +272,9 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, ...@@ -272,9 +272,9 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
amdgpu_mes_unlock(&adev->mes); amdgpu_mes_unlock(&adev->mes);
if (r) { if (r) {
pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n", dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n",
q->properties.doorbell_off); q->properties.doorbell_off);
pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
kfd_hws_hang(dqm); kfd_hws_hang(dqm);
} }
...@@ -284,6 +284,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, ...@@ -284,6 +284,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
static int remove_all_queues_mes(struct device_queue_manager *dqm) static int remove_all_queues_mes(struct device_queue_manager *dqm)
{ {
struct device_process_node *cur; struct device_process_node *cur;
struct device *dev = dqm->dev->adev->dev;
struct qcm_process_device *qpd; struct qcm_process_device *qpd;
struct queue *q; struct queue *q;
int retval = 0; int retval = 0;
...@@ -294,7 +295,7 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm) ...@@ -294,7 +295,7 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm)
if (q->properties.is_active) { if (q->properties.is_active) {
retval = remove_queue_mes(dqm, q, qpd); retval = remove_queue_mes(dqm, q, qpd);
if (retval) { if (retval) {
pr_err("%s: Failed to remove queue %d for dev %d", dev_err(dev, "%s: Failed to remove queue %d for dev %d",
__func__, __func__,
q->properties.queue_id, q->properties.queue_id,
dqm->dev->id); dqm->dev->id);
...@@ -443,6 +444,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, ...@@ -443,6 +444,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd, struct qcm_process_device *qpd,
struct queue *q) struct queue *q)
{ {
struct device *dev = dqm->dev->adev->dev;
int allocated_vmid = -1, i; int allocated_vmid = -1, i;
for (i = dqm->dev->vm_info.first_vmid_kfd; for (i = dqm->dev->vm_info.first_vmid_kfd;
...@@ -454,7 +456,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, ...@@ -454,7 +456,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
} }
if (allocated_vmid < 0) { if (allocated_vmid < 0) {
pr_err("no more vmid to allocate\n"); dev_err(dev, "no more vmid to allocate\n");
return -ENOSPC; return -ENOSPC;
} }
...@@ -510,10 +512,12 @@ static void deallocate_vmid(struct device_queue_manager *dqm, ...@@ -510,10 +512,12 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd, struct qcm_process_device *qpd,
struct queue *q) struct queue *q)
{ {
struct device *dev = dqm->dev->adev->dev;
/* On GFX v7, CP doesn't flush TC at dequeue */ /* On GFX v7, CP doesn't flush TC at dequeue */
if (q->device->adev->asic_type == CHIP_HAWAII) if (q->device->adev->asic_type == CHIP_HAWAII)
if (flush_texture_cache_nocpsch(q->device, qpd)) if (flush_texture_cache_nocpsch(q->device, qpd))
pr_err("Failed to flush TC\n"); dev_err(dev, "Failed to flush TC\n");
kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
...@@ -708,7 +712,7 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process ...@@ -708,7 +712,7 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
pr_debug("Killing all process wavefronts\n"); pr_debug("Killing all process wavefronts\n");
if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
pr_err("no vmid pasid mapping supported \n"); dev_err(dev->adev->dev, "no vmid pasid mapping supported\n");
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
...@@ -729,7 +733,7 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process ...@@ -729,7 +733,7 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
} }
if (vmid > last_vmid_to_scan) { if (vmid > last_vmid_to_scan) {
pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); dev_err(dev->adev->dev, "Didn't find vmid for pasid 0x%x\n", p->pasid);
return -EFAULT; return -EFAULT;
} }
...@@ -821,6 +825,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, ...@@ -821,6 +825,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
{ {
int retval; int retval;
uint64_t sdma_val = 0; uint64_t sdma_val = 0;
struct device *dev = dqm->dev->adev->dev;
struct kfd_process_device *pdd = qpd_to_pdd(qpd); struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct mqd_manager *mqd_mgr = struct mqd_manager *mqd_mgr =
dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
...@@ -831,7 +836,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, ...@@ -831,7 +836,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
&sdma_val); &sdma_val);
if (retval) if (retval)
pr_err("Failed to read SDMA queue counter for queue: %d\n", dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n",
q->properties.queue_id); q->properties.queue_id);
} }
...@@ -850,6 +855,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, ...@@ -850,6 +855,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
struct mqd_update_info *minfo) struct mqd_update_info *minfo)
{ {
int retval = 0; int retval = 0;
struct device *dev = dqm->dev->adev->dev;
struct mqd_manager *mqd_mgr; struct mqd_manager *mqd_mgr;
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
bool prev_active = false; bool prev_active = false;
...@@ -875,7 +881,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, ...@@ -875,7 +881,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
retval = remove_queue_mes(dqm, q, &pdd->qpd); retval = remove_queue_mes(dqm, q, &pdd->qpd);
if (retval) { if (retval) {
pr_err("unmap queue failed\n"); dev_err(dev, "unmap queue failed\n");
goto out_unlock; goto out_unlock;
} }
} else if (prev_active && } else if (prev_active &&
...@@ -894,7 +900,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, ...@@ -894,7 +900,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
if (retval) { if (retval) {
pr_err("destroy mqd failed\n"); dev_err(dev, "destroy mqd failed\n");
goto out_unlock; goto out_unlock;
} }
} }
...@@ -1088,6 +1094,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1088,6 +1094,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{ {
struct queue *q; struct queue *q;
struct device *dev = dqm->dev->adev->dev;
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
int retval = 0; int retval = 0;
...@@ -1121,7 +1128,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1121,7 +1128,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
if (dqm->dev->kfd->shared_resources.enable_mes) { if (dqm->dev->kfd->shared_resources.enable_mes) {
retval = remove_queue_mes(dqm, q, qpd); retval = remove_queue_mes(dqm, q, qpd);
if (retval) { if (retval) {
pr_err("Failed to evict queue %d\n", dev_err(dev, "Failed to evict queue %d\n",
q->properties.queue_id); q->properties.queue_id);
goto out; goto out;
} }
...@@ -1225,6 +1232,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1225,6 +1232,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{ {
struct queue *q; struct queue *q;
struct device *dev = dqm->dev->adev->dev;
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
uint64_t eviction_duration; uint64_t eviction_duration;
int retval = 0; int retval = 0;
...@@ -1265,7 +1273,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1265,7 +1273,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
if (dqm->dev->kfd->shared_resources.enable_mes) { if (dqm->dev->kfd->shared_resources.enable_mes) {
retval = add_queue_mes(dqm, q, qpd); retval = add_queue_mes(dqm, q, qpd);
if (retval) { if (retval) {
pr_err("Failed to restore queue %d\n", dev_err(dev, "Failed to restore queue %d\n",
q->properties.queue_id); q->properties.queue_id);
goto out; goto out;
} }
...@@ -1474,18 +1482,19 @@ static void pre_reset(struct device_queue_manager *dqm) ...@@ -1474,18 +1482,19 @@ static void pre_reset(struct device_queue_manager *dqm)
static int allocate_sdma_queue(struct device_queue_manager *dqm, static int allocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q, const uint32_t *restore_sdma_id) struct queue *q, const uint32_t *restore_sdma_id)
{ {
struct device *dev = dqm->dev->adev->dev;
int bit; int bit;
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
pr_err("No more SDMA queue to allocate\n"); dev_err(dev, "No more SDMA queue to allocate\n");
return -ENOMEM; return -ENOMEM;
} }
if (restore_sdma_id) { if (restore_sdma_id) {
/* Re-use existing sdma_id */ /* Re-use existing sdma_id */
if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) {
pr_err("SDMA queue already in use\n"); dev_err(dev, "SDMA queue already in use\n");
return -EBUSY; return -EBUSY;
} }
clear_bit(*restore_sdma_id, dqm->sdma_bitmap); clear_bit(*restore_sdma_id, dqm->sdma_bitmap);
...@@ -1504,13 +1513,13 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, ...@@ -1504,13 +1513,13 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
kfd_get_num_sdma_engines(dqm->dev); kfd_get_num_sdma_engines(dqm->dev);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
pr_err("No more XGMI SDMA queue to allocate\n"); dev_err(dev, "No more XGMI SDMA queue to allocate\n");
return -ENOMEM; return -ENOMEM;
} }
if (restore_sdma_id) { if (restore_sdma_id) {
/* Re-use existing sdma_id */ /* Re-use existing sdma_id */
if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) {
pr_err("SDMA queue already in use\n"); dev_err(dev, "SDMA queue already in use\n");
return -EBUSY; return -EBUSY;
} }
clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap);
...@@ -1562,6 +1571,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) ...@@ -1562,6 +1571,7 @@ static int set_sched_resources(struct device_queue_manager *dqm)
{ {
int i, mec; int i, mec;
struct scheduling_resources res; struct scheduling_resources res;
struct device *dev = dqm->dev->adev->dev;
res.vmid_mask = dqm->dev->compute_vmid_bitmap; res.vmid_mask = dqm->dev->compute_vmid_bitmap;
...@@ -1582,7 +1592,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) ...@@ -1582,7 +1592,7 @@ static int set_sched_resources(struct device_queue_manager *dqm)
* definition of res.queue_mask needs updating * definition of res.queue_mask needs updating
*/ */
if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
pr_err("Invalid queue enabled by amdgpu: %d\n", i); dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i);
break; break;
} }
...@@ -1625,6 +1635,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) ...@@ -1625,6 +1635,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
static int start_cpsch(struct device_queue_manager *dqm) static int start_cpsch(struct device_queue_manager *dqm)
{ {
struct device *dev = dqm->dev->adev->dev;
int retval; int retval;
retval = 0; retval = 0;
...@@ -1671,7 +1682,7 @@ static int start_cpsch(struct device_queue_manager *dqm) ...@@ -1671,7 +1682,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
retval = pm_update_grace_period(&dqm->packet_mgr, retval = pm_update_grace_period(&dqm->packet_mgr,
grace_period); grace_period);
if (retval) if (retval)
pr_err("Setting grace timeout failed\n"); dev_err(dev, "Setting grace timeout failed\n");
else if (dqm->dev->kfd2kgd->build_grace_period_packet_info) else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
/* Update dqm->wait_times maintained in software */ /* Update dqm->wait_times maintained in software */
dqm->dev->kfd2kgd->build_grace_period_packet_info( dqm->dev->kfd2kgd->build_grace_period_packet_info(
...@@ -1881,15 +1892,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -1881,15 +1892,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
return retval; return retval;
} }
int amdkfd_fence_wait_timeout(uint64_t *fence_addr, int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
uint64_t fence_value, uint64_t fence_value,
unsigned int timeout_ms) unsigned int timeout_ms)
{ {
unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
struct device *dev = dqm->dev->adev->dev;
uint64_t *fence_addr = dqm->fence_addr;
while (*fence_addr != fence_value) { while (*fence_addr != fence_value) {
if (time_after(jiffies, end_jiffies)) { if (time_after(jiffies, end_jiffies)) {
pr_err("qcm fence wait loop timeout expired\n"); dev_err(dev, "qcm fence wait loop timeout expired\n");
/* In HWS case, this is used to halt the driver thread /* In HWS case, this is used to halt the driver thread
* in order not to mess up CP states before doing * in order not to mess up CP states before doing
* scandumps for FW debugging. * scandumps for FW debugging.
...@@ -1908,6 +1921,7 @@ int amdkfd_fence_wait_timeout(uint64_t *fence_addr, ...@@ -1908,6 +1921,7 @@ int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
/* dqm->lock mutex has to be locked before calling this function */ /* dqm->lock mutex has to be locked before calling this function */
static int map_queues_cpsch(struct device_queue_manager *dqm) static int map_queues_cpsch(struct device_queue_manager *dqm)
{ {
struct device *dev = dqm->dev->adev->dev;
int retval; int retval;
if (!dqm->sched_running) if (!dqm->sched_running)
...@@ -1920,7 +1934,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) ...@@ -1920,7 +1934,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
pr_debug("%s sent runlist\n", __func__); pr_debug("%s sent runlist\n", __func__);
if (retval) { if (retval) {
pr_err("failed to execute runlist\n"); dev_err(dev, "failed to execute runlist\n");
return retval; return retval;
} }
dqm->active_runlist = true; dqm->active_runlist = true;
...@@ -1935,8 +1949,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1935,8 +1949,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
uint32_t grace_period, uint32_t grace_period,
bool reset) bool reset)
{ {
int retval = 0; struct device *dev = dqm->dev->adev->dev;
struct mqd_manager *mqd_mgr; struct mqd_manager *mqd_mgr;
int retval = 0;
if (!dqm->sched_running) if (!dqm->sched_running)
return 0; return 0;
...@@ -1959,10 +1974,10 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1959,10 +1974,10 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
KFD_FENCE_COMPLETED); KFD_FENCE_COMPLETED);
/* should be timed out */ /* should be timed out */
retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED,
queue_preemption_timeout_ms); queue_preemption_timeout_ms);
if (retval) { if (retval) {
pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
kfd_hws_hang(dqm); kfd_hws_hang(dqm);
return retval; return retval;
} }
...@@ -1977,7 +1992,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1977,7 +1992,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
*/ */
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
while (halt_if_hws_hang) while (halt_if_hws_hang)
schedule(); schedule();
return -ETIME; return -ETIME;
...@@ -1987,7 +2002,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1987,7 +2002,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
if (grace_period != USE_DEFAULT_GRACE_PERIOD) { if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
if (pm_update_grace_period(&dqm->packet_mgr, if (pm_update_grace_period(&dqm->packet_mgr,
USE_DEFAULT_GRACE_PERIOD)) USE_DEFAULT_GRACE_PERIOD))
pr_err("Failed to reset grace period\n"); dev_err(dev, "Failed to reset grace period\n");
} }
pm_release_ib(&dqm->packet_mgr); pm_release_ib(&dqm->packet_mgr);
...@@ -2061,6 +2076,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, ...@@ -2061,6 +2076,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr; struct mqd_manager *mqd_mgr;
uint64_t sdma_val = 0; uint64_t sdma_val = 0;
struct kfd_process_device *pdd = qpd_to_pdd(qpd); struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct device *dev = dqm->dev->adev->dev;
/* Get the SDMA queue stats */ /* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
...@@ -2068,7 +2084,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, ...@@ -2068,7 +2084,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
&sdma_val); &sdma_val);
if (retval) if (retval)
pr_err("Failed to read SDMA queue counter for queue: %d\n", dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n",
q->properties.queue_id); q->properties.queue_id);
} }
...@@ -2349,6 +2365,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, ...@@ -2349,6 +2365,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
{ {
int retval; int retval;
struct queue *q; struct queue *q;
struct device *dev = dqm->dev->adev->dev;
struct kernel_queue *kq, *kq_next; struct kernel_queue *kq, *kq_next;
struct mqd_manager *mqd_mgr; struct mqd_manager *mqd_mgr;
struct device_process_node *cur, *next_dpn; struct device_process_node *cur, *next_dpn;
...@@ -2382,7 +2399,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, ...@@ -2382,7 +2399,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
if (dqm->dev->kfd->shared_resources.enable_mes) { if (dqm->dev->kfd->shared_resources.enable_mes) {
retval = remove_queue_mes(dqm, q, qpd); retval = remove_queue_mes(dqm, q, qpd);
if (retval) if (retval)
pr_err("Failed to remove queue %d\n", dev_err(dev, "Failed to remove queue %d\n",
q->properties.queue_id); q->properties.queue_id);
} }
} }
...@@ -2437,12 +2454,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, ...@@ -2437,12 +2454,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
static int init_mqd_managers(struct device_queue_manager *dqm) static int init_mqd_managers(struct device_queue_manager *dqm)
{ {
int i, j; int i, j;
struct device *dev = dqm->dev->adev->dev;
struct mqd_manager *mqd_mgr; struct mqd_manager *mqd_mgr;
for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
if (!mqd_mgr) { if (!mqd_mgr) {
pr_err("mqd manager [%d] initialization failed\n", i); dev_err(dev, "mqd manager [%d] initialization failed\n", i);
goto out_free; goto out_free;
} }
dqm->mqd_mgrs[i] = mqd_mgr; dqm->mqd_mgrs[i] = mqd_mgr;
...@@ -2552,7 +2570,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) ...@@ -2552,7 +2570,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
dqm->ops.checkpoint_mqd = checkpoint_mqd; dqm->ops.checkpoint_mqd = checkpoint_mqd;
break; break;
default: default:
pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy);
goto out_free; goto out_free;
} }
...@@ -2590,7 +2608,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) ...@@ -2590,7 +2608,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
goto out_free; goto out_free;
if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n");
goto out_free; goto out_free;
} }
...@@ -2649,17 +2667,18 @@ int reserve_debug_trap_vmid(struct device_queue_manager *dqm, ...@@ -2649,17 +2667,18 @@ int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{ {
int r; int r;
struct device *dev = dqm->dev->adev->dev;
int updated_vmid_mask; int updated_vmid_mask;
if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
return -EINVAL; return -EINVAL;
} }
dqm_lock(dqm); dqm_lock(dqm);
if (dqm->trap_debug_vmid != 0) { if (dqm->trap_debug_vmid != 0) {
pr_err("Trap debug id already reserved\n"); dev_err(dev, "Trap debug id already reserved\n");
r = -EBUSY; r = -EBUSY;
goto out_unlock; goto out_unlock;
} }
...@@ -2695,19 +2714,20 @@ int reserve_debug_trap_vmid(struct device_queue_manager *dqm, ...@@ -2695,19 +2714,20 @@ int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
int release_debug_trap_vmid(struct device_queue_manager *dqm, int release_debug_trap_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{ {
struct device *dev = dqm->dev->adev->dev;
int r; int r;
int updated_vmid_mask; int updated_vmid_mask;
uint32_t trap_debug_vmid; uint32_t trap_debug_vmid;
if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
return -EINVAL; return -EINVAL;
} }
dqm_lock(dqm); dqm_lock(dqm);
trap_debug_vmid = dqm->trap_debug_vmid; trap_debug_vmid = dqm->trap_debug_vmid;
if (dqm->trap_debug_vmid == 0) { if (dqm->trap_debug_vmid == 0) {
pr_err("Trap debug id is not reserved\n"); dev_err(dev, "Trap debug id is not reserved\n");
r = -EINVAL; r = -EINVAL;
goto out_unlock; goto out_unlock;
} }
...@@ -2844,6 +2864,7 @@ int resume_queues(struct kfd_process *p, ...@@ -2844,6 +2864,7 @@ int resume_queues(struct kfd_process *p,
for (i = 0; i < p->n_pdds; i++) { for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i]; struct kfd_process_device *pdd = p->pdds[i];
struct device_queue_manager *dqm = pdd->dev->dqm; struct device_queue_manager *dqm = pdd->dev->dqm;
struct device *dev = dqm->dev->adev->dev;
struct qcm_process_device *qpd = &pdd->qpd; struct qcm_process_device *qpd = &pdd->qpd;
struct queue *q; struct queue *q;
int r, per_device_resumed = 0; int r, per_device_resumed = 0;
...@@ -2894,7 +2915,7 @@ int resume_queues(struct kfd_process *p, ...@@ -2894,7 +2915,7 @@ int resume_queues(struct kfd_process *p,
0, 0,
USE_DEFAULT_GRACE_PERIOD); USE_DEFAULT_GRACE_PERIOD);
if (r) { if (r) {
pr_err("Failed to resume process queues\n"); dev_err(dev, "Failed to resume process queues\n");
if (queue_ids) { if (queue_ids) {
list_for_each_entry(q, &qpd->queues_list, list) { list_for_each_entry(q, &qpd->queues_list, list) {
int q_idx = q_array_get_index( int q_idx = q_array_get_index(
...@@ -2946,6 +2967,7 @@ int suspend_queues(struct kfd_process *p, ...@@ -2946,6 +2967,7 @@ int suspend_queues(struct kfd_process *p,
for (i = 0; i < p->n_pdds; i++) { for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i]; struct kfd_process_device *pdd = p->pdds[i];
struct device_queue_manager *dqm = pdd->dev->dqm; struct device_queue_manager *dqm = pdd->dev->dqm;
struct device *dev = dqm->dev->adev->dev;
struct qcm_process_device *qpd = &pdd->qpd; struct qcm_process_device *qpd = &pdd->qpd;
struct queue *q; struct queue *q;
int r, per_device_suspended = 0; int r, per_device_suspended = 0;
...@@ -2994,7 +3016,7 @@ int suspend_queues(struct kfd_process *p, ...@@ -2994,7 +3016,7 @@ int suspend_queues(struct kfd_process *p,
grace_period); grace_period);
if (r) if (r)
pr_err("Failed to suspend process queues.\n"); dev_err(dev, "Failed to suspend process queues.\n");
else else
total_suspended += per_device_suspended; total_suspended += per_device_suspended;
...@@ -3081,10 +3103,11 @@ void set_queue_snapshot_entry(struct queue *q, ...@@ -3081,10 +3103,11 @@ void set_queue_snapshot_entry(struct queue *q,
int debug_lock_and_unmap(struct device_queue_manager *dqm) int debug_lock_and_unmap(struct device_queue_manager *dqm)
{ {
struct device *dev = dqm->dev->adev->dev;
int r; int r;
if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
return -EINVAL; return -EINVAL;
} }
...@@ -3102,10 +3125,11 @@ int debug_lock_and_unmap(struct device_queue_manager *dqm) ...@@ -3102,10 +3125,11 @@ int debug_lock_and_unmap(struct device_queue_manager *dqm)
int debug_map_and_unlock(struct device_queue_manager *dqm) int debug_map_and_unlock(struct device_queue_manager *dqm)
{ {
struct device *dev = dqm->dev->adev->dev;
int r; int r;
if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
return -EINVAL; return -EINVAL;
} }
......
...@@ -1343,7 +1343,7 @@ int pqm_get_queue_snapshot(struct process_queue_manager *pqm, ...@@ -1343,7 +1343,7 @@ int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
int *num_qss_entries, int *num_qss_entries,
uint32_t *entry_size); uint32_t *entry_size);
int amdkfd_fence_wait_timeout(uint64_t *fence_addr, int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
uint64_t fence_value, uint64_t fence_value,
unsigned int timeout_ms); unsigned int timeout_ms);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment