Commit c811f7bc authored by Tomer Tayar's avatar Tomer Tayar Committed by Oded Gabbay

habanalabs: Add a printout with the name of a busy engine

Print the name of a busy engine when checking if a device is idle.
The change is done mainly to help a user to pinpoint problems in his
topology's recipe.
Signed-off-by: default avatarTomer Tayar <ttayar@habana.ai>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent e1266004
...@@ -2783,6 +2783,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) ...@@ -2783,6 +2783,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
dma_addr_t fence_dma_addr; dma_addr_t fence_dma_addr;
struct hl_cb *cb; struct hl_cb *cb;
u32 tmp, timeout; u32 tmp, timeout;
char buf[16] = {};
int rc; int rc;
if (hdev->pldm) if (hdev->pldm)
...@@ -2790,9 +2791,10 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) ...@@ -2790,9 +2791,10 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
else else
timeout = HL_DEVICE_TIMEOUT_USEC; timeout = HL_DEVICE_TIMEOUT_USEC;
if (!hdev->asic_funcs->is_device_idle(hdev)) { if (!hdev->asic_funcs->is_device_idle(hdev, buf, sizeof(buf))) {
dev_err_ratelimited(hdev->dev, dev_err_ratelimited(hdev->dev,
"Can't send KMD job on QMAN0 if device is not idle\n"); "Can't send KMD job on QMAN0 because %s is busy\n",
buf);
return -EBUSY; return -EBUSY;
} }
...@@ -4691,7 +4693,7 @@ static void goya_disable_clock_gating(struct hl_device *hdev) ...@@ -4691,7 +4693,7 @@ static void goya_disable_clock_gating(struct hl_device *hdev)
} }
static bool goya_is_device_idle(struct hl_device *hdev) static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size)
{ {
u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg; u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
int i; int i;
...@@ -4703,7 +4705,7 @@ static bool goya_is_device_idle(struct hl_device *hdev) ...@@ -4703,7 +4705,7 @@ static bool goya_is_device_idle(struct hl_device *hdev)
if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) != if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
DMA_QM_IDLE_MASK) DMA_QM_IDLE_MASK)
return false; return HL_ENG_BUSY(buf, size, "DMA%d_QM", i);
} }
offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0; offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
...@@ -4715,31 +4717,31 @@ static bool goya_is_device_idle(struct hl_device *hdev) ...@@ -4715,31 +4717,31 @@ static bool goya_is_device_idle(struct hl_device *hdev)
if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) != if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
TPC_QM_IDLE_MASK) TPC_QM_IDLE_MASK)
return false; return HL_ENG_BUSY(buf, size, "TPC%d_QM", i);
if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) != if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
TPC_CMDQ_IDLE_MASK) TPC_CMDQ_IDLE_MASK)
return false; return HL_ENG_BUSY(buf, size, "TPC%d_CMDQ", i);
if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) != if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
TPC_CFG_IDLE_MASK) TPC_CFG_IDLE_MASK)
return false; return HL_ENG_BUSY(buf, size, "TPC%d_CFG", i);
} }
if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) != if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
MME_QM_IDLE_MASK) MME_QM_IDLE_MASK)
return false; return HL_ENG_BUSY(buf, size, "MME_QM");
if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) != if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
MME_CMDQ_IDLE_MASK) MME_CMDQ_IDLE_MASK)
return false; return HL_ENG_BUSY(buf, size, "MME_CMDQ");
if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) != if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
MME_ARCH_IDLE_MASK) MME_ARCH_IDLE_MASK)
return false; return HL_ENG_BUSY(buf, size, "MME_ARCH");
if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK) if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
return false; return HL_ENG_BUSY(buf, size, "MME");
return true; return true;
} }
......
...@@ -555,7 +555,7 @@ struct hl_asic_funcs { ...@@ -555,7 +555,7 @@ struct hl_asic_funcs {
int (*send_heartbeat)(struct hl_device *hdev); int (*send_heartbeat)(struct hl_device *hdev);
void (*enable_clock_gating)(struct hl_device *hdev); void (*enable_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev);
bool (*is_device_idle)(struct hl_device *hdev); bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size);
int (*soft_reset_late_init)(struct hl_device *hdev); int (*soft_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev);
...@@ -1010,6 +1010,12 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); ...@@ -1010,6 +1010,12 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \ WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \
(val) << REG_FIELD_SHIFT(reg, field)) (val) << REG_FIELD_SHIFT(reg, field))
#define HL_ENG_BUSY(buf, size, fmt, ...) ({ \
if (buf) \
snprintf(buf, size, fmt, ##__VA_ARGS__); \
false; \
})
struct hwmon_chip_info; struct hwmon_chip_info;
/** /**
......
...@@ -93,7 +93,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) ...@@ -93,7 +93,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
if ((!max_size) || (!out)) if ((!max_size) || (!out))
return -EINVAL; return -EINVAL;
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev); hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL, 0);
return copy_to_user(out, &hw_idle, return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment