Commit 460696ed authored by Omer Shpigelman's avatar Omer Shpigelman Committed by Oded Gabbay

habanalabs: print event name for fatal and non-RAZWI events

This patch improves the error reporting in case of fatal and non-RAZWI
events such that the event name is printed in addition to the IRQ number.
Signed-off-by: default avatarOmer Shpigelman <oshpigelman@habana.ai>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent a1e537b3
...@@ -4067,6 +4067,47 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val) ...@@ -4067,6 +4067,47 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
static const char *_goya_get_event_desc(u16 event_type) static const char *_goya_get_event_desc(u16 event_type)
{ {
switch (event_type) { switch (event_type) {
case GOYA_ASYNC_EVENT_ID_PCIE_IF:
return "PCIe_if";
case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
return "TPC%d_ecc";
case GOYA_ASYNC_EVENT_ID_MME_ECC:
return "MME_ecc";
case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
return "MME_ecc_ext";
case GOYA_ASYNC_EVENT_ID_MMU_ECC:
return "MMU_ecc";
case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
return "DMA_macro";
case GOYA_ASYNC_EVENT_ID_DMA_ECC:
return "DMA_ecc";
case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
return "CPU_if_ecc";
case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
return "PSOC_mem";
case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
return "PSOC_coresight";
case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
return "SRAM%d";
case GOYA_ASYNC_EVENT_ID_GIC500:
return "GIC500";
case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
return "PLL%d";
case GOYA_ASYNC_EVENT_ID_AXI_ECC:
return "AXI_ecc";
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
return "L2_ram_ecc";
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
return "PSOC_gpio_05_sw_reset";
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
return "PSOC_gpio_10_vrhot_icrit";
case GOYA_ASYNC_EVENT_ID_PCIE_DEC: case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
return "PCIe_dec"; return "PCIe_dec";
case GOYA_ASYNC_EVENT_ID_TPC0_DEC: case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
...@@ -4109,6 +4150,17 @@ static const char *_goya_get_event_desc(u16 event_type) ...@@ -4109,6 +4150,17 @@ static const char *_goya_get_event_desc(u16 event_type)
return "DMA%d_qm"; return "DMA%d_qm";
case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH: case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
return "DMA%d_ch"; return "DMA%d_ch";
case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
return "TPC%d_bmon_spmu";
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
return "DMA_bm_ch%d";
default: default:
return "N/A"; return "N/A";
} }
...@@ -4119,6 +4171,25 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size) ...@@ -4119,6 +4171,25 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
u8 index; u8 index;
switch (event_type) { switch (event_type) {
case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
snprintf(desc, size, _goya_get_event_desc(event_type), index);
break;
case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
snprintf(desc, size, _goya_get_event_desc(event_type), index);
break;
case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
snprintf(desc, size, _goya_get_event_desc(event_type), index);
break;
case GOYA_ASYNC_EVENT_ID_TPC0_DEC: case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
case GOYA_ASYNC_EVENT_ID_TPC1_DEC: case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
case GOYA_ASYNC_EVENT_ID_TPC2_DEC: case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
...@@ -4157,6 +4228,21 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size) ...@@ -4157,6 +4228,21 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH; index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
snprintf(desc, size, _goya_get_event_desc(event_type), index); snprintf(desc, size, _goya_get_event_desc(event_type), index);
break; break;
case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
snprintf(desc, size, _goya_get_event_desc(event_type), index);
break;
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
snprintf(desc, size, _goya_get_event_desc(event_type), index);
break;
default: default:
snprintf(desc, size, _goya_get_event_desc(event_type)); snprintf(desc, size, _goya_get_event_desc(event_type));
break; break;
...@@ -4207,7 +4293,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev) ...@@ -4207,7 +4293,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev)
} }
} }
static void goya_print_irq_info(struct hl_device *hdev, u16 event_type) static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
bool razwi)
{ {
char desc[20] = ""; char desc[20] = "";
...@@ -4215,8 +4302,10 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type) ...@@ -4215,8 +4302,10 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type)
dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
event_type, desc); event_type, desc);
if (razwi) {
goya_print_razwi_info(hdev); goya_print_razwi_info(hdev);
goya_print_mmu_error_info(hdev); goya_print_mmu_error_info(hdev);
}
} }
static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
...@@ -4320,19 +4409,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) ...@@ -4320,19 +4409,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT: case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29: case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
case GOYA_ASYNC_EVENT_ID_GIC500: case GOYA_ASYNC_EVENT_ID_GIC500:
case GOYA_ASYNC_EVENT_ID_PLL0: case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
case GOYA_ASYNC_EVENT_ID_PLL1:
case GOYA_ASYNC_EVENT_ID_PLL3:
case GOYA_ASYNC_EVENT_ID_PLL4:
case GOYA_ASYNC_EVENT_ID_PLL5:
case GOYA_ASYNC_EVENT_ID_PLL6:
case GOYA_ASYNC_EVENT_ID_AXI_ECC: case GOYA_ASYNC_EVENT_ID_AXI_ECC:
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC: case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT: case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
dev_err(hdev->dev, goya_print_irq_info(hdev, event_type, false);
"Received H/W interrupt %d, reset the chip\n",
event_type);
hl_device_reset(hdev, true, false); hl_device_reset(hdev, true, false);
break; break;
...@@ -4363,7 +4445,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) ...@@ -4363,7 +4445,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_MME_CMDQ: case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM: case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH: case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
goya_print_irq_info(hdev, event_type); goya_print_irq_info(hdev, event_type, true);
goya_unmask_irq(hdev, event_type); goya_unmask_irq(hdev, event_type);
break; break;
...@@ -4375,12 +4457,9 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) ...@@ -4375,12 +4457,9 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU: case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU: case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU: case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0: case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH1: goya_print_irq_info(hdev, event_type, false);
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH2: goya_unmask_irq(hdev, event_type);
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH3:
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
dev_info(hdev->dev, "Received H/W interrupt %d\n", event_type);
break; break;
default: default:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment