Commit 97a78e3d authored by Dani Liberman's avatar Dani Liberman Committed by Oded Gabbay

habanalabs: rename error info structure

As a preparation for adding more errors to it,
change to more suitable name.
Signed-off-by: default avatarDani Liberman <dliberman@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 04d53cd2
...@@ -826,10 +826,10 @@ static void cs_timedout(struct work_struct *work) ...@@ -826,10 +826,10 @@ static void cs_timedout(struct work_struct *work)
} }
/* Save only the first CS timeout parameters */ /* Save only the first CS timeout parameters */
rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0); rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
if (rc) { if (rc) {
hdev->last_error.cs_timeout.timestamp = ktime_get(); hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
hdev->last_error.cs_timeout.seq = cs->sequence; hdev->captured_err_info.cs_timeout.seq = cs->sequence;
event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT | event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT; HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
......
...@@ -2981,12 +2981,12 @@ struct undefined_opcode_info { ...@@ -2981,12 +2981,12 @@ struct undefined_opcode_info {
}; };
/** /**
* struct last_error_session_info - info about last session errors occurred. * struct hl_error_info - holds information collected during an error.
* @cs_timeout: CS timeout error last information. * @cs_timeout: CS timeout error information.
* @razwi: razwi last information. * @razwi: razwi information.
* @undef_opcode: undefined opcode information * @undef_opcode: undefined opcode information
*/ */
struct last_error_session_info { struct hl_error_info {
struct cs_timeout_info cs_timeout; struct cs_timeout_info cs_timeout;
struct razwi_info razwi; struct razwi_info razwi;
struct undefined_opcode_info undef_opcode; struct undefined_opcode_info undef_opcode;
...@@ -3111,7 +3111,7 @@ struct hl_reset_info { ...@@ -3111,7 +3111,7 @@ struct hl_reset_info {
* @state_dump_specs: constants and dictionaries needed to dump system state. * @state_dump_specs: constants and dictionaries needed to dump system state.
* @multi_cs_completion: array of multi-CS completion. * @multi_cs_completion: array of multi-CS completion.
* @clk_throttling: holds information about current/previous clock throttling events * @clk_throttling: holds information about current/previous clock throttling events
* @last_error: holds information about last session in which CS timeout or razwi error occurred. * @captured_err_info: holds information about errors.
* @reset_info: holds current device reset information. * @reset_info: holds current device reset information.
* @stream_master_qid_arr: pointer to array with QIDs of master streams. * @stream_master_qid_arr: pointer to array with QIDs of master streams.
* @fw_major_version: major version of current loaded preboot. * @fw_major_version: major version of current loaded preboot.
...@@ -3286,7 +3286,7 @@ struct hl_device { ...@@ -3286,7 +3286,7 @@ struct hl_device {
struct multi_cs_completion multi_cs_completion[ struct multi_cs_completion multi_cs_completion[
MULTI_CS_MAX_USER_CTX]; MULTI_CS_MAX_USER_CTX];
struct hl_clk_throttle clk_throttling; struct hl_clk_throttle clk_throttling;
struct last_error_session_info last_error; struct hl_error_info captured_err_info;
struct hl_reset_info reset_info; struct hl_reset_info reset_info;
......
...@@ -211,9 +211,9 @@ int hl_device_open(struct inode *inode, struct file *filp) ...@@ -211,9 +211,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_debugfs_add_file(hpriv); hl_debugfs_add_file(hpriv);
atomic_set(&hdev->last_error.cs_timeout.write_enable, 1); atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
atomic_set(&hdev->last_error.razwi.write_enable, 1); atomic_set(&hdev->captured_err_info.razwi.write_enable, 1);
hdev->last_error.undef_opcode.write_enable = true; hdev->captured_err_info.undef_opcode.write_enable = true;
hdev->open_counter++; hdev->open_counter++;
hdev->last_successful_open_jif = jiffies; hdev->last_successful_open_jif = jiffies;
......
...@@ -593,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args) ...@@ -593,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out)) if ((!max_size) || (!out))
return -EINVAL; return -EINVAL;
info.seq = hdev->last_error.cs_timeout.seq; info.seq = hdev->captured_err_info.cs_timeout.seq;
info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp); info.timestamp = ktime_to_ns(hdev->captured_err_info.cs_timeout.timestamp);
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
} }
...@@ -609,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args) ...@@ -609,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out)) if ((!max_size) || (!out))
return -EINVAL; return -EINVAL;
info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp); info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp);
info.addr = hdev->last_error.razwi.addr; info.addr = hdev->captured_err_info.razwi.addr;
info.engine_id_1 = hdev->last_error.razwi.engine_id_1; info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1;
info.engine_id_2 = hdev->last_error.razwi.engine_id_2; info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2;
info.no_engine_id = hdev->last_error.razwi.non_engine_initiator; info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator;
info.error_type = hdev->last_error.razwi.type; info.error_type = hdev->captured_err_info.razwi.type;
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
} }
...@@ -629,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar ...@@ -629,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar
if ((!max_size) || (!out)) if ((!max_size) || (!out))
return -EINVAL; return -EINVAL;
info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp); info.timestamp = ktime_to_ns(hdev->captured_err_info.undef_opcode.timestamp);
info.engine_id = hdev->last_error.undef_opcode.engine_id; info.engine_id = hdev->captured_err_info.undef_opcode.engine_id;
info.cq_addr = hdev->last_error.undef_opcode.cq_addr; info.cq_addr = hdev->captured_err_info.undef_opcode.cq_addr;
info.cq_size = hdev->last_error.undef_opcode.cq_size; info.cq_size = hdev->captured_err_info.undef_opcode.cq_size;
info.stream_id = hdev->last_error.undef_opcode.stream_id; info.stream_id = hdev->captured_err_info.undef_opcode.stream_id;
info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len; info.cb_addr_streams_len = hdev->captured_err_info.undef_opcode.cb_addr_streams_len;
memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams, memcpy(info.cb_addr_streams, hdev->captured_err_info.undef_opcode.cb_addr_streams,
sizeof(info.cb_addr_streams)); sizeof(info.cb_addr_streams));
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
......
...@@ -6894,9 +6894,9 @@ static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 strea ...@@ -6894,9 +6894,9 @@ static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 strea
stream, cq_ptr, size); stream, cq_ptr, size);
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
hdev->last_error.undef_opcode.cq_addr = cq_ptr; hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
hdev->last_error.undef_opcode.cq_size = size; hdev->captured_err_info.undef_opcode.cq_size = size;
hdev->last_error.undef_opcode.stream_id = stream; hdev->captured_err_info.undef_opcode.stream_id = stream;
} }
} }
...@@ -6962,7 +6962,7 @@ static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, ...@@ -6962,7 +6962,7 @@ static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
} }
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode; struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
u32 arr_idx = undef_opcode->cb_addr_streams_len; u32 arr_idx = undef_opcode->cb_addr_streams_len;
if (arr_idx == 0) { if (arr_idx == 0) {
...@@ -7046,11 +7046,11 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev, ...@@ -7046,11 +7046,11 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
} }
/* check for undefined opcode */ /* check for undefined opcode */
if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
hdev->last_error.undef_opcode.write_enable) { hdev->captured_err_info.undef_opcode.write_enable) {
memset(&hdev->last_error.undef_opcode, 0, memset(&hdev->captured_err_info.undef_opcode, 0,
sizeof(hdev->last_error.undef_opcode)); sizeof(hdev->captured_err_info.undef_opcode));
hdev->last_error.undef_opcode.write_enable = false; hdev->captured_err_info.undef_opcode.write_enable = false;
*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
} }
...@@ -7332,18 +7332,19 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, ...@@ -7332,18 +7332,19 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type); gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
/* In case it's the first razwi, save its parameters*/ /* In case it's the first razwi, save its parameters*/
rc = atomic_cmpxchg(&hdev->last_error.razwi.write_enable, 1, 0); rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0);
if (rc) { if (rc) {
hdev->last_error.razwi.timestamp = ktime_get(); hdev->captured_err_info.razwi.timestamp = ktime_get();
hdev->last_error.razwi.addr = razwi_addr; hdev->captured_err_info.razwi.addr = razwi_addr;
hdev->last_error.razwi.engine_id_1 = engine_id_1; hdev->captured_err_info.razwi.engine_id_1 = engine_id_1;
hdev->last_error.razwi.engine_id_2 = engine_id_2; hdev->captured_err_info.razwi.engine_id_2 = engine_id_2;
/* /*
* If first engine id holds non valid value the razwi initiator * If first engine id holds non valid value the razwi initiator
* does not have engine id * does not have engine id
*/ */
hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX); hdev->captured_err_info.razwi.non_engine_initiator =
hdev->last_error.razwi.type = razwi_type; (engine_id_1 == U16_MAX);
hdev->captured_err_info.razwi.type = razwi_type;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment