Commit abc3b5d2 authored by Yang Wang's avatar Yang Wang Committed by Alex Deucher

drm/amdgpu: add new aca_smu_type support

Add new types to distinguish between ACA error type and smu mca type.

e.g.:
the ACA_ERROR_TYPE_DEFERRED is not matched any smu mca valid bank
channel, so add new type 'aca_smu_type' to distinguish aca error type
and smu mca type.
Signed-off-by: default avatarYang Wang <kevinyang.wang@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent a43dbeab
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype} #define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype}
typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, void *data); typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
struct aca_banks { struct aca_banks {
int nr_banks; int nr_banks;
...@@ -86,7 +86,7 @@ static void aca_banks_release(struct aca_banks *banks) ...@@ -86,7 +86,7 @@ static void aca_banks_release(struct aca_banks *banks)
} }
} }
static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_error_type type, u32 *count) static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count)
{ {
struct amdgpu_aca *aca = &adev->aca; struct amdgpu_aca *aca = &adev->aca;
const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
...@@ -127,7 +127,7 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st ...@@ -127,7 +127,7 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st
idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
} }
static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_error_type type, static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
int start, int count, int start, int count,
struct aca_banks *banks) struct aca_banks *banks)
{ {
...@@ -143,13 +143,12 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro ...@@ -143,13 +143,12 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro
return -EOPNOTSUPP; return -EOPNOTSUPP;
switch (type) { switch (type) {
case ACA_ERROR_TYPE_UE: case ACA_SMU_TYPE_UE:
max_count = smu_funcs->max_ue_bank_count; max_count = smu_funcs->max_ue_bank_count;
break; break;
case ACA_ERROR_TYPE_CE: case ACA_SMU_TYPE_CE:
max_count = smu_funcs->max_ce_bank_count; max_count = smu_funcs->max_ce_bank_count;
break; break;
case ACA_ERROR_TYPE_DEFERRED:
default: default:
return -EINVAL; return -EINVAL;
} }
...@@ -164,6 +163,8 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro ...@@ -164,6 +163,8 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro
if (ret) if (ret)
return ret; return ret;
bank.type = type;
aca_smu_bank_dump(adev, i, count, &bank); aca_smu_bank_dump(adev, i, count, &bank);
ret = aca_banks_add_bank(banks, &bank); ret = aca_banks_add_bank(banks, &bank);
...@@ -195,7 +196,7 @@ static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type t ...@@ -195,7 +196,7 @@ static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type t
return hwip->hwid == hwid && hwip->mcatype == mcatype; return hwip->hwid == hwid && hwip->mcatype == mcatype;
} }
static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type) static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
{ {
const struct aca_bank_ops *bank_ops = handle->bank_ops; const struct aca_bank_ops *bank_ops = handle->bank_ops;
...@@ -297,7 +298,7 @@ static int aca_log_errors(struct aca_handle *handle, enum aca_error_type type, ...@@ -297,7 +298,7 @@ static int aca_log_errors(struct aca_handle *handle, enum aca_error_type type,
} }
static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank, static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank,
enum aca_error_type type, struct aca_bank_report *report) enum aca_smu_type type, struct aca_bank_report *report)
{ {
const struct aca_bank_ops *bank_ops = handle->bank_ops; const struct aca_bank_ops *bank_ops = handle->bank_ops;
...@@ -313,12 +314,24 @@ static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank * ...@@ -313,12 +314,24 @@ static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *
} }
static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank, static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank,
enum aca_error_type type, void *data) enum aca_smu_type smu_type, void *data)
{ {
struct aca_bank_report report; struct aca_bank_report report;
enum aca_error_type type;
int ret; int ret;
ret = aca_generate_bank_report(handle, bank, type, &report); switch (smu_type) {
case ACA_SMU_TYPE_UE:
type = ACA_ERROR_TYPE_UE;
break;
case ACA_SMU_TYPE_CE:
type = ACA_ERROR_TYPE_CE;
break;
default:
return -EINVAL;
}
ret = aca_generate_bank_report(handle, bank, smu_type, &report);
if (ret) if (ret)
return ret; return ret;
...@@ -333,7 +346,7 @@ static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank ...@@ -333,7 +346,7 @@ static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank
} }
static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank, static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank,
enum aca_error_type type, bank_handler_t handler, void *data) enum aca_smu_type type, bank_handler_t handler, void *data)
{ {
struct aca_handle *handle; struct aca_handle *handle;
int ret; int ret;
...@@ -354,7 +367,7 @@ static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *ba ...@@ -354,7 +367,7 @@ static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *ba
} }
static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks, static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks,
enum aca_error_type type, bank_handler_t handler, void *data) enum aca_smu_type type, bank_handler_t handler, void *data)
{ {
struct aca_bank_node *node; struct aca_bank_node *node;
struct aca_bank *bank; struct aca_bank *bank;
...@@ -378,7 +391,7 @@ static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks * ...@@ -378,7 +391,7 @@ static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *
return 0; return 0;
} }
static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type, static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
bank_handler_t handler, void *data) bank_handler_t handler, void *data)
{ {
struct amdgpu_aca *aca = &adev->aca; struct amdgpu_aca *aca = &adev->aca;
...@@ -389,10 +402,6 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type ...@@ -389,10 +402,6 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type
if (list_empty(&aca->mgr.list)) if (list_empty(&aca->mgr.list))
return 0; return 0;
/* NOTE: pmfw is only support UE and CE */
if (type == ACA_ERROR_TYPE_DEFERRED)
type = ACA_ERROR_TYPE_CE;
ret = aca_smu_get_valid_aca_count(adev, type, &count); ret = aca_smu_get_valid_aca_count(adev, type, &count);
if (ret) if (ret)
return ret; return ret;
...@@ -479,10 +488,22 @@ static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type ...@@ -479,10 +488,22 @@ static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type
static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type, static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type,
struct ras_err_data *err_data) struct ras_err_data *err_data)
{ {
enum aca_smu_type smu_type;
int ret; int ret;
switch (type) {
case ACA_ERROR_TYPE_UE:
smu_type = ACA_SMU_TYPE_UE;
break;
case ACA_ERROR_TYPE_CE:
smu_type = ACA_SMU_TYPE_CE;
break;
default:
return -EINVAL;
}
/* udpate aca bank to aca source error_cache first */ /* udpate aca bank to aca source error_cache first */
ret = aca_banks_update(adev, type, handler_aca_log_bank_error, NULL); ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, NULL);
if (ret) if (ret)
return ret; return ret;
...@@ -784,7 +805,7 @@ static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val) ...@@ -784,7 +805,7 @@ static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val)
return 0; return 0;
} }
static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_error_type type, int idx) static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_smu_type type, int idx)
{ {
struct aca_bank_info info; struct aca_bank_info info;
int i, ret; int i, ret;
...@@ -793,7 +814,7 @@ static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_e ...@@ -793,7 +814,7 @@ static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_e
if (ret) if (ret)
return; return;
seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_ERROR_TYPE_UE ? "UE" : "CE"); seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_SMU_TYPE_UE ? "UE" : "CE");
seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n", seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
idx, info.socket_id, info.die_id, info.hwid, info.mcatype); idx, info.socket_id, info.die_id, info.hwid, info.mcatype);
...@@ -807,7 +828,7 @@ struct aca_dump_context { ...@@ -807,7 +828,7 @@ struct aca_dump_context {
}; };
static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank, static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank,
enum aca_error_type type, void *data) enum aca_smu_type type, void *data)
{ {
struct aca_dump_context *ctx = (struct aca_dump_context *)data; struct aca_dump_context *ctx = (struct aca_dump_context *)data;
...@@ -816,7 +837,7 @@ static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *ban ...@@ -816,7 +837,7 @@ static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *ban
return handler_aca_log_bank_error(handle, bank, type, NULL); return handler_aca_log_bank_error(handle, bank, type, NULL);
} }
static int aca_dump_show(struct seq_file *m, enum aca_error_type type) static int aca_dump_show(struct seq_file *m, enum aca_smu_type type)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
struct aca_dump_context context = { struct aca_dump_context context = {
...@@ -829,7 +850,7 @@ static int aca_dump_show(struct seq_file *m, enum aca_error_type type) ...@@ -829,7 +850,7 @@ static int aca_dump_show(struct seq_file *m, enum aca_error_type type)
static int aca_dump_ce_show(struct seq_file *m, void *unused) static int aca_dump_ce_show(struct seq_file *m, void *unused)
{ {
return aca_dump_show(m, ACA_ERROR_TYPE_CE); return aca_dump_show(m, ACA_SMU_TYPE_CE);
} }
static int aca_dump_ce_open(struct inode *inode, struct file *file) static int aca_dump_ce_open(struct inode *inode, struct file *file)
...@@ -847,7 +868,7 @@ static const struct file_operations aca_ce_dump_debug_fops = { ...@@ -847,7 +868,7 @@ static const struct file_operations aca_ce_dump_debug_fops = {
static int aca_dump_ue_show(struct seq_file *m, void *unused) static int aca_dump_ue_show(struct seq_file *m, void *unused)
{ {
return aca_dump_show(m, ACA_ERROR_TYPE_UE); return aca_dump_show(m, ACA_SMU_TYPE_UE);
} }
static int aca_dump_ue_open(struct inode *inode, struct file *file) static int aca_dump_ue_open(struct inode *inode, struct file *file)
......
...@@ -99,7 +99,14 @@ enum aca_error_type { ...@@ -99,7 +99,14 @@ enum aca_error_type {
ACA_ERROR_TYPE_COUNT ACA_ERROR_TYPE_COUNT
}; };
enum aca_smu_type {
ACA_SMU_TYPE_UE = 0,
ACA_SMU_TYPE_CE,
ACA_SMU_TYPE_COUNT,
};
struct aca_bank { struct aca_bank {
enum aca_smu_type type;
u64 regs[ACA_MAX_REGS_COUNT]; u64 regs[ACA_MAX_REGS_COUNT];
}; };
...@@ -157,9 +164,9 @@ struct aca_handle { ...@@ -157,9 +164,9 @@ struct aca_handle {
}; };
struct aca_bank_ops { struct aca_bank_ops {
int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type,
struct aca_bank_report *report, void *data); struct aca_bank_report *report, void *data);
bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type,
void *data); void *data);
}; };
...@@ -167,8 +174,8 @@ struct aca_smu_funcs { ...@@ -167,8 +174,8 @@ struct aca_smu_funcs {
int max_ue_bank_count; int max_ue_bank_count;
int max_ce_bank_count; int max_ce_bank_count;
int (*set_debug_mode)(struct amdgpu_device *adev, bool enable); int (*set_debug_mode)(struct amdgpu_device *adev, bool enable);
int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_error_type type, u32 *count); int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count);
int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_error_type type, int idx, struct aca_bank *bank); int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank);
}; };
struct amdgpu_aca { struct amdgpu_aca {
......
...@@ -1035,12 +1035,12 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) ...@@ -1035,12 +1035,12 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
return 0; return 0;
} }
static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type,
struct aca_bank_report *report, void *data) struct aca_bank_report *report, void *data)
{ {
struct amdgpu_device *adev = handle->adev; struct amdgpu_device *adev = handle->adev;
const char *error_str; const char *error_str;
u64 status; u64 status, count;
int ret, ext_error_code; int ret, ext_error_code;
ret = aca_bank_info_decode(bank, &report->info); ret = aca_bank_info_decode(bank, &report->info);
...@@ -1055,9 +1055,17 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc ...@@ -1055,9 +1055,17 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc
if (error_str) if (error_str)
dev_info(adev->dev, "%s detected\n", error_str); dev_info(adev->dev, "%s detected\n", error_str);
if ((type == ACA_ERROR_TYPE_UE && ext_error_code == 0) || count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]);
(type == ACA_ERROR_TYPE_CE && ext_error_code == 6)) switch (type) {
report->count[type] = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); case ACA_SMU_TYPE_UE:
report->count[ACA_ERROR_TYPE_UE] = ext_error_code == 0 ? count : 0ULL;
break;
case ACA_SMU_TYPE_CE:
report->count[ACA_ERROR_TYPE_CE] = ext_error_code == 6 ? count : 0ULL;
break;
default:
return -EINVAL;
}
return 0; return 0;
} }
......
...@@ -681,37 +681,40 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { ...@@ -681,37 +681,40 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
}; };
static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle,
struct aca_bank *bank, enum aca_error_type type, struct aca_bank *bank, enum aca_smu_type type,
struct aca_bank_report *report, void *data) struct aca_bank_report *report, void *data)
{ {
u64 status, misc0; u64 misc0;
u32 instlo; u32 instlo;
int ret; int ret;
status = bank->regs[ACA_REG_IDX_STATUS]; ret = aca_bank_info_decode(bank, &report->info);
if ((type == ACA_ERROR_TYPE_UE && if (ret)
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || return ret;
(type == ACA_ERROR_TYPE_CE &&
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) {
ret = aca_bank_info_decode(bank, &report->info); /* NOTE: overwrite info.die_id with xcd id for gfx */
if (ret) instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
return ret; instlo &= GENMASK(31, 1);
report->info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1;
/* NOTE: overwrite info.die_id with xcd id for gfx */ misc0 = bank->regs[ACA_REG_IDX_MISC0];
instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
instlo &= GENMASK(31, 1);
report->info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1;
misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) {
report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); case ACA_SMU_TYPE_UE:
report->count[ACA_ERROR_TYPE_UE] = 1ULL;
break;
case ACA_SMU_TYPE_CE:
report->count[ACA_ERROR_TYPE_CE] = ACA_REG__MISC0__ERRCNT(misc0);
break;
default:
return -EINVAL;
} }
return 0; return 0;
} }
static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
enum aca_error_type type, void *data) enum aca_smu_type type, void *data)
{ {
u32 instlo; u32 instlo;
......
...@@ -707,24 +707,27 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { ...@@ -707,24 +707,27 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
}; };
static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle, static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle,
struct aca_bank *bank, enum aca_error_type type, struct aca_bank *bank, enum aca_smu_type type,
struct aca_bank_report *report, void *data) struct aca_bank_report *report, void *data)
{ {
u64 status, misc0; u64 misc0;
int ret; int ret;
status = bank->regs[ACA_REG_IDX_STATUS]; ret = aca_bank_info_decode(bank, &report->info);
if ((type == ACA_ERROR_TYPE_UE && if (ret)
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || return ret;
(type == ACA_ERROR_TYPE_CE &&
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { misc0 = bank->regs[ACA_REG_IDX_MISC0];
ret = aca_bank_info_decode(bank, &report->info); switch (type) {
if (ret) case ACA_SMU_TYPE_UE:
return ret; report->count[ACA_ERROR_TYPE_UE] = 1ULL;
break;
misc0 = bank->regs[ACA_REG_IDX_MISC0]; case ACA_SMU_TYPE_CE:
report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); report->count[ACA_ERROR_TYPE_CE] = ACA_REG__MISC0__ERRCNT(misc0);
break;
default:
return -EINVAL;
} }
return 0; return 0;
...@@ -741,7 +744,7 @@ static int mmhub_v1_8_err_codes[] = { ...@@ -741,7 +744,7 @@ static int mmhub_v1_8_err_codes[] = {
}; };
static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
enum aca_error_type type, void *data) enum aca_smu_type type, void *data)
{ {
u32 instlo; u32 instlo;
......
...@@ -2190,24 +2190,27 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = { ...@@ -2190,24 +2190,27 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = {
}; };
static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle, static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle,
struct aca_bank *bank, enum aca_error_type type, struct aca_bank *bank, enum aca_smu_type type,
struct aca_bank_report *report, void *data) struct aca_bank_report *report, void *data)
{ {
u64 status, misc0; u64 misc0;
int ret; int ret;
status = bank->regs[ACA_REG_IDX_STATUS]; ret = aca_bank_info_decode(bank, &report->info);
if ((type == ACA_ERROR_TYPE_UE && if (ret)
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || return ret;
(type == ACA_ERROR_TYPE_CE &&
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) {
ret = aca_bank_info_decode(bank, &report->info); misc0 = bank->regs[ACA_REG_IDX_MISC0];
if (ret)
return ret;
misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) {
report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); case ACA_SMU_TYPE_UE:
report->count[ACA_ERROR_TYPE_UE] = 1ULL;
break;
case ACA_SMU_TYPE_CE:
report->count[ACA_ERROR_TYPE_CE] = ACA_REG__MISC0__ERRCNT(misc0);
break;
default:
return -EINVAL;
} }
return 0; return 0;
...@@ -2217,7 +2220,7 @@ static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle, ...@@ -2217,7 +2220,7 @@ static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle,
static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 }; static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 };
static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
enum aca_error_type type, void *data) enum aca_smu_type type, void *data)
{ {
u32 instlo; u32 instlo;
......
...@@ -504,7 +504,7 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = { ...@@ -504,7 +504,7 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = {
.query_ras_error_address = umc_v12_0_query_ras_error_address, .query_ras_error_address = umc_v12_0_query_ras_error_address,
}; };
static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type,
struct aca_bank_report *report, void *data) struct aca_bank_report *report, void *data)
{ {
struct amdgpu_device *adev = handle->adev; struct amdgpu_device *adev = handle->adev;
...@@ -517,14 +517,14 @@ static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct ...@@ -517,14 +517,14 @@ static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct
status = bank->regs[ACA_REG_IDX_STATUS]; status = bank->regs[ACA_REG_IDX_STATUS];
switch (type) { switch (type) {
case ACA_ERROR_TYPE_UE: case ACA_SMU_TYPE_UE:
if (umc_v12_0_is_uncorrectable_error(adev, status)) { if (umc_v12_0_is_uncorrectable_error(adev, status)) {
report->count[type] = 1; report->count[ACA_ERROR_TYPE_UE] = 1;
} }
break; break;
case ACA_ERROR_TYPE_CE: case ACA_SMU_TYPE_CE:
if (umc_v12_0_is_correctable_error(adev, status)) { if (umc_v12_0_is_correctable_error(adev, status)) {
report->count[type] = 1; report->count[ACA_ERROR_TYPE_CE] = 1;
} }
break; break;
default: default:
......
...@@ -2984,7 +2984,7 @@ static int aca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) ...@@ -2984,7 +2984,7 @@ static int aca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
return smu_v13_0_6_mca_set_debug_mode(smu, enable); return smu_v13_0_6_mca_set_debug_mode(smu, enable);
} }
static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_error_type type, u32 *count) static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_smu_type type, u32 *count)
{ {
uint32_t msg; uint32_t msg;
int ret; int ret;
...@@ -2993,10 +2993,10 @@ static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_err ...@@ -2993,10 +2993,10 @@ static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_err
return -EINVAL; return -EINVAL;
switch (type) { switch (type) {
case ACA_ERROR_TYPE_UE: case ACA_SMU_TYPE_UE:
msg = SMU_MSG_QueryValidMcaCount; msg = SMU_MSG_QueryValidMcaCount;
break; break;
case ACA_ERROR_TYPE_CE: case ACA_SMU_TYPE_CE:
msg = SMU_MSG_QueryValidMcaCeCount; msg = SMU_MSG_QueryValidMcaCeCount;
break; break;
default: default:
...@@ -3013,14 +3013,14 @@ static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_err ...@@ -3013,14 +3013,14 @@ static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_err
} }
static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev,
enum aca_error_type type, u32 *count) enum aca_smu_type type, u32 *count)
{ {
struct smu_context *smu = adev->powerplay.pp_handle; struct smu_context *smu = adev->powerplay.pp_handle;
int ret; int ret;
switch (type) { switch (type) {
case ACA_ERROR_TYPE_UE: case ACA_SMU_TYPE_UE:
case ACA_ERROR_TYPE_CE: case ACA_SMU_TYPE_CE:
ret = smu_v13_0_6_get_valid_aca_count(smu, type, count); ret = smu_v13_0_6_get_valid_aca_count(smu, type, count);
break; break;
default: default:
...@@ -3031,16 +3031,16 @@ static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, ...@@ -3031,16 +3031,16 @@ static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev,
return ret; return ret;
} }
static int __smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_type type, static int __smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_smu_type type,
int idx, int offset, u32 *val) int idx, int offset, u32 *val)
{ {
uint32_t msg, param; uint32_t msg, param;
switch (type) { switch (type) {
case ACA_ERROR_TYPE_UE: case ACA_SMU_TYPE_UE:
msg = SMU_MSG_McaBankDumpDW; msg = SMU_MSG_McaBankDumpDW;
break; break;
case ACA_ERROR_TYPE_CE: case ACA_SMU_TYPE_CE:
msg = SMU_MSG_McaBankCeDumpDW; msg = SMU_MSG_McaBankCeDumpDW;
break; break;
default: default:
...@@ -3052,7 +3052,7 @@ static int __smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_t ...@@ -3052,7 +3052,7 @@ static int __smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_t
return smu_cmn_send_smc_msg_with_param(smu, msg, param, (uint32_t *)val); return smu_cmn_send_smc_msg_with_param(smu, msg, param, (uint32_t *)val);
} }
static int smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_type type, static int smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_smu_type type,
int idx, int offset, u32 *val, int count) int idx, int offset, u32 *val, int count)
{ {
int ret, i; int ret, i;
...@@ -3069,7 +3069,7 @@ static int smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_typ ...@@ -3069,7 +3069,7 @@ static int smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_typ
return 0; return 0;
} }
static int aca_bank_read_reg(struct amdgpu_device *adev, enum aca_error_type type, static int aca_bank_read_reg(struct amdgpu_device *adev, enum aca_smu_type type,
int idx, int reg_idx, u64 *val) int idx, int reg_idx, u64 *val)
{ {
struct smu_context *smu = adev->powerplay.pp_handle; struct smu_context *smu = adev->powerplay.pp_handle;
...@@ -3086,13 +3086,13 @@ static int aca_bank_read_reg(struct amdgpu_device *adev, enum aca_error_type typ ...@@ -3086,13 +3086,13 @@ static int aca_bank_read_reg(struct amdgpu_device *adev, enum aca_error_type typ
*val = (u64)data[1] << 32 | data[0]; *val = (u64)data[1] << 32 | data[0];
dev_dbg(adev->dev, "mca read bank reg: type:%s, index: %d, reg_idx: %d, val: 0x%016llx\n", dev_dbg(adev->dev, "mca read bank reg: type:%s, index: %d, reg_idx: %d, val: 0x%016llx\n",
type == ACA_ERROR_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val); type == ACA_SMU_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val);
return 0; return 0;
} }
static int aca_smu_get_valid_aca_bank(struct amdgpu_device *adev, static int aca_smu_get_valid_aca_bank(struct amdgpu_device *adev,
enum aca_error_type type, int idx, struct aca_bank *bank) enum aca_smu_type type, int idx, struct aca_bank *bank)
{ {
int i, ret, count; int i, ret, count;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment