Commit 31fd330b authored by Yang Wang's avatar Yang Wang Committed by Alex Deucher

drm/amdgpu: add ras event id support for ACA

add ras event id support for ACA.
Signed-off-by: default avatarYang Wang <kevinyang.wang@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent bd15bf74
...@@ -116,20 +116,22 @@ static struct aca_regs_dump { ...@@ -116,20 +116,22 @@ static struct aca_regs_dump {
{"CONTROL_MASK", ACA_REG_IDX_CTL_MASK}, {"CONTROL_MASK", ACA_REG_IDX_CTL_MASK},
}; };
static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank) static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank,
struct ras_query_context *qctx)
{ {
u64 event_id = qctx ? qctx->event_id : 0ULL;
int i; int i;
dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n"); RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
/* plus 1 for output format, e.g: ACA[08/08]: xxxx */ /* plus 1 for output format, e.g: ACA[08/08]: xxxx */
for (i = 0; i < ARRAY_SIZE(aca_regs); i++) for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
dev_info(adev->dev, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
} }
static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
int start, int count, int start, int count,
struct aca_banks *banks) struct aca_banks *banks, struct ras_query_context *qctx)
{ {
struct amdgpu_aca *aca = &adev->aca; struct amdgpu_aca *aca = &adev->aca;
const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
...@@ -165,7 +167,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_ ...@@ -165,7 +167,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
bank.type = type; bank.type = type;
aca_smu_bank_dump(adev, i, count, &bank); aca_smu_bank_dump(adev, i, count, &bank, qctx);
ret = aca_banks_add_bank(banks, &bank); ret = aca_banks_add_bank(banks, &bank);
if (ret) if (ret)
...@@ -390,7 +392,7 @@ static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type ...@@ -390,7 +392,7 @@ static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type
} }
static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
bank_handler_t handler, void *data) bank_handler_t handler, struct ras_query_context *qctx, void *data)
{ {
struct amdgpu_aca *aca = &adev->aca; struct amdgpu_aca *aca = &adev->aca;
struct aca_banks banks; struct aca_banks banks;
...@@ -412,7 +414,7 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, ...@@ -412,7 +414,7 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
aca_banks_init(&banks); aca_banks_init(&banks);
ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks); ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx);
if (ret) if (ret)
goto err_release_banks; goto err_release_banks;
...@@ -489,7 +491,7 @@ static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type ...@@ -489,7 +491,7 @@ static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type
} }
static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type, static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type,
struct ras_err_data *err_data) struct ras_err_data *err_data, struct ras_query_context *qctx)
{ {
enum aca_smu_type smu_type; enum aca_smu_type smu_type;
int ret; int ret;
...@@ -507,7 +509,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h ...@@ -507,7 +509,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
} }
/* udpate aca bank to aca source error_cache first */ /* udpate aca bank to aca source error_cache first */
ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, NULL); ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
if (ret) if (ret)
return ret; return ret;
...@@ -523,7 +525,7 @@ static bool aca_handle_is_valid(struct aca_handle *handle) ...@@ -523,7 +525,7 @@ static bool aca_handle_is_valid(struct aca_handle *handle)
} }
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
enum aca_error_type type, void *data) enum aca_error_type type, void *data, void *qctx)
{ {
struct ras_err_data *err_data = (struct ras_err_data *)data; struct ras_err_data *err_data = (struct ras_err_data *)data;
...@@ -536,7 +538,8 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han ...@@ -536,7 +538,8 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han
if (!(BIT(type) & handle->mask)) if (!(BIT(type) & handle->mask))
return 0; return 0;
return __aca_get_error_data(adev, handle, type, err_data); return __aca_get_error_data(adev, handle, type, err_data,
(struct ras_query_context *)qctx);
} }
static void aca_error_init(struct aca_error *aerr, enum aca_error_type type) static void aca_error_init(struct aca_error *aerr, enum aca_error_type type)
...@@ -853,7 +856,7 @@ static int aca_dump_show(struct seq_file *m, enum aca_smu_type type) ...@@ -853,7 +856,7 @@ static int aca_dump_show(struct seq_file *m, enum aca_smu_type type)
.idx = 0, .idx = 0,
}; };
return aca_banks_update(adev, type, handler_aca_bank_dump, (void *)&context); return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context);
} }
static int aca_dump_ce_show(struct seq_file *m, void *unused) static int aca_dump_ce_show(struct seq_file *m, void *unused)
......
...@@ -198,7 +198,7 @@ int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, ...@@ -198,7 +198,7 @@ int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
const char *name, const struct aca_info *aca_info, void *data); const char *name, const struct aca_info *aca_info, void *data);
void amdgpu_aca_remove_handle(struct aca_handle *handle); void amdgpu_aca_remove_handle(struct aca_handle *handle);
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
enum aca_error_type type, void *data); enum aca_error_type type, void *data, void *qctx);
int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en); int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en);
void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
......
...@@ -1269,7 +1269,8 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) ...@@ -1269,7 +1269,8 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
} }
static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk, static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
enum aca_error_type type, struct ras_err_data *err_data) enum aca_error_type type, struct ras_err_data *err_data,
struct ras_query_context *qctx)
{ {
struct ras_manager *obj; struct ras_manager *obj;
...@@ -1277,7 +1278,7 @@ static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu ...@@ -1277,7 +1278,7 @@ static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu
if (!obj) if (!obj)
return -EINVAL; return -EINVAL;
return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data); return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx);
} }
ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
...@@ -1334,15 +1335,15 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, ...@@ -1334,15 +1335,15 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
} }
} else { } else {
if (amdgpu_aca_is_enabled(adev)) { if (amdgpu_aca_is_enabled(adev)) {
ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data); ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx);
if (ret) if (ret)
return ret; return ret;
ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data); ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx);
if (ret) if (ret)
return ret; return ret;
ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data); ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx);
if (ret) if (ret)
return ret; return ret;
} else { } else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment