Commit 22d4ba53 authored by yipechai's avatar yipechai Committed by Alex Deucher

drm/amdgpu: Adjust error inject function code style in amdgpu_ras.c

1. Move xgmi special error inject function from amdgpu_ras.c to xgmi block.
2. Support to use psp_ras_trigger_error as default error inject function in amdgpu_ras.c. If .ras_error_inject isn't defined in ras block, default error inject function will take effect.

v2: squash in warning fix (Alex)
Signed-off-by: default avataryipechai <YiPeng.Chai@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarJohn Clements <john.clements@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b0e2062d
...@@ -903,31 +903,6 @@ static struct amdgpu_ras_block_object* amdgpu_ras_get_ras_block(struct amdgpu_de ...@@ -903,31 +903,6 @@ static struct amdgpu_ras_block_object* amdgpu_ras_get_ras_block(struct amdgpu_de
return NULL; return NULL;
} }
static void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev,
struct ras_common_if *ras_block,
struct ras_err_data *err_data)
{
switch (ras_block->sub_block_index) {
case AMDGPU_RAS_MCA_BLOCK__MP0:
if (adev->mca.mp0.ras_funcs &&
adev->mca.mp0.ras_funcs->query_ras_error_count)
adev->mca.mp0.ras_funcs->query_ras_error_count(adev, &err_data);
break;
case AMDGPU_RAS_MCA_BLOCK__MP1:
if (adev->mca.mp1.ras_funcs &&
adev->mca.mp1.ras_funcs->query_ras_error_count)
adev->mca.mp1.ras_funcs->query_ras_error_count(adev, &err_data);
break;
case AMDGPU_RAS_MCA_BLOCK__MPIO:
if (adev->mca.mpio.ras_funcs &&
adev->mca.mpio.ras_funcs->query_ras_error_count)
adev->mca.mpio.ras_funcs->query_ras_error_count(adev, &err_data);
break;
default:
break;
}
}
static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data) static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data)
{ {
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
...@@ -994,6 +969,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, ...@@ -994,6 +969,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
case AMDGPU_RAS_BLOCK__PCIE_BIF: case AMDGPU_RAS_BLOCK__PCIE_BIF:
case AMDGPU_RAS_BLOCK__XGMI_WAFL: case AMDGPU_RAS_BLOCK__XGMI_WAFL:
case AMDGPU_RAS_BLOCK__HDP: case AMDGPU_RAS_BLOCK__HDP:
case AMDGPU_RAS_BLOCK__MCA:
if (!block_obj || !block_obj->hw_ops) { if (!block_obj || !block_obj->hw_ops) {
dev_info(adev->dev, "%s doesn't config ras function \n", dev_info(adev->dev, "%s doesn't config ras function \n",
get_ras_block_str(&info->head)); get_ras_block_str(&info->head));
...@@ -1002,9 +978,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, ...@@ -1002,9 +978,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
if (block_obj->hw_ops->query_ras_error_count) if (block_obj->hw_ops->query_ras_error_count)
block_obj->hw_ops->query_ras_error_count(adev, &err_data); block_obj->hw_ops->query_ras_error_count(adev, &err_data);
break; break;
case AMDGPU_RAS_BLOCK__MCA:
amdgpu_ras_mca_query_error_status(adev, &info->head, &err_data);
break;
default: default:
break; break;
} }
...@@ -1099,32 +1072,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, ...@@ -1099,32 +1072,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
return 0; return 0;
} }
/* Trigger XGMI/WAFL error */
static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
struct ta_ras_trigger_error_input *block_info)
{
int ret;
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");
if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
dev_warn(adev->dev, "Failed to disallow XGMI power down");
ret = psp_ras_trigger_error(&adev->psp, block_info);
if (amdgpu_ras_intr_triggered())
return ret;
if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
dev_warn(adev->dev, "Failed to allow XGMI power down");
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
dev_warn(adev->dev, "Failed to allow df cstate");
return ret;
}
/* wrapper of psp_ras_trigger_error */ /* wrapper of psp_ras_trigger_error */
int amdgpu_ras_error_inject(struct amdgpu_device *adev, int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info) struct ras_inject_if *info)
...@@ -1143,6 +1090,11 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, ...@@ -1143,6 +1090,11 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
if (!obj) if (!obj)
return -EINVAL; return -EINVAL;
if (!block_obj || !block_obj->hw_ops) {
dev_info(adev->dev, "%s doesn't config ras function \n", get_ras_block_str(&info->head));
return -EINVAL;
}
/* Calculate XGMI relative offset */ /* Calculate XGMI relative offset */
if (adev->gmc.xgmi.num_physical_nodes > 1) { if (adev->gmc.xgmi.num_physical_nodes > 1) {
block_info.address = block_info.address =
...@@ -1150,30 +1102,15 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, ...@@ -1150,30 +1102,15 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
block_info.address); block_info.address);
} }
switch (info->head.block) { if (info->head.block == AMDGPU_RAS_BLOCK__GFX) {
case AMDGPU_RAS_BLOCK__GFX:
if (!block_obj || !block_obj->hw_ops) {
dev_info(adev->dev, "%s doesn't config ras function \n", get_ras_block_str(&info->head));
return -EINVAL;
}
if (block_obj->hw_ops->ras_error_inject) if (block_obj->hw_ops->ras_error_inject)
ret = block_obj->hw_ops->ras_error_inject(adev, info); ret = block_obj->hw_ops->ras_error_inject(adev, info);
break; } else {
case AMDGPU_RAS_BLOCK__UMC: /* If defined special ras_error_inject(e.g: xgmi), implement special ras_error_inject */
case AMDGPU_RAS_BLOCK__SDMA: if (block_obj->hw_ops->ras_error_inject)
case AMDGPU_RAS_BLOCK__MMHUB: ret = block_obj->hw_ops->ras_error_inject(adev, &block_info);
case AMDGPU_RAS_BLOCK__PCIE_BIF: else /*If not defined .ras_error_inject, use default ras_error_inject*/
case AMDGPU_RAS_BLOCK__MCA:
ret = psp_ras_trigger_error(&adev->psp, &block_info); ret = psp_ras_trigger_error(&adev->psp, &block_info);
break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL:
ret = amdgpu_ras_error_inject_xgmi(adev, &block_info);
break;
default:
dev_info(adev->dev, "%s error injection is not supported yet\n",
get_ras_block_str(&info->head));
ret = -EINVAL;
} }
if (ret) if (ret)
......
...@@ -946,9 +946,36 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, ...@@ -946,9 +946,36 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
err_data->ce_count += ce_cnt; err_data->ce_count += ce_cnt;
} }
/* Trigger XGMI/WAFL error */
static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if)
{
int ret = 0;
struct ta_ras_trigger_error_input *block_info = (struct ta_ras_trigger_error_input *)inject_if;
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");
if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
dev_warn(adev->dev, "Failed to disallow XGMI power down");
ret = psp_ras_trigger_error(&adev->psp, block_info);
if (amdgpu_ras_intr_triggered())
return ret;
if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
dev_warn(adev->dev, "Failed to allow XGMI power down");
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
dev_warn(adev->dev, "Failed to allow df cstate");
return ret;
}
struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = { struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = {
.query_ras_error_count = amdgpu_xgmi_query_ras_error_count, .query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count, .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
.ras_error_inject = amdgpu_ras_error_inject_xgmi,
}; };
struct amdgpu_xgmi_ras xgmi_ras = { struct amdgpu_xgmi_ras xgmi_ras = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment