Commit e4348849 authored by Tao Zhou's avatar Tao Zhou Committed by Alex Deucher

drm/amdgpu: set poison supported flag for RAS (v2)

Add RAS poison supported flag and tell PSP RAS TA about the info.

v2: rename poison mode to poison supported, we can also disable poison
mode even we support it.
    print value of poison supported if ras feature enablement fails.
Signed-off-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent aaca8c38
...@@ -1444,9 +1444,9 @@ static int psp_ras_initialize(struct psp_context *psp) ...@@ -1444,9 +1444,9 @@ static int psp_ras_initialize(struct psp_context *psp)
ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
if (psp->adev->gmc.xgmi.connected_to_cpu) if (amdgpu_ras_is_poison_mode_supported(adev))
ras_cmd->ras_in_message.init_flags.poison_mode_en = 1; ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
else if (!adev->gmc.xgmi.connected_to_cpu)
ras_cmd->ras_in_message.init_flags.dgpu_mode = 1; ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
ret = psp_ras_load(psp); ret = psp_ras_load(psp);
......
...@@ -710,10 +710,10 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, ...@@ -710,10 +710,10 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!amdgpu_ras_intr_triggered()) { if (!amdgpu_ras_intr_triggered()) {
ret = psp_ras_enable_features(&adev->psp, info, enable); ret = psp_ras_enable_features(&adev->psp, info, enable);
if (ret) { if (ret) {
dev_err(adev->dev, "ras %s %s failed %d\n", dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
enable ? "enable":"disable", enable ? "enable":"disable",
get_ras_block_str(head), get_ras_block_str(head),
ret); amdgpu_ras_is_poison_mode_supported(adev), ret);
goto out; goto out;
} }
} }
...@@ -2238,6 +2238,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) ...@@ -2238,6 +2238,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
{ {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int r; int r;
bool df_poison, umc_poison;
if (con) if (con)
return 0; return 0;
...@@ -2308,6 +2309,23 @@ int amdgpu_ras_init(struct amdgpu_device *adev) ...@@ -2308,6 +2309,23 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
goto release_con; goto release_con;
} }
/* Init poison supported flag, the default value is false */
if (adev->df.funcs &&
adev->df.funcs->query_ras_poison_mode &&
adev->umc.ras_funcs &&
adev->umc.ras_funcs->query_ras_poison_mode) {
df_poison =
adev->df.funcs->query_ras_poison_mode(adev);
umc_poison =
adev->umc.ras_funcs->query_ras_poison_mode(adev);
/* Only poison is set in both DF and UMC, we can support it */
if (df_poison && umc_poison)
con->poison_supported = true;
else if (df_poison != umc_poison)
dev_warn(adev->dev, "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
df_poison, umc_poison);
}
if (amdgpu_ras_fs_init(adev)) { if (amdgpu_ras_fs_init(adev)) {
r = -EINVAL; r = -EINVAL;
goto release_con; goto release_con;
...@@ -2351,6 +2369,16 @@ static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev, ...@@ -2351,6 +2369,16 @@ static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev,
return 0; return 0;
} }
bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
if (!con)
return false;
return con->poison_supported;
}
/* helper function to handle common stuff in ip late init phase */ /* helper function to handle common stuff in ip late init phase */
int amdgpu_ras_late_init(struct amdgpu_device *adev, int amdgpu_ras_late_init(struct amdgpu_device *adev,
struct ras_common_if *ras_block, struct ras_common_if *ras_block,
......
...@@ -351,6 +351,9 @@ struct amdgpu_ras { ...@@ -351,6 +351,9 @@ struct amdgpu_ras {
/* disable ras error count harvest in recovery */ /* disable ras error count harvest in recovery */
bool disable_ras_err_cnt_harvest; bool disable_ras_err_cnt_harvest;
/* is poison mode supported */
bool poison_supported;
/* RAS count errors delayed work */ /* RAS count errors delayed work */
struct delayed_work ras_counte_delay_work; struct delayed_work ras_counte_delay_work;
atomic_t ras_ue_count; atomic_t ras_ue_count;
...@@ -646,4 +649,6 @@ int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev); ...@@ -646,4 +649,6 @@ int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev);
const char *get_ras_block_str(struct ras_common_if *ras_block); const char *get_ras_block_str(struct ras_common_if *ras_block);
bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment