Commit a8c77a12 authored by YiPeng Chai's avatar YiPeng Chai Committed by Alex Deucher

drm/amdgpu: Add poison mode check error condition for umc v12_0

Add poison mode check error condition for umc v12_0.
Signed-off-by: default avatarYiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 9f91e983
...@@ -88,16 +88,26 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) ...@@ -88,16 +88,26 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
umc_v12_0_reset_error_count_per_channel, NULL); umc_v12_0_reset_error_count_per_channel, NULL);
} }
bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
{ {
if (amdgpu_ras_is_poison_mode_supported(adev) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1))
return true;
return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)); REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
} }
bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
{ {
if (amdgpu_ras_is_poison_mode_supported(adev) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1))
return false;
return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 || (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
...@@ -105,7 +115,7 @@ bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) ...@@ -105,7 +115,7 @@ bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
/* Identify data parity error in replay mode */ /* Identify data parity error in replay mode */
((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 || ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) &&
!(umc_v12_0_is_uncorrectable_error(mc_umc_status))))); !(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)))));
} }
static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
...@@ -124,7 +134,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, ...@@ -124,7 +134,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
mc_umc_status = mc_umc_status =
RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
if (umc_v12_0_is_correctable_error(mc_umc_status)) if (umc_v12_0_is_correctable_error(adev, mc_umc_status))
*error_count += 1; *error_count += 1;
} }
...@@ -142,7 +152,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev ...@@ -142,7 +152,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev
mc_umc_status = mc_umc_status =
RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
if (umc_v12_0_is_uncorrectable_error(mc_umc_status)) if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status))
*error_count += 1; *error_count += 1;
} }
......
...@@ -117,8 +117,8 @@ ...@@ -117,8 +117,8 @@
(pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \
} while (0) } while (0)
bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
extern const uint32_t extern const uint32_t
umc_v12_0_channel_idx_tbl[] umc_v12_0_channel_idx_tbl[]
......
...@@ -2524,9 +2524,9 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct ...@@ -2524,9 +2524,9 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct
return 0; return 0;
} }
if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0)) if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(adev, status0))
*count = 1; *count = 1;
else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0)) else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(adev, status0))
*count = 1; *count = 1;
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment