Commit 05eee31c authored by Stanley.Yang's avatar Stanley.Yang Committed by Alex Deucher

drm/amdgpu: add umc query error status function

In order to debug ras error, driver will print IPID/SYND/MISC0
register value if detect correctable or uncorrectable error.
Provide umc_query_error_status_helper function to reduce code
redundancy.
Signed-off-by: default avatarStanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 1754cea1
...@@ -64,21 +64,62 @@ static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev, ...@@ -64,21 +64,62 @@ static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev,
return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
} }
static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
uint64_t mc_umc_status, uint32_t umc_reg_offset)
{
uint32_t mc_umc_addr;
uint64_t reg_value;
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
dev_info(adev->dev, "Deferred error, no user action is needed.\n");
if (mc_umc_status)
dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
/* print IPID registers value */
mc_umc_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
if (reg_value)
dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
/* print SYND registers value */
mc_umc_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
if (reg_value)
dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
/* print MISC0 registers value */
mc_umc_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
if (reg_value)
dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
}
static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev, static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
uint32_t umc_inst, uint32_t ch_inst, uint32_t umc_inst, uint32_t ch_inst,
unsigned long *error_count) unsigned long *error_count)
{ {
uint64_t mc_umc_status; uint64_t mc_umc_status;
uint32_t eccinfo_table_idx; uint32_t eccinfo_table_idx;
uint32_t umc_reg_offset;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
umc_reg_offset = get_umc_v6_7_reg_offset(adev,
umc_inst, ch_inst);
eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
/* check for SRAM correctable error /* check for SRAM correctable error
MCUMC_STATUS is a 64 bit register */ MCUMC_STATUS is a 64 bit register */
mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
*error_count += 1; *error_count += 1;
umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
}
} }
static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev, static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
...@@ -88,8 +129,6 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev ...@@ -88,8 +129,6 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev
uint64_t mc_umc_status; uint64_t mc_umc_status;
uint32_t eccinfo_table_idx; uint32_t eccinfo_table_idx;
uint32_t umc_reg_offset; uint32_t umc_reg_offset;
uint32_t mc_umc_addr;
uint64_t reg_value;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
umc_reg_offset = get_umc_v6_7_reg_offset(adev, umc_reg_offset = get_umc_v6_7_reg_offset(adev,
...@@ -106,32 +145,7 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev ...@@ -106,32 +145,7 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
*error_count += 1; *error_count += 1;
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
dev_info(adev->dev, "Deferred error, no user action is needed.\n");
if (mc_umc_status)
dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
/* print IPID registers value */
mc_umc_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
if (reg_value)
dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
/* print SYND registers value */
mc_umc_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
if (reg_value)
dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
/* print MISC0 registers value */
mc_umc_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
if (reg_value)
dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
} }
} }
...@@ -277,8 +291,11 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, ...@@ -277,8 +291,11 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
MCUMC_STATUS is a 64 bit register */ MCUMC_STATUS is a 64 bit register */
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
*error_count += 1; *error_count += 1;
umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
}
} }
static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev, static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev,
...@@ -287,8 +304,6 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev ...@@ -287,8 +304,6 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev
{ {
uint64_t mc_umc_status; uint64_t mc_umc_status;
uint32_t mc_umc_status_addr; uint32_t mc_umc_status_addr;
uint32_t mc_umc_addr;
uint64_t reg_value;
mc_umc_status_addr = mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
...@@ -303,32 +318,7 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev ...@@ -303,32 +318,7 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
*error_count += 1; *error_count += 1;
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
dev_info(adev->dev, "Deferred error, no user action is needed.\n");
if (mc_umc_status)
dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
/* print IPID registers value */
mc_umc_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
if (reg_value)
dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
/* print SYND registers value */
mc_umc_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
if (reg_value)
dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
/* print MISC0 registers value */
mc_umc_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
if (reg_value)
dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment