Commit 1f8d3ad2 authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher

drm/amdgpu: only harvest gcea/mmea error status in aldebaran

In aldebaran, driver only needs to harvest SDP
RdRspStatus, WrRspStatus and first parity error
on RdRsp data. Check error type before harvest
error information.
Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarStanley Yang <Stanley.Yang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 53ee6609
...@@ -808,7 +808,7 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = { ...@@ -808,7 +808,7 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = {
REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) }, REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) },
}; };
static const struct soc15_reg_entry gfx_v9_4_2_rdrsp_status_regs = static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs =
{ SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 }; { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 };
static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev, static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
...@@ -1041,11 +1041,11 @@ static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev) ...@@ -1041,11 +1041,11 @@ static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
uint32_t i, j; uint32_t i, j;
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) { for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance; for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
j++) { j++) {
gfx_v9_4_2_select_se_sh(adev, i, 0, j); gfx_v9_4_2_select_se_sh(adev, i, 0, j);
WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10); WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
} }
} }
gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
...@@ -1090,17 +1090,20 @@ static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev) ...@@ -1090,17 +1090,20 @@ static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) { for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance; for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
j++) { j++) {
gfx_v9_4_2_select_se_sh(adev, i, 0, j); gfx_v9_4_2_select_se_sh(adev, i, 0, j);
reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
gfx_v9_4_2_rdrsp_status_regs)); gfx_v9_4_2_ea_err_status_regs));
if ((reg_value & 0xFFF) != GCEA_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK) if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n", dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n",
j, reg_value); j, reg_value);
}
/* clear after read */ /* clear after read */
WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10); WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
} }
} }
......
...@@ -1286,7 +1286,7 @@ static void mmhub_v1_7_reset_ras_error_count(struct amdgpu_device *adev) ...@@ -1286,7 +1286,7 @@ static void mmhub_v1_7_reset_ras_error_count(struct amdgpu_device *adev)
} }
} }
static const struct soc15_reg_entry mmhub_v1_7_err_status_regs[] = { static const struct soc15_reg_entry mmhub_v1_7_ea_err_status_regs[] = {
{ SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_ERR_STATUS), 0, 0, 0 }, { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_ERR_STATUS), 0, 0, 0 },
{ SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_ERR_STATUS), 0, 0, 0 }, { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_ERR_STATUS), 0, 0, 0 },
{ SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_ERR_STATUS), 0, 0, 0 }, { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_ERR_STATUS), 0, 0, 0 },
...@@ -1303,13 +1303,16 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev) ...@@ -1303,13 +1303,16 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
return; return;
for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_err_status_regs); i++) { for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
reg_value = reg_value =
RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_err_status_regs[i])); RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]));
if ((reg_value & 0xFFF) != MMEA0_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK) if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n", dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n",
i, reg_value); i, reg_value);
} }
}
} }
const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = { const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment