Commit 2f6247da authored by Stanley.Yang's avatar Stanley.Yang Committed by Alex Deucher

drm/amdgpu/pm: support mca_ceumc_addr in ecctable

SMU add a new variable mca_ceumc_addr to record
umc correctable error address in EccInfo table,
driver side add EccInfo_V2_t to support this feature
Signed-off-by: default avatarStanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent faf26f2b
...@@ -328,6 +328,7 @@ struct ecc_info_per_ch { ...@@ -328,6 +328,7 @@ struct ecc_info_per_ch {
uint16_t ce_count_hi_chip; uint16_t ce_count_hi_chip;
uint64_t mca_umc_status; uint64_t mca_umc_status;
uint64_t mca_umc_addr; uint64_t mca_umc_addr;
uint64_t mca_ceumc_addr;
}; };
struct umc_ecc_info { struct umc_ecc_info {
......
...@@ -519,7 +519,21 @@ typedef struct { ...@@ -519,7 +519,21 @@ typedef struct {
} EccInfo_t; } EccInfo_t;
typedef struct { typedef struct {
uint64_t mca_umc_status;
uint64_t mca_umc_addr;
uint64_t mca_ceumc_addr;
uint16_t ce_count_lo_chip;
uint16_t ce_count_hi_chip;
uint32_t eccPadding;
} EccInfo_V2_t;
typedef struct {
union {
EccInfo_t EccInfo[ALDEBARAN_UMC_CHANNEL_NUM]; EccInfo_t EccInfo[ALDEBARAN_UMC_CHANNEL_NUM];
EccInfo_V2_t EccInfo_V2[ALDEBARAN_UMC_CHANNEL_NUM];
};
} EccInfoTable_t; } EccInfoTable_t;
// These defines are used with the following messages: // These defines are used with the following messages:
......
...@@ -82,6 +82,12 @@ ...@@ -82,6 +82,12 @@
*/ */
#define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00 #define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00
/*
* SMU support mca_ceumc_addr in ECCTABLE since version 68.55.0,
* use this to check mca_ceumc_addr record whether support
*/
#define SUPPORT_ECCTABLE_V2_SMU_VERSION 0x00443700
/* /*
* SMU support BAD CHENNEL info MSG since version 68.51.00, * SMU support BAD CHENNEL info MSG since version 68.51.00,
* use this to check ECCTALE feature whether support * use this to check ECCTALE feature whether support
...@@ -1803,7 +1809,8 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, ...@@ -1803,7 +1809,8 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
return sizeof(struct gpu_metrics_v1_3); return sizeof(struct gpu_metrics_v1_3);
} }
static int aldebaran_check_ecc_table_support(struct smu_context *smu) static int aldebaran_check_ecc_table_support(struct smu_context *smu,
int *ecctable_version)
{ {
uint32_t if_version = 0xff, smu_version = 0xff; uint32_t if_version = 0xff, smu_version = 0xff;
int ret = 0; int ret = 0;
...@@ -1816,6 +1823,11 @@ static int aldebaran_check_ecc_table_support(struct smu_context *smu) ...@@ -1816,6 +1823,11 @@ static int aldebaran_check_ecc_table_support(struct smu_context *smu)
if (smu_version < SUPPORT_ECCTABLE_SMU_VERSION) if (smu_version < SUPPORT_ECCTABLE_SMU_VERSION)
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
else if (smu_version >= SUPPORT_ECCTABLE_SMU_VERSION &&
smu_version < SUPPORT_ECCTABLE_V2_SMU_VERSION)
*ecctable_version = 1;
else
*ecctable_version = 2;
return ret; return ret;
} }
...@@ -1827,9 +1839,10 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu, ...@@ -1827,9 +1839,10 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,
EccInfoTable_t *ecc_table = NULL; EccInfoTable_t *ecc_table = NULL;
struct ecc_info_per_ch *ecc_info_per_channel = NULL; struct ecc_info_per_ch *ecc_info_per_channel = NULL;
int i, ret = 0; int i, ret = 0;
int table_version = 0;
struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table; struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;
ret = aldebaran_check_ecc_table_support(smu); ret = aldebaran_check_ecc_table_support(smu, &table_version);
if (ret) if (ret)
return ret; return ret;
...@@ -1845,6 +1858,7 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu, ...@@ -1845,6 +1858,7 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,
ecc_table = (EccInfoTable_t *)smu_table->ecc_table; ecc_table = (EccInfoTable_t *)smu_table->ecc_table;
if (table_version == 1) {
for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) { for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {
ecc_info_per_channel = &(eccinfo->ecc[i]); ecc_info_per_channel = &(eccinfo->ecc[i]);
ecc_info_per_channel->ce_count_lo_chip = ecc_info_per_channel->ce_count_lo_chip =
...@@ -1856,6 +1870,21 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu, ...@@ -1856,6 +1870,21 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,
ecc_info_per_channel->mca_umc_addr = ecc_info_per_channel->mca_umc_addr =
ecc_table->EccInfo[i].mca_umc_addr; ecc_table->EccInfo[i].mca_umc_addr;
} }
} else if (table_version == 2) {
for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {
ecc_info_per_channel = &(eccinfo->ecc[i]);
ecc_info_per_channel->ce_count_lo_chip =
ecc_table->EccInfo_V2[i].ce_count_lo_chip;
ecc_info_per_channel->ce_count_hi_chip =
ecc_table->EccInfo_V2[i].ce_count_hi_chip;
ecc_info_per_channel->mca_umc_status =
ecc_table->EccInfo_V2[i].mca_umc_status;
ecc_info_per_channel->mca_umc_addr =
ecc_table->EccInfo_V2[i].mca_umc_addr;
ecc_info_per_channel->mca_ceumc_addr =
ecc_table->EccInfo_V2[i].mca_ceumc_addr;
}
}
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment