Commit 2f6247da authored by Stanley.Yang's avatar Stanley.Yang Committed by Alex Deucher

drm/amdgpu/pm: support mca_ceumc_addr in ecctable

SMU add a new variable mca_ceumc_addr to record
umc correctable error address in EccInfo table,
driver side add EccInfo_V2_t to support this feature
Signed-off-by: default avatarStanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent faf26f2b
......@@ -328,6 +328,7 @@ struct ecc_info_per_ch {
uint16_t ce_count_hi_chip;
uint64_t mca_umc_status;
uint64_t mca_umc_addr;
uint64_t mca_ceumc_addr;
};
struct umc_ecc_info {
......
......@@ -519,7 +519,21 @@ typedef struct {
} EccInfo_t;
typedef struct {
uint64_t mca_umc_status;
uint64_t mca_umc_addr;
uint64_t mca_ceumc_addr;
uint16_t ce_count_lo_chip;
uint16_t ce_count_hi_chip;
uint32_t eccPadding;
} EccInfo_V2_t;
typedef struct {
union {
EccInfo_t EccInfo[ALDEBARAN_UMC_CHANNEL_NUM];
EccInfo_V2_t EccInfo_V2[ALDEBARAN_UMC_CHANNEL_NUM];
};
} EccInfoTable_t;
// These defines are used with the following messages:
......
......@@ -82,6 +82,12 @@
*/
#define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00
/*
* SMU support mca_ceumc_addr in ECCTABLE since version 68.55.0,
* use this to check mca_ceumc_addr record whether support
*/
#define SUPPORT_ECCTABLE_V2_SMU_VERSION 0x00443700
/*
* SMU support BAD CHENNEL info MSG since version 68.51.00,
* use this to check ECCTALE feature whether support
......@@ -1803,7 +1809,8 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
return sizeof(struct gpu_metrics_v1_3);
}
static int aldebaran_check_ecc_table_support(struct smu_context *smu)
static int aldebaran_check_ecc_table_support(struct smu_context *smu,
int *ecctable_version)
{
uint32_t if_version = 0xff, smu_version = 0xff;
int ret = 0;
......@@ -1816,6 +1823,11 @@ static int aldebaran_check_ecc_table_support(struct smu_context *smu)
if (smu_version < SUPPORT_ECCTABLE_SMU_VERSION)
ret = -EOPNOTSUPP;
else if (smu_version >= SUPPORT_ECCTABLE_SMU_VERSION &&
smu_version < SUPPORT_ECCTABLE_V2_SMU_VERSION)
*ecctable_version = 1;
else
*ecctable_version = 2;
return ret;
}
......@@ -1827,9 +1839,10 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,
EccInfoTable_t *ecc_table = NULL;
struct ecc_info_per_ch *ecc_info_per_channel = NULL;
int i, ret = 0;
int table_version = 0;
struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;
ret = aldebaran_check_ecc_table_support(smu);
ret = aldebaran_check_ecc_table_support(smu, &table_version);
if (ret)
return ret;
......@@ -1845,6 +1858,7 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,
ecc_table = (EccInfoTable_t *)smu_table->ecc_table;
if (table_version == 1) {
for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {
ecc_info_per_channel = &(eccinfo->ecc[i]);
ecc_info_per_channel->ce_count_lo_chip =
......@@ -1856,6 +1870,21 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,
ecc_info_per_channel->mca_umc_addr =
ecc_table->EccInfo[i].mca_umc_addr;
}
} else if (table_version == 2) {
for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {
ecc_info_per_channel = &(eccinfo->ecc[i]);
ecc_info_per_channel->ce_count_lo_chip =
ecc_table->EccInfo_V2[i].ce_count_lo_chip;
ecc_info_per_channel->ce_count_hi_chip =
ecc_table->EccInfo_V2[i].ce_count_hi_chip;
ecc_info_per_channel->mca_umc_status =
ecc_table->EccInfo_V2[i].mca_umc_status;
ecc_info_per_channel->mca_umc_addr =
ecc_table->EccInfo_V2[i].mca_umc_addr;
ecc_info_per_channel->mca_ceumc_addr =
ecc_table->EccInfo_V2[i].mca_ceumc_addr;
}
}
return ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment