Commit 49070c4e authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher

drm/amdgpu: split umc callbacks to ras and non-ras ones

umc ras is not managed by gpu driver when gpu is
connected to cpu through xgmi. split umc callbacks
into ras and non-ras ones so gpu driver only
initializes umc ras callbacks when it manages
umc ras.
Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarDennis Li <Dennis.Li@amd.com>
Reviewed-by: default avatarJohn Clements <John.Clements@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 52137ca8
...@@ -391,8 +391,9 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) ...@@ -391,8 +391,9 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
{ {
int r; int r;
if (adev->umc.funcs && adev->umc.funcs->ras_late_init) { if (adev->umc.ras_funcs &&
r = adev->umc.funcs->ras_late_init(adev); adev->umc.ras_funcs->ras_late_init) {
r = adev->umc.ras_funcs->ras_late_init(adev);
if (r) if (r)
return r; return r;
} }
...@@ -418,8 +419,12 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) ...@@ -418,8 +419,12 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
{ {
amdgpu_umc_ras_fini(adev); if (adev->umc.ras_funcs &&
adev->umc.ras_funcs->ras_fini)
adev->umc.ras_funcs->ras_fini(adev);
amdgpu_mmhub_ras_fini(adev); amdgpu_mmhub_ras_fini(adev);
if (adev->gmc.xgmi.ras_funcs && if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->ras_fini) adev->gmc.xgmi.ras_funcs->ras_fini)
adev->gmc.xgmi.ras_funcs->ras_fini(adev); adev->gmc.xgmi.ras_funcs->ras_fini(adev);
......
...@@ -774,13 +774,15 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, ...@@ -774,13 +774,15 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
switch (info->head.block) { switch (info->head.block) {
case AMDGPU_RAS_BLOCK__UMC: case AMDGPU_RAS_BLOCK__UMC:
if (adev->umc.funcs->query_ras_error_count) if (adev->umc.ras_funcs &&
adev->umc.funcs->query_ras_error_count(adev, &err_data); adev->umc.ras_funcs->query_ras_error_count)
adev->umc.ras_funcs->query_ras_error_count(adev, &err_data);
/* umc query_ras_error_address is also responsible for clearing /* umc query_ras_error_address is also responsible for clearing
* error status * error status
*/ */
if (adev->umc.funcs->query_ras_error_address) if (adev->umc.ras_funcs &&
adev->umc.funcs->query_ras_error_address(adev, &err_data); adev->umc.ras_funcs->query_ras_error_address)
adev->umc.ras_funcs->query_ras_error_address(adev, &err_data);
break; break;
case AMDGPU_RAS_BLOCK__SDMA: case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->query_ras_error_count) { if (adev->sdma.funcs->query_ras_error_count) {
......
...@@ -60,8 +60,9 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) ...@@ -60,8 +60,9 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
} }
/* ras init of specific umc version */ /* ras init of specific umc version */
if (adev->umc.funcs && adev->umc.funcs->err_cnt_init) if (adev->umc.ras_funcs &&
adev->umc.funcs->err_cnt_init(adev); adev->umc.ras_funcs->err_cnt_init)
adev->umc.ras_funcs->err_cnt_init(adev);
return 0; return 0;
...@@ -95,12 +96,12 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, ...@@ -95,12 +96,12 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
if (adev->umc.funcs && if (adev->umc.ras_funcs &&
adev->umc.funcs->query_ras_error_count) adev->umc.ras_funcs->query_ras_error_count)
adev->umc.funcs->query_ras_error_count(adev, ras_error_status); adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status);
if (adev->umc.funcs && if (adev->umc.ras_funcs &&
adev->umc.funcs->query_ras_error_address && adev->umc.ras_funcs->query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) { adev->umc.max_ras_err_cnt_per_query) {
err_data->err_addr = err_data->err_addr =
kcalloc(adev->umc.max_ras_err_cnt_per_query, kcalloc(adev->umc.max_ras_err_cnt_per_query,
...@@ -116,7 +117,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, ...@@ -116,7 +117,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
/* umc query_ras_error_address is also responsible for clearing /* umc query_ras_error_address is also responsible for clearing
* error status * error status
*/ */
adev->umc.funcs->query_ras_error_address(adev, ras_error_status); adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status);
} }
/* only uncorrectable error needs gpu reset */ /* only uncorrectable error needs gpu reset */
......
...@@ -35,13 +35,17 @@ ...@@ -35,13 +35,17 @@
#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
struct amdgpu_umc_funcs { struct amdgpu_umc_ras_funcs {
void (*err_cnt_init)(struct amdgpu_device *adev); void (*err_cnt_init)(struct amdgpu_device *adev);
int (*ras_late_init)(struct amdgpu_device *adev); int (*ras_late_init)(struct amdgpu_device *adev);
void (*ras_fini)(struct amdgpu_device *adev);
void (*query_ras_error_count)(struct amdgpu_device *adev, void (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status); void *ras_error_status);
void (*query_ras_error_address)(struct amdgpu_device *adev, void (*query_ras_error_address)(struct amdgpu_device *adev,
void *ras_error_status); void *ras_error_status);
};
struct amdgpu_umc_funcs {
void (*init_registers)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev);
}; };
...@@ -59,6 +63,7 @@ struct amdgpu_umc { ...@@ -59,6 +63,7 @@ struct amdgpu_umc {
struct ras_common_if *ras_if; struct ras_common_if *ras_if;
const struct amdgpu_umc_funcs *funcs; const struct amdgpu_umc_funcs *funcs;
const struct amdgpu_umc_ras_funcs *ras_funcs;
}; };
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
......
...@@ -655,7 +655,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) ...@@ -655,7 +655,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA; adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0]; adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0];
adev->umc.funcs = &umc_v8_7_funcs; adev->umc.ras_funcs = &umc_v8_7_ras_funcs;
break; break;
default: default:
break; break;
......
...@@ -1155,7 +1155,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) ...@@ -1155,7 +1155,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
adev->umc.funcs = &umc_v6_1_funcs; adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
break; break;
case CHIP_ARCTURUS: case CHIP_ARCTURUS:
adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
...@@ -1163,7 +1163,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) ...@@ -1163,7 +1163,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
adev->umc.funcs = &umc_v6_1_funcs; adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
break; break;
default: default:
break; break;
...@@ -1194,12 +1194,6 @@ static int gmc_v9_0_early_init(void *handle) ...@@ -1194,12 +1194,6 @@ static int gmc_v9_0_early_init(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gmc_v9_0_set_gmc_funcs(adev);
gmc_v9_0_set_irq_funcs(adev);
gmc_v9_0_set_umc_funcs(adev);
gmc_v9_0_set_mmhub_funcs(adev);
gmc_v9_0_set_gfxhub_funcs(adev);
if (adev->asic_type == CHIP_VEGA20 || if (adev->asic_type == CHIP_VEGA20 ||
adev->asic_type == CHIP_ARCTURUS) adev->asic_type == CHIP_ARCTURUS)
adev->gmc.xgmi.supported = true; adev->gmc.xgmi.supported = true;
...@@ -1210,6 +1204,12 @@ static int gmc_v9_0_early_init(void *handle) ...@@ -1210,6 +1204,12 @@ static int gmc_v9_0_early_init(void *handle)
adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
} }
gmc_v9_0_set_gmc_funcs(adev);
gmc_v9_0_set_irq_funcs(adev);
gmc_v9_0_set_umc_funcs(adev);
gmc_v9_0_set_mmhub_funcs(adev);
gmc_v9_0_set_gfxhub_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end = adev->gmc.shared_aperture_end =
adev->gmc.shared_aperture_start + (4ULL << 30) - 1; adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
*/ */
#include "umc_v6_1.h" #include "umc_v6_1.h"
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu.h" #include "amdgpu.h"
#include "rsmu/rsmu_0_0_2_offset.h" #include "rsmu/rsmu_0_0_2_offset.h"
...@@ -464,9 +465,10 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) ...@@ -464,9 +465,10 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
umc_v6_1_enable_umc_index_mode(adev); umc_v6_1_enable_umc_index_mode(adev);
} }
const struct amdgpu_umc_funcs umc_v6_1_funcs = { const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = {
.err_cnt_init = umc_v6_1_err_cnt_init, .err_cnt_init = umc_v6_1_err_cnt_init,
.ras_late_init = amdgpu_umc_ras_late_init, .ras_late_init = amdgpu_umc_ras_late_init,
.ras_fini = amdgpu_umc_ras_fini,
.query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_count = umc_v6_1_query_ras_error_count,
.query_ras_error_address = umc_v6_1_query_ras_error_address, .query_ras_error_address = umc_v6_1_query_ras_error_address,
}; };
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
/* umc ce count initial value */ /* umc ce count initial value */
#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) #define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD)
extern const struct amdgpu_umc_funcs umc_v6_1_funcs; extern const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs;
extern const uint32_t extern const uint32_t
umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM];
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
*/ */
#include "umc_v6_7.h" #include "umc_v6_7.h"
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu.h" #include "amdgpu.h"
#include "umc/umc_6_7_0_offset.h" #include "umc/umc_6_7_0_offset.h"
...@@ -272,8 +273,9 @@ static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev, ...@@ -272,8 +273,9 @@ static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
} }
} }
const struct amdgpu_umc_funcs umc_v6_7_funcs = { const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = {
.ras_late_init = amdgpu_umc_ras_late_init, .ras_late_init = amdgpu_umc_ras_late_init,
.ras_fini = amdgpu_umc_ras_fini,
.query_ras_error_count = umc_v6_7_query_ras_error_count, .query_ras_error_count = umc_v6_7_query_ras_error_count,
.query_ras_error_address = umc_v6_7_query_ras_error_address, .query_ras_error_address = umc_v6_7_query_ras_error_address,
}; };
...@@ -32,6 +32,6 @@ ...@@ -32,6 +32,6 @@
#define UMC_V6_7_INST_DIST 0x40000 #define UMC_V6_7_INST_DIST 0x40000
extern const struct amdgpu_umc_funcs umc_v6_7_funcs; extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs;
#endif #endif
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
*/ */
#include "umc_v8_7.h" #include "umc_v8_7.h"
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu.h" #include "amdgpu.h"
#include "rsmu/rsmu_0_0_2_offset.h" #include "rsmu/rsmu_0_0_2_offset.h"
...@@ -323,9 +324,10 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev) ...@@ -323,9 +324,10 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev)
} }
} }
const struct amdgpu_umc_funcs umc_v8_7_funcs = { const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = {
.err_cnt_init = umc_v8_7_err_cnt_init, .err_cnt_init = umc_v8_7_err_cnt_init,
.ras_late_init = amdgpu_umc_ras_late_init, .ras_late_init = amdgpu_umc_ras_late_init,
.ras_fini = amdgpu_umc_ras_fini,
.query_ras_error_count = umc_v8_7_query_ras_error_count, .query_ras_error_count = umc_v8_7_query_ras_error_count,
.query_ras_error_address = umc_v8_7_query_ras_error_address, .query_ras_error_address = umc_v8_7_query_ras_error_address,
}; };
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
/* umc ce count initial value */ /* umc ce count initial value */
#define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD) #define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD)
extern const struct amdgpu_umc_funcs umc_v8_7_funcs; extern const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs;
extern const uint32_t extern const uint32_t
umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM]; umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment