Commit 719a9b33 authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher

drm/amdgpu: split gfx callbacks into ras and non-ras ones

gfx ras is only available in cerntain ip generations.
Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarDennis Li <Dennis.Li@amd.com>
Reviewed-by: default avatarJohn Clements <John.Clements@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8bc7b360
...@@ -677,8 +677,9 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, ...@@ -677,8 +677,9 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
*/ */
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
if (adev->gfx.funcs->query_ras_error_count) if (adev->gfx.ras_funcs &&
adev->gfx.funcs->query_ras_error_count(adev, err_data); adev->gfx.ras_funcs->query_ras_error_count)
adev->gfx.ras_funcs->query_ras_error_count(adev, err_data);
amdgpu_ras_reset_gpu(adev); amdgpu_ras_reset_gpu(adev);
} }
return AMDGPU_RAS_SUCCESS; return AMDGPU_RAS_SUCCESS;
......
...@@ -205,6 +205,19 @@ struct amdgpu_cu_info { ...@@ -205,6 +205,19 @@ struct amdgpu_cu_info {
uint32_t bitmap[4][4]; uint32_t bitmap[4][4];
}; };
struct amdgpu_gfx_ras_funcs {
int (*ras_late_init)(struct amdgpu_device *adev);
void (*ras_fini)(struct amdgpu_device *adev);
int (*ras_error_inject)(struct amdgpu_device *adev,
void *inject_if);
int (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
void (*query_ras_error_status)(struct amdgpu_device *adev);
void (*reset_ras_error_status)(struct amdgpu_device *adev);
void (*enable_watchdog_timer)(struct amdgpu_device *adev);
};
struct amdgpu_gfx_funcs { struct amdgpu_gfx_funcs {
/* get the gpu clock counter */ /* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
...@@ -220,14 +233,8 @@ struct amdgpu_gfx_funcs { ...@@ -220,14 +233,8 @@ struct amdgpu_gfx_funcs {
uint32_t *dst); uint32_t *dst);
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
u32 queue, u32 vmid); u32 queue, u32 vmid);
int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status);
void (*reset_ras_error_count) (struct amdgpu_device *adev);
void (*init_spm_golden)(struct amdgpu_device *adev); void (*init_spm_golden)(struct amdgpu_device *adev);
void (*query_ras_error_status) (struct amdgpu_device *adev);
void (*reset_ras_error_status) (struct amdgpu_device *adev);
void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable); void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
void (*enable_watchdog_timer)(struct amdgpu_device *adev);
}; };
struct sq_work { struct sq_work {
...@@ -331,6 +338,7 @@ struct amdgpu_gfx { ...@@ -331,6 +338,7 @@ struct amdgpu_gfx {
/*ras */ /*ras */
struct ras_common_if *ras_if; struct ras_common_if *ras_if;
const struct amdgpu_gfx_ras_funcs *ras_funcs;
}; };
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
......
...@@ -792,11 +792,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, ...@@ -792,11 +792,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
} }
break; break;
case AMDGPU_RAS_BLOCK__GFX: case AMDGPU_RAS_BLOCK__GFX:
if (adev->gfx.funcs->query_ras_error_count) if (adev->gfx.ras_funcs &&
adev->gfx.funcs->query_ras_error_count(adev, &err_data); adev->gfx.ras_funcs->query_ras_error_count)
adev->gfx.ras_funcs->query_ras_error_count(adev, &err_data);
if (adev->gfx.funcs->query_ras_error_status) if (adev->gfx.ras_funcs &&
adev->gfx.funcs->query_ras_error_status(adev); adev->gfx.ras_funcs->query_ras_error_status)
adev->gfx.ras_funcs->query_ras_error_status(adev);
break; break;
case AMDGPU_RAS_BLOCK__MMHUB: case AMDGPU_RAS_BLOCK__MMHUB:
if (adev->mmhub.ras_funcs && if (adev->mmhub.ras_funcs &&
...@@ -852,11 +854,13 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, ...@@ -852,11 +854,13 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
switch (block) { switch (block) {
case AMDGPU_RAS_BLOCK__GFX: case AMDGPU_RAS_BLOCK__GFX:
if (adev->gfx.funcs->reset_ras_error_count) if (adev->gfx.ras_funcs &&
adev->gfx.funcs->reset_ras_error_count(adev); adev->gfx.ras_funcs->reset_ras_error_count)
adev->gfx.ras_funcs->reset_ras_error_count(adev);
if (adev->gfx.funcs->reset_ras_error_status) if (adev->gfx.ras_funcs &&
adev->gfx.funcs->reset_ras_error_status(adev); adev->gfx.ras_funcs->reset_ras_error_status)
adev->gfx.ras_funcs->reset_ras_error_status(adev);
break; break;
case AMDGPU_RAS_BLOCK__MMHUB: case AMDGPU_RAS_BLOCK__MMHUB:
if (adev->mmhub.ras_funcs && if (adev->mmhub.ras_funcs &&
...@@ -926,8 +930,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, ...@@ -926,8 +930,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
switch (info->head.block) { switch (info->head.block) {
case AMDGPU_RAS_BLOCK__GFX: case AMDGPU_RAS_BLOCK__GFX:
if (adev->gfx.funcs->ras_error_inject) if (adev->gfx.ras_funcs &&
ret = adev->gfx.funcs->ras_error_inject(adev, info); adev->gfx.ras_funcs->ras_error_inject)
ret = adev->gfx.ras_funcs->ras_error_inject(adev, info);
else else
ret = -EINVAL; ret = -EINVAL;
break; break;
...@@ -1514,8 +1519,9 @@ static void amdgpu_ras_error_status_query(struct amdgpu_device *adev, ...@@ -1514,8 +1519,9 @@ static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
*/ */
switch (info->head.block) { switch (info->head.block) {
case AMDGPU_RAS_BLOCK__GFX: case AMDGPU_RAS_BLOCK__GFX:
if (adev->gfx.funcs->query_ras_error_status) if (adev->gfx.ras_funcs &&
adev->gfx.funcs->query_ras_error_status(adev); adev->gfx.ras_funcs->query_ras_error_status)
adev->gfx.ras_funcs->query_ras_error_status(adev);
break; break;
case AMDGPU_RAS_BLOCK__MMHUB: case AMDGPU_RAS_BLOCK__MMHUB:
if (adev->mmhub.ras_funcs && if (adev->mmhub.ras_funcs &&
......
...@@ -2109,39 +2109,16 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { ...@@ -2109,39 +2109,16 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
};
static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
.ras_late_init = amdgpu_gfx_ras_late_init,
.ras_fini = amdgpu_gfx_ras_fini,
.ras_error_inject = &gfx_v9_0_ras_error_inject, .ras_error_inject = &gfx_v9_0_ras_error_inject,
.query_ras_error_count = &gfx_v9_0_query_ras_error_count, .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
}; };
static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v9_0_select_se_sh,
.read_wave_data = &gfx_v9_0_read_wave_data,
.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
.ras_error_inject = &gfx_v9_4_ras_error_inject,
.query_ras_error_count = &gfx_v9_4_query_ras_error_count,
.reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
.query_ras_error_status = &gfx_v9_4_query_ras_error_status,
};
static const struct amdgpu_gfx_funcs gfx_v9_4_2_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v9_0_select_se_sh,
.read_wave_data = &gfx_v9_0_read_wave_data,
.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
.ras_error_inject = &gfx_v9_4_2_ras_error_inject,
.query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
.reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
.query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
.reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
.enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
};
static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
{ {
u32 gb_addr_config; u32 gb_addr_config;
...@@ -2168,6 +2145,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) ...@@ -2168,6 +2145,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
DRM_INFO("fix gfx.config for vega12\n"); DRM_INFO("fix gfx.config for vega12\n");
break; break;
case CHIP_VEGA20: case CHIP_VEGA20:
adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
...@@ -2193,7 +2171,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) ...@@ -2193,7 +2171,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
break; break;
case CHIP_ARCTURUS: case CHIP_ARCTURUS:
adev->gfx.funcs = &gfx_v9_4_gfx_funcs; adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
...@@ -2214,7 +2192,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) ...@@ -2214,7 +2192,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config |= 0x22010042; gb_addr_config |= 0x22010042;
break; break;
case CHIP_ALDEBARAN: case CHIP_ALDEBARAN:
adev->gfx.funcs = &gfx_v9_4_2_gfx_funcs; adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
...@@ -2447,7 +2425,9 @@ static int gfx_v9_0_sw_fini(void *handle) ...@@ -2447,7 +2425,9 @@ static int gfx_v9_0_sw_fini(void *handle)
int i; int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_gfx_ras_fini(adev); if (adev->gfx.ras_funcs &&
adev->gfx.ras_funcs->ras_fini)
adev->gfx.ras_funcs->ras_fini(adev);
for (i = 0; i < adev->gfx.num_gfx_rings; i++) for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
...@@ -4766,12 +4746,16 @@ static int gfx_v9_0_ecc_late_init(void *handle) ...@@ -4766,12 +4746,16 @@ static int gfx_v9_0_ecc_late_init(void *handle)
if (r) if (r)
return r; return r;
r = amdgpu_gfx_ras_late_init(adev); if (adev->gfx.ras_funcs &&
adev->gfx.ras_funcs->ras_late_init) {
r = adev->gfx.ras_funcs->ras_late_init(adev);
if (r) if (r)
return r; return r;
}
if (adev->gfx.funcs->enable_watchdog_timer) if (adev->gfx.ras_funcs &&
adev->gfx.funcs->enable_watchdog_timer(adev); adev->gfx.ras_funcs->enable_watchdog_timer)
adev->gfx.ras_funcs->enable_watchdog_timer(adev);
return 0; return 0;
} }
......
...@@ -863,7 +863,7 @@ static int gfx_v9_4_ras_error_count(struct amdgpu_device *adev, ...@@ -863,7 +863,7 @@ static int gfx_v9_4_ras_error_count(struct amdgpu_device *adev,
return 0; return 0;
} }
int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status) void *ras_error_status)
{ {
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
...@@ -906,7 +906,7 @@ int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, ...@@ -906,7 +906,7 @@ int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
return 0; return 0;
} }
void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev) static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
{ {
int i, j, k; int i, j, k;
...@@ -971,7 +971,8 @@ void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev) ...@@ -971,7 +971,8 @@ void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
} }
int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if) static int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
void *inject_if)
{ {
struct ras_inject_if *info = (struct ras_inject_if *)inject_if; struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
int ret; int ret;
...@@ -996,7 +997,7 @@ int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if) ...@@ -996,7 +997,7 @@ int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
static const struct soc15_reg_entry gfx_v9_4_rdrsp_status_regs = static const struct soc15_reg_entry gfx_v9_4_rdrsp_status_regs =
{ SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 }; { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 };
void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
{ {
uint32_t i, j; uint32_t i, j;
uint32_t reg_value; uint32_t reg_value;
...@@ -1021,3 +1022,12 @@ void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) ...@@ -1021,3 +1022,12 @@ void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
} }
const struct amdgpu_gfx_ras_funcs gfx_v9_4_ras_funcs = {
.ras_late_init = amdgpu_gfx_ras_late_init,
.ras_fini = amdgpu_gfx_ras_fini,
.ras_error_inject = &gfx_v9_4_ras_error_inject,
.query_ras_error_count = &gfx_v9_4_query_ras_error_count,
.reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
.query_ras_error_status = &gfx_v9_4_query_ras_error_status,
};
...@@ -24,16 +24,6 @@ ...@@ -24,16 +24,6 @@
#ifndef __GFX_V9_4_H__ #ifndef __GFX_V9_4_H__
#define __GFX_V9_4_H__ #define __GFX_V9_4_H__
void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev); extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_ras_funcs;
int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
void *inject_if);
void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev);
void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev);
#endif /* __GFX_V9_4_H__ */ #endif /* __GFX_V9_4_H__ */
...@@ -1284,3 +1284,14 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev) ...@@ -1284,3 +1284,14 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
} }
const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs = {
.ras_late_init = amdgpu_gfx_ras_late_init,
.ras_fini = amdgpu_gfx_ras_fini,
.ras_error_inject = &gfx_v9_4_2_ras_error_inject,
.query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
.reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
.query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
.reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
.enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
};
...@@ -30,11 +30,6 @@ void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev, ...@@ -30,11 +30,6 @@ void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
uint32_t die_id); uint32_t die_id);
void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev); void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev);
void gfx_v9_4_2_reset_ras_error_count(struct amdgpu_device *adev); extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs;
int gfx_v9_4_2_ras_error_inject(struct amdgpu_device *adev, void *inject_if);
void gfx_v9_4_2_query_ras_error_status(struct amdgpu_device *adev);
int gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
void gfx_v9_4_2_reset_ras_error_status(struct amdgpu_device *adev);
void gfx_v9_4_2_enable_watchdog_timer(struct amdgpu_device *adev);
#endif /* __GFX_V9_4_2_H__ */ #endif /* __GFX_V9_4_2_H__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment