Commit 6e36f231 authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher

drm/amdgpu: split nbio callbacks into ras and non-ras ones

nbio ras is not managed by gpu driver when gpu is
connected to cpu through xgmi. split nbio callbacks
into ras and non-ras ones so gpu driver only
initializes nbio ras callbacks when it manages
nbio ras.
Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarDennis Li <Dennis.Li@amd.com>
Reviewed-by: default avatarJohn Clements <John.Clements@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 87da0cc1
...@@ -199,13 +199,13 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) ...@@ -199,13 +199,13 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
* ack the interrupt if it is there * ack the interrupt if it is there
*/ */
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) {
if (adev->nbio.funcs && if (adev->nbio.ras_funcs &&
adev->nbio.funcs->handle_ras_controller_intr_no_bifring) adev->nbio.ras_funcs->handle_ras_controller_intr_no_bifring)
adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev); adev->nbio.ras_funcs->handle_ras_controller_intr_no_bifring(adev);
if (adev->nbio.funcs && if (adev->nbio.ras_funcs &&
adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring) adev->nbio.ras_funcs->handle_ras_err_event_athub_intr_no_bifring)
adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev); adev->nbio.ras_funcs->handle_ras_err_event_athub_intr_no_bifring(adev);
} }
return ret; return ret;
......
...@@ -47,6 +47,17 @@ struct nbio_hdp_flush_reg { ...@@ -47,6 +47,17 @@ struct nbio_hdp_flush_reg {
u32 ref_and_mask_sdma7; u32 ref_and_mask_sdma7;
}; };
struct amdgpu_nbio_ras_funcs {
void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
void (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
int (*ras_late_init)(struct amdgpu_device *adev);
void (*ras_fini)(struct amdgpu_device *adev);
};
struct amdgpu_nbio_funcs { struct amdgpu_nbio_funcs {
const struct nbio_hdp_flush_reg *hdp_flush_reg; const struct nbio_hdp_flush_reg *hdp_flush_reg;
u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
...@@ -79,13 +90,6 @@ struct amdgpu_nbio_funcs { ...@@ -79,13 +90,6 @@ struct amdgpu_nbio_funcs {
void (*ih_control)(struct amdgpu_device *adev); void (*ih_control)(struct amdgpu_device *adev);
void (*init_registers)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev);
void (*remap_hdp_registers)(struct amdgpu_device *adev); void (*remap_hdp_registers)(struct amdgpu_device *adev);
void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
void (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
int (*ras_late_init)(struct amdgpu_device *adev);
void (*enable_aspm)(struct amdgpu_device *adev, void (*enable_aspm)(struct amdgpu_device *adev,
bool enable); bool enable);
void (*program_aspm)(struct amdgpu_device *adev); void (*program_aspm)(struct amdgpu_device *adev);
...@@ -97,6 +101,7 @@ struct amdgpu_nbio { ...@@ -97,6 +101,7 @@ struct amdgpu_nbio {
struct amdgpu_irq_src ras_err_event_athub_irq; struct amdgpu_irq_src ras_err_event_athub_irq;
struct ras_common_if *ras_if; struct ras_common_if *ras_if;
const struct amdgpu_nbio_funcs *funcs; const struct amdgpu_nbio_funcs *funcs;
const struct amdgpu_nbio_ras_funcs *ras_funcs;
}; };
int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev); int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev);
......
...@@ -804,8 +804,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, ...@@ -804,8 +804,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
adev->mmhub.funcs->query_ras_error_status(adev); adev->mmhub.funcs->query_ras_error_status(adev);
break; break;
case AMDGPU_RAS_BLOCK__PCIE_BIF: case AMDGPU_RAS_BLOCK__PCIE_BIF:
if (adev->nbio.funcs->query_ras_error_count) if (adev->nbio.ras_funcs &&
adev->nbio.funcs->query_ras_error_count(adev, &err_data); adev->nbio.ras_funcs->query_ras_error_count)
adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data);
break; break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL: case AMDGPU_RAS_BLOCK__XGMI_WAFL:
amdgpu_xgmi_query_ras_error_count(adev, &err_data); amdgpu_xgmi_query_ras_error_count(adev, &err_data);
...@@ -2030,14 +2031,31 @@ int amdgpu_ras_init(struct amdgpu_device *adev) ...@@ -2030,14 +2031,31 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
/* Might need get this flag from vbios. */ /* Might need get this flag from vbios. */
con->flags = RAS_DEFAULT_FLAGS; con->flags = RAS_DEFAULT_FLAGS;
if (adev->nbio.funcs->init_ras_controller_interrupt) { /* initialize nbio ras function ahead of any other
r = adev->nbio.funcs->init_ras_controller_interrupt(adev); * ras functions so hardware fatal error interrupt
* can be enabled as early as possible */
switch (adev->asic_type) {
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
if (!adev->gmc.xgmi.connected_to_cpu)
adev->nbio.ras_funcs = &nbio_v7_4_ras_funcs;
break;
default:
/* nbio ras is not available */
break;
}
if (adev->nbio.ras_funcs &&
adev->nbio.ras_funcs->init_ras_controller_interrupt) {
r = adev->nbio.ras_funcs->init_ras_controller_interrupt(adev);
if (r) if (r)
goto release_con; goto release_con;
} }
if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) { if (adev->nbio.ras_funcs &&
r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev); adev->nbio.ras_funcs->init_ras_err_event_athub_interrupt) {
r = adev->nbio.ras_funcs->init_ras_err_event_athub_interrupt(adev);
if (r) if (r)
goto release_con; goto release_con;
} }
......
...@@ -557,6 +557,16 @@ static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev, ...@@ -557,6 +557,16 @@ static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev,
DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
} }
const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs = {
.handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
.handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
.init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt,
.init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt,
.query_ras_error_count = nbio_v7_4_query_ras_error_count,
.ras_late_init = amdgpu_nbio_ras_late_init,
.ras_fini = amdgpu_nbio_ras_fini,
};
const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset, .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset, .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
...@@ -577,10 +587,4 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { ...@@ -577,10 +587,4 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.ih_control = nbio_v7_4_ih_control, .ih_control = nbio_v7_4_ih_control,
.init_registers = nbio_v7_4_init_registers, .init_registers = nbio_v7_4_init_registers,
.remap_hdp_registers = nbio_v7_4_remap_hdp_registers, .remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
.handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
.handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
.init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt,
.init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt,
.query_ras_error_count = nbio_v7_4_query_ras_error_count,
.ras_late_init = amdgpu_nbio_ras_late_init,
}; };
...@@ -28,5 +28,6 @@ ...@@ -28,5 +28,6 @@
extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs;
extern const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs;
#endif #endif
...@@ -1523,8 +1523,9 @@ static int soc15_common_late_init(void *handle) ...@@ -1523,8 +1523,9 @@ static int soc15_common_late_init(void *handle)
if (adev->hdp.funcs->reset_ras_error_count) if (adev->hdp.funcs->reset_ras_error_count)
adev->hdp.funcs->reset_ras_error_count(adev); adev->hdp.funcs->reset_ras_error_count(adev);
if (adev->nbio.funcs->ras_late_init) if (adev->nbio.ras_funcs &&
r = adev->nbio.funcs->ras_late_init(adev); adev->nbio.ras_funcs->ras_late_init)
r = adev->nbio.ras_funcs->ras_late_init(adev);
return r; return r;
} }
...@@ -1545,7 +1546,9 @@ static int soc15_common_sw_fini(void *handle) ...@@ -1545,7 +1546,9 @@ static int soc15_common_sw_fini(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_nbio_ras_fini(adev); if (adev->nbio.ras_funcs &&
adev->nbio.ras_funcs->ras_fini)
adev->nbio.ras_funcs->ras_fini(adev);
adev->df.funcs->sw_fini(adev); adev->df.funcs->sw_fini(adev);
return 0; return 0;
} }
...@@ -1609,9 +1612,11 @@ static int soc15_common_hw_fini(void *handle) ...@@ -1609,9 +1612,11 @@ static int soc15_common_hw_fini(void *handle)
if (adev->nbio.ras_if && if (adev->nbio.ras_if &&
amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
if (adev->nbio.funcs->init_ras_controller_interrupt) if (adev->nbio.ras_funcs &&
adev->nbio.ras_funcs->init_ras_controller_interrupt)
amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0); amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0);
if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) if (adev->nbio.ras_funcs &&
adev->nbio.ras_funcs->init_ras_err_event_athub_interrupt)
amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0); amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment