Commit b90484ec authored by Sam Bobroff's avatar Sam Bobroff Committed by Michael Ellerman

powerpc/eeh: Cleanup control flow in eeh_handle_normal_event()

Rather than mixing "if (state)" blocks and gotos, convert entirely to
"if (state)" blocks to make the state machine behaviour clearer.
Signed-off-by: default avatarSam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent fef7f905
...@@ -808,10 +808,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -808,10 +808,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n", pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
pe->phb->global_number, pe->addr, pe->phb->global_number, pe->addr,
pe->freeze_count); pe->freeze_count);
goto hard_fail; result = PCI_ERS_RESULT_DISCONNECT;
} }
pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
pe->freeze_count, eeh_max_freezes);
/* Walk the various device drivers attached to this slot through /* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs * a reset sequence, giving each an opportunity to do what it needs
...@@ -823,31 +821,39 @@ void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -823,31 +821,39 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
* the error. Override the result if necessary to have partially * the error. Override the result if necessary to have partially
* hotplug for this case. * hotplug for this case.
*/ */
if (result != PCI_ERS_RESULT_DISCONNECT) {
pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
pe->freeze_count, eeh_max_freezes);
pr_info("EEH: Notify device drivers to shutdown\n"); pr_info("EEH: Notify device drivers to shutdown\n");
eeh_set_channel_state(pe, pci_channel_io_frozen); eeh_set_channel_state(pe, pci_channel_io_frozen);
eeh_set_irq_state(pe, false); eeh_set_irq_state(pe, false);
eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error, eeh_pe_report("error_detected(IO frozen)", pe,
&result); eeh_report_error, &result);
if ((pe->type & EEH_PE_PHB) && if ((pe->type & EEH_PE_PHB) &&
result != PCI_ERS_RESULT_NONE && result != PCI_ERS_RESULT_NONE &&
result != PCI_ERS_RESULT_NEED_RESET) result != PCI_ERS_RESULT_NEED_RESET)
result = PCI_ERS_RESULT_NEED_RESET; result = PCI_ERS_RESULT_NEED_RESET;
}
/* Get the current PCI slot state. This can take a long time, /* Get the current PCI slot state. This can take a long time,
* sometimes over 300 seconds for certain systems. * sometimes over 300 seconds for certain systems.
*/ */
if (result != PCI_ERS_RESULT_DISCONNECT) {
rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
pr_warn("EEH: Permanent failure\n"); pr_warn("EEH: Permanent failure\n");
goto hard_fail; result = PCI_ERS_RESULT_DISCONNECT;
}
} }
/* Since rtas may enable MMIO when posting the error log, /* Since rtas may enable MMIO when posting the error log,
* don't post the error log until after all dev drivers * don't post the error log until after all dev drivers
* have been informed. * have been informed.
*/ */
if (result != PCI_ERS_RESULT_DISCONNECT) {
pr_info("EEH: Collect temporary log\n"); pr_info("EEH: Collect temporary log\n");
eeh_slot_error_detail(pe, EEH_LOG_TEMP); eeh_slot_error_detail(pe, EEH_LOG_TEMP);
}
/* If all device drivers were EEH-unaware, then shut /* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they * down all of the device drivers, and hope they
...@@ -859,7 +865,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -859,7 +865,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
if (rc) { if (rc) {
pr_warn("%s: Unable to reset, err=%d\n", pr_warn("%s: Unable to reset, err=%d\n",
__func__, rc); __func__, rc);
goto hard_fail; result = PCI_ERS_RESULT_DISCONNECT;
} }
} }
...@@ -868,9 +874,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -868,9 +874,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Enable I/O for affected devices\n"); pr_info("EEH: Enable I/O for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
if (rc < 0) if (rc < 0) {
goto hard_fail; result = PCI_ERS_RESULT_DISCONNECT;
if (rc) { } else if (rc) {
result = PCI_ERS_RESULT_NEED_RESET; result = PCI_ERS_RESULT_NEED_RESET;
} else { } else {
pr_info("EEH: Notify device drivers to resume I/O\n"); pr_info("EEH: Notify device drivers to resume I/O\n");
...@@ -884,9 +890,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -884,9 +890,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Enabled DMA for affected devices\n"); pr_info("EEH: Enabled DMA for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
if (rc < 0) if (rc < 0) {
goto hard_fail; result = PCI_ERS_RESULT_DISCONNECT;
if (rc) { } else if (rc) {
result = PCI_ERS_RESULT_NEED_RESET; result = PCI_ERS_RESULT_NEED_RESET;
} else { } else {
/* /*
...@@ -899,12 +905,6 @@ void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -899,12 +905,6 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
} }
} }
/* If any device has a hard failure, then shut off everything. */
if (result == PCI_ERS_RESULT_DISCONNECT) {
pr_warn("EEH: Device driver gave up\n");
goto hard_fail;
}
/* If any device called out for a reset, then reset the slot */ /* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) { if (result == PCI_ERS_RESULT_NEED_RESET) {
pr_info("EEH: Reset without hotplug activity\n"); pr_info("EEH: Reset without hotplug activity\n");
...@@ -912,27 +912,21 @@ void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -912,27 +912,21 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
if (rc) { if (rc) {
pr_warn("%s: Cannot reset, err=%d\n", pr_warn("%s: Cannot reset, err=%d\n",
__func__, rc); __func__, rc);
goto hard_fail; result = PCI_ERS_RESULT_DISCONNECT;
} } else {
pr_info("EEH: Notify device drivers "
"the completion of reset\n");
result = PCI_ERS_RESULT_NONE; result = PCI_ERS_RESULT_NONE;
eeh_set_channel_state(pe, pci_channel_io_normal); eeh_set_channel_state(pe, pci_channel_io_normal);
eeh_set_irq_state(pe, true); eeh_set_irq_state(pe, true);
eeh_pe_report("slot_reset", pe, eeh_report_reset, &result); eeh_pe_report("slot_reset", pe, eeh_report_reset,
&result);
} }
/* All devices should claim they have recovered by now. */
if ((result != PCI_ERS_RESULT_RECOVERED) &&
(result != PCI_ERS_RESULT_NONE)) {
pr_warn("EEH: Not recovered\n");
goto hard_fail;
} }
if ((result == PCI_ERS_RESULT_RECOVERED) ||
(result == PCI_ERS_RESULT_NONE)) {
/* /*
* For those hot removed VFs, we should add back them after PF get * For those hot removed VFs, we should add back them after PF
* recovered properly. * get recovered properly.
*/ */
list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list, list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
rmv_entry) { rmv_entry) {
...@@ -953,9 +947,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -953,9 +947,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
} }
pr_info("EEH: Recovery successful.\n"); pr_info("EEH: Recovery successful.\n");
goto final; } else {
hard_fail:
/* /*
* About 90% of all real-life EEH failures in the field * About 90% of all real-life EEH failures in the field
* are due to poorly seated PCI cards. Only 10% or so are * are due to poorly seated PCI cards. Only 10% or so are
...@@ -994,7 +986,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -994,7 +986,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
/* The passed PE should no longer be used */ /* The passed PE should no longer be used */
return; return;
} }
final: }
eeh_pe_state_clear(pe, EEH_PE_RECOVERING); eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment