Commit 01e5b2c4 authored by Somnath Kotur's avatar Somnath Kotur Committed by David S. Miller

be2net: Fix crash on 2nd invocation of PCI AER/EEH error_detected hook

During a PCI EEH/AER error recovery flow, if the device did not successfully
restart, the error_detected() hook may be called a second time with a
"perm_failure" state. This patch skips over driver cleanup for the second
invocation of the callback.

Also, Lancer error recovery code is fixed-up to handle these changes.
Signed-off-by: default avatarKalesh AP <kalesh.purayil@emulex.com>
Signed-off-by: default avatarSomnath kotur <somnath.kotur@emulex.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e38b1706
...@@ -562,7 +562,7 @@ int lancer_test_and_set_rdy_state(struct be_adapter *adapter) ...@@ -562,7 +562,7 @@ int lancer_test_and_set_rdy_state(struct be_adapter *adapter)
resource_error = lancer_provisioning_error(adapter); resource_error = lancer_provisioning_error(adapter);
if (resource_error) if (resource_error)
return -1; return -EAGAIN;
status = lancer_wait_ready(adapter); status = lancer_wait_ready(adapter);
if (!status) { if (!status) {
...@@ -590,8 +590,8 @@ int lancer_test_and_set_rdy_state(struct be_adapter *adapter) ...@@ -590,8 +590,8 @@ int lancer_test_and_set_rdy_state(struct be_adapter *adapter)
* when PF provisions resources. * when PF provisions resources.
*/ */
resource_error = lancer_provisioning_error(adapter); resource_error = lancer_provisioning_error(adapter);
if (status == -1 && !resource_error) if (resource_error)
adapter->eeh_error = true; status = -EAGAIN;
return status; return status;
} }
......
...@@ -4098,6 +4098,7 @@ static int be_get_initial_config(struct be_adapter *adapter) ...@@ -4098,6 +4098,7 @@ static int be_get_initial_config(struct be_adapter *adapter)
static int lancer_recover_func(struct be_adapter *adapter) static int lancer_recover_func(struct be_adapter *adapter)
{ {
struct device *dev = &adapter->pdev->dev;
int status; int status;
status = lancer_test_and_set_rdy_state(adapter); status = lancer_test_and_set_rdy_state(adapter);
...@@ -4109,8 +4110,7 @@ static int lancer_recover_func(struct be_adapter *adapter) ...@@ -4109,8 +4110,7 @@ static int lancer_recover_func(struct be_adapter *adapter)
be_clear(adapter); be_clear(adapter);
adapter->hw_error = false; be_clear_all_error(adapter);
adapter->fw_timeout = false;
status = be_setup(adapter); status = be_setup(adapter);
if (status) if (status)
...@@ -4122,13 +4122,13 @@ static int lancer_recover_func(struct be_adapter *adapter) ...@@ -4122,13 +4122,13 @@ static int lancer_recover_func(struct be_adapter *adapter)
goto err; goto err;
} }
dev_err(&adapter->pdev->dev, dev_err(dev, "Error recovery successful\n");
"Adapter SLIPORT recovery succeeded\n");
return 0; return 0;
err: err:
if (adapter->eeh_error) if (status == -EAGAIN)
dev_err(&adapter->pdev->dev, dev_err(dev, "Waiting for resource provisioning\n");
"Adapter SLIPORT recovery failed\n"); else
dev_err(dev, "Error recovery failed\n");
return status; return status;
} }
...@@ -4137,28 +4137,27 @@ static void be_func_recovery_task(struct work_struct *work) ...@@ -4137,28 +4137,27 @@ static void be_func_recovery_task(struct work_struct *work)
{ {
struct be_adapter *adapter = struct be_adapter *adapter =
container_of(work, struct be_adapter, func_recovery_work.work); container_of(work, struct be_adapter, func_recovery_work.work);
int status; int status = 0;
be_detect_error(adapter); be_detect_error(adapter);
if (adapter->hw_error && lancer_chip(adapter)) { if (adapter->hw_error && lancer_chip(adapter)) {
if (adapter->eeh_error)
goto out;
rtnl_lock(); rtnl_lock();
netif_device_detach(adapter->netdev); netif_device_detach(adapter->netdev);
rtnl_unlock(); rtnl_unlock();
status = lancer_recover_func(adapter); status = lancer_recover_func(adapter);
if (!status) if (!status)
netif_device_attach(adapter->netdev); netif_device_attach(adapter->netdev);
} }
out: /* In Lancer, for all errors other than provisioning error (-EAGAIN),
schedule_delayed_work(&adapter->func_recovery_work, * no need to attempt further recovery.
msecs_to_jiffies(1000)); */
if (!status || status == -EAGAIN)
schedule_delayed_work(&adapter->func_recovery_work,
msecs_to_jiffies(1000));
} }
static void be_worker(struct work_struct *work) static void be_worker(struct work_struct *work)
...@@ -4441,20 +4440,19 @@ static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev, ...@@ -4441,20 +4440,19 @@ static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
dev_err(&adapter->pdev->dev, "EEH error detected\n"); dev_err(&adapter->pdev->dev, "EEH error detected\n");
adapter->eeh_error = true; if (!adapter->eeh_error) {
adapter->eeh_error = true;
cancel_delayed_work_sync(&adapter->func_recovery_work);
rtnl_lock(); cancel_delayed_work_sync(&adapter->func_recovery_work);
netif_device_detach(netdev);
rtnl_unlock();
if (netif_running(netdev)) {
rtnl_lock(); rtnl_lock();
be_close(netdev); netif_device_detach(netdev);
if (netif_running(netdev))
be_close(netdev);
rtnl_unlock(); rtnl_unlock();
be_clear(adapter);
} }
be_clear(adapter);
if (state == pci_channel_io_perm_failure) if (state == pci_channel_io_perm_failure)
return PCI_ERS_RESULT_DISCONNECT; return PCI_ERS_RESULT_DISCONNECT;
...@@ -4479,7 +4477,6 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev) ...@@ -4479,7 +4477,6 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
int status; int status;
dev_info(&adapter->pdev->dev, "EEH reset\n"); dev_info(&adapter->pdev->dev, "EEH reset\n");
be_clear_all_error(adapter);
status = pci_enable_device(pdev); status = pci_enable_device(pdev);
if (status) if (status)
...@@ -4497,6 +4494,7 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev) ...@@ -4497,6 +4494,7 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
return PCI_ERS_RESULT_DISCONNECT; return PCI_ERS_RESULT_DISCONNECT;
pci_cleanup_aer_uncorrect_error_status(pdev); pci_cleanup_aer_uncorrect_error_status(pdev);
be_clear_all_error(adapter);
return PCI_ERS_RESULT_RECOVERED; return PCI_ERS_RESULT_RECOVERED;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment