Commit 120dc496 authored by Gavin Shan's avatar Gavin Shan Committed by Benjamin Herrenschmidt

powerpc/eeh: Make EEH handler PE sensitive

Once eeh error is found, eeh event will be created and put it into
the global linked list. At the mean while, kernel thread will be
started to process it. The handler for the kernel thread originally
was eeh device sensitive.

The patch reworks the handler of the kernel thread so that it's PE
sensitive.
Signed-off-by: default avatarGavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent c270a24c
...@@ -57,7 +57,7 @@ static int eeh_event_handler(void * dummy) ...@@ -57,7 +57,7 @@ static int eeh_event_handler(void * dummy)
{ {
unsigned long flags; unsigned long flags;
struct eeh_event *event; struct eeh_event *event;
struct eeh_dev *edev; struct eeh_pe *pe;
set_task_comm(current, "eehd"); set_task_comm(current, "eehd");
...@@ -76,28 +76,23 @@ static int eeh_event_handler(void * dummy) ...@@ -76,28 +76,23 @@ static int eeh_event_handler(void * dummy)
/* Serialize processing of EEH events */ /* Serialize processing of EEH events */
mutex_lock(&eeh_event_mutex); mutex_lock(&eeh_event_mutex);
edev = event->edev; pe = event->pe;
eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING); eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", pe->phb->global_number, pe->addr);
eeh_pci_name(edev->pdev));
set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */ set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */
edev = handle_eeh_events(event); handle_eeh_events(event);
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
if (edev) {
eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
pci_dev_put(edev->pdev);
}
kfree(event); kfree(event);
mutex_unlock(&eeh_event_mutex); mutex_unlock(&eeh_event_mutex);
/* If there are no new errors after an hour, clear the counter. */ /* If there are no new errors after an hour, clear the counter. */
if (edev && edev->freeze_count>0) { if (pe && pe->freeze_count > 0) {
msleep_interruptible(3600*1000); msleep_interruptible(3600*1000);
if (edev->freeze_count>0) if (pe->freeze_count > 0)
edev->freeze_count--; pe->freeze_count--;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment