[PATCH] ppc64: test for EEH error in PCI Config-Read path

This patch adds explicit checking for EEH slot isolation events into the PCI config space read path. The change itself would have been minor, except that pci config reads don't have a pointer to a struct pci_dev. Thus, I had to restructure the eeh code to accomodate this, which seems to be a good thing anyway, making it a tad cleaner. Signed-off-by: Linas Vepstas <linas@linas.org> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>

[PATCH] ppc64: test for EEH error in PCI Config-Read path
This patch adds explicit checking for EEH slot isolation events into the PCI config space read path. The change itself would have been minor, except that pci config reads don't have a pointer to a struct pci_dev. Thus, I had to restructure the eeh code to accomodate this, which seems to be a good thing anyway, making it a tad cleaner. Signed-off-by: Linas Vepstas <linas@linas.org> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
0e475b4b · Paul Mackerras · Linus Torvalds · 88399f12 · 0e475b4b · 0e475b4b
Commit 0e475b4b authored Sep 02, 2004 by Paul Mackerras Committed by Linus Torvalds Sep 02, 2004
Showing with 84 additions and 38 deletions

arch/ppc64/kernel/eeh.c arch/ppc64/kernel/eeh.c +54 -33

arch/ppc64/kernel/pSeries_pci.c arch/ppc64/kernel/pSeries_pci.c +20 -4

include/asm-ppc64/eeh.h include/asm-ppc64/eeh.h +10 -1

No files found.
--- a/arch/ppc64/kernel/eeh.c
+++ b/arch/ppc64/kernel/eeh.c
@@ -348,7 +348,7 @@ void __init pci_addr_cache_build(void)
 * ths routine does *not* convert I/O BAR addresses (which start
 * with 0xE...) to phys addresses!
 */
-static unsigned long eeh_token_to_phys(unsigned long token)
+static inline unsigned long eeh_token_to_phys(unsigned long token)
 {
 	pte_t *ptep;
 	unsigned long pa, vaddr;
@@ -365,24 +365,22 @@ static unsigned long eeh_token_to_phys(unsigned long token)
 }
 /**
- * eeh_check_failure - check if all 1's data is due to EEH slot freeze
+ * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
- * @token i/o token, should be address in the form 0xA....
+ * @dn device node
- * @val value, should be all 1's (XXX why do we need this arg??)
+ * @dev pci device, if known
 *
- * Check for an eeh failure at the given token address.
+ * Check for an EEH failure for the given device node.  Call this
- * The given value has been read and it should be 1's (0xff, 0xffff or
+ * routine if the result of a read was all 0xff's and you want to
- * 0xffffffff).
+ * find out if this is due to an EEH slot freeze event.  This routine
+ * will query firmware for the EEH status.
 *
- * Probe to determine if an error actually occurred.  If not return val.
+ * Returns 0 if there has not been an EEH error; otherwise returns
- * Otherwise panic.
+ * an error code.
 *
- * Note this routine might be called in an interrupt context ...
+ * It is safe to call this routine in an interrupt context.
 */
-unsigned long eeh_check_failure(void *token, unsigned long val)
+int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 {
-	unsigned long addr;
-	struct pci_dev *dev;
-	struct device_node *dn;
 	int ret;
 	int rets[2];
 	unsigned long flags;
@@ -390,30 +388,19 @@ unsigned long eeh_check_failure(void *token, unsigned long val)
 	__get_cpu_var(total_mmio_ffs)++;
 	if (!eeh_subsystem_enabled)
-		return val;
+		return 0;
-	/* Finding the phys addr + pci device; this is pretty quick. */
-	addr = eeh_token_to_phys((unsigned long)token);
-	dev = pci_get_device_by_addr(addr);
-	if (!dev)
-		return val;
-	dn = pci_device_to_OF_node(dev);
+	if (!dn)
-	if (!dn) {
+		return 0;
-		pci_dev_put(dev);
-		return val;
-	}
 	/* Access to IO BARs might get this far and still not want checking. */
 	if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) ||
 	    dn->eeh_mode & EEH_MODE_NOCHECK) {
-		pci_dev_put(dev);
+		return 0;
-		return val;
 	}
 	if (!dn->eeh_config_addr) {
-		pci_dev_put(dev);
+		return 0;
-		return val;
 	}
 	/*
@@ -439,7 +426,7 @@ unsigned long eeh_check_failure(void *token, unsigned long val)
 		                      BUID_LO(dn->phb->buid), NULL, 0,
 		                      virt_to_phys(slot_errbuf),
 		                      eeh_error_buf_size,
-		                      2 /* Permanent Error */);
+		                      1 /* Temporary Error */);
 		if (log_event == 0)
 			log_error(slot_errbuf, ERR_TYPE_RTAS_LOG,
@@ -456,19 +443,53 @@ unsigned long eeh_check_failure(void *token, unsigned long val)
 		 */
 		if (panic_on_oops) {
 			panic("EEH: MMIO failure (%d) on device:%s %s\n",
-			      rets[0], pci_name(dev), pci_pretty_name(dev));
+			      rets[0], dn->name, dn->full_name);
 		} else {
 			__get_cpu_var(ignored_failures)++;
 			printk(KERN_INFO "EEH: MMIO failure (%d) on device:%s %s\n",
-			       rets[0], pci_name(dev), pci_pretty_name(dev));
+			       rets[0], dn->name, dn->full_name);
 		}
 	} else {
 		__get_cpu_var(false_positives)++;
 	}
+	return 0;
+}
+EXPORT_SYMBOL(eeh_dn_check_failure);
+/**
+ * eeh_check_failure - check if all 1's data is due to EEH slot freeze
+ * @token i/o token, should be address in the form 0xA....
+ * @val value, should be all 1's (XXX why do we need this arg??)
+ *
+ * Check for an eeh failure at the given token address.
+ * Check for an EEH failure at the given token address.  Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze event.  This routine
+ * will query firmware for the EEH status.
+ *
+ * Note this routine is safe to call in an interrupt context.
+ */
+unsigned long eeh_check_failure(void *token, unsigned long val)
+{
+	unsigned long addr;
+	struct pci_dev *dev;
+	struct device_node *dn;
+	/* Finding the phys addr + pci device; this is pretty quick. */
+	addr = eeh_token_to_phys((unsigned long)token);
+	dev = pci_get_device_by_addr(addr);
+	if (!dev)
+		return val;
+	dn = pci_device_to_OF_node(dev);
+	eeh_dn_check_failure (dn, dev);
 	pci_dev_put(dev);
 	return val;
 }
 EXPORT_SYMBOL(eeh_check_failure);
 struct eeh_early_enable_info {

--- a/arch/ppc64/kernel/pSeries_pci.c
+++ b/arch/ppc64/kernel/pSeries_pci.c
@@ -68,7 +68,9 @@ static int rtas_read_config(struct device_node *dn, int where, int size, u32 *va
 	int ret;
 	if (!dn)
-		return -2;
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (where & (size - 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
 	addr = (dn->busno << 16) | (dn->devfn << 8) | where;
 	buid = dn->phb->buid;
@@ -79,7 +81,15 @@ static int rtas_read_config(struct device_node *dn, int where, int size, u32 *va
 		ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
 	}
 	*val = returnval;
-	return ret;
+	if (ret)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (returnval == EEH_IO_ERROR_VALUE(size)
+	    && eeh_dn_check_failure (dn, NULL))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	return PCIBIOS_SUCCESSFUL;
 }
 static int rtas_pci_read_config(struct pci_bus *bus,
@@ -106,7 +116,9 @@ static int rtas_write_config(struct device_node *dn, int where, int size, u32 va
 	int ret;
 	if (!dn)
-		return -2;
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (where & (size - 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
 	addr = (dn->busno << 16) | (dn->devfn << 8) | where;
 	buid = dn->phb->buid;
@@ -115,7 +127,11 @@ static int rtas_write_config(struct device_node *dn, int where, int size, u32 va
 	} else {
 		ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
 	}
-	return ret;
+	if (ret)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	return PCIBIOS_SUCCESSFUL;
 }
 static int rtas_pci_write_config(struct pci_bus *bus,

--- a/include/asm-ppc64/eeh.h
+++ b/include/asm-ppc64/eeh.h
@@ -44,6 +44,7 @@ struct device_node;
 extern void __init eeh_init(void);
 unsigned long eeh_check_failure(void *token, unsigned long val);
+int eeh_dn_check_failure (struct device_node *dn, struct pci_dev *dev);
 void *eeh_ioremap(unsigned long addr, void *vaddr);
 void __init pci_addr_cache_build(void);
@@ -89,7 +90,15 @@ int eeh_set_option(struct pci_dev *dev, int options);
 */
 #define EEH_POSSIBLE_IO_ERROR(val, type)	((val) == (type)~0)
-/* The vaddr will equal the addr if EEH checking is disabled for
+/*
+ * Reads from a device which has been isolated by EEH will return
+ * all 1s.  This macro gives an all-1s value of the given size (in
+ * bytes: 1, 2, or 4) for comparing with the result of a read.
+ */
+#define EEH_IO_ERROR_VALUE(size)	(~0U >> ((4 - (size)) * 8))
+/*
+ * The vaddr will equal the addr if EEH checking is disabled for
 * this device.  This is because eeh_ioremap() will not have
 * remapped to 0xA0, and thus both vaddr and addr will be 0xE0...
 */