powerpc/powernv: Display diag data on p7ioc EEH errors

Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent f11fe552
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* 2 of the License, or (at your option) any later version. * 2 of the License, or (at your option) any later version.
*/ */
#define DEBUG #undef DEBUG
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/pci.h> #include <linux/pci.h>
...@@ -467,14 +467,13 @@ static void __devinit pnv_ioda_update_resources(struct pci_bus *bus) ...@@ -467,14 +467,13 @@ static void __devinit pnv_ioda_update_resources(struct pci_bus *bus)
struct pci_bus *cbus; struct pci_bus *cbus;
struct pci_dev *cdev; struct pci_dev *cdev;
unsigned int i; unsigned int i;
u16 cmd;
/* Clear all device enables */ /* We used to clear all device enables here. However it looks like
list_for_each_entry(cdev, &bus->devices, bus_list) { * clearing MEM enable causes Obsidian (IPR SCS) to go bonkers,
pci_read_config_word(cdev, PCI_COMMAND, &cmd); * and shoot fatal errors to the PHB which in turns fences itself
cmd &= ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY|PCI_COMMAND_MASTER); * and we can't recover from that ... yet. So for now, let's leave
pci_write_config_word(cdev, PCI_COMMAND, cmd); * the enables as-is and hope for the best.
} */
/* Check if bus resources fit in our IO or M32 range */ /* Check if bus resources fit in our IO or M32 range */
for (i = 0; bus->self && (i < 2); i++) { for (i = 0; bus->self && (i < 2); i++) {
...@@ -618,7 +617,7 @@ static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb, ...@@ -618,7 +617,7 @@ static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
struct pci_dn *pdn = pnv_ioda_get_pdn(parent); struct pci_dn *pdn = pnv_ioda_get_pdn(parent);
if (pdn && pdn->pe_number != IODA_INVALID_PE) { if (pdn && pdn->pe_number != IODA_INVALID_PE) {
rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
pe->pe_number, 1); pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
/* XXX What to do in case of error ? */ /* XXX What to do in case of error ? */
} }
parent = parent->bus->self; parent = parent->bus->self;
...@@ -638,7 +637,7 @@ static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb, ...@@ -638,7 +637,7 @@ static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
pe->mve_number = -1; pe->mve_number = -1;
} else { } else {
rc = opal_pci_set_mve_enable(phb->opal_id, rc = opal_pci_set_mve_enable(phb->opal_id,
pe->mve_number, 1); pe->mve_number, OPAL_ENABLE_MVE);
if (rc) { if (rc) {
pe_err(pe, "OPAL error %ld enabling MVE %d\n", pe_err(pe, "OPAL error %ld enabling MVE %d\n",
rc, pe->mve_number); rc, pe->mve_number);
...@@ -1187,6 +1186,12 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np) ...@@ -1187,6 +1186,12 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
phb->opal_id = phb_id; phb->opal_id = phb_id;
phb->type = PNV_PHB_IODA1; phb->type = PNV_PHB_IODA1;
/* Detect specific models for error handling */
if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
phb->model = PNV_PHB_MODEL_P7IOC;
else
phb->model = PNV_PHB_MODEL_UNKNOWN;
/* We parse "ranges" now since we need to deduce the register base /* We parse "ranges" now since we need to deduce the register base
* from the IO base * from the IO base
*/ */
......
...@@ -137,6 +137,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, ...@@ -137,6 +137,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
phb->hose->private_data = phb; phb->hose->private_data = phb;
phb->opal_id = phb_id; phb->opal_id = phb_id;
phb->type = PNV_PHB_P5IOC2; phb->type = PNV_PHB_P5IOC2;
phb->model = PNV_PHB_MODEL_P5IOC2;
phb->regs = of_iomap(np, 0); phb->regs = of_iomap(np, 0);
......
...@@ -144,6 +144,112 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) ...@@ -144,6 +144,112 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
} }
#endif /* CONFIG_PCI_MSI */ #endif /* CONFIG_PCI_MSI */
static void pnv_pci_dump_p7ioc_diag_data(struct pnv_phb *phb)
{
struct OpalIoP7IOCPhbErrorData *data = &phb->diag.p7ioc;
int i;
pr_info("PHB %d diagnostic data:\n", phb->hose->global_number);
pr_info(" brdgCtl = 0x%08x\n", data->brdgCtl);
pr_info(" portStatusReg = 0x%08x\n", data->portStatusReg);
pr_info(" rootCmplxStatus = 0x%08x\n", data->rootCmplxStatus);
pr_info(" busAgentStatus = 0x%08x\n", data->busAgentStatus);
pr_info(" deviceStatus = 0x%08x\n", data->deviceStatus);
pr_info(" slotStatus = 0x%08x\n", data->slotStatus);
pr_info(" linkStatus = 0x%08x\n", data->linkStatus);
pr_info(" devCmdStatus = 0x%08x\n", data->devCmdStatus);
pr_info(" devSecStatus = 0x%08x\n", data->devSecStatus);
pr_info(" rootErrorStatus = 0x%08x\n", data->rootErrorStatus);
pr_info(" uncorrErrorStatus = 0x%08x\n", data->uncorrErrorStatus);
pr_info(" corrErrorStatus = 0x%08x\n", data->corrErrorStatus);
pr_info(" tlpHdr1 = 0x%08x\n", data->tlpHdr1);
pr_info(" tlpHdr2 = 0x%08x\n", data->tlpHdr2);
pr_info(" tlpHdr3 = 0x%08x\n", data->tlpHdr3);
pr_info(" tlpHdr4 = 0x%08x\n", data->tlpHdr4);
pr_info(" sourceId = 0x%08x\n", data->sourceId);
pr_info(" errorClass = 0x%016llx\n", data->errorClass);
pr_info(" correlator = 0x%016llx\n", data->correlator);
pr_info(" p7iocPlssr = 0x%016llx\n", data->p7iocPlssr);
pr_info(" p7iocCsr = 0x%016llx\n", data->p7iocCsr);
pr_info(" lemFir = 0x%016llx\n", data->lemFir);
pr_info(" lemErrorMask = 0x%016llx\n", data->lemErrorMask);
pr_info(" lemWOF = 0x%016llx\n", data->lemWOF);
pr_info(" phbErrorStatus = 0x%016llx\n", data->phbErrorStatus);
pr_info(" phbFirstErrorStatus = 0x%016llx\n", data->phbFirstErrorStatus);
pr_info(" phbErrorLog0 = 0x%016llx\n", data->phbErrorLog0);
pr_info(" phbErrorLog1 = 0x%016llx\n", data->phbErrorLog1);
pr_info(" mmioErrorStatus = 0x%016llx\n", data->mmioErrorStatus);
pr_info(" mmioFirstErrorStatus = 0x%016llx\n", data->mmioFirstErrorStatus);
pr_info(" mmioErrorLog0 = 0x%016llx\n", data->mmioErrorLog0);
pr_info(" mmioErrorLog1 = 0x%016llx\n", data->mmioErrorLog1);
pr_info(" dma0ErrorStatus = 0x%016llx\n", data->dma0ErrorStatus);
pr_info(" dma0FirstErrorStatus = 0x%016llx\n", data->dma0FirstErrorStatus);
pr_info(" dma0ErrorLog0 = 0x%016llx\n", data->dma0ErrorLog0);
pr_info(" dma0ErrorLog1 = 0x%016llx\n", data->dma0ErrorLog1);
pr_info(" dma1ErrorStatus = 0x%016llx\n", data->dma1ErrorStatus);
pr_info(" dma1FirstErrorStatus = 0x%016llx\n", data->dma1FirstErrorStatus);
pr_info(" dma1ErrorLog0 = 0x%016llx\n", data->dma1ErrorLog0);
pr_info(" dma1ErrorLog1 = 0x%016llx\n", data->dma1ErrorLog1);
for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
if ((data->pestA[i] >> 63) == 0 &&
(data->pestB[i] >> 63) == 0)
continue;
pr_info(" PE[%3d] PESTA = 0x%016llx\n", i, data->pestA[i]);
pr_info(" PESTB = 0x%016llx\n", data->pestB[i]);
}
}
static void pnv_pci_dump_phb_diag_data(struct pnv_phb *phb)
{
switch(phb->model) {
case PNV_PHB_MODEL_P7IOC:
pnv_pci_dump_p7ioc_diag_data(phb);
break;
default:
pr_warning("PCI %d: Can't decode this PHB diag data\n",
phb->hose->global_number);
}
}
static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
{
unsigned long flags, rc;
int has_diag;
spin_lock_irqsave(&phb->lock, flags);
rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
has_diag = (rc == OPAL_SUCCESS);
rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
if (rc) {
pr_warning("PCI %d: Failed to clear EEH freeze state"
" for PE#%d, err %ld\n",
phb->hose->global_number, pe_no, rc);
/* For now, let's only display the diag buffer when we fail to clear
* the EEH status. We'll do more sensible things later when we have
* proper EEH support. We need to make sure we don't pollute ourselves
* with the normal errors generated when probing empty slots
*/
if (has_diag)
pnv_pci_dump_phb_diag_data(phb);
else
pr_warning("PCI %d: No diag data available\n",
phb->hose->global_number);
}
spin_unlock_irqrestore(&phb->lock, flags);
}
static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus, static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus,
u32 bdfn) u32 bdfn)
{ {
...@@ -165,15 +271,8 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus, ...@@ -165,15 +271,8 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus,
} }
cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n", cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n",
bdfn, pe_no, fstate); bdfn, pe_no, fstate);
if (fstate != 0) { if (fstate != 0)
rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, pnv_pci_handle_eeh_config(phb, pe_no);
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
if (rc) {
pr_warning("PCI %d: Failed to clear EEH freeze state"
" for PE#%d, err %lld\n",
phb->hose->global_number, pe_no, rc);
}
}
} }
static int pnv_pci_read_config(struct pci_bus *bus, static int pnv_pci_read_config(struct pci_bus *bus,
......
...@@ -9,6 +9,15 @@ enum pnv_phb_type { ...@@ -9,6 +9,15 @@ enum pnv_phb_type {
PNV_PHB_IODA2, PNV_PHB_IODA2,
}; };
/* Precise PHB model for error management */
enum pnv_phb_model {
PNV_PHB_MODEL_UNKNOWN,
PNV_PHB_MODEL_P5IOC2,
PNV_PHB_MODEL_P7IOC,
};
#define PNV_PCI_DIAG_BUF_SIZE 4096
/* Data associated with a PE, including IOMMU tracking etc.. */ /* Data associated with a PE, including IOMMU tracking etc.. */
struct pnv_ioda_pe { struct pnv_ioda_pe {
/* A PE can be associated with a single device or an /* A PE can be associated with a single device or an
...@@ -56,6 +65,7 @@ struct pnv_ioda_pe { ...@@ -56,6 +65,7 @@ struct pnv_ioda_pe {
struct pnv_phb { struct pnv_phb {
struct pci_controller *hose; struct pci_controller *hose;
enum pnv_phb_type type; enum pnv_phb_type type;
enum pnv_phb_model model;
u64 opal_id; u64 opal_id;
void __iomem *regs; void __iomem *regs;
spinlock_t lock; spinlock_t lock;
...@@ -118,6 +128,12 @@ struct pnv_phb { ...@@ -118,6 +128,12 @@ struct pnv_phb {
struct list_head pe_list; struct list_head pe_list;
} ioda; } ioda;
}; };
/* PHB status structure */
union {
unsigned char blob[PNV_PCI_DIAG_BUF_SIZE];
struct OpalIoP7IOCPhbErrorData p7ioc;
} diag;
}; };
extern struct pci_ops pnv_pci_ops; extern struct pci_ops pnv_pci_ops;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment