Commit 17e8ce0e authored by Russ Anderson's avatar Russ Anderson Committed by Tony Luck

[IA64-SGI] Altix BTE error handling fixes

Altix (shub2) pushes the BTE clean-up into SAL.
This patch correctly interfaces with the now implemented SAL call.
It also fixes a bug when delaying clean-up to allow busy BTEs to
complete (or error out).
Signed-off-by: default avatarRuss Anderson <rja@sgi.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent 8a4b7b6f
...@@ -33,7 +33,7 @@ void bte_error_handler(unsigned long); ...@@ -33,7 +33,7 @@ void bte_error_handler(unsigned long);
* Wait until all BTE related CRBs are completed * Wait until all BTE related CRBs are completed
* and then reset the interfaces. * and then reset the interfaces.
*/ */
void shub1_bte_error_handler(unsigned long _nodepda) int shub1_bte_error_handler(unsigned long _nodepda)
{ {
struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
...@@ -53,7 +53,7 @@ void shub1_bte_error_handler(unsigned long _nodepda) ...@@ -53,7 +53,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
(err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) { (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda, BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
smp_processor_id())); smp_processor_id()));
return; return 1;
} }
/* Determine information about our hub */ /* Determine information about our hub */
...@@ -81,7 +81,7 @@ void shub1_bte_error_handler(unsigned long _nodepda) ...@@ -81,7 +81,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
mod_timer(recovery_timer, HZ * 5); mod_timer(recovery_timer, HZ * 5);
BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
smp_processor_id())); smp_processor_id()));
return; return 1;
} }
if (icmr.ii_icmr_fld_s.i_crb_vld != 0) { if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
...@@ -99,7 +99,7 @@ void shub1_bte_error_handler(unsigned long _nodepda) ...@@ -99,7 +99,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n", BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
err_nodepda, smp_processor_id(), err_nodepda, smp_processor_id(),
i)); i));
return; return 1;
} }
} }
} }
...@@ -124,6 +124,42 @@ void shub1_bte_error_handler(unsigned long _nodepda) ...@@ -124,6 +124,42 @@ void shub1_bte_error_handler(unsigned long _nodepda)
REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval); REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
del_timer(recovery_timer); del_timer(recovery_timer);
return 0;
}
/*
* Wait until all BTE related CRBs are completed
* and then reset the interfaces.
*/
int shub2_bte_error_handler(unsigned long _nodepda)
{
struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
struct bteinfo_s *bte;
nasid_t nasid;
u64 status;
int i;
nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
/*
* Verify that all the BTEs are complete
*/
for (i = 0; i < BTES_PER_NODE; i++) {
bte = &err_nodepda->bte_if[i];
status = BTE_LNSTAT_LOAD(bte);
if ((status & IBLS_ERROR) || !(status & IBLS_BUSY))
continue;
mod_timer(recovery_timer, HZ * 5);
BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
smp_processor_id()));
return 1;
}
if (ia64_sn_bte_recovery(nasid))
panic("bte_error_handler(): Fatal BTE Error");
del_timer(recovery_timer);
return 0;
} }
/* /*
...@@ -135,7 +171,6 @@ void bte_error_handler(unsigned long _nodepda) ...@@ -135,7 +171,6 @@ void bte_error_handler(unsigned long _nodepda)
struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
int i; int i;
nasid_t nasid;
unsigned long irq_flags; unsigned long irq_flags;
volatile u64 *notify; volatile u64 *notify;
bte_result_t bh_error; bte_result_t bh_error;
...@@ -160,12 +195,15 @@ void bte_error_handler(unsigned long _nodepda) ...@@ -160,12 +195,15 @@ void bte_error_handler(unsigned long _nodepda)
} }
if (is_shub1()) { if (is_shub1()) {
shub1_bte_error_handler(_nodepda); if (shub1_bte_error_handler(_nodepda)) {
spin_unlock_irqrestore(recovery_lock, irq_flags);
return;
}
} else { } else {
nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); if (shub2_bte_error_handler(_nodepda)) {
spin_unlock_irqrestore(recovery_lock, irq_flags);
if (ia64_sn_bte_recovery(nasid)) return;
panic("bte_error_handler(): Fatal BTE Error"); }
} }
for (i = 0; i < BTES_PER_NODE; i++) { for (i = 0; i < BTES_PER_NODE; i++) {
......
...@@ -32,13 +32,14 @@ static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep) ...@@ -32,13 +32,14 @@ static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep)
ret_stuff.v0 = 0; ret_stuff.v0 = 0;
hubdev_info = (struct hubdev_info *)arg; hubdev_info = (struct hubdev_info *)arg;
nasid = hubdev_info->hdi_nasid; nasid = hubdev_info->hdi_nasid;
if (is_shub1()) {
SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
(u64) nasid, 0, 0, 0, 0, 0, 0); (u64) nasid, 0, 0, 0, 0, 0, 0);
if ((int)ret_stuff.v0) if ((int)ret_stuff.v0)
panic("hubii_eint_handler(): Fatal TIO Error"); panic("hubii_eint_handler(): Fatal TIO Error");
if (is_shub1()) {
if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
(void)hubiio_crb_error_handler(hubdev_info); (void)hubiio_crb_error_handler(hubdev_info);
} else } else
......
...@@ -1100,7 +1100,7 @@ ia64_sn_bte_recovery(nasid_t nasid) ...@@ -1100,7 +1100,7 @@ ia64_sn_bte_recovery(nasid_t nasid)
struct ia64_sal_retval rv; struct ia64_sal_retval rv;
rv.status = 0; rv.status = 0;
SAL_CALL_NOLOCK(rv, SN_SAL_BTE_RECOVER, 0, 0, 0, 0, 0, 0, 0); SAL_CALL_NOLOCK(rv, SN_SAL_BTE_RECOVER, (u64)nasid, 0, 0, 0, 0, 0, 0);
if (rv.status == SALRET_NOT_IMPLEMENTED) if (rv.status == SALRET_NOT_IMPLEMENTED)
return 0; return 0;
return (int) rv.status; return (int) rv.status;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment