Commit a4b5bd9a authored by Andrew Isaacson's avatar Andrew Isaacson Committed by Ralf Baechle

SB1 cache exception handling.

    
Expand SB1 cache error handling by adding SB1_CEX_ALWAYS_FATAL and
SB1_CEX_STALL, allowing configurable behavior on cache errors.
Signed-Off-By: default avatarAndy Isaacson <adi@broadcom.com>
Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent 9a6dcea1
...@@ -19,13 +19,19 @@ ...@@ -19,13 +19,19 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <asm/mipsregs.h> #include <asm/mipsregs.h>
#include <asm/sibyte/sb1250.h> #include <asm/sibyte/sb1250.h>
#include <asm/sibyte/sb1250_regs.h>
#ifndef CONFIG_SIBYTE_BUS_WATCHER #if !defined(CONFIG_SIBYTE_BUS_WATCHER) || defined(CONFIG_SIBYTE_BW_TRACE)
#include <asm/io.h> #include <asm/io.h>
#include <asm/sibyte/sb1250_regs.h>
#include <asm/sibyte/sb1250_scd.h> #include <asm/sibyte/sb1250_scd.h>
#endif #endif
/*
* We'd like to dump the L2_ECC_TAG register on errors, but errata make
* that unsafe... So for now we don't. (BCM1250/BCM112x erratum SOC-48.)
*/
#undef DUMP_L2_ECC_TAG_ON_ERROR
/* SB1 definitions */ /* SB1 definitions */
/* XXX should come from config1 XXX */ /* XXX should come from config1 XXX */
...@@ -139,12 +145,18 @@ static inline void breakout_cerrd(unsigned int val) ...@@ -139,12 +145,18 @@ static inline void breakout_cerrd(unsigned int val)
static void check_bus_watcher(void) static void check_bus_watcher(void)
{ {
uint32_t status, l2_err, memio_err; uint32_t status, l2_err, memio_err;
#ifdef DUMP_L2_ECC_TAG_ON_ERROR
uint64_t l2_tag;
#endif
/* Destructive read, clears register and interrupt */ /* Destructive read, clears register and interrupt */
status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS)); status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS));
/* Bit 31 is always on, but there's no #define for that */ /* Bit 31 is always on, but there's no #define for that */
if (status & ~(1UL << 31)) { if (status & ~(1UL << 31)) {
l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS)); l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS));
#ifdef DUMP_L2_ECC_TAG_ON_ERROR
l2_tag = in64(IO_SPACE_BASE | A_L2_ECC_TAG);
#endif
memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS)); memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS));
prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err); prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err);
prom_printf("\nLast recorded signature:\n"); prom_printf("\nLast recorded signature:\n");
...@@ -153,6 +165,9 @@ static void check_bus_watcher(void) ...@@ -153,6 +165,9 @@ static void check_bus_watcher(void)
(int)(G_SCD_BERR_TID(status) >> 6), (int)(G_SCD_BERR_TID(status) >> 6),
(int)G_SCD_BERR_RID(status), (int)G_SCD_BERR_RID(status),
(int)G_SCD_BERR_DCODE(status)); (int)G_SCD_BERR_DCODE(status));
#ifdef DUMP_L2_ECC_TAG_ON_ERROR
prom_printf("Last L2 tag w/ bad ECC: %016llx\n", l2_tag);
#endif
} else { } else {
prom_printf("Bus watcher indicates no error\n"); prom_printf("Bus watcher indicates no error\n");
} }
...@@ -166,6 +181,16 @@ asmlinkage void sb1_cache_error(void) ...@@ -166,6 +181,16 @@ asmlinkage void sb1_cache_error(void)
uint64_t cerr_dpa; uint64_t cerr_dpa;
uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res; uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res;
#ifdef CONFIG_SIBYTE_BW_TRACE
/* Freeze the trace buffer now */
#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
csr_out32(M_BCM1480_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG);
#else
csr_out32(M_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG);
#endif
prom_printf("Trace buffer frozen\n");
#endif
prom_printf("Cache error exception on CPU %x:\n", prom_printf("Cache error exception on CPU %x:\n",
(read_c0_prid() >> 25) & 0x7); (read_c0_prid() >> 25) & 0x7);
...@@ -229,11 +254,19 @@ asmlinkage void sb1_cache_error(void) ...@@ -229,11 +254,19 @@ asmlinkage void sb1_cache_error(void)
check_bus_watcher(); check_bus_watcher();
while (1);
/* /*
* This tends to make things get really ugly; let's just stall instead. * Calling panic() when a fatal cache error occurs scrambles the
* panic("Can't handle the cache error!"); * state of the system (and the cache), making it difficult to
* investigate after the fact. However, if you just stall the CPU,
* the other CPU may keep on running, which is typically very
* undesirable.
*/ */
#ifdef CONFIG_SB1_CERR_STALL
while (1)
;
#else
panic("unhandled cache error");
#endif
} }
...@@ -434,7 +467,8 @@ static struct dc_state dc_states[] = { ...@@ -434,7 +467,8 @@ static struct dc_state dc_states[] = {
}; };
#define DC_TAG_VALID(state) \ #define DC_TAG_VALID(state) \
(((state) == 0xf) || ((state) == 0x13) || ((state) == 0x19) || ((state == 0x16)) || ((state) == 0x1c)) (((state) == 0x0) || ((state) == 0xf) || ((state) == 0x13) || \
((state) == 0x19) || ((state) == 0x16) || ((state) == 0x1c))
static char *dc_state_str(unsigned char state) static char *dc_state_str(unsigned char state)
{ {
...@@ -505,6 +539,7 @@ static uint32_t extract_dc(unsigned short addr, int data) ...@@ -505,6 +539,7 @@ static uint32_t extract_dc(unsigned short addr, int data)
uint64_t datalo; uint64_t datalo;
uint32_t datalohi, datalolo, datahi; uint32_t datalohi, datalolo, datahi;
int offset; int offset;
char bad_ecc = 0;
for (offset = 0; offset < 4; offset++) { for (offset = 0; offset < 4; offset++) {
/* Index-load-data-D */ /* Index-load-data-D */
...@@ -525,8 +560,7 @@ static uint32_t extract_dc(unsigned short addr, int data) ...@@ -525,8 +560,7 @@ static uint32_t extract_dc(unsigned short addr, int data)
ecc = dc_ecc(datalo); ecc = dc_ecc(datalo);
if (ecc != datahi) { if (ecc != datahi) {
int bits = 0; int bits = 0;
prom_printf(" ** bad ECC (%02x %02x) ->", bad_ecc |= 1 << (3-offset);
datahi, ecc);
ecc ^= datahi; ecc ^= datahi;
while (ecc) { while (ecc) {
if (ecc & 1) bits++; if (ecc & 1) bits++;
...@@ -537,6 +571,10 @@ static uint32_t extract_dc(unsigned short addr, int data) ...@@ -537,6 +571,10 @@ static uint32_t extract_dc(unsigned short addr, int data)
prom_printf(" %02X-%016llX", datahi, datalo); prom_printf(" %02X-%016llX", datahi, datalo);
} }
prom_printf("\n"); prom_printf("\n");
if (bad_ecc)
prom_printf(" dwords w/ bad ECC: %d %d %d %d\n",
!!(bad_ecc & 8), !!(bad_ecc & 4),
!!(bad_ecc & 2), !!(bad_ecc & 1));
} }
} }
return res; return res;
......
...@@ -64,6 +64,10 @@ LEAF(except_vec2_sb1) ...@@ -64,6 +64,10 @@ LEAF(except_vec2_sb1)
sd k0,0x170($0) sd k0,0x170($0)
sd k1,0x178($0) sd k1,0x178($0)
#if CONFIG_SB1_CEX_ALWAYS_FATAL
j handle_vec2_sb1
nop
#else
/* /*
* M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell * M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell
* if we can fast-path out of here for a h/w-recovered error. * if we can fast-path out of here for a h/w-recovered error.
...@@ -134,6 +138,7 @@ unrecoverable: ...@@ -134,6 +138,7 @@ unrecoverable:
/* Unrecoverable Icache or Dcache error; log it and/or fail */ /* Unrecoverable Icache or Dcache error; log it and/or fail */
j handle_vec2_sb1 j handle_vec2_sb1
nop nop
#endif
END(except_vec2_sb1) END(except_vec2_sb1)
......
...@@ -102,6 +102,14 @@ config SIMULATION ...@@ -102,6 +102,14 @@ config SIMULATION
Build a kernel suitable for running under the GDB simulator. Build a kernel suitable for running under the GDB simulator.
Primarily adjusts the kernel's notion of time. Primarily adjusts the kernel's notion of time.
config CONFIG_SB1_CEX_ALWAYS_FATAL
bool "All cache exceptions considered fatal (no recovery attempted)"
depends on SIBYTE_SB1xxx_SOC
config CONFIG_SB1_CERR_STALL
bool "Stall (rather than panic) on fatal cache error"
depends on SIBYTE_SB1xxx_SOC
config SIBYTE_CFE config SIBYTE_CFE
bool "Booting from CFE" bool "Booting from CFE"
depends on SIBYTE_SB1xxx_SOC depends on SIBYTE_SB1xxx_SOC
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment