Commit 13c01fe7 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] IRQs: handle bad return values from handlers

Attempt to do something intelligent with IRQ handlers which don't return
IRQ_HANDLED.

- If they return neither IRQ_HANDLED nor IRQ_NONE, complain.

- If they return IRQ_NONE more than 99900 times in 100000 interrupts, complain
  and disable the IRQ.

  I did have it at 750-in-1000, but someone had an otherwise-functioning
  system which triggered it.

  The 99.9% ratio is designed to address the problem wherein the babbling
  device shares an IRQ with a good device.  We don't want the good device's
  trickle of IRQ_HANDLED callouts to defeat the lockup detector.  (fat chance
  os this working right).

- Add a kernel boot parameter `noirqdebug' to turn the whole thing off.
parent 21af2f02
......@@ -617,6 +617,9 @@ running once the system is up.
noht [SMP,IA-32] Disables P4 Xeon(tm) HyperThreading.
noirqdebug [IA-32] Disables the code which attempts to detect and
disable unhandled interrupt sources.
noisapnp [ISAPNP] Disables ISA PnP code.
noinitrd [RAM] Tells the kernel not to load any configured
......
......@@ -66,8 +66,12 @@
/*
* Controller mappings for all interrupt sources:
*/
irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
[0 ... NR_IRQS-1] = {
.handler = &no_irq_type,
.lock = SPIN_LOCK_UNLOCKED
}
};
static void register_irq_proc (unsigned int irq);
......@@ -209,7 +213,6 @@ int handle_IRQ_event(unsigned int irq,
{
int status = 1; /* Force the "do bottom halves" bit */
int retval = 0;
struct irqaction *first_action = action;
if (!(action->flags & SA_INTERRUPT))
local_irq_enable();
......@@ -222,30 +225,88 @@ int handle_IRQ_event(unsigned int irq,
if (status & SA_SAMPLE_RANDOM)
add_interrupt_randomness(irq);
local_irq_disable();
if (retval != 1) {
static int count = 100;
if (count) {
count--;
if (retval) {
printk("irq event %d: bogus retval mask %x\n",
irq, retval);
} else {
printk("irq %d: nobody cared!\n", irq);
}
dump_stack();
printk("handlers:\n");
action = first_action;
do {
printk("[<%p>]", action->handler);
print_symbol(" (%s)",
(unsigned long)action->handler);
printk("\n");
action = action->next;
} while (action);
}
return retval;
}
static void __report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
{
struct irqaction *action;
if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
printk(KERN_ERR "irq event %d: bogus return value %x\n",
irq, action_ret);
} else {
printk(KERN_ERR "irq %d: nobody cared!\n", irq);
}
dump_stack();
printk(KERN_ERR "handlers:\n");
action = desc->action;
do {
printk(KERN_ERR "[<%p>]", action->handler);
print_symbol(" (%s)",
(unsigned long)action->handler);
printk("\n");
action = action->next;
} while (action);
}
static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
{
static int count = 100;
if (count) {
count--;
__report_bad_irq(irq, desc, action_ret);
}
}
static int noirqdebug;
static int __init noirqdebug_setup(char *str)
{
noirqdebug = 1;
printk("IRQ lockup detection disabled\n");
return 1;
}
__setup("noirqdebug", noirqdebug_setup);
/*
* If 99,900 of the previous 100,000 interrupts have not been handled then
* assume that the IRQ is stuck in some manner. Drop a diagnostic and try to
* turn the IRQ off.
*
* (The other 100-of-100,000 interrupts may have been a correctly-functioning
* device sharing an IRQ with the failing one)
*
* Called under desc->lock
*/
static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret)
{
if (action_ret != IRQ_HANDLED) {
desc->irqs_unhandled++;
if (action_ret != IRQ_NONE)
report_bad_irq(irq, desc, action_ret);
}
return status;
desc->irq_count++;
if (desc->irq_count < 100000)
return;
desc->irq_count = 0;
if (desc->irqs_unhandled > 99900) {
/*
* The interrupt is stuck
*/
__report_bad_irq(irq, desc, action_ret);
/*
* Now kill the IRQ
*/
printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
desc->status |= IRQ_DISABLED;
desc->handler->disable(irq);
}
desc->irqs_unhandled = 0;
}
/*
......@@ -418,10 +479,13 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs)
* SMP environment.
*/
for (;;) {
irqreturn_t action_ret;
spin_unlock(&desc->lock);
handle_IRQ_event(irq, &regs, action);
action_ret = handle_IRQ_event(irq, &regs, action);
spin_lock(&desc->lock);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
if (likely(!(desc->status & IRQ_PENDING)))
break;
desc->status &= ~IRQ_PENDING;
......
......@@ -61,6 +61,8 @@ typedef struct {
hw_irq_controller *handler;
struct irqaction *action; /* IRQ action list */
unsigned int depth; /* nested irq disables */
unsigned int irq_count; /* For detecting broken interrupts */
unsigned int irqs_unhandled;
spinlock_t lock;
} ____cacheline_aligned irq_desc_t;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment