Commit 2eb724ed authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] fix mod_timer() race

If two CPUs run mod_timer against the same not-pending timer then they
have no locking relationship.  They can both see the timer as
not-pending and they both add the timer to their cpu-local list.  The
CPU which gets there second corrupts the first CPU's lists.

This was causing Dave Hansen's 8-way to oops after a couple of minutes
of specweb testing.

I believe that to fix this we need locking which is associated with the
timer itself.  The easy fix is hashed spinlocking based on the timer's
address.  The hard fix is a lock inside the timer itself.

It is hard because init_timer() becomes compulsory, to initialise that
spinlock.  An unknown number of code paths in the kernel just wipe the
timer to all-zeroes and start using it.

I chose the hard way - it is cleaner and more idiomatic.  The patch
also adds a "magic number" to the timer so we can detect when a timer
was not correctly initialised.  A warning and stack backtrace is
generated and the timer is fixed up.  After 16 such warnings the
warning mechanism shuts itself up until a reboot.

It took six patches to my kernel to stop the warnings from coming out.
The uninitialised timers are extremely easy to find and fix.  But it
will take some time to weed them all out.  Maybe we should go for
the hashed locking...

Note that the new timer->lock means that we can clean up some awkward
"oh we raced, let's try again" code in timer.c.  But to do that we'd
also need to take timer->lock in the commonly-called del_timer(), so I
left it as-is.

The lock is not needed in add_timer() because concurrent
add_timer()/add_timer() and concurrent add_timer()/mod_timer() are
illegal.
parent 3cf803fb
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/spinlock.h>
struct tvec_t_base_s; struct tvec_t_base_s;
...@@ -10,12 +11,17 @@ struct timer_list { ...@@ -10,12 +11,17 @@ struct timer_list {
struct list_head entry; struct list_head entry;
unsigned long expires; unsigned long expires;
spinlock_t lock;
unsigned long magic;
void (*function)(unsigned long); void (*function)(unsigned long);
unsigned long data; unsigned long data;
struct tvec_t_base_s *base; struct tvec_t_base_s *base;
}; };
#define TIMER_MAGIC 0x4b87ad6e
/*** /***
* init_timer - initialize a timer. * init_timer - initialize a timer.
* @timer: the timer to be initialized * @timer: the timer to be initialized
...@@ -26,6 +32,8 @@ struct timer_list { ...@@ -26,6 +32,8 @@ struct timer_list {
static inline void init_timer(struct timer_list * timer) static inline void init_timer(struct timer_list * timer)
{ {
timer->base = NULL; timer->base = NULL;
timer->magic = TIMER_MAGIC;
spin_lock_init(&timer->lock);
} }
/*** /***
......
...@@ -69,6 +69,30 @@ static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED }; ...@@ -69,6 +69,30 @@ static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
/* Fake initialization needed to avoid compiler breakage */ /* Fake initialization needed to avoid compiler breakage */
static DEFINE_PER_CPU(struct tasklet_struct, timer_tasklet) = { NULL }; static DEFINE_PER_CPU(struct tasklet_struct, timer_tasklet) = { NULL };
static void check_timer_failed(timer_t *timer)
{
static int whine_count;
if (whine_count < 16) {
whine_count++;
printk("Uninitialised timer!\n");
printk("This is just a warning. Your computer is OK\n");
printk("function=0x%p, data=0x%lx\n",
timer->function, timer->data);
dump_stack();
}
/*
* Now fix it up
*/
spin_lock_init(&timer->lock);
timer->magic = TIMER_MAGIC;
}
static inline void check_timer(timer_t *timer)
{
if (timer->magic != TIMER_MAGIC)
check_timer_failed(timer);
}
static inline void internal_add_timer(tvec_base_t *base, timer_t *timer) static inline void internal_add_timer(tvec_base_t *base, timer_t *timer)
{ {
unsigned long expires = timer->expires; unsigned long expires = timer->expires;
...@@ -129,6 +153,8 @@ void add_timer(timer_t *timer) ...@@ -129,6 +153,8 @@ void add_timer(timer_t *timer)
BUG_ON(timer_pending(timer) || !timer->function); BUG_ON(timer_pending(timer) || !timer->function);
check_timer(timer);
spin_lock_irqsave(&base->lock, flags); spin_lock_irqsave(&base->lock, flags);
internal_add_timer(base, timer); internal_add_timer(base, timer);
timer->base = base; timer->base = base;
...@@ -150,6 +176,8 @@ void add_timer_on(struct timer_list *timer, int cpu) ...@@ -150,6 +176,8 @@ void add_timer_on(struct timer_list *timer, int cpu)
BUG_ON(timer_pending(timer) || !timer->function); BUG_ON(timer_pending(timer) || !timer->function);
check_timer(timer);
spin_lock_irqsave(&base->lock, flags); spin_lock_irqsave(&base->lock, flags);
internal_add_timer(base, timer); internal_add_timer(base, timer);
timer->base = base; timer->base = base;
...@@ -182,6 +210,9 @@ int mod_timer(timer_t *timer, unsigned long expires) ...@@ -182,6 +210,9 @@ int mod_timer(timer_t *timer, unsigned long expires)
int ret = 0; int ret = 0;
BUG_ON(!timer->function); BUG_ON(!timer->function);
check_timer(timer);
/* /*
* This is a common optimization triggered by the * This is a common optimization triggered by the
* networking code - if the timer is re-modified * networking code - if the timer is re-modified
...@@ -190,7 +221,7 @@ int mod_timer(timer_t *timer, unsigned long expires) ...@@ -190,7 +221,7 @@ int mod_timer(timer_t *timer, unsigned long expires)
if (timer->expires == expires && timer_pending(timer)) if (timer->expires == expires && timer_pending(timer))
return 1; return 1;
local_irq_save(flags); spin_lock_irqsave(&timer->lock, flags);
new_base = &per_cpu(tvec_bases, smp_processor_id()); new_base = &per_cpu(tvec_bases, smp_processor_id());
repeat: repeat:
old_base = timer->base; old_base = timer->base;
...@@ -207,7 +238,7 @@ int mod_timer(timer_t *timer, unsigned long expires) ...@@ -207,7 +238,7 @@ int mod_timer(timer_t *timer, unsigned long expires)
spin_lock(&new_base->lock); spin_lock(&new_base->lock);
} }
/* /*
* The timer base might have changed while we were * The timer base might have been cancelled while we were
* trying to take the lock(s): * trying to take the lock(s):
*/ */
if (timer->base != old_base) { if (timer->base != old_base) {
...@@ -232,7 +263,8 @@ int mod_timer(timer_t *timer, unsigned long expires) ...@@ -232,7 +263,8 @@ int mod_timer(timer_t *timer, unsigned long expires)
if (old_base && (new_base != old_base)) if (old_base && (new_base != old_base))
spin_unlock(&old_base->lock); spin_unlock(&old_base->lock);
spin_unlock_irqrestore(&new_base->lock, flags); spin_unlock(&new_base->lock);
spin_unlock_irqrestore(&timer->lock, flags);
return ret; return ret;
} }
...@@ -253,6 +285,8 @@ int del_timer(timer_t *timer) ...@@ -253,6 +285,8 @@ int del_timer(timer_t *timer)
unsigned long flags; unsigned long flags;
tvec_base_t *base; tvec_base_t *base;
check_timer(timer);
repeat: repeat:
base = timer->base; base = timer->base;
if (!base) if (!base)
...@@ -290,6 +324,8 @@ int del_timer_sync(timer_t *timer) ...@@ -290,6 +324,8 @@ int del_timer_sync(timer_t *timer)
tvec_base_t *base; tvec_base_t *base;
int i, ret = 0; int i, ret = 0;
check_timer(timer);
del_again: del_again:
ret += del_timer(timer); ret += del_timer(timer);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment