Commit 90e518e1 authored by Corey Minyard's avatar Corey Minyard Committed by Linus Torvalds

[PATCH] Fixes for idr code

* On a 32-bit architecture, the idr code will cease to work if you add
  more than 2^20 entries.  You will not be able to find many of the
  entries.  The problem is that the IDR code uses 5-bit chunks of the
  number and the lower portion used by IDR is 24 bits, so you have one bit
  that leaks over into the comparisons that should not be there.  The
  solution is to mask off that bit before doing IDR processing.  This
  actually causes the POSIX timer code to crash if you create that many
  timers.  I have included an idr_test.tar.gz file that demonstrates this
  with and without the fix, in case you need more evidence :).

* When the IDR fills up, it returns -1.  However, there was no way to
  check for this condition.  This patch adds the ability to check for the
  idr being full and fixes all the users.  It also fixes a problem in
  fs/super.c where the idr code wasn't checking for -1.

* There was a race condition creating POSIX timers.  The timer was added
  to a task struct for another process then the data for the timer was
  filled out.  The other task could use/destroy time timer as soon as it is
  in the task's queue and the lock is released.  This moves settup up the
  timer data to before the timer is enqueued or (for some data) into the
  lock.

* Change things so that the caller doesn't need to run idr_full() to find
  out the reason for an idr_get_new() failure.

  Just return -ENOSPC if the tree was full, or -EAGAIN if the caller needs
  to re-run idr_pre_get() and try again.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent f1372916
...@@ -288,18 +288,20 @@ static spinlock_t proc_inum_lock = SPIN_LOCK_UNLOCKED; /* protects the above */ ...@@ -288,18 +288,20 @@ static spinlock_t proc_inum_lock = SPIN_LOCK_UNLOCKED; /* protects the above */
*/ */
static unsigned int get_inode_number(void) static unsigned int get_inode_number(void)
{ {
unsigned int i, inum = 0; int i, inum = 0;
int error;
retry: retry:
if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0)
return 0; return 0;
spin_lock(&proc_inum_lock); spin_lock(&proc_inum_lock);
i = idr_get_new(&proc_inum_idr, NULL); error = idr_get_new(&proc_inum_idr, NULL, &i);
spin_unlock(&proc_inum_lock); spin_unlock(&proc_inum_lock);
if (error == -EAGAIN)
if (i == -1)
goto retry; goto retry;
else if (error)
return 0;
inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST;
......
...@@ -569,14 +569,19 @@ static spinlock_t unnamed_dev_lock = SPIN_LOCK_UNLOCKED;/* protects the above */ ...@@ -569,14 +569,19 @@ static spinlock_t unnamed_dev_lock = SPIN_LOCK_UNLOCKED;/* protects the above */
int set_anon_super(struct super_block *s, void *data) int set_anon_super(struct super_block *s, void *data)
{ {
int dev; int dev;
int error;
spin_lock(&unnamed_dev_lock); retry:
if (idr_pre_get(&unnamed_dev_idr, GFP_ATOMIC) == 0) { if (idr_pre_get(&unnamed_dev_idr, GFP_ATOMIC) == 0)
spin_unlock(&unnamed_dev_lock);
return -ENOMEM; return -ENOMEM;
} spin_lock(&unnamed_dev_lock);
dev = idr_get_new(&unnamed_dev_idr, NULL); error = idr_get_new(&unnamed_dev_idr, NULL, &dev);
spin_unlock(&unnamed_dev_lock); spin_unlock(&unnamed_dev_lock);
if (error == -EAGAIN)
/* We raced and lost with another CPU. */
goto retry;
else if (error)
return -EAGAIN;
if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
spin_lock(&unnamed_dev_lock); spin_lock(&unnamed_dev_lock);
......
...@@ -16,9 +16,15 @@ ...@@ -16,9 +16,15 @@
#if BITS_PER_LONG == 32 #if BITS_PER_LONG == 32
# define IDR_BITS 5 # define IDR_BITS 5
# define IDR_FULL 0xfffffffful # define IDR_FULL 0xfffffffful
/* We can only use half the bits in the top level because there are
only four possible bits in the top level (5 bits * 4 levels = 25
bits, but you only use 24 bits in the id). */
# define TOP_LEVEL_FULL (IDR_FULL >> 16)
#elif BITS_PER_LONG == 64 #elif BITS_PER_LONG == 64
# define IDR_BITS 6 # define IDR_BITS 6
# define IDR_FULL 0xfffffffffffffffful # define IDR_FULL 0xfffffffffffffffful
/* We can use all the bits in a 64-bit long at the top level. */
# define TOP_LEVEL_FULL IDR_FULL
#else #else
# error "BITS_PER_LONG is not 32 or 64" # error "BITS_PER_LONG is not 32 or 64"
#endif #endif
...@@ -71,7 +77,7 @@ struct idr { ...@@ -71,7 +77,7 @@ struct idr {
void *idr_find(struct idr *idp, int id); void *idr_find(struct idr *idp, int id);
int idr_pre_get(struct idr *idp, unsigned gfp_mask); int idr_pre_get(struct idr *idp, unsigned gfp_mask);
int idr_get_new(struct idr *idp, void *ptr); int idr_get_new(struct idr *idp, void *ptr, int *id);
void idr_remove(struct idr *idp, int id); void idr_remove(struct idr *idp, int id);
void idr_init(struct idr *idp); void idr_init(struct idr *idp);
......
...@@ -397,7 +397,6 @@ static struct k_itimer * alloc_posix_timer(void) ...@@ -397,7 +397,6 @@ static struct k_itimer * alloc_posix_timer(void)
if (!tmr) if (!tmr)
return tmr; return tmr;
memset(tmr, 0, sizeof (struct k_itimer)); memset(tmr, 0, sizeof (struct k_itimer));
tmr->it_id = (timer_t)-1;
if (unlikely(!(tmr->sigq = sigqueue_alloc()))) { if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
kmem_cache_free(posix_timers_cache, tmr); kmem_cache_free(posix_timers_cache, tmr);
tmr = 0; tmr = 0;
...@@ -405,9 +404,11 @@ static struct k_itimer * alloc_posix_timer(void) ...@@ -405,9 +404,11 @@ static struct k_itimer * alloc_posix_timer(void)
return tmr; return tmr;
} }
static void release_posix_timer(struct k_itimer *tmr) #define IT_ID_SET 1
#define IT_ID_NOT_SET 0
static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
{ {
if (tmr->it_id != -1) { if (it_id_set) {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&idr_lock, flags); spin_lock_irqsave(&idr_lock, flags);
idr_remove(&posix_timers_id, tmr->it_id); idr_remove(&posix_timers_id, tmr->it_id);
...@@ -429,10 +430,11 @@ sys_timer_create(clockid_t which_clock, ...@@ -429,10 +430,11 @@ sys_timer_create(clockid_t which_clock,
{ {
int error = 0; int error = 0;
struct k_itimer *new_timer = NULL; struct k_itimer *new_timer = NULL;
timer_t new_timer_id; int new_timer_id;
struct task_struct *process = 0; struct task_struct *process = 0;
unsigned long flags; unsigned long flags;
sigevent_t event; sigevent_t event;
int it_id_set = IT_ID_NOT_SET;
if ((unsigned) which_clock >= MAX_CLOCKS || if ((unsigned) which_clock >= MAX_CLOCKS ||
!posix_clocks[which_clock].res) !posix_clocks[which_clock].res)
...@@ -443,19 +445,38 @@ sys_timer_create(clockid_t which_clock, ...@@ -443,19 +445,38 @@ sys_timer_create(clockid_t which_clock,
return -EAGAIN; return -EAGAIN;
spin_lock_init(&new_timer->it_lock); spin_lock_init(&new_timer->it_lock);
do { retry:
if (unlikely(!idr_pre_get(&posix_timers_id, GFP_KERNEL))) { if (unlikely(!idr_pre_get(&posix_timers_id, GFP_KERNEL))) {
error = -EAGAIN; error = -EAGAIN;
new_timer->it_id = (timer_t)-1; goto out;
goto out; }
} spin_lock_irq(&idr_lock);
spin_lock_irq(&idr_lock); error = idr_get_new(&posix_timers_id,
new_timer_id = (timer_t) idr_get_new(&posix_timers_id, (void *) new_timer,
(void *) new_timer); &new_timer_id);
spin_unlock_irq(&idr_lock); spin_unlock_irq(&idr_lock);
} while (unlikely(new_timer_id == -1)); if (error == -EAGAIN)
goto retry;
else if (error) {
/*
* Wierd looking, but we return EAGAIN if the IDR is
* full (proper POSIX return value for this)
*/
error = -EAGAIN;
goto out;
}
it_id_set = IT_ID_SET;
new_timer->it_id = (timer_t) new_timer_id;
new_timer->it_clock = which_clock;
new_timer->it_incr = 0;
new_timer->it_overrun = -1;
init_timer(&new_timer->it_timer);
new_timer->it_timer.expires = 0;
new_timer->it_timer.data = (unsigned long) new_timer;
new_timer->it_timer.function = posix_timer_fn;
set_timer_inactive(new_timer);
new_timer->it_id = new_timer_id;
/* /*
* return the timer_id now. The next step is hard to * return the timer_id now. The next step is hard to
* back out if there is an error. * back out if there is an error.
...@@ -470,6 +491,10 @@ sys_timer_create(clockid_t which_clock, ...@@ -470,6 +491,10 @@ sys_timer_create(clockid_t which_clock,
error = -EFAULT; error = -EFAULT;
goto out; goto out;
} }
new_timer->it_sigev_notify = event.sigev_notify;
new_timer->it_sigev_signo = event.sigev_signo;
new_timer->it_sigev_value = event.sigev_value;
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
if ((process = good_sigevent(&event))) { if ((process = good_sigevent(&event))) {
/* /*
...@@ -489,6 +514,7 @@ sys_timer_create(clockid_t which_clock, ...@@ -489,6 +514,7 @@ sys_timer_create(clockid_t which_clock,
*/ */
spin_lock_irqsave(&process->sighand->siglock, flags); spin_lock_irqsave(&process->sighand->siglock, flags);
if (!(process->flags & PF_EXITING)) { if (!(process->flags & PF_EXITING)) {
new_timer->it_process = process;
list_add(&new_timer->list, list_add(&new_timer->list,
&process->signal->posix_timers); &process->signal->posix_timers);
spin_unlock_irqrestore(&process->sighand->siglock, flags); spin_unlock_irqrestore(&process->sighand->siglock, flags);
...@@ -503,35 +529,27 @@ sys_timer_create(clockid_t which_clock, ...@@ -503,35 +529,27 @@ sys_timer_create(clockid_t which_clock,
error = -EINVAL; error = -EINVAL;
goto out; goto out;
} }
new_timer->it_sigev_notify = event.sigev_notify;
new_timer->it_sigev_signo = event.sigev_signo;
new_timer->it_sigev_value = event.sigev_value;
} else { } else {
new_timer->it_sigev_notify = SIGEV_SIGNAL; new_timer->it_sigev_notify = SIGEV_SIGNAL;
new_timer->it_sigev_signo = SIGALRM; new_timer->it_sigev_signo = SIGALRM;
new_timer->it_sigev_value.sival_int = new_timer->it_id; new_timer->it_sigev_value.sival_int = new_timer->it_id;
process = current->group_leader; process = current->group_leader;
spin_lock_irqsave(&process->sighand->siglock, flags); spin_lock_irqsave(&process->sighand->siglock, flags);
new_timer->it_process = process;
list_add(&new_timer->list, &process->signal->posix_timers); list_add(&new_timer->list, &process->signal->posix_timers);
spin_unlock_irqrestore(&process->sighand->siglock, flags); spin_unlock_irqrestore(&process->sighand->siglock, flags);
} }
new_timer->it_clock = which_clock; /*
new_timer->it_incr = 0; * In the case of the timer belonging to another task, after
new_timer->it_overrun = -1; * the task is unlocked, the timer is owned by the other task
init_timer(&new_timer->it_timer); * and may cease to exist at any time. Don't use or modify
new_timer->it_timer.expires = 0; * new_timer after the unlock call.
new_timer->it_timer.data = (unsigned long) new_timer;
new_timer->it_timer.function = posix_timer_fn;
set_timer_inactive(new_timer);
/*
* Once we set the process, it can be found so do it last...
*/ */
new_timer->it_process = process;
out: out:
if (error) if (error)
release_posix_timer(new_timer); release_posix_timer(new_timer, it_id_set);
return error; return error;
} }
...@@ -952,7 +970,7 @@ sys_timer_delete(timer_t timer_id) ...@@ -952,7 +970,7 @@ sys_timer_delete(timer_t timer_id)
timer->it_process = NULL; timer->it_process = NULL;
} }
unlock_timer(timer, flags); unlock_timer(timer, flags);
release_posix_timer(timer); release_posix_timer(timer, IT_ID_SET);
return 0; return 0;
} }
/* /*
...@@ -989,7 +1007,7 @@ static inline void itimer_delete(struct k_itimer *timer) ...@@ -989,7 +1007,7 @@ static inline void itimer_delete(struct k_itimer *timer)
timer->it_process = NULL; timer->it_process = NULL;
} }
unlock_timer(timer, flags); unlock_timer(timer, flags);
release_posix_timer(timer); release_posix_timer(timer, IT_ID_SET);
} }
/* /*
......
...@@ -70,14 +70,15 @@ ...@@ -70,14 +70,15 @@
* sleep, so must not be called with any spinlocks held. If the system is * sleep, so must not be called with any spinlocks held. If the system is
* REALLY out of memory this function returns 0, other wise 1. * REALLY out of memory this function returns 0, other wise 1.
* int idr_get_new(struct idr *idp, void *ptr); * int idr_get_new(struct idr *idp, void *ptr, int *id);
* This is the allocate id function. It should be called with any * This is the allocate id function. It should be called with any
* required locks. In fact, in the SMP case, you MUST lock prior to * required locks. In fact, in the SMP case, you MUST lock prior to
* calling this function to avoid possible out of memory problems. If * calling this function to avoid possible out of memory problems.
* memory is required, it will return a -1, in which case you should * If memory is required, it will return -EAGAIN, you should unlock
* unlock and go back to the idr_pre_get() call. ptr is the pointer * and go back to the idr_pre_get() call. If the idr is full, it
* you want associated with the id. In other words: * will return a -ENOSPC. ptr is the pointer you want associated
* with the id. The value is returned in the "id" field.
* void *idr_find(struct idr *idp, int id); * void *idr_find(struct idr *idp, int id);
...@@ -92,6 +93,10 @@ ...@@ -92,6 +93,10 @@
* removes the given id, freeing that slot and any memory that may * removes the given id, freeing that slot and any memory that may
* now be unused. See idr_find() for locking restrictions. * now be unused. See idr_find() for locking restrictions.
* int idr_full(struct idr *idp);
* Returns true if the idr is full and false if not.
*/ */
...@@ -276,12 +281,30 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id) ...@@ -276,12 +281,30 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id)
} }
EXPORT_SYMBOL(idr_get_new_above); EXPORT_SYMBOL(idr_get_new_above);
int idr_get_new(struct idr *idp, void *ptr) static int idr_full(struct idr *idp)
{ {
return idr_get_new_above(idp, ptr, 0); return ((idp->layers >= MAX_LEVEL)
&& (idp->top->bitmap == TOP_LEVEL_FULL));
} }
EXPORT_SYMBOL(idr_get_new);
int idr_get_new(struct idr *idp, void *ptr, int *id)
{
int rv;
rv = idr_get_new_above(idp, ptr, 0);
/*
* This is a cheap hack until the IDR code can be fixed to
* return proper error values.
*/
if (rv == -1) {
if (idr_full(idp))
return -ENOSPC;
else
return -EAGAIN;
}
*id = rv;
return 0;
}
EXPORT_SYMBOL(idr_get_new);
static void sub_remove(struct idr *idp, int shift, int id) static void sub_remove(struct idr *idp, int shift, int id)
{ {
...@@ -315,6 +338,9 @@ void idr_remove(struct idr *idp, int id) ...@@ -315,6 +338,9 @@ void idr_remove(struct idr *idp, int id)
{ {
struct idr_layer *p; struct idr_layer *p;
/* Mask off upper bits we don't use for the search. */
id &= MAX_ID_MASK;
sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
if ( idp->top && idp->top->count == 1 && if ( idp->top && idp->top->count == 1 &&
(idp->layers > 1) && (idp->layers > 1) &&
...@@ -350,6 +376,9 @@ void *idr_find(struct idr *idp, int id) ...@@ -350,6 +376,9 @@ void *idr_find(struct idr *idp, int id)
if ( unlikely( (id & ~(~0 << MAX_ID_SHIFT)) >> (n + IDR_BITS))) if ( unlikely( (id & ~(~0 << MAX_ID_SHIFT)) >> (n + IDR_BITS)))
return NULL; return NULL;
#endif #endif
/* Mask off upper bits we don't use for the search. */
id &= MAX_ID_MASK;
while (n > 0 && p) { while (n > 0 && p) {
n -= IDR_BITS; n -= IDR_BITS;
p = p->ary[(id >> n) & IDR_MASK]; p = p->ary[(id >> n) & IDR_MASK];
......
...@@ -1834,23 +1834,28 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, ...@@ -1834,23 +1834,28 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
/* Allocate storage for the negotiated streams if it is not a temporary * association. /* Allocate storage for the negotiated streams if it is not a temporary * association.
*/ */
if (!asoc->temp) { if (!asoc->temp) {
sctp_assoc_t assoc_id; int assoc_id;
int error;
asoc->ssnmap = sctp_ssnmap_new(asoc->c.sinit_max_instreams, asoc->ssnmap = sctp_ssnmap_new(asoc->c.sinit_max_instreams,
asoc->c.sinit_num_ostreams, gfp); asoc->c.sinit_num_ostreams, gfp);
if (!asoc->ssnmap) if (!asoc->ssnmap)
goto clean_up; goto clean_up;
do { retry:
if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp))) if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp)))
goto clean_up; goto clean_up;
spin_lock_bh(&sctp_assocs_id_lock); spin_lock_bh(&sctp_assocs_id_lock);
assoc_id = (sctp_assoc_t)idr_get_new(&sctp_assocs_id, error = idr_get_new(&sctp_assocs_id,
(void *)asoc); (void *)asoc,
spin_unlock_bh(&sctp_assocs_id_lock); &assoc_id);
} while (unlikely((int)assoc_id == -1)); spin_unlock_bh(&sctp_assocs_id_lock);
if (error == -EAGAIN)
goto retry;
else if (error)
goto clean_up;
asoc->assoc_id = assoc_id; asoc->assoc_id = (sctp_assoc_t) assoc_id;
} }
/* ADDIP Section 4.1 ASCONF Chunk Procedures /* ADDIP Section 4.1 ASCONF Chunk Procedures
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment