Commit 055e188d authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Fix dcache_lock/tasklist_lock ranking bug

__unhash_process acquires the dcache_lock while holding the
tasklist_lock for writing. This can deadlock. Additionally,
fs/proc/base.c incorrectly assumed that p->pid would be set to 0 during
release_task.

The patch fixes that by adding a new spinlock to the task structure and
fixing all references to (!p->pid).

The alternative to the new spinlock would be to hold dcache_lock around
__unhash_process.

- fs/proc/base.c assumed that p->pid is reset to 0 during exit.  This is
  not the case anymore.  I now look at the count of the pid structure for
  PIDTYPE_PID.

- de_thread now tested - as broken as it was before: open handles to
  /proc/<pid> are either stale or invalid after an exec of a nptl process,
  if the exec was call from a secondary thread.

- a few lock_kernels removed - that part of /proc doesn't need it.

- additional instances of 'if(current->pid)' replaced with pid_alive.
parent 05cdeac3
...@@ -529,30 +529,6 @@ static int exec_mmap(struct mm_struct *mm) ...@@ -529,30 +529,6 @@ static int exec_mmap(struct mm_struct *mm)
return 0; return 0;
} }
static struct dentry *clean_proc_dentry(struct task_struct *p)
{
struct dentry *proc_dentry = p->proc_dentry;
if (proc_dentry) {
spin_lock(&dcache_lock);
if (!d_unhashed(proc_dentry)) {
dget_locked(proc_dentry);
__d_drop(proc_dentry);
} else
proc_dentry = NULL;
spin_unlock(&dcache_lock);
}
return proc_dentry;
}
static inline void put_proc_dentry(struct dentry *dentry)
{
if (dentry) {
shrink_dcache_parent(dentry);
dput(dentry);
}
}
/* /*
* This function makes sure the current process has its own signal table, * This function makes sure the current process has its own signal table,
* so that flush_signal_handlers can later reset the handlers without * so that flush_signal_handlers can later reset the handlers without
...@@ -660,9 +636,11 @@ static inline int de_thread(struct task_struct *tsk) ...@@ -660,9 +636,11 @@ static inline int de_thread(struct task_struct *tsk)
while (leader->state != TASK_ZOMBIE) while (leader->state != TASK_ZOMBIE)
yield(); yield();
spin_lock(&leader->proc_lock);
spin_lock(&current->proc_lock);
proc_dentry1 = proc_pid_unhash(current);
proc_dentry2 = proc_pid_unhash(leader);
write_lock_irq(&tasklist_lock); write_lock_irq(&tasklist_lock);
proc_dentry1 = clean_proc_dentry(current);
proc_dentry2 = clean_proc_dentry(leader);
if (leader->tgid != current->tgid) if (leader->tgid != current->tgid)
BUG(); BUG();
...@@ -702,9 +680,10 @@ static inline int de_thread(struct task_struct *tsk) ...@@ -702,9 +680,10 @@ static inline int de_thread(struct task_struct *tsk)
state = leader->state; state = leader->state;
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
spin_unlock(&leader->proc_lock);
put_proc_dentry(proc_dentry1); spin_unlock(&current->proc_lock);
put_proc_dentry(proc_dentry2); proc_pid_flush(proc_dentry1);
proc_pid_flush(proc_dentry2);
if (state != TASK_ZOMBIE) if (state != TASK_ZOMBIE)
BUG(); BUG();
......
...@@ -624,6 +624,12 @@ static struct inode_operations proc_pid_link_inode_operations = { ...@@ -624,6 +624,12 @@ static struct inode_operations proc_pid_link_inode_operations = {
.follow_link = proc_pid_follow_link .follow_link = proc_pid_follow_link
}; };
static int pid_alive(struct task_struct *p)
{
BUG_ON(p->pids[PIDTYPE_PID].pidptr != &p->pids[PIDTYPE_PID].pid);
return atomic_read(&p->pids[PIDTYPE_PID].pid.count);
}
#define NUMBUF 10 #define NUMBUF 10
static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
...@@ -635,6 +641,9 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) ...@@ -635,6 +641,9 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
char buf[NUMBUF]; char buf[NUMBUF];
struct files_struct * files; struct files_struct * files;
retval = -ENOENT;
if (!pid_alive(p))
goto out;
retval = 0; retval = 0;
pid = p->pid; pid = p->pid;
...@@ -696,48 +705,46 @@ static int proc_base_readdir(struct file * filp, ...@@ -696,48 +705,46 @@ static int proc_base_readdir(struct file * filp,
int pid; int pid;
struct inode *inode = filp->f_dentry->d_inode; struct inode *inode = filp->f_dentry->d_inode;
struct pid_entry *p; struct pid_entry *p;
int ret = 0; int ret;
lock_kernel(); ret = -ENOENT;
if (!pid_alive(proc_task(inode)))
goto out;
ret = 0;
pid = proc_task(inode)->pid; pid = proc_task(inode)->pid;
if (!pid) {
ret = -ENOENT;
goto out;
}
i = filp->f_pos; i = filp->f_pos;
switch (i) { switch (i) {
case 0: case 0:
if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0) if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0)
goto out; goto out;
i++; i++;
filp->f_pos++; filp->f_pos++;
/* fall through */ /* fall through */
case 1: case 1:
if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0) if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0)
goto out;
i++;
filp->f_pos++;
/* fall through */
default:
i -= 2;
if (i >= ARRAY_SIZE(base_stuff)) {
ret = 1;
goto out;
}
p = base_stuff + i;
while (p->name) {
if (filldir(dirent, p->name, p->len, filp->f_pos,
fake_ino(pid, p->type), p->mode >> 12) < 0)
goto out; goto out;
i++;
filp->f_pos++; filp->f_pos++;
/* fall through */ p++;
default: }
i -= 2;
if (i>=sizeof(base_stuff)/sizeof(base_stuff[0])) {
ret = 1;
goto out;
}
p = base_stuff + i;
while (p->name) {
if (filldir(dirent, p->name, p->len, filp->f_pos,
fake_ino(pid, p->type), p->mode >> 12) < 0)
goto out;
filp->f_pos++;
p++;
}
} }
ret = 1; ret = 1;
out: out:
unlock_kernel();
return ret; return ret;
} }
...@@ -774,7 +781,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st ...@@ -774,7 +781,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_ino = fake_ino(task->pid, ino); inode->i_ino = fake_ino(task->pid, ino);
if (!task->pid) if (!pid_alive(task))
goto out_unlock; goto out_unlock;
/* /*
...@@ -808,7 +815,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st ...@@ -808,7 +815,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
*/ */
static int pid_revalidate(struct dentry * dentry, int flags) static int pid_revalidate(struct dentry * dentry, int flags)
{ {
if (proc_task(dentry->d_inode)->pid) if (pid_alive(proc_task(dentry->d_inode)))
return 1; return 1;
d_drop(dentry); d_drop(dentry);
return 0; return 0;
...@@ -842,18 +849,23 @@ static int pid_fd_revalidate(struct dentry * dentry, int flags) ...@@ -842,18 +849,23 @@ static int pid_fd_revalidate(struct dentry * dentry, int flags)
static void pid_base_iput(struct dentry *dentry, struct inode *inode) static void pid_base_iput(struct dentry *dentry, struct inode *inode)
{ {
struct task_struct *task = proc_task(inode); struct task_struct *task = proc_task(inode);
write_lock_irq(&tasklist_lock); spin_lock(&task->proc_lock);
if (task->proc_dentry == dentry) if (task->proc_dentry == dentry)
task->proc_dentry = NULL; task->proc_dentry = NULL;
write_unlock_irq(&tasklist_lock); spin_unlock(&task->proc_lock);
iput(inode); iput(inode);
} }
static int pid_delete_dentry(struct dentry * dentry) static int pid_delete_dentry(struct dentry * dentry)
{ {
return proc_task(dentry->d_inode)->pid == 0; /* Is the task we represent dead?
* If so, then don't put the dentry on the lru list,
* kill it immediately.
*/
return !pid_alive(proc_task(dentry->d_inode));
} }
static struct dentry_operations pid_fd_dentry_operations = static struct dentry_operations pid_fd_dentry_operations =
{ {
.d_revalidate = pid_fd_revalidate, .d_revalidate = pid_fd_revalidate,
...@@ -909,6 +921,8 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry) ...@@ -909,6 +921,8 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
if (fd == ~0U) if (fd == ~0U)
goto out; goto out;
if (!pid_alive(task))
goto out;
inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd); inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd);
if (!inode) if (!inode)
...@@ -937,8 +951,6 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry) ...@@ -937,8 +951,6 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
ei->op.proc_get_link = proc_fd_link; ei->op.proc_get_link = proc_fd_link;
dentry->d_op = &pid_fd_dentry_operations; dentry->d_op = &pid_fd_dentry_operations;
d_add(dentry, inode); d_add(dentry, inode);
if (!proc_task(dentry->d_inode)->pid)
d_drop(dentry);
return NULL; return NULL;
out_unlock2: out_unlock2:
...@@ -975,6 +987,9 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) ...@@ -975,6 +987,9 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
error = -ENOENT; error = -ENOENT;
inode = NULL; inode = NULL;
if (!pid_alive(task))
goto out;
for (p = base_stuff; p->name; p++) { for (p = base_stuff; p->name; p++) {
if (p->len != dentry->d_name.len) if (p->len != dentry->d_name.len)
continue; continue;
...@@ -1056,8 +1071,6 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) ...@@ -1056,8 +1071,6 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
} }
dentry->d_op = &pid_dentry_operations; dentry->d_op = &pid_dentry_operations;
d_add(dentry, inode); d_add(dentry, inode);
if (!proc_task(dentry->d_inode)->pid)
d_drop(dentry);
return NULL; return NULL;
out: out:
...@@ -1095,6 +1108,55 @@ static struct inode_operations proc_self_inode_operations = { ...@@ -1095,6 +1108,55 @@ static struct inode_operations proc_self_inode_operations = {
.follow_link = proc_self_follow_link, .follow_link = proc_self_follow_link,
}; };
/**
* proc_pid_unhash - Unhash /proc/<pid> entry from the dcache.
* @p: task that should be flushed.
*
* Drops the /proc/<pid> dcache entry from the hash chains.
*
* Dropping /proc/<pid> entries and detach_pid must be synchroneous,
* otherwise e.g. /proc/<pid>/exe might point to the wrong executable,
* if the pid value is immediately reused. This is enforced by
* - caller must acquire spin_lock(p->proc_lock)
* - must be called before detach_pid()
* - proc_pid_lookup acquires proc_lock, and checks that
* the target is not dead by looking at the attach count
* of PIDTYPE_PID.
*/
struct dentry *proc_pid_unhash(struct task_struct *p)
{
struct dentry *proc_dentry;
proc_dentry = p->proc_dentry;
if (proc_dentry != NULL) {
spin_lock(&dcache_lock);
if (!d_unhashed(proc_dentry)) {
dget_locked(proc_dentry);
__d_drop(proc_dentry);
} else
proc_dentry = NULL;
spin_unlock(&dcache_lock);
}
return proc_dentry;
}
/**
* proc_pid_flush - recover memory used by stale /proc/<pid>/x entries
* @proc_entry: directoy to prune.
*
* Shrink the /proc directory that was used by the just killed thread.
*/
void proc_pid_flush(struct dentry *proc_dentry)
{
if(proc_dentry != NULL) {
shrink_dcache_parent(proc_dentry);
dput(proc_dentry);
}
}
/* SMP-safe */ /* SMP-safe */
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry) struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
{ {
...@@ -1143,12 +1205,12 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry) ...@@ -1143,12 +1205,12 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
inode->i_flags|=S_IMMUTABLE; inode->i_flags|=S_IMMUTABLE;
dentry->d_op = &pid_base_dentry_operations; dentry->d_op = &pid_base_dentry_operations;
spin_lock(&task->proc_lock);
task->proc_dentry = dentry;
d_add(dentry, inode); d_add(dentry, inode);
read_lock(&tasklist_lock); spin_unlock(&task->proc_lock);
proc_task(dentry->d_inode)->proc_dentry = dentry;
read_unlock(&tasklist_lock);
if (!proc_task(dentry->d_inode)->pid)
d_drop(dentry);
return NULL; return NULL;
out: out:
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
...@@ -1171,7 +1233,7 @@ static int get_pid_list(int index, unsigned int *pids) ...@@ -1171,7 +1233,7 @@ static int get_pid_list(int index, unsigned int *pids)
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
for_each_process(p) { for_each_process(p) {
int pid = p->pid; int pid = p->pid;
if (!pid) if (!pid_alive(p))
continue; continue;
if (--index >= 0) if (--index >= 0)
continue; continue;
......
...@@ -110,9 +110,9 @@ static int proc_root_readdir(struct file * filp, ...@@ -110,9 +110,9 @@ static int proc_root_readdir(struct file * filp,
} }
filp->f_pos = FIRST_PROCESS_ENTRY; filp->f_pos = FIRST_PROCESS_ENTRY;
} }
unlock_kernel();
ret = proc_pid_readdir(filp, dirent, filldir); ret = proc_pid_readdir(filp, dirent, filldir);
unlock_kernel();
return ret; return ret;
} }
......
...@@ -101,6 +101,7 @@ ...@@ -101,6 +101,7 @@
.blocked = {{0}}, \ .blocked = {{0}}, \
.posix_timers = LIST_HEAD_INIT(tsk.posix_timers), \ .posix_timers = LIST_HEAD_INIT(tsk.posix_timers), \
.alloc_lock = SPIN_LOCK_UNLOCKED, \ .alloc_lock = SPIN_LOCK_UNLOCKED, \
.proc_lock = SPIN_LOCK_UNLOCKED, \
.switch_lock = SPIN_LOCK_UNLOCKED, \ .switch_lock = SPIN_LOCK_UNLOCKED, \
.journal_info = NULL, \ .journal_info = NULL, \
} }
......
...@@ -87,6 +87,8 @@ extern void proc_root_init(void); ...@@ -87,6 +87,8 @@ extern void proc_root_init(void);
extern void proc_misc_init(void); extern void proc_misc_init(void);
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry); struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry);
struct dentry *proc_pid_unhash(struct task_struct *p);
void proc_pid_flush(struct dentry *proc_dentry);
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
......
...@@ -429,6 +429,8 @@ struct task_struct { ...@@ -429,6 +429,8 @@ struct task_struct {
u32 self_exec_id; u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty */ /* Protection of (de-)allocation: mm, files, fs, tty */
spinlock_t alloc_lock; spinlock_t alloc_lock;
/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
spinlock_t proc_lock;
/* context-switch lock */ /* context-switch lock */
spinlock_t switch_lock; spinlock_t switch_lock;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/profile.h> #include <linux/profile.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/proc_fs.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -31,10 +32,8 @@ extern struct task_struct *child_reaper; ...@@ -31,10 +32,8 @@ extern struct task_struct *child_reaper;
int getrusage(struct task_struct *, int, struct rusage *); int getrusage(struct task_struct *, int, struct rusage *);
static struct dentry * __unhash_process(struct task_struct *p) static void __unhash_process(struct task_struct *p)
{ {
struct dentry *proc_dentry;
nr_threads--; nr_threads--;
detach_pid(p, PIDTYPE_PID); detach_pid(p, PIDTYPE_PID);
detach_pid(p, PIDTYPE_TGID); detach_pid(p, PIDTYPE_TGID);
...@@ -46,34 +45,25 @@ static struct dentry * __unhash_process(struct task_struct *p) ...@@ -46,34 +45,25 @@ static struct dentry * __unhash_process(struct task_struct *p)
} }
REMOVE_LINKS(p); REMOVE_LINKS(p);
proc_dentry = p->proc_dentry;
if (unlikely(proc_dentry != NULL)) {
spin_lock(&dcache_lock);
if (!d_unhashed(proc_dentry)) {
dget_locked(proc_dentry);
__d_drop(proc_dentry);
} else
proc_dentry = NULL;
spin_unlock(&dcache_lock);
}
return proc_dentry;
} }
void release_task(struct task_struct * p) void release_task(struct task_struct * p)
{ {
struct dentry *proc_dentry;
task_t *leader; task_t *leader;
struct dentry *proc_dentry;
BUG_ON(p->state < TASK_ZOMBIE); BUG_ON(p->state < TASK_ZOMBIE);
atomic_dec(&p->user->processes); atomic_dec(&p->user->processes);
spin_lock(&p->proc_lock);
proc_dentry = proc_pid_unhash(p);
write_lock_irq(&tasklist_lock); write_lock_irq(&tasklist_lock);
if (unlikely(p->ptrace)) if (unlikely(p->ptrace))
__ptrace_unlink(p); __ptrace_unlink(p);
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
__exit_signal(p); __exit_signal(p);
__exit_sighand(p); __exit_sighand(p);
proc_dentry = __unhash_process(p); __unhash_process(p);
/* /*
* If we are the last non-leader member of the thread * If we are the last non-leader member of the thread
...@@ -92,11 +82,8 @@ void release_task(struct task_struct * p) ...@@ -92,11 +82,8 @@ void release_task(struct task_struct * p)
p->parent->cnswap += p->nswap + p->cnswap; p->parent->cnswap += p->nswap + p->cnswap;
sched_exit(p); sched_exit(p);
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
spin_unlock(&p->proc_lock);
if (unlikely(proc_dentry != NULL)) { proc_pid_flush(proc_dentry);
shrink_dcache_parent(proc_dentry);
dput(proc_dentry);
}
release_thread(p); release_thread(p);
put_task_struct(p); put_task_struct(p);
} }
...@@ -107,14 +94,13 @@ void unhash_process(struct task_struct *p) ...@@ -107,14 +94,13 @@ void unhash_process(struct task_struct *p)
{ {
struct dentry *proc_dentry; struct dentry *proc_dentry;
spin_lock(&p->proc_lock);
proc_dentry = proc_pid_unhash(p);
write_lock_irq(&tasklist_lock); write_lock_irq(&tasklist_lock);
proc_dentry = __unhash_process(p); __unhash_process(p);
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
spin_unlock(&p->proc_lock);
if (unlikely(proc_dentry != NULL)) { proc_pid_flush(proc_dentry);
shrink_dcache_parent(proc_dentry);
dput(proc_dentry);
}
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment