Commit 055e188d authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Fix dcache_lock/tasklist_lock ranking bug

__unhash_process acquires the dcache_lock while holding the
tasklist_lock for writing. This can deadlock. Additionally,
fs/proc/base.c incorrectly assumed that p->pid would be set to 0 during
release_task.

The patch fixes that by adding a new spinlock to the task structure and
fixing all references to (!p->pid).

The alternative to the new spinlock would be to hold dcache_lock around
__unhash_process.

- fs/proc/base.c assumed that p->pid is reset to 0 during exit.  This is
  not the case anymore.  I now look at the count of the pid structure for
  PIDTYPE_PID.

- de_thread now tested - as broken as it was before: open handles to
  /proc/<pid> are either stale or invalid after an exec of a nptl process,
  if the exec was call from a secondary thread.

- a few lock_kernels removed - that part of /proc doesn't need it.

- additional instances of 'if(current->pid)' replaced with pid_alive.
parent 05cdeac3
......@@ -529,30 +529,6 @@ static int exec_mmap(struct mm_struct *mm)
return 0;
}
static struct dentry *clean_proc_dentry(struct task_struct *p)
{
struct dentry *proc_dentry = p->proc_dentry;
if (proc_dentry) {
spin_lock(&dcache_lock);
if (!d_unhashed(proc_dentry)) {
dget_locked(proc_dentry);
__d_drop(proc_dentry);
} else
proc_dentry = NULL;
spin_unlock(&dcache_lock);
}
return proc_dentry;
}
static inline void put_proc_dentry(struct dentry *dentry)
{
if (dentry) {
shrink_dcache_parent(dentry);
dput(dentry);
}
}
/*
* This function makes sure the current process has its own signal table,
* so that flush_signal_handlers can later reset the handlers without
......@@ -660,9 +636,11 @@ static inline int de_thread(struct task_struct *tsk)
while (leader->state != TASK_ZOMBIE)
yield();
spin_lock(&leader->proc_lock);
spin_lock(&current->proc_lock);
proc_dentry1 = proc_pid_unhash(current);
proc_dentry2 = proc_pid_unhash(leader);
write_lock_irq(&tasklist_lock);
proc_dentry1 = clean_proc_dentry(current);
proc_dentry2 = clean_proc_dentry(leader);
if (leader->tgid != current->tgid)
BUG();
......@@ -702,9 +680,10 @@ static inline int de_thread(struct task_struct *tsk)
state = leader->state;
write_unlock_irq(&tasklist_lock);
put_proc_dentry(proc_dentry1);
put_proc_dentry(proc_dentry2);
spin_unlock(&leader->proc_lock);
spin_unlock(&current->proc_lock);
proc_pid_flush(proc_dentry1);
proc_pid_flush(proc_dentry2);
if (state != TASK_ZOMBIE)
BUG();
......
......@@ -624,6 +624,12 @@ static struct inode_operations proc_pid_link_inode_operations = {
.follow_link = proc_pid_follow_link
};
static int pid_alive(struct task_struct *p)
{
BUG_ON(p->pids[PIDTYPE_PID].pidptr != &p->pids[PIDTYPE_PID].pid);
return atomic_read(&p->pids[PIDTYPE_PID].pid.count);
}
#define NUMBUF 10
static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
......@@ -635,6 +641,9 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
char buf[NUMBUF];
struct files_struct * files;
retval = -ENOENT;
if (!pid_alive(p))
goto out;
retval = 0;
pid = p->pid;
......@@ -696,48 +705,46 @@ static int proc_base_readdir(struct file * filp,
int pid;
struct inode *inode = filp->f_dentry->d_inode;
struct pid_entry *p;
int ret = 0;
int ret;
lock_kernel();
ret = -ENOENT;
if (!pid_alive(proc_task(inode)))
goto out;
ret = 0;
pid = proc_task(inode)->pid;
if (!pid) {
ret = -ENOENT;
goto out;
}
i = filp->f_pos;
switch (i) {
case 0:
if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0)
goto out;
i++;
filp->f_pos++;
/* fall through */
case 1:
if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0)
case 0:
if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0)
goto out;
i++;
filp->f_pos++;
/* fall through */
case 1:
if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0)
goto out;
i++;
filp->f_pos++;
/* fall through */
default:
i -= 2;
if (i >= ARRAY_SIZE(base_stuff)) {
ret = 1;
goto out;
}
p = base_stuff + i;
while (p->name) {
if (filldir(dirent, p->name, p->len, filp->f_pos,
fake_ino(pid, p->type), p->mode >> 12) < 0)
goto out;
i++;
filp->f_pos++;
/* fall through */
default:
i -= 2;
if (i>=sizeof(base_stuff)/sizeof(base_stuff[0])) {
ret = 1;
goto out;
}
p = base_stuff + i;
while (p->name) {
if (filldir(dirent, p->name, p->len, filp->f_pos,
fake_ino(pid, p->type), p->mode >> 12) < 0)
goto out;
filp->f_pos++;
p++;
}
p++;
}
}
ret = 1;
out:
unlock_kernel();
return ret;
}
......@@ -774,7 +781,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_ino = fake_ino(task->pid, ino);
if (!task->pid)
if (!pid_alive(task))
goto out_unlock;
/*
......@@ -808,7 +815,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
*/
static int pid_revalidate(struct dentry * dentry, int flags)
{
if (proc_task(dentry->d_inode)->pid)
if (pid_alive(proc_task(dentry->d_inode)))
return 1;
d_drop(dentry);
return 0;
......@@ -842,18 +849,23 @@ static int pid_fd_revalidate(struct dentry * dentry, int flags)
static void pid_base_iput(struct dentry *dentry, struct inode *inode)
{
struct task_struct *task = proc_task(inode);
write_lock_irq(&tasklist_lock);
spin_lock(&task->proc_lock);
if (task->proc_dentry == dentry)
task->proc_dentry = NULL;
write_unlock_irq(&tasklist_lock);
spin_unlock(&task->proc_lock);
iput(inode);
}
static int pid_delete_dentry(struct dentry * dentry)
{
return proc_task(dentry->d_inode)->pid == 0;
/* Is the task we represent dead?
* If so, then don't put the dentry on the lru list,
* kill it immediately.
*/
return !pid_alive(proc_task(dentry->d_inode));
}
static struct dentry_operations pid_fd_dentry_operations =
{
.d_revalidate = pid_fd_revalidate,
......@@ -909,6 +921,8 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
if (fd == ~0U)
goto out;
if (!pid_alive(task))
goto out;
inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd);
if (!inode)
......@@ -937,8 +951,6 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
ei->op.proc_get_link = proc_fd_link;
dentry->d_op = &pid_fd_dentry_operations;
d_add(dentry, inode);
if (!proc_task(dentry->d_inode)->pid)
d_drop(dentry);
return NULL;
out_unlock2:
......@@ -975,6 +987,9 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
error = -ENOENT;
inode = NULL;
if (!pid_alive(task))
goto out;
for (p = base_stuff; p->name; p++) {
if (p->len != dentry->d_name.len)
continue;
......@@ -1056,8 +1071,6 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
}
dentry->d_op = &pid_dentry_operations;
d_add(dentry, inode);
if (!proc_task(dentry->d_inode)->pid)
d_drop(dentry);
return NULL;
out:
......@@ -1095,6 +1108,55 @@ static struct inode_operations proc_self_inode_operations = {
.follow_link = proc_self_follow_link,
};
/**
* proc_pid_unhash - Unhash /proc/<pid> entry from the dcache.
* @p: task that should be flushed.
*
* Drops the /proc/<pid> dcache entry from the hash chains.
*
* Dropping /proc/<pid> entries and detach_pid must be synchroneous,
* otherwise e.g. /proc/<pid>/exe might point to the wrong executable,
* if the pid value is immediately reused. This is enforced by
* - caller must acquire spin_lock(p->proc_lock)
* - must be called before detach_pid()
* - proc_pid_lookup acquires proc_lock, and checks that
* the target is not dead by looking at the attach count
* of PIDTYPE_PID.
*/
struct dentry *proc_pid_unhash(struct task_struct *p)
{
struct dentry *proc_dentry;
proc_dentry = p->proc_dentry;
if (proc_dentry != NULL) {
spin_lock(&dcache_lock);
if (!d_unhashed(proc_dentry)) {
dget_locked(proc_dentry);
__d_drop(proc_dentry);
} else
proc_dentry = NULL;
spin_unlock(&dcache_lock);
}
return proc_dentry;
}
/**
* proc_pid_flush - recover memory used by stale /proc/<pid>/x entries
* @proc_entry: directoy to prune.
*
* Shrink the /proc directory that was used by the just killed thread.
*/
void proc_pid_flush(struct dentry *proc_dentry)
{
if(proc_dentry != NULL) {
shrink_dcache_parent(proc_dentry);
dput(proc_dentry);
}
}
/* SMP-safe */
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
{
......@@ -1143,12 +1205,12 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
inode->i_flags|=S_IMMUTABLE;
dentry->d_op = &pid_base_dentry_operations;
spin_lock(&task->proc_lock);
task->proc_dentry = dentry;
d_add(dentry, inode);
read_lock(&tasklist_lock);
proc_task(dentry->d_inode)->proc_dentry = dentry;
read_unlock(&tasklist_lock);
if (!proc_task(dentry->d_inode)->pid)
d_drop(dentry);
spin_unlock(&task->proc_lock);
return NULL;
out:
return ERR_PTR(-ENOENT);
......@@ -1171,7 +1233,7 @@ static int get_pid_list(int index, unsigned int *pids)
read_lock(&tasklist_lock);
for_each_process(p) {
int pid = p->pid;
if (!pid)
if (!pid_alive(p))
continue;
if (--index >= 0)
continue;
......
......@@ -110,9 +110,9 @@ static int proc_root_readdir(struct file * filp,
}
filp->f_pos = FIRST_PROCESS_ENTRY;
}
unlock_kernel();
ret = proc_pid_readdir(filp, dirent, filldir);
unlock_kernel();
return ret;
}
......
......@@ -101,6 +101,7 @@
.blocked = {{0}}, \
.posix_timers = LIST_HEAD_INIT(tsk.posix_timers), \
.alloc_lock = SPIN_LOCK_UNLOCKED, \
.proc_lock = SPIN_LOCK_UNLOCKED, \
.switch_lock = SPIN_LOCK_UNLOCKED, \
.journal_info = NULL, \
}
......
......@@ -87,6 +87,8 @@ extern void proc_root_init(void);
extern void proc_misc_init(void);
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry);
struct dentry *proc_pid_unhash(struct task_struct *p);
void proc_pid_flush(struct dentry *proc_dentry);
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
......
......@@ -429,6 +429,8 @@ struct task_struct {
u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty */
spinlock_t alloc_lock;
/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
spinlock_t proc_lock;
/* context-switch lock */
spinlock_t switch_lock;
......
......@@ -21,6 +21,7 @@
#include <linux/ptrace.h>
#include <linux/profile.h>
#include <linux/mount.h>
#include <linux/proc_fs.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
......@@ -31,10 +32,8 @@ extern struct task_struct *child_reaper;
int getrusage(struct task_struct *, int, struct rusage *);
static struct dentry * __unhash_process(struct task_struct *p)
static void __unhash_process(struct task_struct *p)
{
struct dentry *proc_dentry;
nr_threads--;
detach_pid(p, PIDTYPE_PID);
detach_pid(p, PIDTYPE_TGID);
......@@ -46,34 +45,25 @@ static struct dentry * __unhash_process(struct task_struct *p)
}
REMOVE_LINKS(p);
proc_dentry = p->proc_dentry;
if (unlikely(proc_dentry != NULL)) {
spin_lock(&dcache_lock);
if (!d_unhashed(proc_dentry)) {
dget_locked(proc_dentry);
__d_drop(proc_dentry);
} else
proc_dentry = NULL;
spin_unlock(&dcache_lock);
}
return proc_dentry;
}
void release_task(struct task_struct * p)
{
struct dentry *proc_dentry;
task_t *leader;
struct dentry *proc_dentry;
BUG_ON(p->state < TASK_ZOMBIE);
atomic_dec(&p->user->processes);
spin_lock(&p->proc_lock);
proc_dentry = proc_pid_unhash(p);
write_lock_irq(&tasklist_lock);
if (unlikely(p->ptrace))
__ptrace_unlink(p);
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
__exit_signal(p);
__exit_sighand(p);
proc_dentry = __unhash_process(p);
__unhash_process(p);
/*
* If we are the last non-leader member of the thread
......@@ -92,11 +82,8 @@ void release_task(struct task_struct * p)
p->parent->cnswap += p->nswap + p->cnswap;
sched_exit(p);
write_unlock_irq(&tasklist_lock);
if (unlikely(proc_dentry != NULL)) {
shrink_dcache_parent(proc_dentry);
dput(proc_dentry);
}
spin_unlock(&p->proc_lock);
proc_pid_flush(proc_dentry);
release_thread(p);
put_task_struct(p);
}
......@@ -107,14 +94,13 @@ void unhash_process(struct task_struct *p)
{
struct dentry *proc_dentry;
spin_lock(&p->proc_lock);
proc_dentry = proc_pid_unhash(p);
write_lock_irq(&tasklist_lock);
proc_dentry = __unhash_process(p);
__unhash_process(p);
write_unlock_irq(&tasklist_lock);
if (unlikely(proc_dentry != NULL)) {
shrink_dcache_parent(proc_dentry);
dput(proc_dentry);
}
spin_unlock(&p->proc_lock);
proc_pid_flush(proc_dentry);
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment