Commit 48e6484d authored by Eric W. Biederman's avatar Eric W. Biederman Committed by Linus Torvalds

[PATCH] proc: Rewrite the proc dentry flush on exit optimization

To keep the dcache from filling up with dead /proc entries we flush them on
process exit.  However over the years that code has gotten hairy with a
dentry_pointer and a lock in task_struct and misdocumented as a correctness
feature.

I have rewritten this code to look and see if we have a corresponding entry in
the dcache and if so flush it on process exit.  This removes the extra fields
in the task_struct and allows me to trivially handle the case of a
/proc/<tgid>/task/<pid> entry as well as the current /proc/<pid> entries.
Signed-off-by: default avatarEric W. Biederman <ebiederm@xmission.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 662795de
...@@ -666,8 +666,6 @@ static int de_thread(struct task_struct *tsk) ...@@ -666,8 +666,6 @@ static int de_thread(struct task_struct *tsk)
* and to assume its PID: * and to assume its PID:
*/ */
if (!thread_group_leader(current)) { if (!thread_group_leader(current)) {
struct dentry *proc_dentry1, *proc_dentry2;
/* /*
* Wait for the thread group leader to be a zombie. * Wait for the thread group leader to be a zombie.
* It should already be zombie at this point, most * It should already be zombie at this point, most
...@@ -689,10 +687,6 @@ static int de_thread(struct task_struct *tsk) ...@@ -689,10 +687,6 @@ static int de_thread(struct task_struct *tsk)
*/ */
current->start_time = leader->start_time; current->start_time = leader->start_time;
spin_lock(&leader->proc_lock);
spin_lock(&current->proc_lock);
proc_dentry1 = proc_pid_unhash(current);
proc_dentry2 = proc_pid_unhash(leader);
write_lock_irq(&tasklist_lock); write_lock_irq(&tasklist_lock);
BUG_ON(leader->tgid != current->tgid); BUG_ON(leader->tgid != current->tgid);
...@@ -729,10 +723,6 @@ static int de_thread(struct task_struct *tsk) ...@@ -729,10 +723,6 @@ static int de_thread(struct task_struct *tsk)
leader->exit_state = EXIT_DEAD; leader->exit_state = EXIT_DEAD;
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
spin_unlock(&leader->proc_lock);
spin_unlock(&current->proc_lock);
proc_pid_flush(proc_dentry1);
proc_pid_flush(proc_dentry2);
} }
/* /*
......
...@@ -1352,16 +1352,6 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) ...@@ -1352,16 +1352,6 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
return 0; return 0;
} }
static void pid_base_iput(struct dentry *dentry, struct inode *inode)
{
struct task_struct *task = proc_task(inode);
spin_lock(&task->proc_lock);
if (task->proc_dentry == dentry)
task->proc_dentry = NULL;
spin_unlock(&task->proc_lock);
iput(inode);
}
static int pid_delete_dentry(struct dentry * dentry) static int pid_delete_dentry(struct dentry * dentry)
{ {
/* Is the task we represent dead? /* Is the task we represent dead?
...@@ -1383,13 +1373,6 @@ static struct dentry_operations pid_dentry_operations = ...@@ -1383,13 +1373,6 @@ static struct dentry_operations pid_dentry_operations =
.d_delete = pid_delete_dentry, .d_delete = pid_delete_dentry,
}; };
static struct dentry_operations pid_base_dentry_operations =
{
.d_revalidate = pid_revalidate,
.d_iput = pid_base_iput,
.d_delete = pid_delete_dentry,
};
/* Lookups */ /* Lookups */
static unsigned name_to_int(struct dentry *dentry) static unsigned name_to_int(struct dentry *dentry)
...@@ -1859,57 +1842,70 @@ static struct inode_operations proc_self_inode_operations = { ...@@ -1859,57 +1842,70 @@ static struct inode_operations proc_self_inode_operations = {
}; };
/** /**
* proc_pid_unhash - Unhash /proc/@pid entry from the dcache. * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
* @p: task that should be flushed. *
* @task: task that should be flushed.
*
* Looks in the dcache for
* /proc/@pid
* /proc/@tgid/task/@pid
* if either directory is present flushes it and all of it'ts children
* from the dcache.
* *
* Drops the /proc/@pid dcache entry from the hash chains. * It is safe and reasonable to cache /proc entries for a task until
* that task exits. After that they just clog up the dcache with
* useless entries, possibly causing useful dcache entries to be
* flushed instead. This routine is proved to flush those useless
* dcache entries at process exit time.
* *
* Dropping /proc/@pid entries and detach_pid must be synchroneous, * NOTE: This routine is just an optimization so it does not guarantee
* otherwise e.g. /proc/@pid/exe might point to the wrong executable, * that no dcache entries will exist at process exit time it
* if the pid value is immediately reused. This is enforced by * just makes it very unlikely that any will persist.
* - caller must acquire spin_lock(p->proc_lock)
* - must be called before detach_pid()
* - proc_pid_lookup acquires proc_lock, and checks that
* the target is not dead by looking at the attach count
* of PIDTYPE_PID.
*/ */
void proc_flush_task(struct task_struct *task)
struct dentry *proc_pid_unhash(struct task_struct *p)
{ {
struct dentry *proc_dentry; struct dentry *dentry, *leader, *dir;
char buf[30];
struct qstr name;
name.name = buf;
name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
if (dentry) {
shrink_dcache_parent(dentry);
d_drop(dentry);
dput(dentry);
}
proc_dentry = p->proc_dentry; if (thread_group_leader(task))
if (proc_dentry != NULL) { goto out;
spin_lock(&dcache_lock); name.name = buf;
spin_lock(&proc_dentry->d_lock); name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
if (!d_unhashed(proc_dentry)) { leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
dget_locked(proc_dentry); if (!leader)
__d_drop(proc_dentry); goto out;
spin_unlock(&proc_dentry->d_lock);
} else {
spin_unlock(&proc_dentry->d_lock);
proc_dentry = NULL;
}
spin_unlock(&dcache_lock);
}
return proc_dentry;
}
/** name.name = "task";
* proc_pid_flush - recover memory used by stale /proc/@pid/x entries name.len = strlen(name.name);
* @proc_dentry: directoy to prune. dir = d_hash_and_lookup(leader, &name);
* if (!dir)
* Shrink the /proc directory that was used by the just killed thread. goto out_put_leader;
*/
name.name = buf;
void proc_pid_flush(struct dentry *proc_dentry) name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
{ dentry = d_hash_and_lookup(dir, &name);
might_sleep(); if (dentry) {
if(proc_dentry != NULL) { shrink_dcache_parent(dentry);
shrink_dcache_parent(proc_dentry); d_drop(dentry);
dput(proc_dentry); dput(dentry);
} }
dput(dir);
out_put_leader:
dput(leader);
out:
return;
} }
/* SMP-safe */ /* SMP-safe */
...@@ -1919,7 +1915,6 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct ...@@ -1919,7 +1915,6 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
struct inode *inode; struct inode *inode;
struct proc_inode *ei; struct proc_inode *ei;
unsigned tgid; unsigned tgid;
int died;
if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
inode = new_inode(dir->i_sb); inode = new_inode(dir->i_sb);
...@@ -1965,23 +1960,16 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct ...@@ -1965,23 +1960,16 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
inode->i_nlink = 4; inode->i_nlink = 4;
#endif #endif
dentry->d_op = &pid_base_dentry_operations; dentry->d_op = &pid_dentry_operations;
died = 0;
d_add(dentry, inode); d_add(dentry, inode);
spin_lock(&task->proc_lock);
task->proc_dentry = dentry;
if (!pid_alive(task)) { if (!pid_alive(task)) {
dentry = proc_pid_unhash(task); d_drop(dentry);
died = 1; shrink_dcache_parent(dentry);
goto out;
} }
spin_unlock(&task->proc_lock);
put_task_struct(task); put_task_struct(task);
if (died) {
proc_pid_flush(dentry);
goto out;
}
return NULL; return NULL;
out: out:
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
...@@ -2024,7 +2012,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry ...@@ -2024,7 +2012,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
inode->i_nlink = 3; inode->i_nlink = 3;
#endif #endif
dentry->d_op = &pid_base_dentry_operations; dentry->d_op = &pid_dentry_operations;
d_add(dentry, inode); d_add(dentry, inode);
......
...@@ -119,7 +119,6 @@ extern struct group_info init_groups; ...@@ -119,7 +119,6 @@ extern struct group_info init_groups;
.signal = {{0}}}, \ .signal = {{0}}}, \
.blocked = {{0}}, \ .blocked = {{0}}, \
.alloc_lock = SPIN_LOCK_UNLOCKED, \ .alloc_lock = SPIN_LOCK_UNLOCKED, \
.proc_lock = SPIN_LOCK_UNLOCKED, \
.journal_info = NULL, \ .journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
.fs_excl = ATOMIC_INIT(0), \ .fs_excl = ATOMIC_INIT(0), \
......
...@@ -99,9 +99,8 @@ extern void proc_misc_init(void); ...@@ -99,9 +99,8 @@ extern void proc_misc_init(void);
struct mm_struct; struct mm_struct;
void proc_flush_task(struct task_struct *task);
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
struct dentry *proc_pid_unhash(struct task_struct *p);
void proc_pid_flush(struct dentry *proc_dentry);
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
unsigned long task_vsize(struct mm_struct *); unsigned long task_vsize(struct mm_struct *);
int task_statm(struct mm_struct *, int *, int *, int *, int *); int task_statm(struct mm_struct *, int *, int *, int *, int *);
...@@ -211,8 +210,7 @@ static inline void proc_net_remove(const char *name) ...@@ -211,8 +210,7 @@ static inline void proc_net_remove(const char *name)
#define proc_net_create(name, mode, info) ({ (void)(mode), NULL; }) #define proc_net_create(name, mode, info) ({ (void)(mode), NULL; })
static inline void proc_net_remove(const char *name) {} static inline void proc_net_remove(const char *name) {}
static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; } static inline void proc_flush_task(struct task_struct *task) { }
static inline void proc_pid_flush(struct dentry *proc_dentry) { }
static inline struct proc_dir_entry *create_proc_entry(const char *name, static inline struct proc_dir_entry *create_proc_entry(const char *name,
mode_t mode, struct proc_dir_entry *parent) { return NULL; } mode_t mode, struct proc_dir_entry *parent) { return NULL; }
......
...@@ -842,8 +842,6 @@ struct task_struct { ...@@ -842,8 +842,6 @@ struct task_struct {
u32 self_exec_id; u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
spinlock_t alloc_lock; spinlock_t alloc_lock;
/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
spinlock_t proc_lock;
#ifdef CONFIG_DEBUG_MUTEXES #ifdef CONFIG_DEBUG_MUTEXES
/* mutex deadlock detection */ /* mutex deadlock detection */
...@@ -856,7 +854,6 @@ struct task_struct { ...@@ -856,7 +854,6 @@ struct task_struct {
/* VM state */ /* VM state */
struct reclaim_state *reclaim_state; struct reclaim_state *reclaim_state;
struct dentry *proc_dentry;
struct backing_dev_info *backing_dev_info; struct backing_dev_info *backing_dev_info;
struct io_context *io_context; struct io_context *io_context;
......
...@@ -137,12 +137,8 @@ void release_task(struct task_struct * p) ...@@ -137,12 +137,8 @@ void release_task(struct task_struct * p)
{ {
int zap_leader; int zap_leader;
task_t *leader; task_t *leader;
struct dentry *proc_dentry;
repeat: repeat:
atomic_dec(&p->user->processes); atomic_dec(&p->user->processes);
spin_lock(&p->proc_lock);
proc_dentry = proc_pid_unhash(p);
write_lock_irq(&tasklist_lock); write_lock_irq(&tasklist_lock);
ptrace_unlink(p); ptrace_unlink(p);
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
...@@ -171,8 +167,7 @@ void release_task(struct task_struct * p) ...@@ -171,8 +167,7 @@ void release_task(struct task_struct * p)
sched_exit(p); sched_exit(p);
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
spin_unlock(&p->proc_lock); proc_flush_task(p);
proc_pid_flush(proc_dentry);
release_thread(p); release_thread(p);
call_rcu(&p->rcu, delayed_put_task_struct); call_rcu(&p->rcu, delayed_put_task_struct);
......
...@@ -993,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags, ...@@ -993,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags,
if (put_user(p->pid, parent_tidptr)) if (put_user(p->pid, parent_tidptr))
goto bad_fork_cleanup; goto bad_fork_cleanup;
p->proc_dentry = NULL;
INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling); INIT_LIST_HEAD(&p->sibling);
p->vfork_done = NULL; p->vfork_done = NULL;
spin_lock_init(&p->alloc_lock); spin_lock_init(&p->alloc_lock);
spin_lock_init(&p->proc_lock);
clear_tsk_thread_flag(p, TIF_SIGPENDING); clear_tsk_thread_flag(p, TIF_SIGPENDING);
init_sigpending(&p->pending); init_sigpending(&p->pending);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment