Commit 63540cea authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] thread-exec-2.5.34-B1, BK-curr

This implements one of the last missing POSIX threading details - exec()
semantics.  Previous kernels had code that tried to handle it, but that
code had a number of disadvantages:

 - it only worked if the exec()-ing thread was the thread group leader,
   creating an assymetry. This does not work if the thread group leader
   has exited already.

 - it was racy: it sent a SIGKILL to every thread in the group but did not
   wait for them to actually process the SIGKILL. It did a yield() but
   that is not enough. All 'other' threads have to finish processing
   before we can continue with the exec().

This adds the same logic, but extended with the following enhancements:

 - works from non-leader threads just as much as the thread group leader.

 - waits for all other threads to exit before continuing with the exec().

 - reuses the PID of the group.

It would perhaps be a more generic approach to add a new syscall,
sys_ungroup() - which would do largely what de_thread() does in this
patch.

But it's not really needed now - posix_spawn() is currently implemented
via starting a non-CLONE_THREAD helper thread that does a sys_exec().
There's no API currently that needs a direct exec() from a thread - but
it could be created (such as pthread_exec_np()).  It would have the
advantage of not having to go through a helper thread, but the
difference is minimal.
parent 7cd0a691
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#define __NO_VERSION__ #define __NO_VERSION__
#include <linux/module.h> #include <linux/module.h>
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/proc_fs.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
...@@ -493,52 +494,151 @@ static int exec_mmap(struct mm_struct *mm) ...@@ -493,52 +494,151 @@ static int exec_mmap(struct mm_struct *mm)
return 0; return 0;
} }
static struct dentry *clean_proc_dentry(struct task_struct *p)
{
struct dentry *proc_dentry = p->proc_dentry;
if (proc_dentry) {
spin_lock(&dcache_lock);
if (!list_empty(&proc_dentry->d_hash)) {
dget_locked(proc_dentry);
list_del_init(&proc_dentry->d_hash);
} else
proc_dentry = NULL;
spin_unlock(&dcache_lock);
}
return proc_dentry;
}
static inline void put_proc_dentry(struct dentry *dentry)
{
if (dentry) {
shrink_dcache_parent(dentry);
dput(dentry);
}
}
/* /*
* This function makes sure the current process has its own signal table, * This function makes sure the current process has its own signal table,
* so that flush_signal_handlers can later reset the handlers without * so that flush_signal_handlers can later reset the handlers without
* disturbing other processes. (Other processes might share the signal * disturbing other processes. (Other processes might share the signal
* table via the CLONE_SIGNAL option to clone().) * table via the CLONE_SIGNAL option to clone().)
*/ */
static inline int de_thread(struct signal_struct *oldsig)
static inline int make_private_signals(void)
{ {
struct signal_struct * newsig; struct signal_struct *newsig;
int count;
remove_thread_group(current, current->sig);
if (atomic_read(&current->sig->count) <= 1) if (atomic_read(&current->sig->count) <= 1)
return 0; return 0;
newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL); newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
if (newsig == NULL) if (!newsig)
return -ENOMEM; return -ENOMEM;
if (list_empty(&current->thread_group))
goto out;
/*
* Kill all other threads in the thread group:
*/
spin_lock_irq(&oldsig->siglock);
if (oldsig->group_exit) {
/*
* Another group action in progress, just
* return so that the signal is processed.
*/
spin_unlock_irq(&oldsig->siglock);
kmem_cache_free(sigact_cachep, newsig);
return -EAGAIN;
}
oldsig->group_exit = 1;
__broadcast_thread_group(current, SIGKILL);
/*
* Account for the thread group leader hanging around:
*/
count = 2;
if (current->pid == current->tgid)
count = 1;
while (atomic_read(&oldsig->count) > count) {
oldsig->group_exit_task = current;
current->state = TASK_UNINTERRUPTIBLE;
spin_unlock_irq(&oldsig->siglock);
schedule();
spin_lock_irq(&oldsig->siglock);
if (oldsig->group_exit_task)
BUG();
}
spin_unlock_irq(&oldsig->siglock);
/*
* At this point all other threads have exited, all we have to
* do is to wait for the thread group leader to become inactive,
* and to assume its PID:
*/
if (current->pid != current->tgid) {
struct task_struct *leader = current->group_leader;
struct dentry *proc_dentry1, *proc_dentry2;
unsigned long state;
wait_task_inactive(leader);
write_lock_irq(&tasklist_lock);
proc_dentry1 = clean_proc_dentry(current);
proc_dentry2 = clean_proc_dentry(leader);
if (leader->tgid != current->tgid)
BUG();
if (current->pid == current->tgid)
BUG();
/*
* An exec() starts a new thread group with the
* TGID of the previous thread group. Rehash the
* two threads with a switched PID, and release
* the former thread group leader:
*/
unhash_pid(current);
unhash_pid(leader);
leader->pid = leader->tgid = current->pid;
current->pid = current->tgid;
hash_pid(current);
hash_pid(leader);
list_add_tail(&current->tasks, &init_task.tasks);
state = leader->state;
write_unlock_irq(&tasklist_lock);
if (state == TASK_ZOMBIE)
release_task(leader);
put_proc_dentry(proc_dentry1);
put_proc_dentry(proc_dentry2);
}
out:
spin_lock_init(&newsig->siglock); spin_lock_init(&newsig->siglock);
atomic_set(&newsig->count, 1); atomic_set(&newsig->count, 1);
newsig->group_exit = 0; newsig->group_exit = 0;
newsig->group_exit_code = 0; newsig->group_exit_code = 0;
newsig->group_exit_task = NULL;
memcpy(newsig->action, current->sig->action, sizeof(newsig->action)); memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
init_sigpending(&newsig->shared_pending); init_sigpending(&newsig->shared_pending);
remove_thread_group(current, current->sig);
spin_lock_irq(&current->sigmask_lock); spin_lock_irq(&current->sigmask_lock);
current->sig = newsig; current->sig = newsig;
spin_unlock_irq(&current->sigmask_lock); spin_unlock_irq(&current->sigmask_lock);
return 0;
}
/*
* If make_private_signals() made a copy of the signal table, decrement the
* refcount of the original table, and free it if necessary.
* We don't do that in make_private_signals() so that we can back off
* in flush_old_exec() if an error occurs after calling make_private_signals().
*/
static inline void release_old_signals(struct signal_struct * oldsig)
{
if (current->sig == oldsig)
return;
if (atomic_dec_and_test(&oldsig->count)) if (atomic_dec_and_test(&oldsig->count))
kmem_cache_free(sigact_cachep, oldsig); kmem_cache_free(sigact_cachep, oldsig);
}
if (!list_empty(&current->thread_group))
BUG();
if (current->tgid != current->pid)
BUG();
return 0;
}
/* /*
* These functions flushes out all traces of the currently running executable * These functions flushes out all traces of the currently running executable
* so that a new one can be started * so that a new one can be started
...@@ -572,44 +672,27 @@ static inline void flush_old_files(struct files_struct * files) ...@@ -572,44 +672,27 @@ static inline void flush_old_files(struct files_struct * files)
write_unlock(&files->file_lock); write_unlock(&files->file_lock);
} }
/*
* An execve() will automatically "de-thread" the process.
* - if a master thread (PID==TGID) is doing this, then all subsidiary threads
* will be killed (otherwise there will end up being two independent thread
* groups with the same TGID).
* - if a subsidary thread is doing this, then it just leaves the thread group
*/
static void de_thread(struct task_struct *tsk)
{
if (!list_empty(&tsk->thread_group))
BUG();
/* An exec() starts a new thread group: */
tsk->tgid = tsk->pid;
}
int flush_old_exec(struct linux_binprm * bprm) int flush_old_exec(struct linux_binprm * bprm)
{ {
char * name; char * name;
int i, ch, retval; int i, ch, retval;
struct signal_struct * oldsig; struct signal_struct * oldsig = current->sig;
/*
* Make sure we have a private signal table
*/
oldsig = current->sig;
retval = make_private_signals();
if (retval) goto flush_failed;
/* /*
* Release all of the old mmap stuff * Release all of the old mmap stuff
*/ */
retval = exec_mmap(bprm->mm); retval = exec_mmap(bprm->mm);
if (retval) goto mmap_failed; if (retval)
goto mmap_failed;
/*
* Make sure we have a private signal table and that
* we are unassociated from the previous thread group.
*/
retval = de_thread(oldsig);
if (retval)
goto flush_failed;
/* This is the point of no return */ /* This is the point of no return */
de_thread(current);
release_old_signals(oldsig);
current->sas_ss_sp = current->sas_ss_size = 0; current->sas_ss_sp = current->sas_ss_size = 0;
......
...@@ -222,6 +222,8 @@ struct signal_struct { ...@@ -222,6 +222,8 @@ struct signal_struct {
/* thread group exit support */ /* thread group exit support */
int group_exit; int group_exit;
int group_exit_code; int group_exit_code;
struct task_struct *group_exit_task;
}; };
/* /*
...@@ -552,6 +554,7 @@ extern int dequeue_signal(struct sigpending *pending, sigset_t *mask, siginfo_t ...@@ -552,6 +554,7 @@ extern int dequeue_signal(struct sigpending *pending, sigset_t *mask, siginfo_t
extern void block_all_signals(int (*notifier)(void *priv), void *priv, extern void block_all_signals(int (*notifier)(void *priv), void *priv,
sigset_t *mask); sigset_t *mask);
extern void unblock_all_signals(void); extern void unblock_all_signals(void);
extern void release_task(struct task_struct * p);
extern int send_sig_info(int, struct siginfo *, struct task_struct *); extern int send_sig_info(int, struct siginfo *, struct task_struct *);
extern int force_sig_info(int, struct siginfo *, struct task_struct *); extern int force_sig_info(int, struct siginfo *, struct task_struct *);
extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp); extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp);
......
...@@ -49,7 +49,7 @@ static struct dentry * __unhash_process(struct task_struct *p) ...@@ -49,7 +49,7 @@ static struct dentry * __unhash_process(struct task_struct *p)
return proc_dentry; return proc_dentry;
} }
static void release_task(struct task_struct * p) void release_task(struct task_struct * p)
{ {
struct dentry *proc_dentry; struct dentry *proc_dentry;
......
...@@ -633,6 +633,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t ...@@ -633,6 +633,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
atomic_set(&sig->count, 1); atomic_set(&sig->count, 1);
sig->group_exit = 0; sig->group_exit = 0;
sig->group_exit_code = 0; sig->group_exit_code = 0;
sig->group_exit_task = NULL;
memcpy(sig->action, current->sig->action, sizeof(sig->action)); memcpy(sig->action, current->sig->action, sizeof(sig->action));
sig->curr_target = NULL; sig->curr_target = NULL;
init_sigpending(&sig->shared_pending); init_sigpending(&sig->shared_pending);
......
...@@ -273,6 +273,15 @@ void __exit_sighand(struct task_struct *tsk) ...@@ -273,6 +273,15 @@ void __exit_sighand(struct task_struct *tsk)
kmem_cache_free(sigact_cachep, sig); kmem_cache_free(sigact_cachep, sig);
} else { } else {
struct task_struct *leader = tsk->group_leader; struct task_struct *leader = tsk->group_leader;
/*
* If there is any task waiting for the group exit
* then notify it:
*/
if (sig->group_exit_task && atomic_read(&sig->count) <= 2) {
wake_up_process(sig->group_exit_task);
sig->group_exit_task = NULL;
}
/* /*
* If we are the last non-leader member of the thread * If we are the last non-leader member of the thread
* group, and the leader is zombie, then notify the * group, and the leader is zombie, then notify the
...@@ -283,6 +292,7 @@ void __exit_sighand(struct task_struct *tsk) ...@@ -283,6 +292,7 @@ void __exit_sighand(struct task_struct *tsk)
*/ */
if (atomic_read(&sig->count) == 1 && if (atomic_read(&sig->count) == 1 &&
leader->state == TASK_ZOMBIE) { leader->state == TASK_ZOMBIE) {
__remove_thread_group(tsk, sig); __remove_thread_group(tsk, sig);
spin_unlock(&sig->siglock); spin_unlock(&sig->siglock);
do_notify_parent(leader, leader->exit_signal); do_notify_parent(leader, leader->exit_signal);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment