Commit eea96732 authored by Eric W. Biederman's avatar Eric W. Biederman

exec: Add exec_update_mutex to replace cred_guard_mutex

The cred_guard_mutex is problematic as it is held over possibly
indefinite waits for userspace.  The possible indefinite waits for
userspace that I have identified are: The cred_guard_mutex is held in
PTRACE_EVENT_EXIT waiting for the tracer.  The cred_guard_mutex is
held over "put_user(0, tsk->clear_child_tid)" in exit_mm().  The
cred_guard_mutex is held over "get_user(futex_offset, ...")  in
exit_robust_list.  The cred_guard_mutex held over copy_strings.

The functions get_user and put_user can trigger a page fault which can
potentially wait indefinitely in the case of userfaultfd or if
userspace implements part of the page fault path.

In any of those cases the userspace process that the kernel is waiting
for might make a different system call that winds up taking the
cred_guard_mutex and result in deadlock.

Holding a mutex over any of those possibly indefinite waits for
userspace does not appear necessary.  Add exec_update_mutex that will
just cover updating the process during exec where the permissions and
the objects pointed to by the task struct may be out of sync.

The plan is to switch the users of cred_guard_mutex to
exec_update_mutex one by one.  This lets us move forward while still
being careful and not introducing any regressions.

Link: https://lore.kernel.org/lkml/20160921152946.GA24210@dhcp22.suse.cz/
Link: https://lore.kernel.org/lkml/AM6PR03MB5170B06F3A2B75EFB98D071AE4E60@AM6PR03MB5170.eurprd03.prod.outlook.com/
Link: https://lore.kernel.org/linux-fsdevel/20161102181806.GB1112@redhat.com/
Link: https://lore.kernel.org/lkml/20160923095031.GA14923@redhat.com/
Link: https://lore.kernel.org/lkml/20170213141452.GA30203@redhat.com/
Ref: 45c1a159 ("Add PTRACE_O_TRACEVFORKDONE and PTRACE_O_TRACEEXIT facilities.")
Ref: 456f17cd ("[PATCH] user-vm-unlock-2.5.31-A2")
Reviewed-by: default avatarKirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: default avatar"Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: default avatarBernd Edlinger <bernd.edlinger@hotmail.de>
Signed-off-by: default avatarEric W. Biederman <ebiederm@xmission.com>
parent ccf0fa6b
...@@ -1010,16 +1010,26 @@ ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len) ...@@ -1010,16 +1010,26 @@ ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
} }
EXPORT_SYMBOL(read_code); EXPORT_SYMBOL(read_code);
/*
* Maps the mm_struct mm into the current task struct.
* On success, this function returns with the mutex
* exec_update_mutex locked.
*/
static int exec_mmap(struct mm_struct *mm) static int exec_mmap(struct mm_struct *mm)
{ {
struct task_struct *tsk; struct task_struct *tsk;
struct mm_struct *old_mm, *active_mm; struct mm_struct *old_mm, *active_mm;
int ret;
/* Notify parent that we're no longer interested in the old VM */ /* Notify parent that we're no longer interested in the old VM */
tsk = current; tsk = current;
old_mm = current->mm; old_mm = current->mm;
exec_mm_release(tsk, old_mm); exec_mm_release(tsk, old_mm);
ret = mutex_lock_killable(&tsk->signal->exec_update_mutex);
if (ret)
return ret;
if (old_mm) { if (old_mm) {
sync_mm_rss(old_mm); sync_mm_rss(old_mm);
/* /*
...@@ -1031,9 +1041,11 @@ static int exec_mmap(struct mm_struct *mm) ...@@ -1031,9 +1041,11 @@ static int exec_mmap(struct mm_struct *mm)
down_read(&old_mm->mmap_sem); down_read(&old_mm->mmap_sem);
if (unlikely(old_mm->core_state)) { if (unlikely(old_mm->core_state)) {
up_read(&old_mm->mmap_sem); up_read(&old_mm->mmap_sem);
mutex_unlock(&tsk->signal->exec_update_mutex);
return -EINTR; return -EINTR;
} }
} }
task_lock(tsk); task_lock(tsk);
active_mm = tsk->active_mm; active_mm = tsk->active_mm;
membarrier_exec_mmap(mm); membarrier_exec_mmap(mm);
...@@ -1288,11 +1300,12 @@ int flush_old_exec(struct linux_binprm * bprm) ...@@ -1288,11 +1300,12 @@ int flush_old_exec(struct linux_binprm * bprm)
goto out; goto out;
/* /*
* After clearing bprm->mm (to mark that current is using the * After setting bprm->called_exec_mmap (to mark that current is
* prepared mm now), we have nothing left of the original * using the prepared mm now), we have nothing left of the original
* process. If anything from here on returns an error, the check * process. If anything from here on returns an error, the check
* in search_binary_handler() will SEGV current. * in search_binary_handler() will SEGV current.
*/ */
bprm->called_exec_mmap = 1;
bprm->mm = NULL; bprm->mm = NULL;
#ifdef CONFIG_POSIX_TIMERS #ifdef CONFIG_POSIX_TIMERS
...@@ -1438,6 +1451,8 @@ static void free_bprm(struct linux_binprm *bprm) ...@@ -1438,6 +1451,8 @@ static void free_bprm(struct linux_binprm *bprm)
{ {
free_arg_pages(bprm); free_arg_pages(bprm);
if (bprm->cred) { if (bprm->cred) {
if (bprm->called_exec_mmap)
mutex_unlock(&current->signal->exec_update_mutex);
mutex_unlock(&current->signal->cred_guard_mutex); mutex_unlock(&current->signal->cred_guard_mutex);
abort_creds(bprm->cred); abort_creds(bprm->cred);
} }
...@@ -1487,6 +1502,7 @@ void install_exec_creds(struct linux_binprm *bprm) ...@@ -1487,6 +1502,7 @@ void install_exec_creds(struct linux_binprm *bprm)
* credentials; any time after this it may be unlocked. * credentials; any time after this it may be unlocked.
*/ */
security_bprm_committed_creds(bprm); security_bprm_committed_creds(bprm);
mutex_unlock(&current->signal->exec_update_mutex);
mutex_unlock(&current->signal->cred_guard_mutex); mutex_unlock(&current->signal->cred_guard_mutex);
} }
EXPORT_SYMBOL(install_exec_creds); EXPORT_SYMBOL(install_exec_creds);
...@@ -1678,7 +1694,7 @@ int search_binary_handler(struct linux_binprm *bprm) ...@@ -1678,7 +1694,7 @@ int search_binary_handler(struct linux_binprm *bprm)
read_lock(&binfmt_lock); read_lock(&binfmt_lock);
put_binfmt(fmt); put_binfmt(fmt);
if (retval < 0 && !bprm->mm) { if (retval < 0 && bprm->called_exec_mmap) {
/* we got to flush_old_exec() and failed after it */ /* we got to flush_old_exec() and failed after it */
read_unlock(&binfmt_lock); read_unlock(&binfmt_lock);
force_sigsegv(SIGSEGV); force_sigsegv(SIGSEGV);
......
...@@ -44,7 +44,13 @@ struct linux_binprm { ...@@ -44,7 +44,13 @@ struct linux_binprm {
* exec has happened. Used to sanitize execution environment * exec has happened. Used to sanitize execution environment
* and to set AT_SECURE auxv for glibc. * and to set AT_SECURE auxv for glibc.
*/ */
secureexec:1; secureexec:1,
/*
* Set by flush_old_exec, when exec_mmap has been called.
* This is past the point of no return, when the
* exec_update_mutex has been taken.
*/
called_exec_mmap:1;
#ifdef __alpha__ #ifdef __alpha__
unsigned int taso:1; unsigned int taso:1;
#endif #endif
......
...@@ -224,7 +224,14 @@ struct signal_struct { ...@@ -224,7 +224,14 @@ struct signal_struct {
struct mutex cred_guard_mutex; /* guard against foreign influences on struct mutex cred_guard_mutex; /* guard against foreign influences on
* credential calculations * credential calculations
* (notably. ptrace) */ * (notably. ptrace)
* Deprecated do not use in new code.
* Use exec_update_mutex instead.
*/
struct mutex exec_update_mutex; /* Held while task_struct is being
* updated during exec, and may have
* inconsistent permissions.
*/
} __randomize_layout; } __randomize_layout;
/* /*
......
...@@ -26,6 +26,7 @@ static struct signal_struct init_signals = { ...@@ -26,6 +26,7 @@ static struct signal_struct init_signals = {
.multiprocess = HLIST_HEAD_INIT, .multiprocess = HLIST_HEAD_INIT,
.rlim = INIT_RLIMITS, .rlim = INIT_RLIMITS,
.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
.exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex),
#ifdef CONFIG_POSIX_TIMERS #ifdef CONFIG_POSIX_TIMERS
.posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
.cputimer = { .cputimer = {
......
...@@ -1594,6 +1594,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) ...@@ -1594,6 +1594,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->oom_score_adj_min = current->signal->oom_score_adj_min; sig->oom_score_adj_min = current->signal->oom_score_adj_min;
mutex_init(&sig->cred_guard_mutex); mutex_init(&sig->cred_guard_mutex);
mutex_init(&sig->exec_update_mutex);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment