Commit 92307383 authored by Eric W. Biederman's avatar Eric W. Biederman

coredump: Don't perform any cleanups before dumping core

Rename coredump_exit_mm to coredump_task_exit and call it from do_exit
before PTRACE_EVENT_EXIT, and before any cleanup work for a task
happens.  This ensures that an accurate copy of the process can be
captured in the coredump as no cleanup for the process happens before
the coredump completes.  This also ensures that PTRACE_EVENT_EXIT
will not be visited by any thread until the coredump is complete.

Add a new flag PF_POSTCOREDUMP so that tasks that have passed through
coredump_task_exit can be recognized and ignored in zap_process.

Now that all of the coredumping happens before exit_mm remove code to
test for a coredump in progress from mm_release.

Replace "may_ptrace_stop()" with a simple test of "current->ptrace".
The other tests in may_ptrace_stop all concern avoiding stopping
during a coredump.  These tests are no longer necessary as it is now
guaranteed that fatal_signal_pending will be set if the code enters
ptrace_stop during a coredump.  The code in ptrace_stop is guaranteed
not to stop if fatal_signal_pending returns true.

Until this change "ptrace_event(PTRACE_EVENT_EXIT)" could call
ptrace_stop without fatal_signal_pending being true, as signals are
dequeued in get_signal before calling do_exit.  This is no longer
an issue as "ptrace_event(PTRACE_EVENT_EXIT)" is no longer reached
until after the coredump completes.

Link: https://lkml.kernel.org/r/874kaax26c.fsf@disp2133Reviewed-by: default avatarKees Cook <keescook@chromium.org>
Signed-off-by: default avatar"Eric W. Biederman" <ebiederm@xmission.com>
parent d67e03e3
...@@ -359,7 +359,7 @@ static int zap_process(struct task_struct *start, int exit_code, int flags) ...@@ -359,7 +359,7 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)
for_each_thread(start, t) { for_each_thread(start, t) {
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
if (t != current && t->mm) { if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
sigaddset(&t->pending.signal, SIGKILL); sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1); signal_wake_up(t, 1);
nr++; nr++;
...@@ -404,8 +404,8 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, ...@@ -404,8 +404,8 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
* *
* do_exit: * do_exit:
* The caller holds mm->mmap_lock. This means that the task which * The caller holds mm->mmap_lock. This means that the task which
* uses this mm can't pass coredump_exit_mm(), so it can't exit or * uses this mm can't pass coredump_task_exit(), so it can't exit
* clear its ->mm. * or clear its ->mm.
* *
* de_thread: * de_thread:
* It does list_replace_rcu(&leader->tasks, &current->tasks), * It does list_replace_rcu(&leader->tasks, &current->tasks),
...@@ -500,7 +500,7 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped) ...@@ -500,7 +500,7 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
next = curr->next; next = curr->next;
task = curr->task; task = curr->task;
/* /*
* see coredump_exit_mm(), curr->task must not see * see coredump_task_exit(), curr->task must not see
* ->task == NULL before we read ->next. * ->task == NULL before we read ->next.
*/ */
smp_mb(); smp_mb();
......
...@@ -1664,6 +1664,7 @@ extern struct pid *cad_pid; ...@@ -1664,6 +1664,7 @@ extern struct pid *cad_pid;
#define PF_VCPU 0x00000001 /* I'm a virtual CPU */ #define PF_VCPU 0x00000001 /* I'm a virtual CPU */
#define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */
#define PF_EXITING 0x00000004 /* Getting shut down */ #define PF_EXITING 0x00000004 /* Getting shut down */
#define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */
#define PF_IO_WORKER 0x00000010 /* Task is an IO worker */ #define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
......
...@@ -339,23 +339,29 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) ...@@ -339,23 +339,29 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
} }
} }
static void coredump_exit_mm(struct mm_struct *mm) static void coredump_task_exit(struct task_struct *tsk)
{ {
struct core_state *core_state; struct core_state *core_state;
struct mm_struct *mm;
mm = tsk->mm;
if (!mm)
return;
/* /*
* Serialize with any possible pending coredump. * Serialize with any possible pending coredump.
* We must hold mmap_lock around checking core_state * We must hold mmap_lock around checking core_state
* and clearing tsk->mm. The core-inducing thread * and setting PF_POSTCOREDUMP. The core-inducing thread
* will increment ->nr_threads for each thread in the * will increment ->nr_threads for each thread in the
* group with ->mm != NULL. * group without PF_POSTCOREDUMP set.
*/ */
mmap_read_lock(mm);
tsk->flags |= PF_POSTCOREDUMP;
core_state = mm->core_state; core_state = mm->core_state;
mmap_read_unlock(mm);
if (core_state) { if (core_state) {
struct core_thread self; struct core_thread self;
mmap_read_unlock(mm);
self.task = current; self.task = current;
if (self.task->flags & PF_SIGNALED) if (self.task->flags & PF_SIGNALED)
self.next = xchg(&core_state->dumper.next, &self); self.next = xchg(&core_state->dumper.next, &self);
...@@ -375,7 +381,6 @@ static void coredump_exit_mm(struct mm_struct *mm) ...@@ -375,7 +381,6 @@ static void coredump_exit_mm(struct mm_struct *mm)
freezable_schedule(); freezable_schedule();
} }
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
mmap_read_lock(mm);
} }
} }
...@@ -480,7 +485,6 @@ static void exit_mm(void) ...@@ -480,7 +485,6 @@ static void exit_mm(void)
return; return;
sync_mm_rss(mm); sync_mm_rss(mm);
mmap_read_lock(mm); mmap_read_lock(mm);
coredump_exit_mm(mm);
mmgrab(mm); mmgrab(mm);
BUG_ON(mm != current->active_mm); BUG_ON(mm != current->active_mm);
/* more a memory barrier than a real lock */ /* more a memory barrier than a real lock */
...@@ -768,6 +772,7 @@ void __noreturn do_exit(long code) ...@@ -768,6 +772,7 @@ void __noreturn do_exit(long code)
profile_task_exit(tsk); profile_task_exit(tsk);
kcov_task_exit(tsk); kcov_task_exit(tsk);
coredump_task_exit(tsk);
ptrace_event(PTRACE_EVENT_EXIT, code); ptrace_event(PTRACE_EVENT_EXIT, code);
validate_creds_for_do_exit(tsk); validate_creds_for_do_exit(tsk);
......
...@@ -1392,8 +1392,7 @@ static void mm_release(struct task_struct *tsk, struct mm_struct *mm) ...@@ -1392,8 +1392,7 @@ static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
* purposes. * purposes.
*/ */
if (tsk->clear_child_tid) { if (tsk->clear_child_tid) {
if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) && if (atomic_read(&mm->mm_users) > 1) {
atomic_read(&mm->mm_users) > 1) {
/* /*
* We don't check the error code - if userspace has * We don't check the error code - if userspace has
* not set up a proper pointer then tough luck. * not set up a proper pointer then tough luck.
......
...@@ -2158,31 +2158,6 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, ...@@ -2158,31 +2158,6 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
spin_unlock_irqrestore(&sighand->siglock, flags); spin_unlock_irqrestore(&sighand->siglock, flags);
} }
static inline bool may_ptrace_stop(void)
{
if (!likely(current->ptrace))
return false;
/*
* Are we in the middle of do_coredump?
* If so and our tracer is also part of the coredump stopping
* is a deadlock situation, and pointless because our tracer
* is dead so don't allow us to stop.
* If SIGKILL was already sent before the caller unlocked
* ->siglock we must see ->core_state != NULL. Otherwise it
* is safe to enter schedule().
*
* This is almost outdated, a task with the pending SIGKILL can't
* block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
* after SIGKILL was already dequeued.
*/
if (unlikely(current->mm->core_state) &&
unlikely(current->mm == current->parent->mm))
return false;
return true;
}
/* /*
* This must be called with current->sighand->siglock held. * This must be called with current->sighand->siglock held.
* *
...@@ -2263,7 +2238,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t ...@@ -2263,7 +2238,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
spin_unlock_irq(&current->sighand->siglock); spin_unlock_irq(&current->sighand->siglock);
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
if (may_ptrace_stop()) { if (likely(current->ptrace)) {
/* /*
* Notify parents of the stop. * Notify parents of the stop.
* *
......
...@@ -788,7 +788,7 @@ static inline bool __task_will_free_mem(struct task_struct *task) ...@@ -788,7 +788,7 @@ static inline bool __task_will_free_mem(struct task_struct *task)
/* /*
* A coredumping process may sleep for an extended period in * A coredumping process may sleep for an extended period in
* coredump_exit_mm(), so the oom killer cannot assume that * coredump_task_exit(), so the oom killer cannot assume that
* the process will promptly exit and release memory. * the process will promptly exit and release memory.
*/ */
if (sig->flags & SIGNAL_GROUP_COREDUMP) if (sig->flags & SIGNAL_GROUP_COREDUMP)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment