/*
 *  linux/kernel/exit.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/config.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/smp_lock.h>
#include <linux/module.h>
#include <linux/completion.h>
#include <linux/personality.h>
#include <linux/tty.h>
#include <linux/namespace.h>
#include <linux/security.h>
#include <linux/acct.h>
#include <linux/file.h>
#include <linux/binfmts.h>
#include <linux/ptrace.h>

#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>

extern void sem_exit (void);
extern struct task_struct *child_reaper;

int getrusage(struct task_struct *, int, struct rusage *);

static struct dentry * __unhash_process(struct task_struct *p)
{
	struct dentry *proc_dentry;
	nr_threads--;
	unhash_pid(p);
	REMOVE_LINKS(p);
	p->pid = 0;
	proc_dentry = p->proc_dentry;
	if (unlikely(proc_dentry != NULL)) {
		spin_lock(&dcache_lock);
		if (!list_empty(&proc_dentry->d_hash)) {
			dget_locked(proc_dentry);
			list_del_init(&proc_dentry->d_hash);
		} else
			proc_dentry = NULL;
		spin_unlock(&dcache_lock);
	}
	return proc_dentry;
}

static void release_task(struct task_struct * p)
{
	struct dentry *proc_dentry;

	if (p->state != TASK_ZOMBIE)
		BUG();
#ifdef CONFIG_SMP
	if (p != current)
		wait_task_inactive(p);
#endif
	atomic_dec(&p->user->processes);
	security_ops->task_free_security(p);
	free_uid(p->user);
	if (unlikely(p->ptrace)) {
		write_lock_irq(&tasklist_lock);
		__ptrace_unlink(p);
		write_unlock_irq(&tasklist_lock);
	}
	BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
	write_lock_irq(&tasklist_lock);
	__exit_sighand(p);
	proc_dentry = __unhash_process(p);
	write_unlock_irq(&tasklist_lock);
	if (unlikely(proc_dentry != NULL)) {
		shrink_dcache_parent(proc_dentry);
		dput(proc_dentry);
	}

	release_thread(p);
	if (p != current) {
		current->cmin_flt += p->min_flt + p->cmin_flt;
		current->cmaj_flt += p->maj_flt + p->cmaj_flt;
		current->cnswap += p->nswap + p->cnswap;
		sched_exit(p);
	}
	put_task_struct(p);
}

/* we are using it only for SMP init */

void unhash_process(struct task_struct *p)
{
	struct dentry *proc_dentry;

	write_lock_irq(&tasklist_lock);
	proc_dentry = __unhash_process(p);
	write_unlock_irq(&tasklist_lock);

	if (unlikely(proc_dentry != NULL)) {
		shrink_dcache_parent(proc_dentry);
		dput(proc_dentry);
	}
}

/*
 * This checks not only the pgrp, but falls back on the pid if no
 * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
 * without this...
 */
int session_of_pgrp(int pgrp)
{
	struct task_struct *p;
	int fallback;

	fallback = -1;
	read_lock(&tasklist_lock);
	for_each_task(p) {
 		if (p->session <= 0)
 			continue;
		if (p->pgrp == pgrp) {
			fallback = p->session;
			break;
		}
		if (p->pid == pgrp)
			fallback = p->session;
	}
	read_unlock(&tasklist_lock);
	return fallback;
}

/*
 * Determine if a process group is "orphaned", according to the POSIX
 * definition in 2.2.2.52.  Orphaned process groups are not to be affected
 * by terminal-generated stop signals.  Newly orphaned process groups are
 * to receive a SIGHUP and a SIGCONT.
 *
 * "I ask you, have you ever known what it is to be an orphan?"
 */
static int __will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
{
	struct task_struct *p;

	for_each_task(p) {
		if ((p == ignored_task) || (p->pgrp != pgrp) ||
		    (p->state == TASK_ZOMBIE) ||
		    (p->parent->pid == 1))
			continue;
		if ((p->parent->pgrp != pgrp) &&
		    (p->parent->session == p->session)) {
 			return 0;
		}
	}
	return 1;	/* (sighing) "Often!" */
}

static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
{
	int retval;

	read_lock(&tasklist_lock);
	retval = __will_become_orphaned_pgrp(pgrp, ignored_task);
	read_unlock(&tasklist_lock);

	return retval;
}

int is_orphaned_pgrp(int pgrp)
{
	return will_become_orphaned_pgrp(pgrp, 0);
}

static inline int __has_stopped_jobs(int pgrp)
{
	int retval = 0;
	struct task_struct * p;

	for_each_task(p) {
		if (p->pgrp != pgrp)
			continue;
		if (p->state != TASK_STOPPED)
			continue;
		retval = 1;
		break;
	}
	return retval;
}

static inline int has_stopped_jobs(int pgrp)
{
	int retval;

	read_lock(&tasklist_lock);
	retval = __has_stopped_jobs(pgrp);
	read_unlock(&tasklist_lock);

	return retval;
}

/**
 * reparent_to_init() - Reparent the calling kernel thread to the init task.
 *
 * If a kernel thread is launched as a result of a system call, or if
 * it ever exits, it should generally reparent itself to init so that
 * it is correctly cleaned up on exit.
 *
 * The various task state such as scheduling policy and priority may have
 * been inherited from a user process, so we reset them to sane values here.
 *
 * NOTE that reparent_to_init() gives the caller full capabilities.
 */
void reparent_to_init(void)
{
	write_lock_irq(&tasklist_lock);

	ptrace_unlink(current);
	/* Reparent to init */
	REMOVE_LINKS(current);
	current->parent = child_reaper;
	current->real_parent = child_reaper;
	SET_LINKS(current);

	/* Set the exit signal to SIGCHLD so we signal init on exit */
	current->exit_signal = SIGCHLD;

	current->ptrace = 0;
	if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0))
		set_user_nice(current, 0);
	/* cpus_allowed? */
	/* rt_priority? */
	/* signals? */
	security_ops->task_reparent_to_init(current);
	memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim)));
	current->user = INIT_USER;

	write_unlock_irq(&tasklist_lock);
}

/*
 *	Put all the gunge required to become a kernel thread without
 *	attached user resources in one place where it belongs.
 */

void daemonize(void)
{
	struct fs_struct *fs;


	/*
	 * If we were started as result of loading a module, close all of the
	 * user space pages.  We don't need them, and if we didn't close them
	 * they would be locked into memory.
	 */
	exit_mm(current);

	current->session = 1;
	current->pgrp = 1;
	current->tty = NULL;

	/* Become as one with the init task */

	exit_fs(current);	/* current->fs->count--; */
	fs = init_task.fs;
	current->fs = fs;
	atomic_inc(&fs->count);
 	exit_files(current);
	current->files = init_task.files;
	atomic_inc(&current->files->count);

	reparent_to_init();
}

static void reparent_thread(task_t *p, task_t *reaper, task_t *child_reaper)
{
	/* We dont want people slaying init */
	if (p->exit_signal != -1)
		p->exit_signal = SIGCHLD;
	p->self_exec_id++;

	/* Make sure we're not reparenting to ourselves */
	if (p == reaper)
		p->real_parent = child_reaper;
	else
		p->real_parent = reaper;
	if (p->parent == p->real_parent)
		BUG();

	if (p->pdeath_signal)
		send_sig(p->pdeath_signal, p, 0);
}

static inline void close_files(struct files_struct * files)
{
	int i, j;

	j = 0;
	for (;;) {
		unsigned long set;
		i = j * __NFDBITS;
		if (i >= files->max_fdset || i >= files->max_fds)
			break;
		set = files->open_fds->fds_bits[j++];
		while (set) {
			if (set & 1) {
				struct file * file = xchg(&files->fd[i], NULL);
				if (file)
					filp_close(file, files);
			}
			i++;
			set >>= 1;
		}
	}
}

void put_files_struct(struct files_struct *files)
{
	if (atomic_dec_and_test(&files->count)) {
		close_files(files);
		/*
		 * Free the fd and fdset arrays if we expanded them.
		 */
		if (files->fd != &files->fd_array[0])
			free_fd_array(files->fd, files->max_fds);
		if (files->max_fdset > __FD_SETSIZE) {
			free_fdset(files->open_fds, files->max_fdset);
			free_fdset(files->close_on_exec, files->max_fdset);
		}
		kmem_cache_free(files_cachep, files);
	}
}

static inline void __exit_files(struct task_struct *tsk)
{
	struct files_struct * files = tsk->files;

	if (files) {
		task_lock(tsk);
		tsk->files = NULL;
		task_unlock(tsk);
		put_files_struct(files);
	}
}

void exit_files(struct task_struct *tsk)
{
	__exit_files(tsk);
}

static inline void __put_fs_struct(struct fs_struct *fs)
{
	/* No need to hold fs->lock if we are killing it */
	if (atomic_dec_and_test(&fs->count)) {
		dput(fs->root);
		mntput(fs->rootmnt);
		dput(fs->pwd);
		mntput(fs->pwdmnt);
		if (fs->altroot) {
			dput(fs->altroot);
			mntput(fs->altrootmnt);
		}
		kmem_cache_free(fs_cachep, fs);
	}
}

void put_fs_struct(struct fs_struct *fs)
{
	__put_fs_struct(fs);
}

static inline void __exit_fs(struct task_struct *tsk)
{
	struct fs_struct * fs = tsk->fs;

	if (fs) {
		task_lock(tsk);
		tsk->fs = NULL;
		task_unlock(tsk);
		__put_fs_struct(fs);
	}
}

void exit_fs(struct task_struct *tsk)
{
	__exit_fs(tsk);
}

/*
 * We can use these to temporarily drop into
 * "lazy TLB" mode and back.
 */
struct mm_struct * start_lazy_tlb(void)
{
	struct mm_struct *mm = current->mm;
	current->mm = NULL;
	/* active_mm is still 'mm' */
	atomic_inc(&mm->mm_count);
	enter_lazy_tlb(mm, current, smp_processor_id());
	return mm;
}

void end_lazy_tlb(struct mm_struct *mm)
{
	struct mm_struct *active_mm = current->active_mm;

	current->mm = mm;
	if (mm != active_mm) {
		current->active_mm = mm;
		activate_mm(active_mm, mm);
	}
	mmdrop(active_mm);
}

/*
 * Turn us into a lazy TLB process if we
 * aren't already..
 */
static inline void __exit_mm(struct task_struct * tsk)
{
	struct mm_struct * mm = tsk->mm;

	mm_release();
	if (mm) {
		atomic_inc(&mm->mm_count);
		if (mm != tsk->active_mm) BUG();
		/* more a memory barrier than a real lock */
		task_lock(tsk);
		tsk->mm = NULL;
		enter_lazy_tlb(mm, current, smp_processor_id());
		task_unlock(tsk);
		mmput(mm);
	}
}

void exit_mm(struct task_struct *tsk)
{
	__exit_mm(tsk);
}

/*
 * When we die, we re-parent all our children.
 * Try to give them to another thread in our thread
 * group, and if no such member exists, give it to
 * the global child reaper process (ie "init")
 */
static inline void forget_original_parent(struct task_struct * father)
{
	struct task_struct *p, *reaper = father;
	struct list_head *_p;

	if (father->exit_signal != -1)
		reaper = prev_thread(reaper);
	else
		reaper = child_reaper;

	if (reaper == father)
		reaper = child_reaper;

	/*
	 * There are only two places where our children can be:
	 *
	 * - in our child list
	 * - in our ptraced child list
	 *
	 * Search them and reparent children.
	 */
	list_for_each(_p, &father->children) {
		p = list_entry(_p,struct task_struct,sibling);
		reparent_thread(p, reaper, child_reaper);
	}
	list_for_each(_p, &father->ptrace_children) {
		p = list_entry(_p,struct task_struct,ptrace_list);
		reparent_thread(p, reaper, child_reaper);
	}
}

static inline void zap_thread(task_t *p, task_t *father, int traced)
{
	/* If someone else is tracing this thread, preserve the ptrace links.  */
	if (unlikely(traced)) {
		task_t *trace_task = p->parent;
		int ptrace_flag = p->ptrace;
		BUG_ON (ptrace_flag == 0);

		__ptrace_unlink(p);
		p->ptrace = ptrace_flag;
		__ptrace_link(p, trace_task);
	} else {
		/* Otherwise, if we were tracing this thread, untrace it.  */
		ptrace_unlink (p);

		list_del_init(&p->sibling);
		p->parent = p->real_parent;
		list_add_tail(&p->sibling, &p->parent->children);
	}

	if (p->state == TASK_ZOMBIE && p->exit_signal != -1)
		do_notify_parent(p, p->exit_signal);
	/*
	 * process group orphan check
	 * Case ii: Our child is in a different pgrp
	 * than we are, and it was the only connection
	 * outside, so the child pgrp is now orphaned.
	 */
	if ((p->pgrp != current->pgrp) &&
	    (p->session == current->session)) {
		int pgrp = p->pgrp;

		if (__will_become_orphaned_pgrp(pgrp, 0) && __has_stopped_jobs(pgrp)) {
			__kill_pg_info(SIGHUP, (void *)1, pgrp);
			__kill_pg_info(SIGCONT, (void *)1, pgrp);
		}
	}
}

/*
 * Send signals to all our closest relatives so that they know
 * to properly mourn us..
 */
static void exit_notify(void)
{
	struct task_struct *t;

	write_lock_irq(&tasklist_lock);

	forget_original_parent(current);
	/*
	 * Check to see if any process groups have become orphaned
	 * as a result of our exiting, and if they have any stopped
	 * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
	 *
	 * Case i: Our father is in a different pgrp than we are
	 * and we were the only connection outside, so our pgrp
	 * is about to become orphaned.
	 */
	 
	t = current->parent;
	
	if ((t->pgrp != current->pgrp) &&
	    (t->session == current->session) &&
	    __will_become_orphaned_pgrp(current->pgrp, current) &&
	    __has_stopped_jobs(current->pgrp)) {
		__kill_pg_info(SIGHUP, (void *)1, current->pgrp);
		__kill_pg_info(SIGCONT, (void *)1, current->pgrp);
	}

	/* Let father know we died 
	 *
	 * Thread signals are configurable, but you aren't going to use
	 * that to send signals to arbitary processes. 
	 * That stops right now.
	 *
	 * If the parent exec id doesn't match the exec id we saved
	 * when we started then we know the parent has changed security
	 * domain.
	 *
	 * If our self_exec id doesn't match our parent_exec_id then
	 * we have changed execution domain as these two values started
	 * the same after a fork.
	 *	
	 */
	
	if (current->exit_signal != SIGCHLD && current->exit_signal != -1 &&
	    ( current->parent_exec_id != t->self_exec_id  ||
	      current->self_exec_id != current->parent_exec_id) 
	    && !capable(CAP_KILL))
		current->exit_signal = SIGCHLD;


	/*
	 * This loop does two things:
	 *
  	 * A.  Make init inherit all the child processes
	 * B.  Check to see if any process groups have become orphaned
	 *	as a result of our exiting, and if they have any stopped
	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
	 */

	current->state = TASK_ZOMBIE;
	if (current->exit_signal != -1)
		do_notify_parent(current, current->exit_signal);

	while (!list_empty(&current->children))
		zap_thread(list_entry(current->children.next,struct task_struct,sibling), current, 0);
	while (!list_empty(&current->ptrace_children))
		zap_thread(list_entry(current->ptrace_children.next,struct task_struct,ptrace_list), current, 1);
	BUG_ON(!list_empty(&current->children));
	/*
	 * No need to unlock IRQs, we'll schedule() immediately
	 * anyway. In the preemption case this also makes it
	 * impossible for the task to get runnable again (thus
	 * the "_raw_" unlock - to make sure we don't try to
	 * preempt here).
	 */
	_raw_write_unlock(&tasklist_lock);
}

NORET_TYPE void do_exit(long code)
{
	struct task_struct *tsk = current;

	if (in_interrupt())
		panic("Aiee, killing interrupt handler!");
	if (!tsk->pid)
		panic("Attempted to kill the idle task!");
	if (tsk->pid == 1)
		panic("Attempted to kill init!");
	tsk->flags |= PF_EXITING;
	del_timer_sync(&tsk->real_timer);

	if (unlikely(preempt_count()))
		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
				current->comm, current->pid,
				preempt_count());

fake_volatile:
	acct_process(code);
	__exit_mm(tsk);

	sem_exit();
	__exit_files(tsk);
	__exit_fs(tsk);
	exit_namespace(tsk);
	exit_thread();

	if (current->leader)
		disassociate_ctty(1);

	put_exec_domain(tsk->thread_info->exec_domain);
	if (tsk->binfmt && tsk->binfmt->module)
		__MOD_DEC_USE_COUNT(tsk->binfmt->module);

	tsk->exit_code = code;
	exit_notify();
	preempt_disable();
	if (current->exit_signal == -1)
		release_task(current);
	schedule();
	BUG();
/*
 * In order to get rid of the "volatile function does return" message
 * I did this little loop that confuses gcc to think do_exit really
 * is volatile. In fact it's schedule() that is volatile in some
 * circumstances: when current->state = ZOMBIE, schedule() never
 * returns.
 *
 * In fact the natural way to do all this is to have the label and the
 * goto right after each other, but I put the fake_volatile label at
 * the start of the function just in case something /really/ bad
 * happens, and the schedule returns. This way we can try again. I'm
 * not paranoid: it's just that everybody is out to get me.
 */
	goto fake_volatile;
}

NORET_TYPE void complete_and_exit(struct completion *comp, long code)
{
	if (comp)
		complete(comp);
	
	do_exit(code);
}

asmlinkage long sys_exit(int error_code)
{
	do_exit((error_code&0xff)<<8);
}

/*
 * this kills every thread in the thread group. Note that any externally
 * wait4()-ing process will get the correct exit code - even if this 
 * thread is not the thread group leader.
 */
asmlinkage long sys_exit_group(int error_code)
{
	struct signal_struct *sig = current->sig;

	spin_lock_irq(&sig->siglock);
	if (sig->group_exit) {
		spin_unlock_irq(&sig->siglock);

		/* another thread was faster: */
		do_exit(sig->group_exit_code);
	}
	sig->group_exit = 1;
	sig->group_exit_code = (error_code & 0xff) << 8;
	__broadcast_thread_group(current, SIGKILL);
	spin_unlock_irq(&sig->siglock);

	do_exit(sig->group_exit_code);
}

static inline int eligible_child(pid_t pid, int options, task_t *p)
{
	if (pid>0) {
		if (p->pid != pid)
			return 0;
	} else if (!pid) {
		if (p->pgrp != current->pgrp)
			return 0;
	} else if (pid != -1) {
		if (p->pgrp != -pid)
			return 0;
	}

	/*
	 * Do not consider detached threads that are
	 * not ptraced:
	 */
	if (p->exit_signal == -1 && !p->ptrace)
		return 0;

	/* Wait for all children (clone and not) if __WALL is set;
	 * otherwise, wait for clone children *only* if __WCLONE is
	 * set; otherwise, wait for non-clone children *only*.  (Note:
	 * A "clone" child here is one that reports to its parent
	 * using a signal other than SIGCHLD.) */
	if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
	    && !(options & __WALL))
		return 0;

	if (security_ops->task_wait(p))
		return 0;

	return 1;
}

asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
{
	int flag, retval;
	DECLARE_WAITQUEUE(wait, current);
	struct task_struct *tsk;

	if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
		return -EINVAL;

	add_wait_queue(&current->wait_chldexit,&wait);
repeat:
	flag = 0;
	current->state = TASK_INTERRUPTIBLE;
	read_lock(&tasklist_lock);
	tsk = current;
	do {
		struct task_struct *p;
		struct list_head *_p;
		list_for_each(_p,&tsk->children) {
			p = list_entry(_p,struct task_struct,sibling);
			if (!eligible_child(pid, options, p))
				continue;
			flag = 1;
			switch (p->state) {
			case TASK_STOPPED:
				if (!p->exit_code)
					continue;
				if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
					continue;
				read_unlock(&tasklist_lock);

				/* move to end of parent's list to avoid starvation */
				write_lock_irq(&tasklist_lock);
				remove_parent(p);
				add_parent(p, p->parent);
				write_unlock_irq(&tasklist_lock);
				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 
				if (!retval && stat_addr) 
					retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
				if (!retval) {
					p->exit_code = 0;
					retval = p->pid;
				}
				goto end_wait4;
			case TASK_ZOMBIE:
				current->cutime += p->utime + p->cutime;
				current->cstime += p->stime + p->cstime;
				read_unlock(&tasklist_lock);
				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
				if (!retval && stat_addr)
					retval = put_user(p->exit_code, stat_addr);
				if (retval)
					goto end_wait4; 
				retval = p->pid;
				if (p->real_parent != p->parent) {
					write_lock_irq(&tasklist_lock);
					__ptrace_unlink(p);
					do_notify_parent(p, SIGCHLD);
					write_unlock_irq(&tasklist_lock);
				} else
					release_task(p);
				goto end_wait4;
			default:
				continue;
			}
		}
		if (!flag) {
			list_for_each (_p,&tsk->ptrace_children) {
				p = list_entry(_p,struct task_struct,ptrace_list);
				if (!eligible_child(pid, options, p))
					continue;
				flag = 1;
				break;
			}
		}
		if (options & __WNOTHREAD)
			break;
		tsk = next_thread(tsk);
		if (tsk->sig != current->sig)
			BUG();
	} while (tsk != current);
	read_unlock(&tasklist_lock);
	if (flag) {
		retval = 0;
		if (options & WNOHANG)
			goto end_wait4;
		retval = -ERESTARTSYS;
		if (signal_pending(current))
			goto end_wait4;
		schedule();
		goto repeat;
	}
	retval = -ECHILD;
end_wait4:
	current->state = TASK_RUNNING;
	remove_wait_queue(&current->wait_chldexit,&wait);
	return retval;
}

#if !defined(__alpha__) && !defined(__ia64__) && !defined(__arm__)

/*
 * sys_waitpid() remains for compatibility. waitpid() should be
 * implemented by calling sys_wait4() from libc.a.
 */
asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
{
	return sys_wait4(pid, stat_addr, options, NULL);
}

#endif