[PATCH] make RLIMIT_CPU/SIGXCPU per-process

POSIX requires that the RLIMIT_CPU resource limit that generates SIGXCPU be counted on a per-process basis. Currently, Linux implements this for individual threads. This patch fixes the semantics to conform with POSIX. The essential machinery for the process CPU limit is is tied into the new posix-timers code for process CPU clocks and timers. Signed-off-by: Roland McGrath <roland@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>

[PATCH] make RLIMIT_CPU/SIGXCPU per-process
POSIX requires that the RLIMIT_CPU resource limit that generates SIGXCPU be counted on a per-process basis. Currently, Linux implements this for individual threads. This patch fixes the semantics to conform with POSIX. The essential machinery for the process CPU limit is is tied into the new posix-timers code for process CPU clocks and timers. Signed-off-by: Roland McGrath <roland@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
9046dd43 · Roland McGrath · Linus Torvalds · d80d30ff · 9046dd43 · 9046dd43
Commit 9046dd43 authored Mar 07, 2005 by Roland McGrath Committed by Linus Torvalds Mar 07, 2005
5 changed files
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -768,6 +768,15 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
 	task_unlock(current->group_leader);

+	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+		/*
+		 * New sole thread in the process gets an expiry time
+		 * of the whole CPU time limit.
+		 */
+		tsk->it_prof_expires =
+			secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+	}
+
 	return 0;
 }

@@ -1032,6 +1041,7 @@ static task_t *copy_process(unsigned long clone_flags,
 				cputime_zero) ||
 		    !cputime_eq(current->signal->it_prof_expires,
 				cputime_zero) ||
+		    current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
 		    !list_empty(&current->signal->cpu_timers[0]) ||
 		    !list_empty(&current->signal->cpu_timers[1]) ||
 		    !list_empty(&current->signal->cpu_timers[2])) {

--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -549,6 +549,7 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 	struct list_head *head, *listpos;
 	struct cpu_timer_list *const nt = &timer->it.cpu;
 	struct cpu_timer_list *next;
+	unsigned long i;

 	head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
 		p->cpu_timers : p->signal->cpu_timers);
@@ -619,6 +620,10 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				    cputime_lt(p->signal->it_prof_expires,
 					       timer->it.cpu.expires.cpu))
 					break;
+				i = p->signal->rlim[RLIMIT_CPU].rlim_cur;
+				if (i != RLIM_INFINITY &&
+				    i <= cputime_to_secs(timer->it.cpu.expires.cpu))
+					break;
 				goto rebalance;
 			case CPUCLOCK_SCHED:
 			rebalance:
@@ -990,6 +995,7 @@ static void check_process_timers(struct task_struct *tsk,
 	 */
 	if (list_empty(&timers[CPUCLOCK_PROF]) &&
 	    cputime_eq(sig->it_prof_expires, cputime_zero) &&
+	    sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
 	    list_empty(&timers[CPUCLOCK_VIRT]) &&
 	    cputime_eq(sig->it_virt_expires, cputime_zero) &&
 	    list_empty(&timers[CPUCLOCK_SCHED]))
@@ -1086,6 +1092,33 @@ static void check_process_timers(struct task_struct *tsk,
 			virt_expires = sig->it_virt_expires;
 		}
 	}
+	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+		unsigned long psecs = cputime_to_secs(ptime);
+		cputime_t x;
+		if (psecs >= sig->rlim[RLIMIT_CPU].rlim_max) {
+			/*
+			 * At the hard limit, we just die.
+			 * No need to calculate anything else now.
+			 */
+			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+			return;
+		}
+		if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) {
+			/*
+			 * At the soft limit, send a SIGXCPU every second.
+			 */
+			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
+			if (sig->rlim[RLIMIT_CPU].rlim_cur
+			    < sig->rlim[RLIMIT_CPU].rlim_max) {
+				sig->rlim[RLIMIT_CPU].rlim_cur++;
+			}
+		}
+		x = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+		if (cputime_eq(prof_expires, cputime_zero) ||
+		    cputime_lt(x, prof_expires)) {
+			prof_expires = x;
+		}
+	}

 	if (!cputime_eq(prof_expires, cputime_zero) ||
 	    !cputime_eq(virt_expires, cputime_zero) ||
@@ -1275,6 +1308,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 /*
 * Set one of the process-wide special case CPU timers.
 * The tasklist_lock and tsk->sighand->siglock must be held by the caller.
+ * The oldval argument is null for the RLIMIT_CPU timer, where *newval is
+ * absolute; non-null for ITIMER_*, where *newval is relative and we update
+ * it to be absolute, *oldval is absolute and we update it to be relative.
 */
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval)
@@ -1285,17 +1321,28 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 	BUG_ON(clock_idx == CPUCLOCK_SCHED);
 	cpu_clock_sample_group_locked(clock_idx, tsk, &now);

-	if (oldval && !cputime_eq(*oldval, cputime_zero)) {
-		if (cputime_le(*oldval, now.cpu)) { /* Just about to fire. */
-			*oldval = jiffies_to_cputime(1);
-		} else {
-			*oldval = cputime_sub(*oldval, now.cpu);
+	if (oldval) {
+		if (!cputime_eq(*oldval, cputime_zero)) {
+			if (cputime_le(*oldval, now.cpu)) {
+				/* Just about to fire. */
+				*oldval = jiffies_to_cputime(1);
+			} else {
+				*oldval = cputime_sub(*oldval, now.cpu);
+			}
 		}
-	}

-	if (cputime_eq(*newval, cputime_zero))
-		return;
-	*newval = cputime_add(*newval, now.cpu);
+		if (cputime_eq(*newval, cputime_zero))
+			return;
+		*newval = cputime_add(*newval, now.cpu);
+
+		/*
+		 * If the RLIMIT_CPU timer will expire before the
+		 * ITIMER_PROF timer, we have nothing else to do.
+		 */
+		if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
+		    < cputime_to_secs(*newval))
+			return;
+	}

 	/*
 	 * Check whether there are any process timers already set to fire

--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2283,30 +2283,6 @@ unsigned long long current_sched_time(const task_t *tsk)
 			STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
 			((rq)->curr->static_prio > (rq)->best_expired_prio))

-/*
- * Check if the process went over its cputime resource limit after
- * some cpu time got added to utime/stime.
- * @p: the process that the cpu time gets accounted to
- * @cputime: the cpu time spent in user and kernel space since the last update
- */
-static void check_rlimit(struct task_struct *p, cputime_t cputime)
-{
-	cputime_t total, tmp;
-	unsigned long secs;
-
-	total = cputime_add(p->utime, p->stime);
-	secs = cputime_to_secs(total);
-	if (unlikely(secs >= p->signal->rlim[RLIMIT_CPU].rlim_cur)) {
-		/* Send SIGXCPU every second. */
-		tmp = cputime_sub(total, cputime);
-		if (cputime_to_secs(tmp) < secs)
-			send_sig(SIGXCPU, p, 1);
-		/* and SIGKILL when we go over max.. */
-		if (secs >= p->signal->rlim[RLIMIT_CPU].rlim_max)
-			send_sig(SIGKILL, p, 1);
-	}
-}
-
 /*
 * Account user cpu time to a process.
 * @p: the process that the cpu time gets accounted to
@@ -2320,9 +2296,6 @@ void account_user_time(struct task_struct *p, cputime_t cputime)

 	p->utime = cputime_add(p->utime, cputime);

-	/* Check for signals (SIGXCPU & SIGKILL). */
-	check_rlimit(p, cputime);
-
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
 	if (TASK_NICE(p) > 0)
@@ -2346,11 +2319,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset,

 	p->stime = cputime_add(p->stime, cputime);

-	/* Check for signals (SIGXCPU & SIGKILL). */
-	if (likely(p->signal && p->exit_state < EXIT_ZOMBIE)) {
-		check_rlimit(p, cputime);
-	}
-
 	/* Add system time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
 	if (hardirq_count() - hardirq_offset)

--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -20,6 +20,7 @@
 #include <linux/device.h>
 #include <linux/key.h>
 #include <linux/times.h>
+#include <linux/posix-timers.h>
 #include <linux/security.h>
 #include <linux/dcookies.h>
 #include <linux/suspend.h>
@@ -1502,6 +1503,20 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
 	task_lock(current->group_leader);
 	*old_rlim = new_rlim;
 	task_unlock(current->group_leader);
+
+	if (resource == RLIMIT_CPU && new_rlim.rlim_cur != RLIM_INFINITY &&
+	    (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
+	     new_rlim.rlim_cur <= cputime_to_secs(
+		     current->signal->it_prof_expires))) {
+		cputime_t cputime = secs_to_cputime(new_rlim.rlim_cur);
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&current->sighand->siglock);
+		set_process_cpu_timer(current, CPUCLOCK_PROF,
+				      &cputime, NULL);
+		spin_unlock_irq(&current->sighand->siglock);
+		read_unlock(&tasklist_lock);
+	}
+
 	return 0;
 }


--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1856,6 +1856,13 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
 			initrlim = init_task.signal->rlim+i;
 			rlim->rlim_cur = min(rlim->rlim_max,initrlim->rlim_cur);
 		}
+		if (current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+			/*
+			 * This will cause RLIMIT_CPU calculations
+			 * to be refigured.
+			 */
+			current->it_prof_expires = jiffies_to_cputime(1);
+		}
 	}

 	/* Wake up the parent if it is waiting so that it can