itimers: remove the per-cpu-ish-ness

Either we bounce once cacheline per cpu per tick, yielding n^2 bounces or we just bounce a single.. Also, using per-cpu allocations for the thread-groups complicates the per-cpu allocator in that its currently aimed to be a fixed sized allocator and the only possible extention to that would be vmap based, which is seriously constrained on 32 bit archs. So making the per-cpu memory requirement depend on the number of processes is an issue. Lastly, it didn't deal with cpu-hotplug, although admittedly that might be fixable. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>

itimers: remove the per-cpu-ish-ness
Either we bounce once cacheline per cpu per tick, yielding n^2 bounces or we just bounce a single.. Also, using per-cpu allocations for the thread-groups complicates the per-cpu allocator in that its currently aimed to be a fixed sized allocator and the only possible extention to that would be vmap based, which is seriously constrained on 32 bit archs. So making the per-cpu memory requirement depend on the number of processes is an issue. Lastly, it didn't deal with cpu-hotplug, although admittedly that might be fixable. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
490dea45 · Peter Zijlstra · Ingo Molnar · ede6f5ae · 490dea45 · 490dea45
Commit 490dea45 authored Nov 24, 2008 by Peter Zijlstra Committed by Ingo Molnar Jan 07, 2009
5 changed files
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -48,6 +48,12 @@ extern struct fs_struct init_fs;
 	.posix_timers	 = LIST_HEAD_INIT(sig.posix_timers),		\
 	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
 	.rlim		= INIT_RLIMITS,					\
+	.cputime	= { .totals = {					\
+		.utime = cputime_zero,					\
+		.stime = cputime_zero,					\
+		.sum_exec_runtime = 0,					\
+		.lock = __SPIN_LOCK_UNLOCKED(sig.cputime.totals.lock),	\
+	}, },								\
 }
 extern struct nsproxy init_nsproxy;

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -450,6 +450,7 @@ struct task_cputime {
 	cputime_t utime;
 	cputime_t stime;
 	unsigned long long sum_exec_runtime;
+	spinlock_t lock;
 };
 /* Alternate field names when used to cache expirations. */
 #define prof_exp	stime
@@ -465,7 +466,7 @@ struct task_cputime {
 * used for thread group CPU clock calculations.
 */
 struct thread_group_cputime {
-	struct task_cputime *totals;
+	struct task_cputime totals;
 };
 /*
@@ -2180,24 +2181,30 @@ static inline int spin_needbreak(spinlock_t *lock)
 * Thread group CPU time accounting.
 */
-extern int thread_group_cputime_alloc(struct task_struct *);
+static inline
-extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
-static inline void thread_group_cputime_init(struct signal_struct *sig)
 {
-	sig->cputime.totals = NULL;
+	struct task_cputime *totals = &tsk->signal->cputime.totals;
+	unsigned long flags;
+	spin_lock_irqsave(&totals->lock, flags);
+	*times = *totals;
+	spin_unlock_irqrestore(&totals->lock, flags);
 }
-static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
+static inline void thread_group_cputime_init(struct signal_struct *sig)
 {
-	if (curr->signal->cputime.totals)
+	sig->cputime.totals = (struct task_cputime){
-		return 0;
+		.utime = cputime_zero,
-	return thread_group_cputime_alloc(curr);
+		.stime = cputime_zero,
+		.sum_exec_runtime = 0,
+	};
+	spin_lock_init(&sig->cputime.totals.lock);
 }
 static inline void thread_group_cputime_free(struct signal_struct *sig)
 {
-	free_percpu(sig->cputime.totals);
 }
 /*

--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -820,14 +820,15 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	int ret;
 	if (clone_flags & CLONE_THREAD) {
-		ret = thread_group_cputime_clone_thread(current);
+		atomic_inc(&current->signal->count);
-		if (likely(!ret)) {
+		atomic_inc(&current->signal->live);
-			atomic_inc(&current->signal->count);
+		return 0;
-			atomic_inc(&current->signal->live);
-		}
-		return ret;
 	}
 	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+	if (sig)
+		posix_cpu_timers_init_group(sig);
 	tsk->signal = sig;
 	if (!sig)
 		return -ENOMEM;
@@ -864,8 +865,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
 	task_unlock(current->group_leader);
-	posix_cpu_timers_init_group(sig);
 	acct_init_pacct(&sig->pacct);
 	tty_audit_fork(sig);

--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -9,76 +9,6 @@
 #include <asm/uaccess.h>
 #include <linux/kernel_stat.h>
-/*
- * Allocate the thread_group_cputime structure appropriately and fill in the
- * current values of the fields.  Called from copy_signal() via
- * thread_group_cputime_clone_thread() when adding a second or subsequent
- * thread to a thread group.  Assumes interrupts are enabled when called.
- */
-int thread_group_cputime_alloc(struct task_struct *tsk)
-{
-	struct signal_struct *sig = tsk->signal;
-	struct task_cputime *cputime;
-	/*
-	 * If we have multiple threads and we don't already have a
-	 * per-CPU task_cputime struct (checked in the caller), allocate
-	 * one and fill it in with the times accumulated so far.  We may
-	 * race with another thread so recheck after we pick up the sighand
-	 * lock.
-	 */
-	cputime = alloc_percpu(struct task_cputime);
-	if (cputime == NULL)
-		return -ENOMEM;
-	spin_lock_irq(&tsk->sighand->siglock);
-	if (sig->cputime.totals) {
-		spin_unlock_irq(&tsk->sighand->siglock);
-		free_percpu(cputime);
-		return 0;
-	}
-	sig->cputime.totals = cputime;
-	cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
-	cputime->utime = tsk->utime;
-	cputime->stime = tsk->stime;
-	cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
-	spin_unlock_irq(&tsk->sighand->siglock);
-	return 0;
-}
-/**
- * thread_group_cputime - Sum the thread group time fields across all CPUs.
- *
- * @tsk:	The task we use to identify the thread group.
- * @times:	task_cputime structure in which we return the summed fields.
- *
- * Walk the list of CPUs to sum the per-CPU time fields in the thread group
- * time structure.
- */
-void thread_group_cputime(
-	struct task_struct *tsk,
-	struct task_cputime *times)
-{
-	struct task_cputime *totals, *tot;
-	int i;
-	totals = tsk->signal->cputime.totals;
-	if (!totals) {
-		times->utime = tsk->utime;
-		times->stime = tsk->stime;
-		times->sum_exec_runtime = tsk->se.sum_exec_runtime;
-		return;
-	}
-	times->stime = times->utime = cputime_zero;
-	times->sum_exec_runtime = 0;
-	for_each_possible_cpu(i) {
-		tot = per_cpu_ptr(totals, i);
-		times->utime = cputime_add(times->utime, tot->utime);
-		times->stime = cputime_add(times->stime, tot->stime);
-		times->sum_exec_runtime += tot->sum_exec_runtime;
-	}
-}
 /*
 * Called after updating RLIMIT_CPU to set timer expiration if necessary.
 */

--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -296,6 +296,7 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
 static inline void account_group_user_time(struct task_struct *tsk,
 					   cputime_t cputime)
 {
+	struct task_cputime *times;
 	struct signal_struct *sig;
 	/* tsk == current, ensure it is safe to use ->signal */
@@ -303,13 +304,11 @@ static inline void account_group_user_time(struct task_struct *tsk,
 		return;
 	sig = tsk->signal;
-	if (sig->cputime.totals) {
+	times = &sig->cputime.totals;
-		struct task_cputime *times;
-		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+	spin_lock(&times->lock);
-		times->utime = cputime_add(times->utime, cputime);
+	times->utime = cputime_add(times->utime, cputime);
-		put_cpu_no_resched();
+	spin_unlock(&times->lock);
-	}
 }
 /**
@@ -325,6 +324,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
 static inline void account_group_system_time(struct task_struct *tsk,
 					     cputime_t cputime)
 {
+	struct task_cputime *times;
 	struct signal_struct *sig;
 	/* tsk == current, ensure it is safe to use ->signal */
@@ -332,13 +332,11 @@ static inline void account_group_system_time(struct task_struct *tsk,
 		return;
 	sig = tsk->signal;
-	if (sig->cputime.totals) {
+	times = &sig->cputime.totals;
-		struct task_cputime *times;
-		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+	spin_lock(&times->lock);
-		times->stime = cputime_add(times->stime, cputime);
+	times->stime = cputime_add(times->stime, cputime);
-		put_cpu_no_resched();
+	spin_unlock(&times->lock);
-	}
 }
 /**
@@ -354,6 +352,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
 static inline void account_group_exec_runtime(struct task_struct *tsk,
 					      unsigned long long ns)
 {
+	struct task_cputime *times;
 	struct signal_struct *sig;
 	sig = tsk->signal;
@@ -362,11 +361,9 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
 	if (unlikely(!sig))
 		return;
-	if (sig->cputime.totals) {
+	times = &sig->cputime.totals;
-		struct task_cputime *times;
-		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+	spin_lock(&times->lock);
-		times->sum_exec_runtime += ns;
+	times->sum_exec_runtime += ns;
-		put_cpu_no_resched();
+	spin_unlock(&times->lock);
-	}
 }