Commit 53795ced authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar.

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched: Fix migration thread runtime bogosity
  sched,rt: fix isolated CPUs leaving root_task_group indefinitely throttled
  sched,cgroup: Fix up task_groups list
  sched: fix divide by zero at {thread_group,task}_times
  sched, cgroup: Reduce rq->lock hold times for large cgroup hierarchies
parents f78602ab 8f618968
...@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) ...@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
#endif #endif
static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
{
u64 temp = (__force u64) rtime;
temp *= (__force u64) utime;
if (sizeof(cputime_t) == 4)
temp = div_u64(temp, (__force u32) total);
else
temp = div64_u64(temp, (__force u64) total);
return (__force cputime_t) temp;
}
void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
{ {
cputime_t rtime, utime = p->utime, total = utime + p->stime; cputime_t rtime, utime = p->utime, total = utime + p->stime;
...@@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) ...@@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
*/ */
rtime = nsecs_to_cputime(p->se.sum_exec_runtime); rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
if (total) { if (total)
u64 temp = (__force u64) rtime; utime = scale_utime(utime, rtime, total);
else
temp *= (__force u64) utime;
do_div(temp, (__force u32) total);
utime = (__force cputime_t) temp;
} else
utime = rtime; utime = rtime;
/* /*
...@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) ...@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
total = cputime.utime + cputime.stime; total = cputime.utime + cputime.stime;
rtime = nsecs_to_cputime(cputime.sum_exec_runtime); rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
if (total) { if (total)
u64 temp = (__force u64) rtime; utime = scale_utime(cputime.utime, rtime, total);
else
temp *= (__force u64) cputime.utime;
do_div(temp, (__force u32) total);
utime = (__force cputime_t) temp;
} else
utime = rtime; utime = rtime;
sig->prev_utime = max(sig->prev_utime, utime); sig->prev_utime = max(sig->prev_utime, utime);
...@@ -7246,6 +7252,7 @@ int in_sched_functions(unsigned long addr) ...@@ -7246,6 +7252,7 @@ int in_sched_functions(unsigned long addr)
#ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_CGROUP_SCHED
struct task_group root_task_group; struct task_group root_task_group;
LIST_HEAD(task_groups);
#endif #endif
DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
......
...@@ -3387,6 +3387,14 @@ static int tg_load_down(struct task_group *tg, void *data) ...@@ -3387,6 +3387,14 @@ static int tg_load_down(struct task_group *tg, void *data)
static void update_h_load(long cpu) static void update_h_load(long cpu)
{ {
struct rq *rq = cpu_rq(cpu);
unsigned long now = jiffies;
if (rq->h_load_throttle == now)
return;
rq->h_load_throttle = now;
rcu_read_lock(); rcu_read_lock();
walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
rcu_read_unlock(); rcu_read_unlock();
...@@ -4293,11 +4301,10 @@ static int load_balance(int this_cpu, struct rq *this_rq, ...@@ -4293,11 +4301,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
env.src_rq = busiest; env.src_rq = busiest;
env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
update_h_load(env.src_cpu);
more_balance: more_balance:
local_irq_save(flags); local_irq_save(flags);
double_rq_lock(this_rq, busiest); double_rq_lock(this_rq, busiest);
if (!env.loop)
update_h_load(env.src_cpu);
/* /*
* cur_ld_moved - load moved in current iteration * cur_ld_moved - load moved in current iteration
......
...@@ -788,6 +788,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) ...@@ -788,6 +788,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
const struct cpumask *span; const struct cpumask *span;
span = sched_rt_period_mask(); span = sched_rt_period_mask();
#ifdef CONFIG_RT_GROUP_SCHED
/*
* FIXME: isolated CPUs should really leave the root task group,
* whether they are isolcpus or were isolated via cpusets, lest
* the timer run on a CPU which does not service all runqueues,
* potentially leaving other CPUs indefinitely throttled. If
* isolation is really required, the user will turn the throttle
* off to kill the perturbations it causes anyway. Meanwhile,
* this maintains functionality for boot and/or troubleshooting.
*/
if (rt_b == &root_task_group.rt_bandwidth)
span = cpu_online_mask;
#endif
for_each_cpu(i, span) { for_each_cpu(i, span) {
int enqueue = 0; int enqueue = 0;
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
......
...@@ -80,7 +80,7 @@ extern struct mutex sched_domains_mutex; ...@@ -80,7 +80,7 @@ extern struct mutex sched_domains_mutex;
struct cfs_rq; struct cfs_rq;
struct rt_rq; struct rt_rq;
static LIST_HEAD(task_groups); extern struct list_head task_groups;
struct cfs_bandwidth { struct cfs_bandwidth {
#ifdef CONFIG_CFS_BANDWIDTH #ifdef CONFIG_CFS_BANDWIDTH
...@@ -374,7 +374,11 @@ struct rq { ...@@ -374,7 +374,11 @@ struct rq {
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */ /* list of leaf cfs_rq on this cpu: */
struct list_head leaf_cfs_rq_list; struct list_head leaf_cfs_rq_list;
#endif #ifdef CONFIG_SMP
unsigned long h_load_throttle;
#endif /* CONFIG_SMP */
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_RT_GROUP_SCHED #ifdef CONFIG_RT_GROUP_SCHED
struct list_head leaf_rt_rq_list; struct list_head leaf_rt_rq_list;
#endif #endif
......
...@@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) ...@@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
{ {
struct task_struct *stop = rq->stop; struct task_struct *stop = rq->stop;
if (stop && stop->on_rq) if (stop && stop->on_rq) {
stop->se.exec_start = rq->clock_task;
return stop; return stop;
}
return NULL; return NULL;
} }
...@@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq) ...@@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq)
static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
{ {
struct task_struct *curr = rq->curr;
u64 delta_exec;
delta_exec = rq->clock_task - curr->se.exec_start;
if (unlikely((s64)delta_exec < 0))
delta_exec = 0;
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
curr->se.exec_start = rq->clock_task;
cpuacct_charge(curr, delta_exec);
} }
static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
...@@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) ...@@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
static void set_curr_task_stop(struct rq *rq) static void set_curr_task_stop(struct rq *rq)
{ {
struct task_struct *stop = rq->stop;
stop->se.exec_start = rq->clock_task;
} }
static void switched_to_stop(struct rq *rq, struct task_struct *p) static void switched_to_stop(struct rq *rq, struct task_struct *p)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment