Commit d33fdee4 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-fixes-for-linus' of...

Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  sched: Fix cross-sched-class wakeup preemption
  sched: Fix runnable condition for stoptask
  sched: Use group weight, idle cpu metrics to fix imbalances during idle
parents 1e8703b2 1e5a7405
...@@ -862,6 +862,7 @@ struct sched_group { ...@@ -862,6 +862,7 @@ struct sched_group {
* single CPU. * single CPU.
*/ */
unsigned int cpu_power, cpu_power_orig; unsigned int cpu_power, cpu_power_orig;
unsigned int group_weight;
/* /*
* The CPUs this group covers. * The CPUs this group covers.
......
...@@ -560,18 +560,8 @@ struct rq { ...@@ -560,18 +560,8 @@ struct rq {
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
static inline
void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
{
rq->curr->sched_class->check_preempt_curr(rq, p, flags);
/* static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
* A queue event has occurred, and we're going to schedule. In
* this case, we can save a useless back to back clock update.
*/
if (test_tsk_need_resched(p))
rq->skip_clock_update = 1;
}
static inline int cpu_of(struct rq *rq) static inline int cpu_of(struct rq *rq)
{ {
...@@ -2118,6 +2108,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, ...@@ -2118,6 +2108,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
p->sched_class->prio_changed(rq, p, oldprio, running); p->sched_class->prio_changed(rq, p, oldprio, running);
} }
static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
{
const struct sched_class *class;
if (p->sched_class == rq->curr->sched_class) {
rq->curr->sched_class->check_preempt_curr(rq, p, flags);
} else {
for_each_class(class) {
if (class == rq->curr->sched_class)
break;
if (class == p->sched_class) {
resched_task(rq->curr);
break;
}
}
}
/*
* A queue event has occurred, and we're going to schedule. In
* this case, we can save a useless back to back clock update.
*/
if (test_tsk_need_resched(rq->curr))
rq->skip_clock_update = 1;
}
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
* Is this task likely cache-hot: * Is this task likely cache-hot:
...@@ -6960,6 +6975,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) ...@@ -6960,6 +6975,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
if (cpu != group_first_cpu(sd->groups)) if (cpu != group_first_cpu(sd->groups))
return; return;
sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
child = sd->child; child = sd->child;
sd->groups->cpu_power = 0; sd->groups->cpu_power = 0;
......
...@@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ ...@@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct cfs_rq *cfs_rq = task_cfs_rq(curr);
int scale = cfs_rq->nr_running >= sched_nr_latency; int scale = cfs_rq->nr_running >= sched_nr_latency;
if (unlikely(rt_prio(p->prio)))
goto preempt;
if (unlikely(p->sched_class != &fair_sched_class))
return;
if (unlikely(se == pse)) if (unlikely(se == pse))
return; return;
...@@ -2035,13 +2029,16 @@ struct sd_lb_stats { ...@@ -2035,13 +2029,16 @@ struct sd_lb_stats {
unsigned long this_load_per_task; unsigned long this_load_per_task;
unsigned long this_nr_running; unsigned long this_nr_running;
unsigned long this_has_capacity; unsigned long this_has_capacity;
unsigned int this_idle_cpus;
/* Statistics of the busiest group */ /* Statistics of the busiest group */
unsigned int busiest_idle_cpus;
unsigned long max_load; unsigned long max_load;
unsigned long busiest_load_per_task; unsigned long busiest_load_per_task;
unsigned long busiest_nr_running; unsigned long busiest_nr_running;
unsigned long busiest_group_capacity; unsigned long busiest_group_capacity;
unsigned long busiest_has_capacity; unsigned long busiest_has_capacity;
unsigned int busiest_group_weight;
int group_imb; /* Is there imbalance in this sd */ int group_imb; /* Is there imbalance in this sd */
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
...@@ -2063,6 +2060,8 @@ struct sg_lb_stats { ...@@ -2063,6 +2060,8 @@ struct sg_lb_stats {
unsigned long sum_nr_running; /* Nr tasks running in the group */ unsigned long sum_nr_running; /* Nr tasks running in the group */
unsigned long sum_weighted_load; /* Weighted load of group's tasks */ unsigned long sum_weighted_load; /* Weighted load of group's tasks */
unsigned long group_capacity; unsigned long group_capacity;
unsigned long idle_cpus;
unsigned long group_weight;
int group_imb; /* Is there an imbalance in the group ? */ int group_imb; /* Is there an imbalance in the group ? */
int group_has_capacity; /* Is there extra capacity in the group? */ int group_has_capacity; /* Is there extra capacity in the group? */
}; };
...@@ -2431,7 +2430,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, ...@@ -2431,7 +2430,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
sgs->group_load += load; sgs->group_load += load;
sgs->sum_nr_running += rq->nr_running; sgs->sum_nr_running += rq->nr_running;
sgs->sum_weighted_load += weighted_cpuload(i); sgs->sum_weighted_load += weighted_cpuload(i);
if (idle_cpu(i))
sgs->idle_cpus++;
} }
/* /*
...@@ -2469,6 +2469,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, ...@@ -2469,6 +2469,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
if (!sgs->group_capacity) if (!sgs->group_capacity)
sgs->group_capacity = fix_small_capacity(sd, group); sgs->group_capacity = fix_small_capacity(sd, group);
sgs->group_weight = group->group_weight;
if (sgs->group_capacity > sgs->sum_nr_running) if (sgs->group_capacity > sgs->sum_nr_running)
sgs->group_has_capacity = 1; sgs->group_has_capacity = 1;
...@@ -2576,13 +2577,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, ...@@ -2576,13 +2577,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
sds->this_nr_running = sgs.sum_nr_running; sds->this_nr_running = sgs.sum_nr_running;
sds->this_load_per_task = sgs.sum_weighted_load; sds->this_load_per_task = sgs.sum_weighted_load;
sds->this_has_capacity = sgs.group_has_capacity; sds->this_has_capacity = sgs.group_has_capacity;
sds->this_idle_cpus = sgs.idle_cpus;
} else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
sds->max_load = sgs.avg_load; sds->max_load = sgs.avg_load;
sds->busiest = sg; sds->busiest = sg;
sds->busiest_nr_running = sgs.sum_nr_running; sds->busiest_nr_running = sgs.sum_nr_running;
sds->busiest_idle_cpus = sgs.idle_cpus;
sds->busiest_group_capacity = sgs.group_capacity; sds->busiest_group_capacity = sgs.group_capacity;
sds->busiest_load_per_task = sgs.sum_weighted_load; sds->busiest_load_per_task = sgs.sum_weighted_load;
sds->busiest_has_capacity = sgs.group_has_capacity; sds->busiest_has_capacity = sgs.group_has_capacity;
sds->busiest_group_weight = sgs.group_weight;
sds->group_imb = sgs.group_imb; sds->group_imb = sgs.group_imb;
} }
...@@ -2860,8 +2864,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, ...@@ -2860,8 +2864,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
if (sds.this_load >= sds.avg_load) if (sds.this_load >= sds.avg_load)
goto out_balanced; goto out_balanced;
/*
* In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
* And to check for busy balance use !idle_cpu instead of
* CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
* even when they are idle.
*/
if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
goto out_balanced; goto out_balanced;
} else {
/*
* This cpu is idle. If the busiest group load doesn't
* have more tasks than the number of available cpu's and
* there is no imbalance between this and busiest group
* wrt to idle cpu's, it is balanced.
*/
if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
sds.busiest_nr_running <= sds.busiest_group_weight)
goto out_balanced;
}
force_balance: force_balance:
/* Looks like there is an imbalance. Compute it */ /* Looks like there is an imbalance. Compute it */
......
...@@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p, ...@@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p,
static void static void
check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
{ {
resched_task(rq->curr); /* we preempt everything */ /* we're never preempted */
} }
static struct task_struct *pick_next_task_stop(struct rq *rq) static struct task_struct *pick_next_task_stop(struct rq *rq)
{ {
struct task_struct *stop = rq->stop; struct task_struct *stop = rq->stop;
if (stop && stop->state == TASK_RUNNING) if (stop && stop->se.on_rq)
return stop; return stop;
return NULL; return NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment