Commit f16c759b authored by Andrew Theurer's avatar Andrew Theurer Committed by Linus Torvalds

[PATCH] sched: more agressive wake_idle()

This patch addresses some problems with wake_idle().  Currently wake_idle()
will wake a task on an alternate cpu if:

1) task->cpu is not idle
2) an idle cpu can be found

However the span of cpus to look for is very limited (only the task->cpu's
sibling).  The scheduler should find the closest idle cpu, starting with
the lowest level domain, then going to higher level domains if allowed
(doamin has flag SD_WAKE_IDLE).  This patch does this.

This and the other two patches (also to be submitted) combined have
provided as much at 5% improvement on that "online transaction DB workload"
and 2% on the industry standard J@EE workload.

I asked Martin Bligh to test these for regression, and he did not find any.
 I would like to submit for inclusion to -mm and barring any problems
eventually to mainline.

Signed-off-by: <habanero@us.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 39a488d1
......@@ -80,6 +80,7 @@ static inline cpumask_t pcibus_to_cpumask(int bus)
.per_cpu_gain = 100, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_EXEC \
| SD_WAKE_IDLE \
| SD_WAKE_BALANCE, \
.last_balance = jiffies, \
.balance_interval = 1, \
......
......@@ -56,6 +56,7 @@ void build_cpu_to_node_map(void);
.per_cpu_gain = 100, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_EXEC \
| SD_WAKE_IDLE \
| SD_WAKE_BALANCE, \
.last_balance = jiffies, \
.balance_interval = 1, \
......
......@@ -51,6 +51,7 @@ static inline int node_to_first_cpu(int node)
.per_cpu_gain = 100, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_EXEC \
| SD_WAKE_IDLE \
| SD_WAKE_BALANCE, \
.last_balance = jiffies, \
.balance_interval = 1, \
......
......@@ -53,6 +53,7 @@ static inline cpumask_t __pcibus_to_cpumask(int bus)
.per_cpu_gain = 100, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_EXEC \
| SD_WAKE_IDLE \
| SD_WAKE_BALANCE, \
.last_balance = jiffies, \
.balance_interval = 1, \
......
......@@ -123,6 +123,7 @@ static inline int __next_node_with_cpus(int node)
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
| SD_WAKE_AFFINE \
| SD_WAKE_IDLE \
| SD_WAKE_BALANCE, \
.last_balance = jiffies, \
.balance_interval = 1, \
......
......@@ -935,9 +935,10 @@ static inline unsigned long target_load(int cpu)
#endif
/*
* wake_idle() is useful especially on SMT architectures to wake a
* task onto an idle sibling if we would otherwise wake it onto a
* busy sibling.
* wake_idle() will wake a task on an idle cpu if task->cpu is
* not idle and an idle cpu is available. The span of cpus to
* search starts with cpus closest then further out as needed,
* so we always favor a closer, idle cpu.
*
* Returns the CPU we should wake onto.
*/
......@@ -945,24 +946,23 @@ static inline unsigned long target_load(int cpu)
static int wake_idle(int cpu, task_t *p)
{
cpumask_t tmp;
runqueue_t *rq = cpu_rq(cpu);
struct sched_domain *sd;
int i;
if (idle_cpu(cpu))
return cpu;
sd = rq->sd;
if (!(sd->flags & SD_WAKE_IDLE))
return cpu;
cpus_and(tmp, sd->span, p->cpus_allowed);
for_each_cpu_mask(i, tmp) {
if (idle_cpu(i))
return i;
for_each_domain(cpu, sd) {
if (sd->flags & SD_WAKE_IDLE) {
cpus_and(tmp, sd->span, cpu_online_map);
cpus_and(tmp, tmp, p->cpus_allowed);
for_each_cpu_mask(i, tmp) {
if (idle_cpu(i))
return i;
}
}
else break;
}
return cpu;
}
#else
......@@ -1074,7 +1074,7 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync)
out_set_cpu:
schedstat_inc(rq, ttwu_attempts);
new_cpu = wake_idle(new_cpu, p);
if (new_cpu != cpu && cpu_isset(new_cpu, p->cpus_allowed)) {
if (new_cpu != cpu) {
schedstat_inc(rq, ttwu_moved);
set_task_cpu(p, new_cpu);
task_rq_unlock(rq, &flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment