Commit 9d5efe05 authored by Srivatsa Vaddagiri's avatar Srivatsa Vaddagiri Committed by Ingo Molnar

sched: Fix capacity calculations for SMT4

Handle cpu capacity being reported as 0 on cores with more number of
hardware threads. For example on a Power7 core with 4 hardware
threads, core power is 1177 and thus power of each hardware thread is
1177/4 = 294. This low power can lead to capacity for each hardware
thread being calculated as 0, which leads to tasks bouncing within the
core madly!

Fix this by reporting capacity for hardware threads as 1, provided
their power is not scaled down significantly because of frequency
scaling or real-time tasks usage of cpu.
Signed-off-by: default avatarSrivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Neuling <mikey@neuling.org>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <20100608045702.21D03CC895@localhost.localdomain>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 83cd4fe2
...@@ -857,7 +857,7 @@ struct sched_group { ...@@ -857,7 +857,7 @@ struct sched_group {
* CPU power of this group, SCHED_LOAD_SCALE being max power for a * CPU power of this group, SCHED_LOAD_SCALE being max power for a
* single CPU. * single CPU.
*/ */
unsigned int cpu_power; unsigned int cpu_power, cpu_power_orig;
/* /*
* The CPUs this group covers. * The CPUs this group covers.
......
...@@ -2285,13 +2285,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) ...@@ -2285,13 +2285,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
unsigned long power = SCHED_LOAD_SCALE; unsigned long power = SCHED_LOAD_SCALE;
struct sched_group *sdg = sd->groups; struct sched_group *sdg = sd->groups;
if (sched_feat(ARCH_POWER))
power *= arch_scale_freq_power(sd, cpu);
else
power *= default_scale_freq_power(sd, cpu);
power >>= SCHED_LOAD_SHIFT;
if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
if (sched_feat(ARCH_POWER)) if (sched_feat(ARCH_POWER))
power *= arch_scale_smt_power(sd, cpu); power *= arch_scale_smt_power(sd, cpu);
...@@ -2301,6 +2294,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) ...@@ -2301,6 +2294,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
power >>= SCHED_LOAD_SHIFT; power >>= SCHED_LOAD_SHIFT;
} }
sdg->cpu_power_orig = power;
if (sched_feat(ARCH_POWER))
power *= arch_scale_freq_power(sd, cpu);
else
power *= default_scale_freq_power(sd, cpu);
power >>= SCHED_LOAD_SHIFT;
power *= scale_rt_power(cpu); power *= scale_rt_power(cpu);
power >>= SCHED_LOAD_SHIFT; power >>= SCHED_LOAD_SHIFT;
...@@ -2333,6 +2335,31 @@ static void update_group_power(struct sched_domain *sd, int cpu) ...@@ -2333,6 +2335,31 @@ static void update_group_power(struct sched_domain *sd, int cpu)
sdg->cpu_power = power; sdg->cpu_power = power;
} }
/*
* Try and fix up capacity for tiny siblings, this is needed when
* things like SD_ASYM_PACKING need f_b_g to select another sibling
* which on its own isn't powerful enough.
*
* See update_sd_pick_busiest() and check_asym_packing().
*/
static inline int
fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
{
/*
* Only siblings can have significantly less than SCHED_LOAD_SCALE
*/
if (sd->level != SD_LV_SIBLING)
return 0;
/*
* If ~90% of the cpu_power is still there, we're good.
*/
if (group->cpu_power * 32 < group->cpu_power_orig * 29)
return 1;
return 0;
}
/** /**
* update_sg_lb_stats - Update sched_group's statistics for load balancing. * update_sg_lb_stats - Update sched_group's statistics for load balancing.
* @sd: The sched_domain whose statistics are to be updated. * @sd: The sched_domain whose statistics are to be updated.
...@@ -2426,6 +2453,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, ...@@ -2426,6 +2453,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
sgs->group_capacity = sgs->group_capacity =
DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
if (!sgs->group_capacity)
sgs->group_capacity = fix_small_capacity(sd, group);
} }
/** /**
...@@ -2724,8 +2753,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, ...@@ -2724,8 +2753,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
* find_busiest_queue - find the busiest runqueue among the cpus in group. * find_busiest_queue - find the busiest runqueue among the cpus in group.
*/ */
static struct rq * static struct rq *
find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
unsigned long imbalance, const struct cpumask *cpus) enum cpu_idle_type idle, unsigned long imbalance,
const struct cpumask *cpus)
{ {
struct rq *busiest = NULL, *rq; struct rq *busiest = NULL, *rq;
unsigned long max_load = 0; unsigned long max_load = 0;
...@@ -2736,6 +2766,9 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, ...@@ -2736,6 +2766,9 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
unsigned long wl; unsigned long wl;
if (!capacity)
capacity = fix_small_capacity(sd, group);
if (!cpumask_test_cpu(i, cpus)) if (!cpumask_test_cpu(i, cpus))
continue; continue;
...@@ -2852,7 +2885,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, ...@@ -2852,7 +2885,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
goto out_balanced; goto out_balanced;
} }
busiest = find_busiest_queue(group, idle, imbalance, cpus); busiest = find_busiest_queue(sd, group, idle, imbalance, cpus);
if (!busiest) { if (!busiest) {
schedstat_inc(sd, lb_nobusyq[idle]); schedstat_inc(sd, lb_nobusyq[idle]);
goto out_balanced; goto out_balanced;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment