Commit 7d9ffa89 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched: Micro-optimize the smart wake-affine logic

Smart wake-affine is using node-size as the factor currently, but the overhead
of the mask operation is high.

Thus, this patch introduce the 'sd_llc_size' percpu variable, which will record
the highest cache-share domain size, and make it to be the new factor, in order
to reduce the overhead and make it more reasonable.
Tested-by: default avatarDavidlohr Bueso <davidlohr.bueso@hp.com>
Tested-by: default avatarMichael Wang <wangyun@linux.vnet.ibm.com>
Signed-off-by: default avatarPeter Zijlstra <peterz@infradead.org>
Acked-by: default avatarMichael Wang <wangyun@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Link: http://lkml.kernel.org/r/51D5008E.6030102@linux.vnet.ibm.com
[ Tidied up the changelog. ]
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 62470419
...@@ -5083,18 +5083,23 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu) ...@@ -5083,18 +5083,23 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
* two cpus are in the same cache domain, see cpus_share_cache(). * two cpus are in the same cache domain, see cpus_share_cache().
*/ */
DEFINE_PER_CPU(struct sched_domain *, sd_llc); DEFINE_PER_CPU(struct sched_domain *, sd_llc);
DEFINE_PER_CPU(int, sd_llc_size);
DEFINE_PER_CPU(int, sd_llc_id); DEFINE_PER_CPU(int, sd_llc_id);
static void update_top_cache_domain(int cpu) static void update_top_cache_domain(int cpu)
{ {
struct sched_domain *sd; struct sched_domain *sd;
int id = cpu; int id = cpu;
int size = 1;
sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
if (sd) if (sd) {
id = cpumask_first(sched_domain_span(sd)); id = cpumask_first(sched_domain_span(sd));
size = cpumask_weight(sched_domain_span(sd));
}
rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
per_cpu(sd_llc_size, cpu) = size;
per_cpu(sd_llc_id, cpu) = id; per_cpu(sd_llc_id, cpu) = id;
} }
......
...@@ -3175,7 +3175,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, ...@@ -3175,7 +3175,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
static int wake_wide(struct task_struct *p) static int wake_wide(struct task_struct *p)
{ {
int factor = nr_cpus_node(cpu_to_node(smp_processor_id())); int factor = this_cpu_read(sd_llc_size);
/* /*
* Yeah, it's the switching-frequency, could means many wakee or * Yeah, it's the switching-frequency, could means many wakee or
......
...@@ -594,6 +594,7 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag) ...@@ -594,6 +594,7 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
} }
DECLARE_PER_CPU(struct sched_domain *, sd_llc); DECLARE_PER_CPU(struct sched_domain *, sd_llc);
DECLARE_PER_CPU(int, sd_llc_size);
DECLARE_PER_CPU(int, sd_llc_id); DECLARE_PER_CPU(int, sd_llc_id);
struct sched_group_power { struct sched_group_power {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment