Commit 8f37961c authored by Tim Chen's avatar Tim Chen Committed by Ingo Molnar

sched/core, x86/topology: Fix NUMA in package topology bug

Current code can call set_cpu_sibling_map() and invoke sched_set_topology()
more than once (e.g. on CPU hot plug).  When this happens after
sched_init_smp() has been called, we lose the NUMA topology extension to
sched_domain_topology in sched_init_numa().  This results in incorrect
topology when the sched domain is rebuilt.

This patch fixes the bug and issues warning if we call sched_set_topology()
after sched_init_smp().
Signed-off-by: default avatarTim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: default avatarSrinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@suse.de
Cc: jolsa@redhat.com
Cc: rjw@rjwysocki.net
Link: http://lkml.kernel.org/r/1474485552-141429-2-git-send-email-srinivas.pandruvada@linux.intel.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 536e0e81
...@@ -471,7 +471,7 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) ...@@ -471,7 +471,7 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false; return false;
} }
static struct sched_domain_topology_level numa_inside_package_topology[] = { static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
#ifdef CONFIG_SCHED_SMT #ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
#endif #endif
...@@ -480,22 +480,23 @@ static struct sched_domain_topology_level numa_inside_package_topology[] = { ...@@ -480,22 +480,23 @@ static struct sched_domain_topology_level numa_inside_package_topology[] = {
#endif #endif
{ NULL, }, { NULL, },
}; };
static struct sched_domain_topology_level x86_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
#endif
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
};
/* /*
* set_sched_topology() sets the topology internal to a CPU. The * Set if a package/die has multiple NUMA nodes inside.
* NUMA topologies are layered on top of it to build the full * AMD Magny-Cours and Intel Cluster-on-Die have this.
* system topology. */
* static bool x86_has_numa_in_package;
* If NUMA nodes are observed to occur within a CPU package, this
* function should be called. It forces the sched domain code to
* only use the SMT level for the CPU portion of the topology.
* This essentially falls back to relying on NUMA information
* from the SRAT table to describe the entire system topology
* (except for hyperthreads).
*/
static void primarily_use_numa_for_topology(void)
{
set_sched_topology(numa_inside_package_topology);
}
void set_cpu_sibling_map(int cpu) void set_cpu_sibling_map(int cpu)
{ {
...@@ -558,7 +559,7 @@ void set_cpu_sibling_map(int cpu) ...@@ -558,7 +559,7 @@ void set_cpu_sibling_map(int cpu)
c->booted_cores = cpu_data(i).booted_cores; c->booted_cores = cpu_data(i).booted_cores;
} }
if (match_die(c, o) && !topology_same_node(c, o)) if (match_die(c, o) && !topology_same_node(c, o))
primarily_use_numa_for_topology(); x86_has_numa_in_package = true;
} }
threads = cpumask_weight(topology_sibling_cpumask(cpu)); threads = cpumask_weight(topology_sibling_cpumask(cpu));
...@@ -1304,6 +1305,16 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) ...@@ -1304,6 +1305,16 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
} }
/*
* Set 'default' x86 topology, this matches default_topology() in that
* it has NUMA nodes as a topology level. See also
* native_smp_cpus_done().
*
* Must be done before set_cpus_sibling_map() is ran.
*/
set_sched_topology(x86_topology);
set_cpu_sibling_map(0); set_cpu_sibling_map(0);
switch (smp_sanity_check(max_cpus)) { switch (smp_sanity_check(max_cpus)) {
...@@ -1370,6 +1381,9 @@ void __init native_smp_cpus_done(unsigned int max_cpus) ...@@ -1370,6 +1381,9 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
{ {
pr_debug("Boot done\n"); pr_debug("Boot done\n");
if (x86_has_numa_in_package)
set_sched_topology(x86_numa_in_package_topology);
nmi_selftest(); nmi_selftest();
impress_friends(); impress_friends();
setup_ioapic_dest(); setup_ioapic_dest();
......
...@@ -6552,6 +6552,9 @@ static struct sched_domain_topology_level *sched_domain_topology = ...@@ -6552,6 +6552,9 @@ static struct sched_domain_topology_level *sched_domain_topology =
void set_sched_topology(struct sched_domain_topology_level *tl) void set_sched_topology(struct sched_domain_topology_level *tl)
{ {
if (WARN_ON_ONCE(sched_smp_initialized))
return;
sched_domain_topology = tl; sched_domain_topology = tl;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment