Commit 3f098c26 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64: Support dualcore and 8 socket systems in k8 fallback node parsing

In particular on systems where the local APIC space and node space
is very different from the Linux CPU number space.

Previously the older NUMA setup code directly parsing the K8
northbridge registers had some issues on 8 socket or dual core
systems. This patch fixes them.

This is mainly done by fixing some confusion between Linux
CPU numbers and local APIC ids. We now pass the local APIC IDs
to later code, which avoids mismatches.

Also add some heuristics to detect cases where the Hypertransport
nodeids and the local APIC IDs don't match, but are shifted
by a constant offset.

This is still all quite hackish, hopefully BIOS writers fill
in correct SRATs instead.
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent b9169116
...@@ -755,6 +755,24 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) ...@@ -755,6 +755,24 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
} }
} }
#ifdef CONFIG_NUMA
static int nearby_node(int apicid)
{
int i;
for (i = apicid - 1; i >= 0; i--) {
int node = apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
int node = apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
return first_node(node_online_map); /* Shouldn't happen */
}
#endif
/* /*
* On a AMD dual core setup the lower bits of the APIC id distingush the cores. * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
* Assumes number of cores is a power of two. * Assumes number of cores is a power of two.
...@@ -763,9 +781,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) ...@@ -763,9 +781,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
{ {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
int cpu = smp_processor_id(); int cpu = smp_processor_id();
int node = 0;
unsigned bits; unsigned bits;
#ifdef CONFIG_NUMA
int node = 0;
unsigned apicid = phys_proc_id[cpu]; unsigned apicid = phys_proc_id[cpu];
#endif
bits = 0; bits = 0;
while ((1 << bits) < c->x86_num_cores) while ((1 << bits) < c->x86_num_cores)
...@@ -777,25 +797,33 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) ...@@ -777,25 +797,33 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
phys_proc_id[cpu] >>= bits; phys_proc_id[cpu] >>= bits;
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
/* When an ACPI SRAT table is available use the mappings from SRAT
instead. */
node = phys_proc_id[cpu]; node = phys_proc_id[cpu];
if (acpi_numa > 0) {
if (apicid_to_node[apicid] != NUMA_NO_NODE) if (apicid_to_node[apicid] != NUMA_NO_NODE)
node = apicid_to_node[apicid]; node = apicid_to_node[apicid];
else if (!node_online(node)) {
printk(KERN_ERR /* Two possibilities here:
"SRAT: Didn't specify node for CPU %d(%d)\n", - The CPU is missing memory and no node was created.
cpu, apicid); In that case try picking one from a nearby CPU
} - The APIC IDs differ from the HyperTransport node IDs
which the K8 northbridge parsing fills in.
Assume they are all increased by a constant offset,
but in the same order as the HT nodeids.
If that doesn't result in a usable node fall back to the
path for the previous case. */
int ht_nodeid = apicid - (phys_proc_id[0] << bits);
if (ht_nodeid >= 0 &&
apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
node = apicid_to_node[ht_nodeid];
/* Pick a nearby node */
if (!node_online(node)) if (!node_online(node))
node = first_node(node_online_map); node = nearby_node(apicid);
}
cpu_to_node[cpu] = node; cpu_to_node[cpu] = node;
#endif
printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
cpu, c->x86_num_cores, node, cpu_core_id[cpu]); cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
#endif #endif
#endif
} }
static int __init init_amd(struct cpuinfo_x86 *c) static int __init init_amd(struct cpuinfo_x86 *c)
......
...@@ -45,10 +45,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -45,10 +45,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
unsigned long prevbase; unsigned long prevbase;
struct node nodes[8]; struct node nodes[8];
int nodeid, i, nb; int nodeid, i, nb;
unsigned char nodeids[8];
int found = 0; int found = 0;
u32 reg; u32 reg;
unsigned numnodes; unsigned numnodes;
nodemask_t nodes_parsed; nodemask_t nodes_parsed;
unsigned dualcore = 0;
nodes_clear(nodes_parsed); nodes_clear(nodes_parsed);
...@@ -67,11 +69,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -67,11 +69,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
prevbase = 0; prevbase = 0;
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
unsigned long base,limit; unsigned long base,limit;
u32 nodeid;
/* Undefined before E stepping, but hopefully 0 */
dualcore |= ((read_pci_config(0, nb, 3, 0xe8) >> 12) & 3) == 1;
base = read_pci_config(0, nb, 1, 0x40 + i*8); base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8);
nodeid = limit & 7; nodeid = limit & 7;
nodeids[i] = nodeid;
if ((base & 3) == 0) { if ((base & 3) == 0) {
if (i < numnodes) if (i < numnodes)
printk("Skipping disabled node %d\n", i); printk("Skipping disabled node %d\n", i);
...@@ -157,8 +163,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -157,8 +163,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
if (nodes[i].start != nodes[i].end) { if (nodes[i].start != nodes[i].end) {
/* assume 1:1 NODE:CPU */ nodeid = nodeids[i];
cpu_to_node[i] = i; apicid_to_node[nodeid << dualcore] = i;
apicid_to_node[(nodeid << dualcore) + dualcore] = i;
setup_node_bootmem(i, nodes[i].start, nodes[i].end); setup_node_bootmem(i, nodes[i].start, nodes[i].end);
} }
} }
......
...@@ -28,10 +28,12 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES]; ...@@ -28,10 +28,12 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
int memnode_shift; int memnode_shift;
u8 memnodemap[NODEMAPSIZE]; u8 memnodemap[NODEMAPSIZE];
unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
unsigned char apicid_to_node[256] __cpuinitdata = {
[0 ... NR_CPUS-1] = NUMA_NO_NODE [0 ... NR_CPUS-1] = NUMA_NO_NODE
}; };
unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
};
cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
int numa_off __initdata; int numa_off __initdata;
......
...@@ -113,6 +113,7 @@ ...@@ -113,6 +113,7 @@
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
#define MAX_IO_APICS 128 #define MAX_IO_APICS 128
#define MAX_LOCAL_APIC 256
/* /*
* All x86-64 systems are xAPIC compatible. * All x86-64 systems are xAPIC compatible.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment