Commit 91556237 authored by Tejun Heo's avatar Tejun Heo

x86-64, NUMA: Kill numa_nodes[]

numa_nodes[] doesn't carry any information which isn't present in
numa_meminfo.  Each entry is simply min/max range of all the memblks
for the node.  This is not only redundant but also inaccurate when
memblks for different nodes interleave - for example,
find_node_by_addr() can return the wrong nodeid.

Kill numa_nodes[] and always use numa_meminfo instead.

* nodes_cover_memory() is renamed to numa_meminfo_cover_memory() and
  now operations on numa_meminfo and returns bool.

* setup_node_bootmem() needs min/max range.  Compute the range on the
  fly.  setup_node_bootmem() invocation is restructured to use outer
  loop instead of hardcoding the double invocations.

* find_node_by_addr() now operates on numa_meminfo.

* setup_physnodes() builds physnodes[] from memblks.  This will go
  away when emulation code is updated to use struct numa_meminfo.

This patch also makes the following misc changes.

* Clearing of nodes_add[] clearing is converted to memset().

* numa_add_memblk() in amd_numa_init() is moved down a bit for
  consistency.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
parent a844ef46
...@@ -26,7 +26,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, ...@@ -26,7 +26,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
extern nodemask_t cpu_nodes_parsed __initdata; extern nodemask_t cpu_nodes_parsed __initdata;
extern nodemask_t mem_nodes_parsed __initdata; extern nodemask_t mem_nodes_parsed __initdata;
extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
extern int __cpuinit numa_cpu_node(int cpu); extern int __cpuinit numa_cpu_node(int cpu);
extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
......
...@@ -165,12 +165,8 @@ int __init amd_numa_init(void) ...@@ -165,12 +165,8 @@ int __init amd_numa_init(void)
pr_info("Node %d MemBase %016lx Limit %016lx\n", pr_info("Node %d MemBase %016lx Limit %016lx\n",
nodeid, base, limit); nodeid, base, limit);
numa_nodes[nodeid].start = base;
numa_nodes[nodeid].end = limit;
numa_add_memblk(nodeid, base, limit);
prevbase = base; prevbase = base;
numa_add_memblk(nodeid, base, limit);
node_set(nodeid, mem_nodes_parsed); node_set(nodeid, mem_nodes_parsed);
node_set(nodeid, cpu_nodes_parsed); node_set(nodeid, cpu_nodes_parsed);
} }
......
...@@ -46,8 +46,6 @@ static unsigned long __initdata nodemap_size; ...@@ -46,8 +46,6 @@ static unsigned long __initdata nodemap_size;
static struct numa_meminfo numa_meminfo __initdata; static struct numa_meminfo numa_meminfo __initdata;
struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
/* /*
* Given a shift value, try to populate memnodemap[] * Given a shift value, try to populate memnodemap[]
* Returns : * Returns :
...@@ -349,17 +347,17 @@ static int __init numa_cleanup_meminfo(struct numa_meminfo *mi) ...@@ -349,17 +347,17 @@ static int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
* Sanity check to catch more bad NUMA configurations (they are amazingly * Sanity check to catch more bad NUMA configurations (they are amazingly
* common). Make sure the nodes cover all memory. * common). Make sure the nodes cover all memory.
*/ */
static int __init nodes_cover_memory(const struct bootnode *nodes) static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
{ {
unsigned long numaram, e820ram; unsigned long numaram, e820ram;
int i; int i;
numaram = 0; numaram = 0;
for_each_node_mask(i, mem_nodes_parsed) { for (i = 0; i < mi->nr_blks; i++) {
unsigned long s = nodes[i].start >> PAGE_SHIFT; unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
unsigned long e = nodes[i].end >> PAGE_SHIFT; unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
numaram += e - s; numaram += e - s;
numaram -= __absent_pages_in_range(i, s, e); numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
if ((long)numaram < 0) if ((long)numaram < 0)
numaram = 0; numaram = 0;
} }
...@@ -371,14 +369,14 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) ...@@ -371,14 +369,14 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n", printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
(numaram << PAGE_SHIFT) >> 20, (numaram << PAGE_SHIFT) >> 20,
(e820ram << PAGE_SHIFT) >> 20); (e820ram << PAGE_SHIFT) >> 20);
return 0; return false;
} }
return 1; return true;
} }
static int __init numa_register_memblks(struct numa_meminfo *mi) static int __init numa_register_memblks(struct numa_meminfo *mi)
{ {
int i; int i, j, nid;
/* Account for nodes with cpus and no memory */ /* Account for nodes with cpus and no memory */
nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed); nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed);
...@@ -398,23 +396,34 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) ...@@ -398,23 +396,34 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
/* for out of order entries */ /* for out of order entries */
sort_node_map(); sort_node_map();
if (!nodes_cover_memory(numa_nodes)) if (!numa_meminfo_cover_memory(mi))
return -EINVAL; return -EINVAL;
init_memory_mapping_high(); init_memory_mapping_high();
/* Finally register nodes. */
for_each_node_mask(i, node_possible_map)
setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
/* /*
* Try again in case setup_node_bootmem missed one due to missing * Finally register nodes. Do it twice in case setup_node_bootmem
* bootmem. * missed one due to missing bootmem.
*/ */
for_each_node_mask(i, node_possible_map) for (i = 0; i < 2; i++) {
if (!node_online(i)) for_each_node_mask(nid, node_possible_map) {
setup_node_bootmem(i, numa_nodes[i].start, u64 start = (u64)max_pfn << PAGE_SHIFT;
numa_nodes[i].end); u64 end = 0;
if (node_online(nid))
continue;
for (j = 0; j < mi->nr_blks; j++) {
if (nid != mi->blk[j].nid)
continue;
start = min(mi->blk[j].start, start);
end = max(mi->blk[j].end, end);
}
if (start < end)
setup_node_bootmem(nid, start, end);
}
}
return 0; return 0;
} }
...@@ -432,33 +441,41 @@ void __init numa_emu_cmdline(char *str) ...@@ -432,33 +441,41 @@ void __init numa_emu_cmdline(char *str)
int __init find_node_by_addr(unsigned long addr) int __init find_node_by_addr(unsigned long addr)
{ {
int ret = NUMA_NO_NODE; const struct numa_meminfo *mi = &numa_meminfo;
int i; int i;
for_each_node_mask(i, mem_nodes_parsed) { for (i = 0; i < mi->nr_blks; i++) {
/* /*
* Find the real node that this emulated node appears on. For * Find the real node that this emulated node appears on. For
* the sake of simplicity, we only use a real node's starting * the sake of simplicity, we only use a real node's starting
* address to determine which emulated node it appears on. * address to determine which emulated node it appears on.
*/ */
if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) { if (addr >= mi->blk[i].start && addr < mi->blk[i].end)
ret = i; return mi->blk[i].nid;
break;
}
} }
return ret; return NUMA_NO_NODE;
} }
static int __init setup_physnodes(unsigned long start, unsigned long end) static int __init setup_physnodes(unsigned long start, unsigned long end)
{ {
const struct numa_meminfo *mi = &numa_meminfo;
int ret = 0; int ret = 0;
int i; int i;
memset(physnodes, 0, sizeof(physnodes)); memset(physnodes, 0, sizeof(physnodes));
for_each_node_mask(i, mem_nodes_parsed) { for (i = 0; i < mi->nr_blks; i++) {
physnodes[i].start = numa_nodes[i].start; int nid = mi->blk[i].nid;
physnodes[i].end = numa_nodes[i].end;
if (physnodes[nid].start == physnodes[nid].end) {
physnodes[nid].start = mi->blk[i].start;
physnodes[nid].end = mi->blk[i].end;
} else {
physnodes[nid].start = min(physnodes[nid].start,
mi->blk[i].start);
physnodes[nid].end = max(physnodes[nid].end,
mi->blk[i].end);
}
} }
/* /*
...@@ -809,8 +826,6 @@ static int dummy_numa_init(void) ...@@ -809,8 +826,6 @@ static int dummy_numa_init(void)
node_set(0, cpu_nodes_parsed); node_set(0, cpu_nodes_parsed);
node_set(0, mem_nodes_parsed); node_set(0, mem_nodes_parsed);
numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
numa_nodes[0].start = 0;
numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT;
return 0; return 0;
} }
...@@ -841,7 +856,6 @@ void __init initmem_init(void) ...@@ -841,7 +856,6 @@ void __init initmem_init(void)
nodes_clear(node_possible_map); nodes_clear(node_possible_map);
nodes_clear(node_online_map); nodes_clear(node_online_map);
memset(&numa_meminfo, 0, sizeof(numa_meminfo)); memset(&numa_meminfo, 0, sizeof(numa_meminfo));
memset(numa_nodes, 0, sizeof(numa_nodes));
remove_all_active_ranges(); remove_all_active_ranges();
if (numa_init[i]() < 0) if (numa_init[i]() < 0)
......
...@@ -37,13 +37,9 @@ static __init int setup_node(int pxm) ...@@ -37,13 +37,9 @@ static __init int setup_node(int pxm)
static __init void bad_srat(void) static __init void bad_srat(void)
{ {
int i;
printk(KERN_ERR "SRAT: SRAT not used.\n"); printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1; acpi_numa = -1;
for (i = 0; i < MAX_NUMNODES; i++) { memset(nodes_add, 0, sizeof(nodes_add));
numa_nodes[i].start = numa_nodes[i].end = 0;
nodes_add[i].start = nodes_add[i].end = 0;
}
} }
static __init inline int srat_disabled(void) static __init inline int srat_disabled(void)
...@@ -210,7 +206,6 @@ update_nodes_add(int node, unsigned long start, unsigned long end) ...@@ -210,7 +206,6 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
void __init void __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{ {
struct bootnode *nd;
unsigned long start, end; unsigned long start, end;
int node, pxm; int node, pxm;
...@@ -243,18 +238,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) ...@@ -243,18 +238,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
start, end); start, end);
if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) { if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE))
nd = &numa_nodes[node]; node_set(node, mem_nodes_parsed);
if (!node_test_and_set(node, mem_nodes_parsed)) { else
nd->start = start;
nd->end = end;
} else {
if (start < nd->start)
nd->start = start;
if (nd->end < end)
nd->end = end;
}
} else
update_nodes_add(node, start, end); update_nodes_add(node, start, end);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment