Commit 1a27fc0a authored by Yinghai Lu's avatar Yinghai Lu Committed by Ingo Molnar

x86_64: fix setup_node_bootmem to support big mem excluding with memmap

typical case: four sockets system, every node has 4g ram, and we are using:

	memmap=10g$4g

to mask out memory on node1 and node2

when numa is enabled, early_node_mem is used to get node_data and node_bootmap.

if it can not get memory from the same node with find_e820_area(), it will
use alloc_bootmem to get buff from previous nodes.

so check it and print out some info about it.

need to move early_res_to_bootmem into every setup_node_bootmem.
and it takes range that node has. otherwise alloc_bootmem could return addr
that reserved early.

depends on "mm: make reserve_bootmem can crossed the nodes".
Signed-off-by: default avatarYinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 8b3cd09e
...@@ -106,14 +106,19 @@ void __init free_early(unsigned long start, unsigned long end) ...@@ -106,14 +106,19 @@ void __init free_early(unsigned long start, unsigned long end)
early_res[j - 1].end = 0; early_res[j - 1].end = 0;
} }
void __init early_res_to_bootmem(void) void __init early_res_to_bootmem(unsigned long start, unsigned long end)
{ {
int i; int i;
unsigned long final_start, final_end;
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
struct early_res *r = &early_res[i]; struct early_res *r = &early_res[i];
printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, final_start = max(start, r->start);
r->start, r->end - 1, r->name); final_end = min(end, r->end);
reserve_bootmem_generic(r->start, r->end - r->start); if (final_start >= final_end)
continue;
printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
final_start, final_end - 1, r->name);
reserve_bootmem_generic(final_start, final_end - final_start);
} }
} }
......
...@@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) ...@@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
e820_register_active_regions(0, start_pfn, end_pfn); e820_register_active_regions(0, start_pfn, end_pfn);
free_bootmem_with_active_regions(0, end_pfn); free_bootmem_with_active_regions(0, end_pfn);
early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
} }
#endif #endif
...@@ -421,8 +422,6 @@ void __init setup_arch(char **cmdline_p) ...@@ -421,8 +422,6 @@ void __init setup_arch(char **cmdline_p)
contig_initmem_init(0, end_pfn); contig_initmem_init(0, end_pfn);
#endif #endif
early_res_to_bootmem();
dma32_reserve_bootmem(); dma32_reserve_bootmem();
#ifdef CONFIG_ACPI_SLEEP #ifdef CONFIG_ACPI_SLEEP
......
...@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, ...@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
unsigned long bootmap_start, nodedata_phys; unsigned long bootmap_start, nodedata_phys;
void *bootmap; void *bootmap;
const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
int nid;
start = round_up(start, ZONE_ALIGN); start = round_up(start, ZONE_ALIGN);
...@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, ...@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
/* Find a place for the bootmem map */ /*
* Find a place for the bootmem map
* nodedata_phys could be on other nodes by alloc_bootmem,
* so need to sure bootmap_start not to be small, otherwise
* early_node_mem will get that with find_e820_area instead
* of alloc_bootmem, that could clash with reserved range
*/
bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
nid = phys_to_nid(nodedata_phys);
if (nid == nodeid)
bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
else
bootmap_start = round_up(start, PAGE_SIZE);
/* /*
* SMP_CAHCE_BYTES could be enough, but init_bootmem_node like * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
* to use that to align to PAGE_SIZE * to use that to align to PAGE_SIZE
...@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, ...@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
free_bootmem_with_active_regions(nodeid, end); free_bootmem_with_active_regions(nodeid, end);
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, /*
BOOTMEM_DEFAULT); * convert early reserve to bootmem reserve earlier
* otherwise early_node_mem could use early reserved mem
* on previous node
*/
early_res_to_bootmem(start, end);
/*
* in some case early_node_mem could use alloc_bootmem
* to get range on other node, don't reserve that again
*/
if (nid != nodeid)
printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
else
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
pgdat_size, BOOTMEM_DEFAULT);
nid = phys_to_nid(bootmap_start);
if (nid != nodeid)
printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
else
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
#ifdef CONFIG_ACPI_NUMA #ifdef CONFIG_ACPI_NUMA
srat_reserve_add_area(nodeid); srat_reserve_add_area(nodeid);
#endif #endif
......
...@@ -49,7 +49,7 @@ extern void update_e820(void); ...@@ -49,7 +49,7 @@ extern void update_e820(void);
extern void reserve_early(unsigned long start, unsigned long end, char *name); extern void reserve_early(unsigned long start, unsigned long end, char *name);
extern void free_early(unsigned long start, unsigned long end); extern void free_early(unsigned long start, unsigned long end);
extern void early_res_to_bootmem(void); extern void early_res_to_bootmem(unsigned long start, unsigned long end);
#endif/*!__ASSEMBLY__*/ #endif/*!__ASSEMBLY__*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment