Commit 122e093c authored by Mike Rapoport's avatar Mike Rapoport Committed by Linus Torvalds

mm/page_alloc: fix memory map initialization for descending nodes

On systems with memory nodes sorted in descending order, for instance Dell
Precision WorkStation T5500, the struct pages for higher PFNs and
respectively lower nodes, could be overwritten by the initialization of
struct pages corresponding to the holes in the memory sections.

For example for the below memory layout

[    0.245624] Early memory node ranges
[    0.248496]   node   1: [mem 0x0000000000001000-0x0000000000090fff]
[    0.251376]   node   1: [mem 0x0000000000100000-0x00000000dbdf8fff]
[    0.254256]   node   1: [mem 0x0000000100000000-0x0000001423ffffff]
[    0.257144]   node   0: [mem 0x0000001424000000-0x0000002023ffffff]

the range 0x1424000000 - 0x1428000000 in the beginning of node 0 starts in
the middle of a section and will be considered as a hole during the
initialization of the last section in node 1.

The wrong initialization of the memory map causes panic on boot when
CONFIG_DEBUG_VM is enabled.

Reorder loop order of the memory map initialization so that the outer loop
will always iterate over populated memory regions in the ascending order
and the inner loop will select the zone corresponding to the PFN range.

This way initialization of the struct pages for the memory holes will be
always done for the ranges that are actually not populated.

[akpm@linux-foundation.org: coding style fixes]

Link: https://lkml.kernel.org/r/YNXlMqBbL+tBG7yq@kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213073
Link: https://lkml.kernel.org/r/20210624062305.10940-1-rppt@kernel.org
Fixes: 0740a50b ("mm/page_alloc.c: refactor initialization of struct page for holes in memory layout")
Signed-off-by: default avatarMike Rapoport <rppt@linux.ibm.com>
Cc: Boris Petkov <bp@alien8.de>
Cc: Robert Shteynfeld <robert.shteynfeld@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Hildenbrand <david@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent c24d3732
...@@ -2474,7 +2474,6 @@ extern void set_dma_reserve(unsigned long new_dma_reserve); ...@@ -2474,7 +2474,6 @@ extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void memmap_init_range(unsigned long, int, unsigned long, extern void memmap_init_range(unsigned long, int, unsigned long,
unsigned long, unsigned long, enum meminit_context, unsigned long, unsigned long, enum meminit_context,
struct vmem_altmap *, int migratetype); struct vmem_altmap *, int migratetype);
extern void memmap_init_zone(struct zone *zone);
extern void setup_per_zone_wmarks(void); extern void setup_per_zone_wmarks(void);
extern int __meminit init_per_zone_wmark_min(void); extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void); extern void mem_init(void);
......
...@@ -6400,7 +6400,7 @@ void __ref memmap_init_zone_device(struct zone *zone, ...@@ -6400,7 +6400,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
return; return;
/* /*
* The call to memmap_init_zone should have already taken care * The call to memmap_init should have already taken care
* of the pages reserved for the memmap, so we can just jump to * of the pages reserved for the memmap, so we can just jump to
* the end of that region and start processing the device pages. * the end of that region and start processing the device pages.
*/ */
...@@ -6465,7 +6465,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) ...@@ -6465,7 +6465,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
/* /*
* Only struct pages that correspond to ranges defined by memblock.memory * Only struct pages that correspond to ranges defined by memblock.memory
* are zeroed and initialized by going through __init_single_page() during * are zeroed and initialized by going through __init_single_page() during
* memmap_init_zone(). * memmap_init_zone_range().
* *
* But, there could be struct pages that correspond to holes in * But, there could be struct pages that correspond to holes in
* memblock.memory. This can happen because of the following reasons: * memblock.memory. This can happen because of the following reasons:
...@@ -6484,7 +6484,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) ...@@ -6484,7 +6484,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
* zone/node above the hole except for the trailing pages in the last * zone/node above the hole except for the trailing pages in the last
* section that will be appended to the zone/node below. * section that will be appended to the zone/node below.
*/ */
static u64 __meminit init_unavailable_range(unsigned long spfn, static void __init init_unavailable_range(unsigned long spfn,
unsigned long epfn, unsigned long epfn,
int zone, int node) int zone, int node)
{ {
...@@ -6502,56 +6502,77 @@ static u64 __meminit init_unavailable_range(unsigned long spfn, ...@@ -6502,56 +6502,77 @@ static u64 __meminit init_unavailable_range(unsigned long spfn,
pgcnt++; pgcnt++;
} }
return pgcnt; if (pgcnt)
pr_info("On node %d, zone %s: %lld pages in unavailable ranges",
node, zone_names[zone], pgcnt);
} }
#else #else
static inline u64 init_unavailable_range(unsigned long spfn, unsigned long epfn, static inline void init_unavailable_range(unsigned long spfn,
unsigned long epfn,
int zone, int node) int zone, int node)
{ {
return 0;
} }
#endif #endif
void __meminit __weak memmap_init_zone(struct zone *zone) static void __init memmap_init_zone_range(struct zone *zone,
unsigned long start_pfn,
unsigned long end_pfn,
unsigned long *hole_pfn)
{ {
unsigned long zone_start_pfn = zone->zone_start_pfn; unsigned long zone_start_pfn = zone->zone_start_pfn;
unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages; unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages;
int i, nid = zone_to_nid(zone), zone_id = zone_idx(zone); int nid = zone_to_nid(zone), zone_id = zone_idx(zone);
static unsigned long hole_pfn;
unsigned long start_pfn, end_pfn;
u64 pgcnt = 0;
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn); start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn); end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
if (end_pfn > start_pfn) if (start_pfn >= end_pfn)
memmap_init_range(end_pfn - start_pfn, nid, return;
zone_id, start_pfn, zone_end_pfn,
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn,
zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
if (*hole_pfn < start_pfn)
init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid);
*hole_pfn = end_pfn;
}
static void __init memmap_init(void)
{
unsigned long start_pfn, end_pfn;
unsigned long hole_pfn = 0;
int i, j, zone_id, nid;
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
struct pglist_data *node = NODE_DATA(nid);
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = node->node_zones + j;
if (hole_pfn < start_pfn) if (!populated_zone(zone))
pgcnt += init_unavailable_range(hole_pfn, start_pfn, continue;
zone_id, nid);
hole_pfn = end_pfn; memmap_init_zone_range(zone, start_pfn, end_pfn,
&hole_pfn);
zone_id = j;
}
} }
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM
/* /*
* Initialize the hole in the range [zone_end_pfn, section_end]. * Initialize the memory map for hole in the range [memory_end,
* If zone boundary falls in the middle of a section, this hole * section_end].
* will be re-initialized during the call to this function for the * Append the pages in this hole to the highest zone in the last
* higher zone. * node.
* The call to init_unavailable_range() is outside the ifdef to
* silence the compiler warining about zone_id set but not used;
* for FLATMEM it is a nop anyway
*/ */
end_pfn = round_up(zone_end_pfn, PAGES_PER_SECTION); end_pfn = round_up(end_pfn, PAGES_PER_SECTION);
if (hole_pfn < end_pfn) if (hole_pfn < end_pfn)
pgcnt += init_unavailable_range(hole_pfn, end_pfn,
zone_id, nid);
#endif #endif
init_unavailable_range(hole_pfn, end_pfn, zone_id, nid);
if (pgcnt)
pr_info(" %s zone: %llu pages in unavailable ranges\n",
zone->name, pgcnt);
} }
static int zone_batchsize(struct zone *zone) static int zone_batchsize(struct zone *zone)
...@@ -7254,7 +7275,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat) ...@@ -7254,7 +7275,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
set_pageblock_order(); set_pageblock_order();
setup_usemap(zone); setup_usemap(zone);
init_currently_empty_zone(zone, zone->zone_start_pfn, size); init_currently_empty_zone(zone, zone->zone_start_pfn, size);
memmap_init_zone(zone);
} }
} }
...@@ -7780,6 +7800,8 @@ void __init free_area_init(unsigned long *max_zone_pfn) ...@@ -7780,6 +7800,8 @@ void __init free_area_init(unsigned long *max_zone_pfn)
node_set_state(nid, N_MEMORY); node_set_state(nid, N_MEMORY);
check_for_memory(pgdat, nid); check_for_memory(pgdat, nid);
} }
memmap_init();
} }
static int __init cmdline_parse_core(char *p, unsigned long *core, static int __init cmdline_parse_core(char *p, unsigned long *core,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment