Commit c246a213 authored by Michal Hocko's avatar Michal Hocko Committed by Linus Torvalds

mm, memory_hotplug: do not assume ZONE_NORMAL is default kernel zone

Heiko Carstens has noticed that he can generate overlapping zones for
ZONE_DMA and ZONE_NORMAL:

  DMA      [mem 0x0000000000000000-0x000000007fffffff]
  Normal   [mem 0x0000000080000000-0x000000017fffffff]

  $ cat /sys/devices/system/memory/block_size_bytes
  10000000
  $ cat /sys/devices/system/memory/memory5/valid_zones
  DMA
  $ echo 0 > /sys/devices/system/memory/memory5/online
  $ cat /sys/devices/system/memory/memory5/valid_zones
  Normal
  $ echo 1 > /sys/devices/system/memory/memory5/online
  Normal

  $ cat /proc/zoneinfo
  Node 0, zone      DMA
  spanned  524288        <-----
  present  458752
  managed  455078
  start_pfn:           0 <-----

  Node 0, zone   Normal
  spanned  720896
  present  589824
  managed  571648
  start_pfn:           327680 <-----

The reason is that we assume that the default zone for kernel onlining
is ZONE_NORMAL.  This was a simplification introduced by the memory
hotplug rework and it is easily fixable by checking the range overlap in
the zone order and considering the first matching zone as the default
one.  If there is no such zone then assume ZONE_NORMAL as we have been
doing so far.

Fixes: "mm, memory_hotplug: do not associate hotadded memory to zones until online"
Link: http://lkml.kernel.org/r/20170601083746.4924-3-mhocko@kernel.orgSigned-off-by: default avatarMichal Hocko <mhocko@suse.com>
Reported-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a69578a1
...@@ -419,7 +419,7 @@ static ssize_t show_valid_zones(struct device *dev, ...@@ -419,7 +419,7 @@ static ssize_t show_valid_zones(struct device *dev,
nid = pfn_to_nid(start_pfn); nid = pfn_to_nid(start_pfn);
if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL)) { if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL)) {
strcat(buf, NODE_DATA(nid)->node_zones[ZONE_NORMAL].name); strcat(buf, default_zone_for_pfn(nid, start_pfn, nr_pages)->name);
append = true; append = true;
} }
......
...@@ -311,4 +311,6 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, ...@@ -311,4 +311,6 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum); unsigned long pnum);
extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
int online_type); int online_type);
extern struct zone *default_zone_for_pfn(int nid, unsigned long pfn,
unsigned long nr_pages);
#endif /* __LINUX_MEMORY_HOTPLUG_H */ #endif /* __LINUX_MEMORY_HOTPLUG_H */
...@@ -1028,7 +1028,7 @@ bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, ...@@ -1028,7 +1028,7 @@ bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
{ {
struct pglist_data *pgdat = NODE_DATA(nid); struct pglist_data *pgdat = NODE_DATA(nid);
struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE]; struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE];
struct zone *normal_zone = &pgdat->node_zones[ZONE_NORMAL]; struct zone *default_zone = default_zone_for_pfn(nid, pfn, nr_pages);
/* /*
* TODO there shouldn't be any inherent reason to have ZONE_NORMAL * TODO there shouldn't be any inherent reason to have ZONE_NORMAL
...@@ -1042,7 +1042,7 @@ bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, ...@@ -1042,7 +1042,7 @@ bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
return true; return true;
return movable_zone->zone_start_pfn >= pfn + nr_pages; return movable_zone->zone_start_pfn >= pfn + nr_pages;
} else if (online_type == MMOP_ONLINE_MOVABLE) { } else if (online_type == MMOP_ONLINE_MOVABLE) {
return zone_end_pfn(normal_zone) <= pfn; return zone_end_pfn(default_zone) <= pfn;
} }
/* MMOP_ONLINE_KEEP will always succeed and inherits the current zone */ /* MMOP_ONLINE_KEEP will always succeed and inherits the current zone */
...@@ -1102,6 +1102,27 @@ void move_pfn_range_to_zone(struct zone *zone, ...@@ -1102,6 +1102,27 @@ void move_pfn_range_to_zone(struct zone *zone,
set_zone_contiguous(zone); set_zone_contiguous(zone);
} }
/*
* Returns a default kernel memory zone for the given pfn range.
* If no kernel zone covers this pfn range it will automatically go
* to the ZONE_NORMAL.
*/
struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
unsigned long nr_pages)
{
struct pglist_data *pgdat = NODE_DATA(nid);
int zid;
for (zid = 0; zid <= ZONE_NORMAL; zid++) {
struct zone *zone = &pgdat->node_zones[zid];
if (zone_intersects(zone, start_pfn, nr_pages))
return zone;
}
return &pgdat->node_zones[ZONE_NORMAL];
}
/* /*
* Associates the given pfn range with the given node and the zone appropriate * Associates the given pfn range with the given node and the zone appropriate
* for the given online type. * for the given online type.
...@@ -1110,7 +1131,7 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid, ...@@ -1110,7 +1131,7 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid,
unsigned long start_pfn, unsigned long nr_pages) unsigned long start_pfn, unsigned long nr_pages)
{ {
struct pglist_data *pgdat = NODE_DATA(nid); struct pglist_data *pgdat = NODE_DATA(nid);
struct zone *zone = &pgdat->node_zones[ZONE_NORMAL]; struct zone *zone = default_zone_for_pfn(nid, start_pfn, nr_pages);
if (online_type == MMOP_ONLINE_KEEP) { if (online_type == MMOP_ONLINE_KEEP) {
struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE]; struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment