Commit 31bc3858 authored by Vitaly Kuznetsov's avatar Vitaly Kuznetsov Committed by Linus Torvalds

memory-hotplug: add automatic onlining policy for the newly added memory

Currently, all newly added memory blocks remain in 'offline' state
unless someone onlines them, some linux distributions carry special udev
rules like:

  SUBSYSTEM=="memory", ACTION=="add", ATTR{state}=="offline", ATTR{state}="online"

to make this happen automatically.  This is not a great solution for
virtual machines where memory hotplug is being used to address high
memory pressure situations as such onlining is slow and a userspace
process doing this (udev) has a chance of being killed by the OOM killer
as it will probably require to allocate some memory.

Introduce default policy for the newly added memory blocks in
/sys/devices/system/memory/auto_online_blocks file with two possible
values: "offline" which preserves the current behavior and "online"
which causes all newly added memory blocks to go online as soon as
they're added.  The default is "offline".
Signed-off-by: default avatarVitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: default avatarDaniel Kiper <daniel.kiper@oracle.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Acked-by: default avatarDavid Rientjes <rientjes@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Kay Sievers <kay@vrfy.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 9cb65bc3
...@@ -256,10 +256,27 @@ If the memory block is offline, you'll read "offline". ...@@ -256,10 +256,27 @@ If the memory block is offline, you'll read "offline".
5.2. How to online memory 5.2. How to online memory
------------ ------------
Even if the memory is hot-added, it is not at ready-to-use state. When the memory is hot-added, the kernel decides whether or not to "online"
For using newly added memory, you have to "online" the memory block. it according to the policy which can be read from "auto_online_blocks" file:
For onlining, you have to write "online" to the memory block's state file as: % cat /sys/devices/system/memory/auto_online_blocks
The default is "offline" which means the newly added memory is not in a
ready-to-use state and you have to "online" the newly added memory blocks
manually. Automatic onlining can be requested by writing "online" to
"auto_online_blocks" file:
% echo online > /sys/devices/system/memory/auto_online_blocks
This sets a global policy and impacts all memory blocks that will subsequently
be hotplugged. Currently offline blocks keep their state. It is possible, under
certain circumstances, that some memory blocks will be added but will fail to
online. User space tools can check their "state" files
(/sys/devices/system/memory/memoryXXX/state) and try to online them manually.
If the automatic onlining wasn't requested, failed, or some memory block was
offlined it is possible to change the individual block's state by writing to the
"state" file:
% echo online > /sys/devices/system/memory/memoryXXX/state % echo online > /sys/devices/system/memory/memoryXXX/state
......
...@@ -251,7 +251,7 @@ memory_block_action(unsigned long phys_index, unsigned long action, int online_t ...@@ -251,7 +251,7 @@ memory_block_action(unsigned long phys_index, unsigned long action, int online_t
return ret; return ret;
} }
static int memory_block_change_state(struct memory_block *mem, int memory_block_change_state(struct memory_block *mem,
unsigned long to_state, unsigned long from_state_req) unsigned long to_state, unsigned long from_state_req)
{ {
int ret = 0; int ret = 0;
...@@ -438,6 +438,37 @@ print_block_size(struct device *dev, struct device_attribute *attr, ...@@ -438,6 +438,37 @@ print_block_size(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL); static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL);
/*
* Memory auto online policy.
*/
static ssize_t
show_auto_online_blocks(struct device *dev, struct device_attribute *attr,
char *buf)
{
if (memhp_auto_online)
return sprintf(buf, "online\n");
else
return sprintf(buf, "offline\n");
}
static ssize_t
store_auto_online_blocks(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
if (sysfs_streq(buf, "online"))
memhp_auto_online = true;
else if (sysfs_streq(buf, "offline"))
memhp_auto_online = false;
else
return -EINVAL;
return count;
}
static DEVICE_ATTR(auto_online_blocks, 0644, show_auto_online_blocks,
store_auto_online_blocks);
/* /*
* Some architectures will have custom drivers to do this, and * Some architectures will have custom drivers to do this, and
* will not need to do it from userspace. The fake hot-add code * will not need to do it from userspace. The fake hot-add code
...@@ -746,6 +777,7 @@ static struct attribute *memory_root_attrs[] = { ...@@ -746,6 +777,7 @@ static struct attribute *memory_root_attrs[] = {
#endif #endif
&dev_attr_block_size_bytes.attr, &dev_attr_block_size_bytes.attr,
&dev_attr_auto_online_blocks.attr,
NULL NULL
}; };
......
...@@ -338,7 +338,7 @@ static enum bp_state reserve_additional_memory(void) ...@@ -338,7 +338,7 @@ static enum bp_state reserve_additional_memory(void)
} }
#endif #endif
rc = add_memory_resource(nid, resource); rc = add_memory_resource(nid, resource, false);
if (rc) { if (rc) {
pr_warn("Cannot add additional memory (%i)\n", rc); pr_warn("Cannot add additional memory (%i)\n", rc);
goto err; goto err;
......
...@@ -109,6 +109,9 @@ extern void unregister_memory_notifier(struct notifier_block *nb); ...@@ -109,6 +109,9 @@ extern void unregister_memory_notifier(struct notifier_block *nb);
extern int register_memory_isolate_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb);
extern void unregister_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
extern int register_new_memory(int, struct mem_section *); extern int register_new_memory(int, struct mem_section *);
extern int memory_block_change_state(struct memory_block *mem,
unsigned long to_state,
unsigned long from_state_req);
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
extern int unregister_memory_section(struct mem_section *); extern int unregister_memory_section(struct mem_section *);
#endif #endif
......
...@@ -99,6 +99,8 @@ extern void __online_page_free(struct page *page); ...@@ -99,6 +99,8 @@ extern void __online_page_free(struct page *page);
extern int try_online_node(int nid); extern int try_online_node(int nid);
extern bool memhp_auto_online;
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
extern bool is_pageblock_removable_nolock(struct page *page); extern bool is_pageblock_removable_nolock(struct page *page);
extern int arch_remove_memory(u64 start, u64 size); extern int arch_remove_memory(u64 start, u64 size);
...@@ -267,7 +269,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {} ...@@ -267,7 +269,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
void *arg, int (*func)(struct memory_block *, void *)); void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size);
extern int add_memory_resource(int nid, struct resource *resource); extern int add_memory_resource(int nid, struct resource *resource, bool online);
extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
bool for_device); bool for_device);
extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
......
...@@ -77,6 +77,9 @@ static struct { ...@@ -77,6 +77,9 @@ static struct {
#define memhp_lock_acquire() lock_map_acquire(&mem_hotplug.dep_map) #define memhp_lock_acquire() lock_map_acquire(&mem_hotplug.dep_map)
#define memhp_lock_release() lock_map_release(&mem_hotplug.dep_map) #define memhp_lock_release() lock_map_release(&mem_hotplug.dep_map)
bool memhp_auto_online;
EXPORT_SYMBOL_GPL(memhp_auto_online);
void get_online_mems(void) void get_online_mems(void)
{ {
might_sleep(); might_sleep();
...@@ -1261,8 +1264,13 @@ int zone_for_memory(int nid, u64 start, u64 size, int zone_default, ...@@ -1261,8 +1264,13 @@ int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
return zone_default; return zone_default;
} }
static int online_memory_block(struct memory_block *mem, void *arg)
{
return memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
}
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
int __ref add_memory_resource(int nid, struct resource *res) int __ref add_memory_resource(int nid, struct resource *res, bool online)
{ {
u64 start, size; u64 start, size;
pg_data_t *pgdat = NULL; pg_data_t *pgdat = NULL;
...@@ -1322,6 +1330,11 @@ int __ref add_memory_resource(int nid, struct resource *res) ...@@ -1322,6 +1330,11 @@ int __ref add_memory_resource(int nid, struct resource *res)
/* create new memmap entry */ /* create new memmap entry */
firmware_map_add_hotplug(start, start + size, "System RAM"); firmware_map_add_hotplug(start, start + size, "System RAM");
/* online pages if requested */
if (online)
walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1),
NULL, online_memory_block);
goto out; goto out;
error: error:
...@@ -1345,7 +1358,7 @@ int __ref add_memory(int nid, u64 start, u64 size) ...@@ -1345,7 +1358,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
if (IS_ERR(res)) if (IS_ERR(res))
return PTR_ERR(res); return PTR_ERR(res);
ret = add_memory_resource(nid, res); ret = add_memory_resource(nid, res, memhp_auto_online);
if (ret < 0) if (ret < 0)
release_memory_resource(res); release_memory_resource(res);
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment