Commit 1a8c64e1 authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Andrew Morton

mm/memory_hotplug: embed vmem_altmap details in memory block

With memmap on memory, some architecture needs more details w.r.t altmap
such as base_pfn, end_pfn, etc to unmap vmemmap memory.  Instead of
computing them again when we remove a memory block, embed vmem_altmap
details in struct memory_block if we are using memmap on memory block
feature.

[yangyingliang@huawei.com: fix error return code in add_memory_resource()]
  Link: https://lkml.kernel.org/r/20230809081552.1351184-1-yangyingliang@huawei.com
Link: https://lkml.kernel.org/r/20230808091501.287660-7-aneesh.kumar@linux.ibm.comSigned-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: default avatarYang Yingliang <yangyingliang@huawei.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 603fd64d
...@@ -105,7 +105,8 @@ EXPORT_SYMBOL(unregister_memory_notifier); ...@@ -105,7 +105,8 @@ EXPORT_SYMBOL(unregister_memory_notifier);
static void memory_block_release(struct device *dev) static void memory_block_release(struct device *dev)
{ {
struct memory_block *mem = to_memory_block(dev); struct memory_block *mem = to_memory_block(dev);
/* Verify that the altmap is freed */
WARN_ON(mem->altmap);
kfree(mem); kfree(mem);
} }
...@@ -183,7 +184,7 @@ static int memory_block_online(struct memory_block *mem) ...@@ -183,7 +184,7 @@ static int memory_block_online(struct memory_block *mem)
{ {
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; unsigned long nr_vmemmap_pages = 0;
struct zone *zone; struct zone *zone;
int ret; int ret;
...@@ -200,6 +201,9 @@ static int memory_block_online(struct memory_block *mem) ...@@ -200,6 +201,9 @@ static int memory_block_online(struct memory_block *mem)
* stage helps to keep accounting easier to follow - e.g vmemmaps * stage helps to keep accounting easier to follow - e.g vmemmaps
* belong to the same zone as the memory they backed. * belong to the same zone as the memory they backed.
*/ */
if (mem->altmap)
nr_vmemmap_pages = mem->altmap->free;
if (nr_vmemmap_pages) { if (nr_vmemmap_pages) {
ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone); ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
if (ret) if (ret)
...@@ -230,7 +234,7 @@ static int memory_block_offline(struct memory_block *mem) ...@@ -230,7 +234,7 @@ static int memory_block_offline(struct memory_block *mem)
{ {
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; unsigned long nr_vmemmap_pages = 0;
int ret; int ret;
if (!mem->zone) if (!mem->zone)
...@@ -240,6 +244,9 @@ static int memory_block_offline(struct memory_block *mem) ...@@ -240,6 +244,9 @@ static int memory_block_offline(struct memory_block *mem)
* Unaccount before offlining, such that unpopulated zone and kthreads * Unaccount before offlining, such that unpopulated zone and kthreads
* can properly be torn down in offline_pages(). * can properly be torn down in offline_pages().
*/ */
if (mem->altmap)
nr_vmemmap_pages = mem->altmap->free;
if (nr_vmemmap_pages) if (nr_vmemmap_pages)
adjust_present_page_count(pfn_to_page(start_pfn), mem->group, adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
-nr_vmemmap_pages); -nr_vmemmap_pages);
...@@ -726,7 +733,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid, ...@@ -726,7 +733,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
#endif #endif
static int add_memory_block(unsigned long block_id, unsigned long state, static int add_memory_block(unsigned long block_id, unsigned long state,
unsigned long nr_vmemmap_pages, struct vmem_altmap *altmap,
struct memory_group *group) struct memory_group *group)
{ {
struct memory_block *mem; struct memory_block *mem;
...@@ -744,7 +751,7 @@ static int add_memory_block(unsigned long block_id, unsigned long state, ...@@ -744,7 +751,7 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
mem->start_section_nr = block_id * sections_per_block; mem->start_section_nr = block_id * sections_per_block;
mem->state = state; mem->state = state;
mem->nid = NUMA_NO_NODE; mem->nid = NUMA_NO_NODE;
mem->nr_vmemmap_pages = nr_vmemmap_pages; mem->altmap = altmap;
INIT_LIST_HEAD(&mem->group_next); INIT_LIST_HEAD(&mem->group_next);
#ifndef CONFIG_NUMA #ifndef CONFIG_NUMA
...@@ -783,14 +790,14 @@ static int __init add_boot_memory_block(unsigned long base_section_nr) ...@@ -783,14 +790,14 @@ static int __init add_boot_memory_block(unsigned long base_section_nr)
if (section_count == 0) if (section_count == 0)
return 0; return 0;
return add_memory_block(memory_block_id(base_section_nr), return add_memory_block(memory_block_id(base_section_nr),
MEM_ONLINE, 0, NULL); MEM_ONLINE, NULL, NULL);
} }
static int add_hotplug_memory_block(unsigned long block_id, static int add_hotplug_memory_block(unsigned long block_id,
unsigned long nr_vmemmap_pages, struct vmem_altmap *altmap,
struct memory_group *group) struct memory_group *group)
{ {
return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group); return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
} }
static void remove_memory_block(struct memory_block *memory) static void remove_memory_block(struct memory_block *memory)
...@@ -818,7 +825,7 @@ static void remove_memory_block(struct memory_block *memory) ...@@ -818,7 +825,7 @@ static void remove_memory_block(struct memory_block *memory)
* Called under device_hotplug_lock. * Called under device_hotplug_lock.
*/ */
int create_memory_block_devices(unsigned long start, unsigned long size, int create_memory_block_devices(unsigned long start, unsigned long size,
unsigned long vmemmap_pages, struct vmem_altmap *altmap,
struct memory_group *group) struct memory_group *group)
{ {
const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
...@@ -832,7 +839,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size, ...@@ -832,7 +839,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
return -EINVAL; return -EINVAL;
for (block_id = start_block_id; block_id != end_block_id; block_id++) { for (block_id = start_block_id; block_id != end_block_id; block_id++) {
ret = add_hotplug_memory_block(block_id, vmemmap_pages, group); ret = add_hotplug_memory_block(block_id, altmap, group);
if (ret) if (ret)
break; break;
} }
......
...@@ -77,11 +77,7 @@ struct memory_block { ...@@ -77,11 +77,7 @@ struct memory_block {
*/ */
struct zone *zone; struct zone *zone;
struct device dev; struct device dev;
/* struct vmem_altmap *altmap;
* Number of vmemmap pages. These pages
* lay at the beginning of the memory block.
*/
unsigned long nr_vmemmap_pages;
struct memory_group *group; /* group (if any) for this block */ struct memory_group *group; /* group (if any) for this block */
struct list_head group_next; /* next block inside memory group */ struct list_head group_next; /* next block inside memory group */
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
...@@ -147,7 +143,7 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) ...@@ -147,7 +143,7 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
extern int register_memory_notifier(struct notifier_block *nb); extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb);
int create_memory_block_devices(unsigned long start, unsigned long size, int create_memory_block_devices(unsigned long start, unsigned long size,
unsigned long vmemmap_pages, struct vmem_altmap *altmap,
struct memory_group *group); struct memory_group *group);
void remove_memory_block_devices(unsigned long start, unsigned long size); void remove_memory_block_devices(unsigned long start, unsigned long size);
extern void memory_dev_init(void); extern void memory_dev_init(void);
......
...@@ -1439,7 +1439,13 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) ...@@ -1439,7 +1439,13 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
if (mhp_flags & MHP_MEMMAP_ON_MEMORY) { if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
if (mhp_supports_memmap_on_memory(size)) { if (mhp_supports_memmap_on_memory(size)) {
mhp_altmap.free = memory_block_memmap_on_memory_pages(); mhp_altmap.free = memory_block_memmap_on_memory_pages();
params.altmap = &mhp_altmap; params.altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL);
if (!params.altmap) {
ret = -ENOMEM;
goto error;
}
memcpy(params.altmap, &mhp_altmap, sizeof(mhp_altmap));
} }
/* fallback to not using altmap */ /* fallback to not using altmap */
} }
...@@ -1447,13 +1453,13 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) ...@@ -1447,13 +1453,13 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
/* call arch's memory hotadd */ /* call arch's memory hotadd */
ret = arch_add_memory(nid, start, size, &params); ret = arch_add_memory(nid, start, size, &params);
if (ret < 0) if (ret < 0)
goto error; goto error_free;
/* create memory block devices after memory was added */ /* create memory block devices after memory was added */
ret = create_memory_block_devices(start, size, mhp_altmap.free, group); ret = create_memory_block_devices(start, size, params.altmap, group);
if (ret) { if (ret) {
arch_remove_memory(start, size, NULL); arch_remove_memory(start, size, NULL);
goto error; goto error_free;
} }
if (new_node) { if (new_node) {
...@@ -1490,6 +1496,8 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) ...@@ -1490,6 +1496,8 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
walk_memory_blocks(start, size, NULL, online_memory_block); walk_memory_blocks(start, size, NULL, online_memory_block);
return ret; return ret;
error_free:
kfree(params.altmap);
error: error:
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
memblock_remove(start, size); memblock_remove(start, size);
...@@ -2056,12 +2064,18 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) ...@@ -2056,12 +2064,18 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
return 0; return 0;
} }
static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg) static int test_has_altmap_cb(struct memory_block *mem, void *arg)
{ {
struct memory_block **mem_ptr = (struct memory_block **)arg;
/* /*
* If not set, continue with the next block. * return the memblock if we have altmap
* and break callback.
*/ */
return mem->nr_vmemmap_pages; if (mem->altmap) {
*mem_ptr = mem;
return 1;
}
return 0;
} }
static int check_cpu_on_node(int nid) static int check_cpu_on_node(int nid)
...@@ -2136,10 +2150,9 @@ EXPORT_SYMBOL(try_offline_node); ...@@ -2136,10 +2150,9 @@ EXPORT_SYMBOL(try_offline_node);
static int __ref try_remove_memory(u64 start, u64 size) static int __ref try_remove_memory(u64 start, u64 size)
{ {
struct vmem_altmap mhp_altmap = {}; struct memory_block *mem;
struct vmem_altmap *altmap = NULL;
unsigned long nr_vmemmap_pages;
int rc = 0, nid = NUMA_NO_NODE; int rc = 0, nid = NUMA_NO_NODE;
struct vmem_altmap *altmap = NULL;
BUG_ON(check_hotplug_memory_range(start, size)); BUG_ON(check_hotplug_memory_range(start, size));
...@@ -2161,25 +2174,20 @@ static int __ref try_remove_memory(u64 start, u64 size) ...@@ -2161,25 +2174,20 @@ static int __ref try_remove_memory(u64 start, u64 size)
* the same granularity it was added - a single memory block. * the same granularity it was added - a single memory block.
*/ */
if (mhp_memmap_on_memory()) { if (mhp_memmap_on_memory()) {
nr_vmemmap_pages = walk_memory_blocks(start, size, NULL, rc = walk_memory_blocks(start, size, &mem, test_has_altmap_cb);
get_nr_vmemmap_pages_cb); if (rc) {
if (nr_vmemmap_pages) {
if (size != memory_block_size_bytes()) { if (size != memory_block_size_bytes()) {
pr_warn("Refuse to remove %#llx - %#llx," pr_warn("Refuse to remove %#llx - %#llx,"
"wrong granularity\n", "wrong granularity\n",
start, start + size); start, start + size);
return -EINVAL; return -EINVAL;
} }
altmap = mem->altmap;
/* /*
* Let remove_pmd_table->free_hugepage_table do the * Mark altmap NULL so that we can add a debug
* right thing if we used vmem_altmap when hot-adding * check on memblock free.
* the range.
*/ */
mhp_altmap.base_pfn = PHYS_PFN(start); mem->altmap = NULL;
mhp_altmap.free = nr_vmemmap_pages;
mhp_altmap.alloc = nr_vmemmap_pages;
altmap = &mhp_altmap;
} }
} }
...@@ -2196,6 +2204,12 @@ static int __ref try_remove_memory(u64 start, u64 size) ...@@ -2196,6 +2204,12 @@ static int __ref try_remove_memory(u64 start, u64 size)
arch_remove_memory(start, size, altmap); arch_remove_memory(start, size, altmap);
/* Verify that all vmemmap pages have actually been freed. */
if (altmap) {
WARN(altmap->alloc, "Altmap not fully unmapped");
kfree(altmap);
}
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
memblock_phys_free(start, size); memblock_phys_free(start, size);
memblock_remove(start, size); memblock_remove(start, size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment