Commit d3b88655 authored by Dan Williams's avatar Dan Williams

Merge branch 'for-5.7/numa' into libnvdimm-for-next

- Promote numa_map_to_online_node() to a cross-kernel generic facility.

- Save x86 numa information to allow for node-id lookups for reserved
  memory ranges, deploy that capability for the e820-pmem driver.

- Introduce phys_to_target_node() to facilitate drivers that want to
  know resulting numa node if a given reserved address range was
  onlined.
parents 91bf79bc 7b27a862
...@@ -285,25 +285,6 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, ...@@ -285,25 +285,6 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
return 0; return 0;
} }
static inline int papr_scm_node(int node)
{
int min_dist = INT_MAX, dist;
int nid, min_node;
if ((node == NUMA_NO_NODE) || node_online(node))
return node;
min_node = first_online_node;
for_each_online_node(nid) {
dist = node_distance(node, nid);
if (dist < min_dist) {
min_dist = dist;
min_node = nid;
}
}
return min_node;
}
static int papr_scm_nvdimm_init(struct papr_scm_priv *p) static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
{ {
struct device *dev = &p->pdev->dev; struct device *dev = &p->pdev->dev;
...@@ -349,7 +330,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ...@@ -349,7 +330,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
memset(&ndr_desc, 0, sizeof(ndr_desc)); memset(&ndr_desc, 0, sizeof(ndr_desc));
target_nid = dev_to_node(&p->pdev->dev); target_nid = dev_to_node(&p->pdev->dev);
online_nid = papr_scm_node(target_nid); online_nid = numa_map_to_online_node(target_nid);
ndr_desc.numa_node = online_nid; ndr_desc.numa_node = online_nid;
ndr_desc.target_node = target_nid; ndr_desc.target_node = target_nid;
ndr_desc.res = &p->res; ndr_desc.res = &p->res;
......
...@@ -1664,6 +1664,7 @@ config X86_PMEM_LEGACY ...@@ -1664,6 +1664,7 @@ config X86_PMEM_LEGACY
depends on PHYS_ADDR_T_64BIT depends on PHYS_ADDR_T_64BIT
depends on BLK_DEV depends on BLK_DEV
select X86_PMEM_LEGACY_DEVICE select X86_PMEM_LEGACY_DEVICE
select NUMA_KEEP_MEMINFO if NUMA
select LIBNVDIMM select LIBNVDIMM
help help
Treat memory marked using the non-standard e820 type of 12 as used Treat memory marked using the non-standard e820 type of 12 as used
......
...@@ -25,11 +25,8 @@ nodemask_t numa_nodes_parsed __initdata; ...@@ -25,11 +25,8 @@ nodemask_t numa_nodes_parsed __initdata;
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data); EXPORT_SYMBOL(node_data);
static struct numa_meminfo numa_meminfo static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
#ifndef CONFIG_MEMORY_HOTPLUG static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
__initdata
#endif
;
static int numa_distance_cnt; static int numa_distance_cnt;
static u8 *numa_distance; static u8 *numa_distance;
...@@ -168,6 +165,19 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) ...@@ -168,6 +165,19 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
(mi->nr_blks - idx) * sizeof(mi->blk[0])); (mi->nr_blks - idx) * sizeof(mi->blk[0]));
} }
/**
* numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
* @dst: numa_meminfo to append block to
* @idx: Index of memblk to remove
* @src: numa_meminfo to remove memblk from
*/
static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
struct numa_meminfo *src)
{
dst->blk[dst->nr_blks++] = src->blk[idx];
numa_remove_memblk_from(idx, src);
}
/** /**
* numa_add_memblk - Add one numa_memblk to numa_meminfo * numa_add_memblk - Add one numa_memblk to numa_meminfo
* @nid: NUMA node ID of the new memblk * @nid: NUMA node ID of the new memblk
...@@ -237,14 +247,19 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) ...@@ -237,14 +247,19 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
for (i = 0; i < mi->nr_blks; i++) { for (i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i]; struct numa_memblk *bi = &mi->blk[i];
/* make sure all blocks are inside the limits */ /* move / save reserved memory ranges */
if (!memblock_overlaps_region(&memblock.memory,
bi->start, bi->end - bi->start)) {
numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
continue;
}
/* make sure all non-reserved blocks are inside the limits */
bi->start = max(bi->start, low); bi->start = max(bi->start, low);
bi->end = min(bi->end, high); bi->end = min(bi->end, high);
/* and there's no empty or non-exist block */ /* and there's no empty block */
if (bi->start >= bi->end || if (bi->start >= bi->end)
!memblock_overlaps_region(&memblock.memory,
bi->start, bi->end - bi->start))
numa_remove_memblk_from(i--, mi); numa_remove_memblk_from(i--, mi);
} }
...@@ -881,16 +896,38 @@ EXPORT_SYMBOL(cpumask_of_node); ...@@ -881,16 +896,38 @@ EXPORT_SYMBOL(cpumask_of_node);
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_NUMA_KEEP_MEMINFO
int memory_add_physaddr_to_nid(u64 start) static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
{ {
struct numa_meminfo *mi = &numa_meminfo;
int nid = mi->blk[0].nid;
int i; int i;
for (i = 0; i < mi->nr_blks; i++) for (i = 0; i < mi->nr_blks; i++)
if (mi->blk[i].start <= start && mi->blk[i].end > start) if (mi->blk[i].start <= start && mi->blk[i].end > start)
nid = mi->blk[i].nid; return mi->blk[i].nid;
return NUMA_NO_NODE;
}
int phys_to_target_node(phys_addr_t start)
{
int nid = meminfo_to_nid(&numa_meminfo, start);
/*
* Prefer online nodes, but if reserved memory might be
* hot-added continue the search with reserved ranges.
*/
if (nid != NUMA_NO_NODE)
return nid;
return meminfo_to_nid(&numa_reserved_meminfo, start);
}
EXPORT_SYMBOL_GPL(phys_to_target_node);
int memory_add_physaddr_to_nid(u64 start)
{
int nid = meminfo_to_nid(&numa_meminfo, start);
if (nid == NUMA_NO_NODE)
nid = numa_meminfo.blk[0].nid;
return nid; return nid;
} }
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
......
...@@ -72,47 +72,6 @@ int acpi_map_pxm_to_node(int pxm) ...@@ -72,47 +72,6 @@ int acpi_map_pxm_to_node(int pxm)
} }
EXPORT_SYMBOL(acpi_map_pxm_to_node); EXPORT_SYMBOL(acpi_map_pxm_to_node);
/**
* acpi_map_pxm_to_online_node - Map proximity ID to online node
* @pxm: ACPI proximity ID
*
* This is similar to acpi_map_pxm_to_node(), but always returns an online
* node. When the mapped node from a given proximity ID is offline, it
* looks up the node distance table and returns the nearest online node.
*
* ACPI device drivers, which are called after the NUMA initialization has
* completed in the kernel, can call this interface to obtain their device
* NUMA topology from ACPI tables. Such drivers do not have to deal with
* offline nodes. A node may be offline when a device proximity ID is
* unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
* "numa=off" on x86.
*/
int acpi_map_pxm_to_online_node(int pxm)
{
int node, min_node;
node = acpi_map_pxm_to_node(pxm);
if (node == NUMA_NO_NODE)
node = 0;
min_node = node;
if (!node_online(node)) {
int min_dist = INT_MAX, dist, n;
for_each_online_node(n) {
dist = node_distance(node, n);
if (dist < min_dist) {
min_dist = dist;
min_node = n;
}
}
}
return min_node;
}
EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
static void __init static void __init
acpi_table_print_srat_entry(struct acpi_subtable_header *header) acpi_table_print_srat_entry(struct acpi_subtable_header *header)
{ {
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/memory_hotplug.h> #include <linux/memory_hotplug.h>
#include <linux/libnvdimm.h> #include <linux/libnvdimm.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/numa.h>
static int e820_pmem_remove(struct platform_device *pdev) static int e820_pmem_remove(struct platform_device *pdev)
{ {
...@@ -16,27 +17,16 @@ static int e820_pmem_remove(struct platform_device *pdev) ...@@ -16,27 +17,16 @@ static int e820_pmem_remove(struct platform_device *pdev)
return 0; return 0;
} }
#ifdef CONFIG_MEMORY_HOTPLUG
static int e820_range_to_nid(resource_size_t addr)
{
return memory_add_physaddr_to_nid(addr);
}
#else
static int e820_range_to_nid(resource_size_t addr)
{
return NUMA_NO_NODE;
}
#endif
static int e820_register_one(struct resource *res, void *data) static int e820_register_one(struct resource *res, void *data)
{ {
struct nd_region_desc ndr_desc; struct nd_region_desc ndr_desc;
struct nvdimm_bus *nvdimm_bus = data; struct nvdimm_bus *nvdimm_bus = data;
int nid = phys_to_target_node(res->start);
memset(&ndr_desc, 0, sizeof(ndr_desc)); memset(&ndr_desc, 0, sizeof(ndr_desc));
ndr_desc.res = res; ndr_desc.res = res;
ndr_desc.numa_node = e820_range_to_nid(res->start); ndr_desc.numa_node = numa_map_to_online_node(nid);
ndr_desc.target_node = ndr_desc.numa_node; ndr_desc.target_node = nid;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
return -ENXIO; return -ENXIO;
......
...@@ -416,9 +416,30 @@ extern void acpi_osi_setup(char *str); ...@@ -416,9 +416,30 @@ extern void acpi_osi_setup(char *str);
extern bool acpi_osi_is_win8(void); extern bool acpi_osi_is_win8(void);
#ifdef CONFIG_ACPI_NUMA #ifdef CONFIG_ACPI_NUMA
int acpi_map_pxm_to_online_node(int pxm);
int acpi_map_pxm_to_node(int pxm); int acpi_map_pxm_to_node(int pxm);
int acpi_get_node(acpi_handle handle); int acpi_get_node(acpi_handle handle);
/**
* acpi_map_pxm_to_online_node - Map proximity ID to online node
* @pxm: ACPI proximity ID
*
* This is similar to acpi_map_pxm_to_node(), but always returns an online
* node. When the mapped node from a given proximity ID is offline, it
* looks up the node distance table and returns the nearest online node.
*
* ACPI device drivers, which are called after the NUMA initialization has
* completed in the kernel, can call this interface to obtain their device
* NUMA topology from ACPI tables. Such drivers do not have to deal with
* offline nodes. A node may be offline when a device proximity ID is
* unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
* "numa=off" on x86.
*/
static inline int acpi_map_pxm_to_online_node(int pxm)
{
int node = acpi_map_pxm_to_node(pxm);
return numa_map_to_online_node(node);
}
#else #else
static inline int acpi_map_pxm_to_online_node(int pxm) static inline int acpi_map_pxm_to_online_node(int pxm)
{ {
......
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_NUMA_H #ifndef _LINUX_NUMA_H
#define _LINUX_NUMA_H #define _LINUX_NUMA_H
#include <linux/types.h>
#ifdef CONFIG_NODES_SHIFT #ifdef CONFIG_NODES_SHIFT
#define NODES_SHIFT CONFIG_NODES_SHIFT #define NODES_SHIFT CONFIG_NODES_SHIFT
...@@ -13,4 +13,32 @@ ...@@ -13,4 +13,32 @@
#define NUMA_NO_NODE (-1) #define NUMA_NO_NODE (-1)
/* optionally keep NUMA memory info available post init */
#ifdef CONFIG_NUMA_KEEP_MEMINFO
#define __initdata_or_meminfo
#else
#define __initdata_or_meminfo __initdata
#endif
#ifdef CONFIG_NUMA
/* Generic implementation available */
int numa_map_to_online_node(int node);
/*
* Optional architecture specific implementation, users need a "depends
* on $ARCH"
*/
int phys_to_target_node(phys_addr_t addr);
#else
static inline int numa_map_to_online_node(int node)
{
return NUMA_NO_NODE;
}
static inline int phys_to_target_node(phys_addr_t addr)
{
return NUMA_NO_NODE;
}
#endif
#endif /* _LINUX_NUMA_H */ #endif /* _LINUX_NUMA_H */
...@@ -139,6 +139,10 @@ config HAVE_FAST_GUP ...@@ -139,6 +139,10 @@ config HAVE_FAST_GUP
config ARCH_KEEP_MEMBLOCK config ARCH_KEEP_MEMBLOCK
bool bool
# Keep arch NUMA mapping infrastructure post-init.
config NUMA_KEEP_MEMINFO
bool
config MEMORY_ISOLATION config MEMORY_ISOLATION
bool bool
...@@ -154,6 +158,7 @@ config MEMORY_HOTPLUG ...@@ -154,6 +158,7 @@ config MEMORY_HOTPLUG
bool "Allow for memory hot-add" bool "Allow for memory hot-add"
depends on SPARSEMEM || X86_64_ACPI_NUMA depends on SPARSEMEM || X86_64_ACPI_NUMA
depends on ARCH_ENABLE_MEMORY_HOTPLUG depends on ARCH_ENABLE_MEMORY_HOTPLUG
select NUMA_KEEP_MEMINFO if NUMA
config MEMORY_HOTPLUG_SPARSE config MEMORY_HOTPLUG_SPARSE
def_bool y def_bool y
......
...@@ -127,6 +127,32 @@ static struct mempolicy default_policy = { ...@@ -127,6 +127,32 @@ static struct mempolicy default_policy = {
static struct mempolicy preferred_node_policy[MAX_NUMNODES]; static struct mempolicy preferred_node_policy[MAX_NUMNODES];
/**
* numa_map_to_online_node - Find closest online node
* @nid: Node id to start the search
*
* Lookup the next closest node by distance if @nid is not online.
*/
int numa_map_to_online_node(int node)
{
int min_dist = INT_MAX, dist, n, min_node;
if (node == NUMA_NO_NODE || node_online(node))
return node;
min_node = node;
for_each_online_node(n) {
dist = node_distance(node, n);
if (dist < min_dist) {
min_dist = dist;
min_node = n;
}
}
return min_node;
}
EXPORT_SYMBOL_GPL(numa_map_to_online_node);
struct mempolicy *get_task_policy(struct task_struct *p) struct mempolicy *get_task_policy(struct task_struct *p)
{ {
struct mempolicy *pol = p->mempolicy; struct mempolicy *pol = p->mempolicy;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment