Commit e4174ff7 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

Merge branch 'acpi-numa'

* acpi-numa:
  docs: mm: numaperf.rst Add brief description for access class 1.
  node: Add access1 class to represent CPU to memory characteristics
  ACPI: HMAT: Fix handling of changes from ACPI 6.2 to ACPI 6.3
  ACPI: Let ACPI know we support Generic Initiator Affinity Structures
  x86: Support Generic Initiator only proximity domains
  ACPI: Support Generic Initiator only domains
  ACPI / NUMA: Add stub function for pxm_to_node()
  irq-chip/gic-v3-its: Fix crash if ITS is in a proximity domain without processor or memory
  ACPI: Remove side effect of partly creating a node in acpi_get_node()
  ACPI: Rename acpi_map_pxm_to_online_node() to pxm_to_online_node()
  ACPI: Remove side effect of partly creating a node in acpi_map_pxm_to_online_node()
  ACPI: Do not create new NUMA domains from ACPI static tables that are not SRAT
  ACPI: Add out of bounds and numa_off protections to pxm_to_node()
parents 20eeeafb dc9e7860
......@@ -56,6 +56,11 @@ nodes' access characteristics share the same performance relative to other
linked initiator nodes. Each target within an initiator's access class,
though, do not necessarily perform the same as each other.
The access class "1" is used to allow differentiation between initiators
that are CPUs and hence suitable for generic task scheduling, and
IO initiators such as GPUs and NICs. Unlike access class 0, only
nodes containing CPUs are considered.
================
NUMA Performance
================
......@@ -88,6 +93,9 @@ The latency attributes are provided in nanoseconds.
The values reported here correspond to the rated latency and bandwidth
for the platform.
Access class 1 takes the same form but only includes values for CPU to
memory activity.
==========
NUMA Cache
==========
......
......@@ -62,12 +62,14 @@ extern void numa_clear_node(int cpu);
extern void __init init_cpu_to_node(void);
extern void numa_add_cpu(int cpu);
extern void numa_remove_cpu(int cpu);
extern void init_gi_nodes(void);
#else /* CONFIG_NUMA */
static inline void numa_set_node(int cpu, int node) { }
static inline void numa_clear_node(int cpu) { }
static inline void init_cpu_to_node(void) { }
static inline void numa_add_cpu(int cpu) { }
static inline void numa_remove_cpu(int cpu) { }
static inline void init_gi_nodes(void) { }
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
......
......@@ -1218,6 +1218,7 @@ void __init setup_arch(char **cmdline_p)
prefill_possible_map();
init_cpu_to_node();
init_gi_nodes();
io_apic_init_mappings();
......
......@@ -747,6 +747,27 @@ static void __init init_memory_less_node(int nid)
*/
}
/*
* A node may exist which has one or more Generic Initiators but no CPUs and no
* memory.
*
* This function must be called after init_cpu_to_node(), to ensure that any
* memoryless CPU nodes have already been brought online, and before the
* node_data[nid] is needed for zone list setup in build_all_zonelists().
*
* When this function is called, any nodes containing either memory and/or CPUs
* will already be online and there is no need to do anything extra, even if
* they also contain one or more Generic Initiators.
*/
void __init init_gi_nodes(void)
{
int nid;
for_each_node_state(nid, N_GENERIC_INITIATOR)
if (!node_online(nid))
init_memory_less_node(nid);
}
/*
* Setup early cpu_to_node.
*
......
......@@ -1335,7 +1335,7 @@ static int __init arm_smmu_v3_set_proximity(struct device *dev,
smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
if (smmu->flags & ACPI_IORT_SMMU_V3_PXM_VALID) {
int dev_node = acpi_map_pxm_to_node(smmu->pxm);
int dev_node = pxm_to_node(smmu->pxm);
if (dev_node != NUMA_NO_NODE && !node_online(dev_node))
return -EINVAL;
......
......@@ -303,7 +303,11 @@ static void acpi_bus_osc_support(void)
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_HOTPLUG_OST_SUPPORT;
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PCLPI_SUPPORT;
#ifdef CONFIG_ARM64
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT;
#endif
#ifdef CONFIG_X86
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT;
if (boot_cpu_has(X86_FEATURE_HWP)) {
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT;
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT;
......
......@@ -3006,10 +3006,8 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
ndr_desc->provider_data = nfit_spa;
ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) {
ndr_desc->numa_node = acpi_map_pxm_to_online_node(
spa->proximity_domain);
ndr_desc->target_node = acpi_map_pxm_to_node(
spa->proximity_domain);
ndr_desc->numa_node = pxm_to_online_node(spa->proximity_domain);
ndr_desc->target_node = pxm_to_node(spa->proximity_domain);
} else {
ndr_desc->numa_node = NUMA_NO_NODE;
ndr_desc->target_node = NUMA_NO_NODE;
......
......@@ -56,7 +56,7 @@ struct memory_target {
unsigned int memory_pxm;
unsigned int processor_pxm;
struct resource memregions;
struct node_hmem_attrs hmem_attrs;
struct node_hmem_attrs hmem_attrs[2];
struct list_head caches;
struct node_cache_attrs cache_attrs;
bool registered;
......@@ -65,6 +65,7 @@ struct memory_target {
struct memory_initiator {
struct list_head node;
unsigned int processor_pxm;
bool has_cpu;
};
struct memory_locality {
......@@ -108,6 +109,7 @@ static __init void alloc_memory_initiator(unsigned int cpu_pxm)
return;
initiator->processor_pxm = cpu_pxm;
initiator->has_cpu = node_state(pxm_to_node(cpu_pxm), N_CPU);
list_add_tail(&initiator->node, &initiators);
}
......@@ -215,28 +217,28 @@ static u32 hmat_normalize(u16 entry, u64 base, u8 type)
}
static void hmat_update_target_access(struct memory_target *target,
u8 type, u32 value)
u8 type, u32 value, int access)
{
switch (type) {
case ACPI_HMAT_ACCESS_LATENCY:
target->hmem_attrs.read_latency = value;
target->hmem_attrs.write_latency = value;
target->hmem_attrs[access].read_latency = value;
target->hmem_attrs[access].write_latency = value;
break;
case ACPI_HMAT_READ_LATENCY:
target->hmem_attrs.read_latency = value;
target->hmem_attrs[access].read_latency = value;
break;
case ACPI_HMAT_WRITE_LATENCY:
target->hmem_attrs.write_latency = value;
target->hmem_attrs[access].write_latency = value;
break;
case ACPI_HMAT_ACCESS_BANDWIDTH:
target->hmem_attrs.read_bandwidth = value;
target->hmem_attrs.write_bandwidth = value;
target->hmem_attrs[access].read_bandwidth = value;
target->hmem_attrs[access].write_bandwidth = value;
break;
case ACPI_HMAT_READ_BANDWIDTH:
target->hmem_attrs.read_bandwidth = value;
target->hmem_attrs[access].read_bandwidth = value;
break;
case ACPI_HMAT_WRITE_BANDWIDTH:
target->hmem_attrs.write_bandwidth = value;
target->hmem_attrs[access].write_bandwidth = value;
break;
default:
break;
......@@ -329,8 +331,12 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
if (mem_hier == ACPI_HMAT_MEMORY) {
target = find_mem_target(targs[targ]);
if (target && target->processor_pxm == inits[init])
hmat_update_target_access(target, type, value);
if (target && target->processor_pxm == inits[init]) {
hmat_update_target_access(target, type, value, 0);
/* If the node has a CPU, update access 1 */
if (node_state(pxm_to_node(inits[init]), N_CPU))
hmat_update_target_access(target, type, value, 1);
}
}
}
}
......@@ -424,7 +430,8 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade
pr_info("HMAT: Memory Flags:%04x Processor Domain:%u Memory Domain:%u\n",
p->flags, p->processor_PD, p->memory_PD);
if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) {
if ((hmat_revision == 1 && p->flags & ACPI_HMAT_MEMORY_PD_VALID) ||
hmat_revision > 1) {
target = find_mem_target(p->memory_PD);
if (!target) {
pr_debug("HMAT: Memory Domain missing from SRAT\n");
......@@ -566,6 +573,7 @@ static void hmat_register_target_initiators(struct memory_target *target)
unsigned int mem_nid, cpu_nid;
struct memory_locality *loc = NULL;
u32 best = 0;
bool access0done = false;
int i;
mem_nid = pxm_to_node(target->memory_pxm);
......@@ -577,7 +585,11 @@ static void hmat_register_target_initiators(struct memory_target *target)
if (target->processor_pxm != PXM_INVAL) {
cpu_nid = pxm_to_node(target->processor_pxm);
register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
return;
access0done = true;
if (node_state(cpu_nid, N_CPU)) {
register_memory_node_under_compute_node(mem_nid, cpu_nid, 1);
return;
}
}
if (list_empty(&localities))
......@@ -591,6 +603,41 @@ static void hmat_register_target_initiators(struct memory_target *target)
*/
bitmap_zero(p_nodes, MAX_NUMNODES);
list_sort(p_nodes, &initiators, initiator_cmp);
if (!access0done) {
for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
loc = localities_types[i];
if (!loc)
continue;
best = 0;
list_for_each_entry(initiator, &initiators, node) {
u32 value;
if (!test_bit(initiator->processor_pxm, p_nodes))
continue;
value = hmat_initiator_perf(target, initiator,
loc->hmat_loc);
if (hmat_update_best(loc->hmat_loc->data_type, value, &best))
bitmap_clear(p_nodes, 0, initiator->processor_pxm);
if (value != best)
clear_bit(initiator->processor_pxm, p_nodes);
}
if (best)
hmat_update_target_access(target, loc->hmat_loc->data_type,
best, 0);
}
for_each_set_bit(i, p_nodes, MAX_NUMNODES) {
cpu_nid = pxm_to_node(i);
register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
}
}
/* Access 1 ignores Generic Initiators */
bitmap_zero(p_nodes, MAX_NUMNODES);
list_sort(p_nodes, &initiators, initiator_cmp);
best = 0;
for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
loc = localities_types[i];
if (!loc)
......@@ -600,6 +647,10 @@ static void hmat_register_target_initiators(struct memory_target *target)
list_for_each_entry(initiator, &initiators, node) {
u32 value;
if (!initiator->has_cpu) {
clear_bit(initiator->processor_pxm, p_nodes);
continue;
}
if (!test_bit(initiator->processor_pxm, p_nodes))
continue;
......@@ -610,12 +661,11 @@ static void hmat_register_target_initiators(struct memory_target *target)
clear_bit(initiator->processor_pxm, p_nodes);
}
if (best)
hmat_update_target_access(target, loc->hmat_loc->data_type, best);
hmat_update_target_access(target, loc->hmat_loc->data_type, best, 1);
}
for_each_set_bit(i, p_nodes, MAX_NUMNODES) {
cpu_nid = pxm_to_node(i);
register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
register_memory_node_under_compute_node(mem_nid, cpu_nid, 1);
}
}
......@@ -628,10 +678,10 @@ static void hmat_register_target_cache(struct memory_target *target)
node_add_cache(mem_nid, &tcache->cache_attrs);
}
static void hmat_register_target_perf(struct memory_target *target)
static void hmat_register_target_perf(struct memory_target *target, int access)
{
unsigned mem_nid = pxm_to_node(target->memory_pxm);
node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0);
node_set_perf_attrs(mem_nid, &target->hmem_attrs[access], access);
}
static void hmat_register_target_device(struct memory_target *target,
......@@ -664,9 +714,9 @@ static void hmat_register_target_device(struct memory_target *target,
goto out_pdev;
}
pdev->dev.numa_node = acpi_map_pxm_to_online_node(target->memory_pxm);
pdev->dev.numa_node = pxm_to_online_node(target->memory_pxm);
info = (struct memregion_info) {
.target_node = acpi_map_pxm_to_node(target->memory_pxm),
.target_node = pxm_to_node(target->memory_pxm),
};
rc = platform_device_add_data(pdev, &info, sizeof(info));
if (rc < 0) {
......@@ -733,7 +783,8 @@ static void hmat_register_target(struct memory_target *target)
if (!target->registered) {
hmat_register_target_initiators(target);
hmat_register_target_cache(target);
hmat_register_target_perf(target);
hmat_register_target_perf(target, 0);
hmat_register_target_perf(target, 1);
target->registered = true;
}
mutex_unlock(&target_lock);
......
......@@ -31,7 +31,7 @@ int acpi_numa __initdata;
int pxm_to_node(int pxm)
{
if (pxm < 0)
if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off)
return NUMA_NO_NODE;
return pxm_to_node_map[pxm];
}
......@@ -130,6 +130,36 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header)
}
break;
case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
{
struct acpi_srat_generic_affinity *p =
(struct acpi_srat_generic_affinity *)header;
if (p->device_handle_type == 0) {
/*
* For pci devices this may be the only place they
* are assigned a proximity domain
*/
pr_debug("SRAT Generic Initiator(Seg:%u BDF:%u) in proximity domain %d %s\n",
*(u16 *)(&p->device_handle[0]),
*(u16 *)(&p->device_handle[2]),
p->proximity_domain,
(p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ?
"enabled" : "disabled");
} else {
/*
* In this case we can rely on the device having a
* proximity domain reference
*/
pr_debug("SRAT Generic Initiator(HID=%.8s UID=%.4s) in proximity domain %d %s\n",
(char *)(&p->device_handle[0]),
(char *)(&p->device_handle[8]),
p->proximity_domain,
(p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ?
"enabled" : "disabled");
}
}
break;
default:
pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
header->type);
......@@ -332,6 +362,41 @@ acpi_parse_gicc_affinity(union acpi_subtable_headers *header,
return 0;
}
#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
static int __init
acpi_parse_gi_affinity(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_srat_generic_affinity *gi_affinity;
int node;
gi_affinity = (struct acpi_srat_generic_affinity *)header;
if (!gi_affinity)
return -EINVAL;
acpi_table_print_srat_entry(&header->common);
if (!(gi_affinity->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED))
return -EINVAL;
node = acpi_map_pxm_to_node(gi_affinity->proximity_domain);
if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
pr_err("SRAT: Too many proximity domains.\n");
return -EINVAL;
}
node_set(node, numa_nodes_parsed);
node_set_state(node, N_GENERIC_INITIATOR);
return 0;
}
#else
static int __init
acpi_parse_gi_affinity(union acpi_subtable_headers *header,
const unsigned long end)
{
return 0;
}
#endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */
static int __initdata parsed_numa_memblks;
static int __init
......@@ -385,7 +450,7 @@ int __init acpi_numa_init(void)
/* SRAT: System Resource Affinity Table */
if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
struct acpi_subtable_proc srat_proc[3];
struct acpi_subtable_proc srat_proc[4];
memset(srat_proc, 0, sizeof(srat_proc));
srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
......@@ -394,6 +459,8 @@ int __init acpi_numa_init(void)
srat_proc[1].handler = acpi_parse_x2apic_affinity;
srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY;
srat_proc[2].handler = acpi_parse_gicc_affinity;
srat_proc[3].id = ACPI_SRAT_TYPE_GENERIC_AFFINITY;
srat_proc[3].handler = acpi_parse_gi_affinity;
acpi_table_parse_entries_array(ACPI_SIG_SRAT,
sizeof(struct acpi_table_srat),
......@@ -436,6 +503,6 @@ int acpi_get_node(acpi_handle handle)
pxm = acpi_get_pxm(handle);
return acpi_map_pxm_to_node(pxm);
return pxm_to_node(pxm);
}
EXPORT_SYMBOL(acpi_get_node);
......@@ -1005,6 +1005,8 @@ static struct node_attr node_state_attr[] = {
#endif
[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
[N_GENERIC_INITIATOR] = _NODE_ATTR(has_generic_initiator,
N_GENERIC_INITIATOR),
};
static struct attribute *node_state_attrs[] = {
......@@ -1016,6 +1018,7 @@ static struct attribute *node_state_attrs[] = {
#endif
&node_state_attr[N_MEMORY].attr.attr,
&node_state_attr[N_CPU].attr.attr,
&node_state_attr[N_GENERIC_INITIATOR].attr.attr,
NULL
};
......
......@@ -473,7 +473,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
rhsa = (struct acpi_dmar_rhsa *)header;
for_each_drhd_unit(drhd) {
if (drhd->reg_base_addr == rhsa->base_address) {
int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
int node = pxm_to_node(rhsa->proximity_domain);
if (!node_online(node))
node = NUMA_NO_NODE;
......
......@@ -5263,7 +5263,12 @@ static int __init gic_acpi_parse_srat_its(union acpi_subtable_headers *header,
return -EINVAL;
}
node = acpi_map_pxm_to_node(its_affinity->proximity_domain);
/*
* Note that in theory a new proximity node could be created by this
* entry as it is an SRAT resource allocation structure.
* We do not currently support doing so.
*/
node = pxm_to_node(its_affinity->proximity_domain);
if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
pr_err("SRAT: Invalid NUMA node %d in ITS affinity\n", node);
......
......@@ -22,5 +22,10 @@ extern int acpi_numa __initdata;
extern void bad_srat(void);
extern int srat_disabled(void);
#else /* CONFIG_ACPI_NUMA */
static inline int pxm_to_node(int pxm)
{
return 0;
}
#endif /* CONFIG_ACPI_NUMA */
#endif /* __ACP_NUMA_H */
......@@ -420,28 +420,27 @@ int acpi_map_pxm_to_node(int pxm);
int acpi_get_node(acpi_handle handle);
/**
* acpi_map_pxm_to_online_node - Map proximity ID to online node
* pxm_to_online_node - Map proximity ID to online node
* @pxm: ACPI proximity ID
*
* This is similar to acpi_map_pxm_to_node(), but always returns an online
* This is similar to pxm_to_node(), but always returns an online
* node. When the mapped node from a given proximity ID is offline, it
* looks up the node distance table and returns the nearest online node.
*
* ACPI device drivers, which are called after the NUMA initialization has
* completed in the kernel, can call this interface to obtain their device
* NUMA topology from ACPI tables. Such drivers do not have to deal with
* offline nodes. A node may be offline when a device proximity ID is
* unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
* "numa=off" on x86.
* offline nodes. A node may be offline when SRAT memory entry does not exist,
* or NUMA is disabled, ex. "numa=off" on x86.
*/
static inline int acpi_map_pxm_to_online_node(int pxm)
static inline int pxm_to_online_node(int pxm)
{
int node = acpi_map_pxm_to_node(pxm);
int node = pxm_to_node(pxm);
return numa_map_to_online_node(node);
}
#else
static inline int acpi_map_pxm_to_online_node(int pxm)
static inline int pxm_to_online_node(int pxm)
{
return 0;
}
......@@ -546,6 +545,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
#define OSC_SB_PCLPI_SUPPORT 0x00000080
#define OSC_SB_OSLPI_SUPPORT 0x00000100
#define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT 0x00001000
#define OSC_SB_GENERIC_INITIATOR_SUPPORT 0x00002000
extern bool osc_sb_apei_support_acked;
extern bool osc_pc_lpi_support_confirmed;
......
......@@ -399,6 +399,7 @@ enum node_states {
#endif
N_MEMORY, /* The node has memory(regular, high, movable) */
N_CPU, /* The node has one or more cpus */
N_GENERIC_INITIATOR, /* The node has one or more Generic Initiators */
NR_NODE_STATES
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment