Commit 25e01e8c authored by Tony Luck's avatar Tony Luck

Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/work/alex

into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-release-2.6.9
parents 8178ad77 b9987ada
......@@ -203,6 +203,9 @@ struct ioc {
/* clearing pdir to prevent races with allocations. */
unsigned int res_bitshift; /* from the RIGHT! */
unsigned int res_size; /* size of resource map in bytes */
#ifdef CONFIG_NUMA
unsigned int node; /* node where this IOC lives */
#endif
#if DELAYED_RESOURCE_CNT > 0
spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */
/* than res_lock for bigger systems. */
......@@ -1057,7 +1060,24 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int
struct ioc *ioc;
void *addr;
ioc = GET_IOC(dev);
ASSERT(ioc);
#ifdef CONFIG_NUMA
{
struct page *page;
page = alloc_pages_node(ioc->node == MAX_NUMNODES ?
numa_node_id() : ioc->node, flags,
get_order(size));
if (unlikely(!page))
return NULL;
addr = page_address(page);
}
#else
addr = (void *) __get_free_pages(flags, get_order(size));
#endif
if (unlikely(!addr))
return NULL;
......@@ -1081,8 +1101,6 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int
* If device can't bypass or bypass is disabled, pass the 32bit fake
* device to map single to get an iova mapping.
*/
ioc = GET_IOC(dev);
ASSERT(ioc);
*dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0);
return addr;
......@@ -1799,6 +1817,10 @@ ioc_show(struct seq_file *s, void *v)
seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n",
ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF));
#ifdef CONFIG_NUMA
if (ioc->node != MAX_NUMNODES)
seq_printf(s, "NUMA node : %d\n", ioc->node);
#endif
seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024));
seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024);
......@@ -1899,6 +1921,58 @@ sba_connect_bus(struct pci_bus *bus)
printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number);
}
#ifdef CONFIG_NUMA
static void __init
sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
{
struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
union acpi_object *obj;
acpi_handle phandle;
unsigned int node;
ioc->node = MAX_NUMNODES;
/*
* Check for a _PXM on this node first. We don't typically see
* one here, so we'll end up getting it from the parent.
*/
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) {
if (ACPI_FAILURE(acpi_get_parent(handle, &phandle)))
return;
/* Reset the acpi buffer */
buffer.length = ACPI_ALLOCATE_BUFFER;
buffer.pointer = NULL;
if (ACPI_FAILURE(acpi_evaluate_object(phandle, "_PXM", NULL,
&buffer)))
return;
}
if (!buffer.length || !buffer.pointer)
return;
obj = buffer.pointer;
if (obj->type != ACPI_TYPE_INTEGER ||
obj->integer.value >= MAX_PXM_DOMAINS) {
acpi_os_free(buffer.pointer);
return;
}
node = pxm_to_nid_map[obj->integer.value];
acpi_os_free(buffer.pointer);
if (node >= MAX_NUMNODES || !node_online(node))
return;
ioc->node = node;
return;
}
#else
#define sba_map_ioc_to_node(ioc, handle)
#endif
static int __init
acpi_sba_ioc_add(struct acpi_device *device)
{
......@@ -1941,6 +2015,8 @@ acpi_sba_ioc_add(struct acpi_device *device)
if (!ioc)
return 1;
/* setup NUMA node association */
sba_map_ioc_to_node(ioc, device->handle);
return 0;
}
......
......@@ -650,4 +650,71 @@ acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
return 0;
}
#ifdef CONFIG_NUMA
acpi_status __init
acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
{
struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
union acpi_object *obj;
struct acpi_table_iosapic *iosapic;
unsigned int gsi_base;
int node;
/* Only care about objects w/ a method that returns the MADT */
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
return AE_OK;
if (!buffer.length || !buffer.pointer)
return AE_OK;
obj = buffer.pointer;
if (obj->type != ACPI_TYPE_BUFFER ||
obj->buffer.length < sizeof(*iosapic)) {
acpi_os_free(buffer.pointer);
return AE_OK;
}
iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer;
if (iosapic->header.type != ACPI_MADT_IOSAPIC) {
acpi_os_free(buffer.pointer);
return AE_OK;
}
gsi_base = iosapic->global_irq_base;
acpi_os_free(buffer.pointer);
buffer.length = ACPI_ALLOCATE_BUFFER;
buffer.pointer = NULL;
/*
* OK, it's an IOSAPIC MADT entry, look for a _PXM method to tell
* us which node to associate this with.
*/
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer)))
return AE_OK;
if (!buffer.length || !buffer.pointer)
return AE_OK;
obj = buffer.pointer;
if (obj->type != ACPI_TYPE_INTEGER ||
obj->integer.value >= MAX_PXM_DOMAINS) {
acpi_os_free(buffer.pointer);
return AE_OK;
}
node = pxm_to_nid_map[obj->integer.value];
acpi_os_free(buffer.pointer);
if (node >= MAX_NUMNODES || !node_online(node) ||
cpus_empty(node_to_cpumask(node)))
return AE_OK;
/* We know a gsi to node mapping! */
map_iosapic_to_node(gsi_base, node);
return AE_OK;
}
#endif /* CONFIG_NUMA */
#endif /* CONFIG_ACPI_BOOT */
......@@ -117,6 +117,9 @@ static struct iosapic {
char *addr; /* base address of IOSAPIC */
unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
unsigned short num_rte; /* number of RTE in this IOSAPIC */
#ifdef CONFIG_NUMA
unsigned short node; /* numa node association via pxm */
#endif
} iosapic_lists[NR_IOSAPICS];
static int num_iosapic;
......@@ -488,7 +491,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
}
static unsigned int
get_target_cpu (void)
get_target_cpu (unsigned int gsi, int vector)
{
#ifdef CONFIG_SMP
static int cpu = -1;
......@@ -507,6 +510,34 @@ get_target_cpu (void)
if (!cpu_online(smp_processor_id()))
return hard_smp_processor_id();
#ifdef CONFIG_NUMA
{
int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
cpumask_t cpu_mask;
iosapic_index = find_iosapic(gsi);
if (iosapic_index < 0 ||
iosapic_lists[iosapic_index].node == MAX_NUMNODES)
goto skip_numa_setup;
cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
num_cpus = cpus_weight(cpu_mask);
if (!num_cpus)
goto skip_numa_setup;
/* Use vector assigment to distribute across cpus in node */
cpu_index = vector % num_cpus;
for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
numa_cpu = next_cpu(numa_cpu, cpu_mask);
if (numa_cpu != NR_CPUS)
return cpu_physical_id(numa_cpu);
}
skip_numa_setup:
#endif
/*
* Otherwise, round-robin interrupt vectors across all the
* processors. (It'd be nice if we could be smarter in the
......@@ -550,7 +581,7 @@ iosapic_register_intr (unsigned int gsi,
}
vector = assign_irq_vector(AUTO_ASSIGN);
dest = get_target_cpu();
dest = get_target_cpu(gsi, vector);
register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
polarity, trigger);
}
......@@ -680,6 +711,9 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
iosapic_lists[num_iosapic].addr = addr;
iosapic_lists[num_iosapic].gsi_base = gsi_base;
iosapic_lists[num_iosapic].num_rte = num_rte;
#ifdef CONFIG_NUMA
iosapic_lists[num_iosapic].node = MAX_NUMNODES;
#endif
num_iosapic++;
if ((gsi_base == 0) && pcat_compat) {
......@@ -692,3 +726,20 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
}
}
#ifdef CONFIG_NUMA
void __init
map_iosapic_to_node(unsigned int gsi_base, int node)
{
int index;
index = find_iosapic(gsi_base);
if (index < 0) {
printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
__FUNCTION__, gsi_base);
return;
}
iosapic_lists[index].node = node;
return;
}
#endif
......@@ -53,11 +53,12 @@ static struct early_node_data mem_data[NR_NODES] __initdata;
static void __init reassign_cpu_only_nodes(void)
{
struct node_memblk_s *p;
int i, j, k, nnode, nid, cpu, cpunid;
int i, j, k, nnode, nid, cpu, cpunid, pxm;
u8 cslit, slit;
static DECLARE_BITMAP(nodes_with_mem, NR_NODES) __initdata;
static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata;
static int node_flip[NR_NODES] __initdata;
static int old_nid_map[NR_CPUS] __initdata;
for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
if (!test_bit(p->nid, (void *) nodes_with_mem)) {
......@@ -104,8 +105,13 @@ static void __init reassign_cpu_only_nodes(void)
for (cpu = 0; cpu < NR_CPUS; cpu++)
if (node_cpuid[cpu].nid == i) {
/* For nodes not being reassigned just fix the cpu's nid. */
/*
* For nodes not being reassigned just
* fix the cpu's nid and reverse pxm map
*/
if (cpunid < numnodes) {
pxm = nid_to_pxm_map[i];
pxm_to_nid_map[pxm] =
node_cpuid[cpu].nid = cpunid;
continue;
}
......@@ -126,6 +132,8 @@ static void __init reassign_cpu_only_nodes(void)
}
}
/* save old nid map so we can update the pxm */
old_nid_map[cpu] = node_cpuid[cpu].nid;
node_cpuid[cpu].nid = k;
}
}
......@@ -134,14 +142,19 @@ static void __init reassign_cpu_only_nodes(void)
* Fixup temporary nid values for CPU-only nodes.
*/
for (cpu = 0; cpu < NR_CPUS; cpu++)
if (node_cpuid[cpu].nid == (numnodes + numnodes))
node_cpuid[cpu].nid = nnode - 1;
else
for (i = 0; i < nnode; i++)
if (node_flip[i] == (node_cpuid[cpu].nid - numnodes)) {
node_cpuid[cpu].nid = i;
if (node_cpuid[cpu].nid == (numnodes + numnodes)) {
pxm = nid_to_pxm_map[old_nid_map[cpu]];
pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1;
} else {
for (i = 0; i < nnode; i++) {
if (node_flip[i] != (node_cpuid[cpu].nid - numnodes))
continue;
pxm = nid_to_pxm_map[old_nid_map[cpu]];
pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i;
break;
}
}
/*
* Fix numa_slit by compressing from larger
......
......@@ -136,6 +136,11 @@ pci_acpi_init (void)
printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
#ifdef CONFIG_NUMA
extern acpi_status acpi_map_iosapic (acpi_handle, u32, void*, void**);
acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL);
#endif
/*
* PCI IRQ routing is set up by pci_enable_device(), but we
* also do it here in case there are still broken drivers that
......
......@@ -90,6 +90,9 @@ extern int __init iosapic_register_platform_intr (u32 int_type,
extern unsigned int iosapic_version (char *addr);
extern void iosapic_pci_fixup (int);
#ifdef CONFIG_NUMA
extern void __init map_iosapic_to_node (unsigned int, int);
#endif
#else
#define iosapic_system_init(pcat_compat) do { } while (0)
#define iosapic_init(address,gsi_base) do { } while (0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment