Commit 7bcfc318 authored by Linus Torvalds's avatar Linus Torvalds

Revert recent NUMA and pgd_index() fixes, since they show regressions.

People are investigating.

Cset exclude: davem@sunset.davemloft.net|ChangeSet|20050315051617|44508
Cset exclude: kravetz@us.ibm.com[torvalds]|ChangeSet|20050314002422|18516
parent 581bdf78
...@@ -40,6 +40,7 @@ int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0}; ...@@ -40,6 +40,7 @@ int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0};
struct pglist_data *node_data[MAX_NUMNODES]; struct pglist_data *node_data[MAX_NUMNODES];
bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
static unsigned long node0_io_hole_size;
static int min_common_depth; static int min_common_depth;
/* /*
...@@ -48,8 +49,7 @@ static int min_common_depth; ...@@ -48,8 +49,7 @@ static int min_common_depth;
*/ */
static struct { static struct {
unsigned long node_start_pfn; unsigned long node_start_pfn;
unsigned long node_end_pfn; unsigned long node_spanned_pages;
unsigned long node_present_pages;
} init_node_data[MAX_NUMNODES] __initdata; } init_node_data[MAX_NUMNODES] __initdata;
EXPORT_SYMBOL(node_data); EXPORT_SYMBOL(node_data);
...@@ -186,31 +186,14 @@ static int __init find_min_common_depth(void) ...@@ -186,31 +186,14 @@ static int __init find_min_common_depth(void)
return depth; return depth;
} }
static int __init get_mem_addr_cells(void) static unsigned long read_cell_ul(struct device_node *device, unsigned int **buf)
{
struct device_node *memory = NULL;
memory = of_find_node_by_type(memory, "memory");
if (!memory)
return 0; /* it won't matter */
return(prom_n_addr_cells(memory));
}
static int __init get_mem_size_cells(void)
{
struct device_node *memory = NULL;
memory = of_find_node_by_type(memory, "memory");
if (!memory)
return 0; /* it won't matter */
return(prom_n_size_cells(memory));
}
static unsigned long read_n_cells(int n, unsigned int **buf)
{ {
int i;
unsigned long result = 0; unsigned long result = 0;
while (n--) { i = prom_n_size_cells(device);
/* bug on i>2 ?? */
while (i--) {
result = (result << 32) | **buf; result = (result << 32) | **buf;
(*buf)++; (*buf)++;
} }
...@@ -284,7 +267,6 @@ static int __init parse_numa_properties(void) ...@@ -284,7 +267,6 @@ static int __init parse_numa_properties(void)
{ {
struct device_node *cpu = NULL; struct device_node *cpu = NULL;
struct device_node *memory = NULL; struct device_node *memory = NULL;
int addr_cells, size_cells;
int max_domain = 0; int max_domain = 0;
long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT; long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT;
unsigned long i; unsigned long i;
...@@ -331,8 +313,6 @@ static int __init parse_numa_properties(void) ...@@ -331,8 +313,6 @@ static int __init parse_numa_properties(void)
} }
} }
addr_cells = get_mem_addr_cells();
size_cells = get_mem_size_cells();
memory = NULL; memory = NULL;
while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
unsigned long start; unsigned long start;
...@@ -349,8 +329,8 @@ static int __init parse_numa_properties(void) ...@@ -349,8 +329,8 @@ static int __init parse_numa_properties(void)
ranges = memory->n_addrs; ranges = memory->n_addrs;
new_range: new_range:
/* these are order-sensitive, and modify the buffer pointer */ /* these are order-sensitive, and modify the buffer pointer */
start = read_n_cells(addr_cells, &memcell_buf); start = read_cell_ul(memory, &memcell_buf);
size = read_n_cells(size_cells, &memcell_buf); size = read_cell_ul(memory, &memcell_buf);
start = _ALIGN_DOWN(start, MEMORY_INCREMENT); start = _ALIGN_DOWN(start, MEMORY_INCREMENT);
size = _ALIGN_UP(size, MEMORY_INCREMENT); size = _ALIGN_UP(size, MEMORY_INCREMENT);
...@@ -369,27 +349,32 @@ static int __init parse_numa_properties(void) ...@@ -369,27 +349,32 @@ static int __init parse_numa_properties(void)
max_domain = numa_domain; max_domain = numa_domain;
/* /*
* Initialize new node struct, or add to an existing one. * For backwards compatibility, OF splits the first node
* into two regions (the first being 0-4GB). Check for
* this simple case and complain if there is a gap in
* memory
*/ */
if (init_node_data[numa_domain].node_end_pfn) { if (init_node_data[numa_domain].node_spanned_pages) {
if ((start / PAGE_SIZE) < unsigned long shouldstart =
init_node_data[numa_domain].node_start_pfn) init_node_data[numa_domain].node_start_pfn +
init_node_data[numa_domain].node_start_pfn = init_node_data[numa_domain].node_spanned_pages;
start / PAGE_SIZE; if (shouldstart != (start / PAGE_SIZE)) {
else /* Revert to non-numa for now */
init_node_data[numa_domain].node_end_pfn = printk(KERN_ERR
(start / PAGE_SIZE) + "WARNING: Unexpected node layout: "
(size / PAGE_SIZE); "region start %lx length %lx\n",
start, size);
init_node_data[numa_domain].node_present_pages += printk(KERN_ERR "NUMA is disabled\n");
goto err;
}
init_node_data[numa_domain].node_spanned_pages +=
size / PAGE_SIZE; size / PAGE_SIZE;
} else { } else {
node_set_online(numa_domain); node_set_online(numa_domain);
init_node_data[numa_domain].node_start_pfn = init_node_data[numa_domain].node_start_pfn =
start / PAGE_SIZE; start / PAGE_SIZE;
init_node_data[numa_domain].node_end_pfn = init_node_data[numa_domain].node_spanned_pages =
init_node_data[numa_domain].node_start_pfn +
size / PAGE_SIZE; size / PAGE_SIZE;
} }
...@@ -406,6 +391,14 @@ static int __init parse_numa_properties(void) ...@@ -406,6 +391,14 @@ static int __init parse_numa_properties(void)
node_set_online(i); node_set_online(i);
return 0; return 0;
err:
/* Something has gone wrong; revert any setup we've done */
for_each_node(i) {
node_set_offline(i);
init_node_data[i].node_start_pfn = 0;
init_node_data[i].node_spanned_pages = 0;
}
return -1;
} }
static void __init setup_nonnuma(void) static void __init setup_nonnuma(void)
...@@ -433,11 +426,12 @@ static void __init setup_nonnuma(void) ...@@ -433,11 +426,12 @@ static void __init setup_nonnuma(void)
node_set_online(0); node_set_online(0);
init_node_data[0].node_start_pfn = 0; init_node_data[0].node_start_pfn = 0;
init_node_data[0].node_end_pfn = lmb_end_of_DRAM() / PAGE_SIZE; init_node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE;
init_node_data[0].node_present_pages = total_ram / PAGE_SIZE;
for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
node0_io_hole_size = top_of_ram - total_ram;
} }
static void __init dump_numa_topology(void) static void __init dump_numa_topology(void)
...@@ -518,8 +512,6 @@ static unsigned long careful_allocation(int nid, unsigned long size, ...@@ -518,8 +512,6 @@ static unsigned long careful_allocation(int nid, unsigned long size,
void __init do_init_bootmem(void) void __init do_init_bootmem(void)
{ {
int nid; int nid;
int addr_cells, size_cells;
struct device_node *memory = NULL;
static struct notifier_block ppc64_numa_nb = { static struct notifier_block ppc64_numa_nb = {
.notifier_call = cpu_numa_callback, .notifier_call = cpu_numa_callback,
.priority = 1 /* Must run before sched domains notifier. */ .priority = 1 /* Must run before sched domains notifier. */
...@@ -543,7 +535,7 @@ void __init do_init_bootmem(void) ...@@ -543,7 +535,7 @@ void __init do_init_bootmem(void)
unsigned long bootmap_pages; unsigned long bootmap_pages;
start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE; start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE;
end_paddr = init_node_data[nid].node_end_pfn * PAGE_SIZE; end_paddr = start_paddr + (init_node_data[nid].node_spanned_pages * PAGE_SIZE);
/* Allocate the node structure node local if possible */ /* Allocate the node structure node local if possible */
NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid, NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid,
...@@ -559,9 +551,9 @@ void __init do_init_bootmem(void) ...@@ -559,9 +551,9 @@ void __init do_init_bootmem(void)
NODE_DATA(nid)->node_start_pfn = NODE_DATA(nid)->node_start_pfn =
init_node_data[nid].node_start_pfn; init_node_data[nid].node_start_pfn;
NODE_DATA(nid)->node_spanned_pages = NODE_DATA(nid)->node_spanned_pages =
end_paddr - start_paddr; init_node_data[nid].node_spanned_pages;
if (NODE_DATA(nid)->node_spanned_pages == 0) if (init_node_data[nid].node_spanned_pages == 0)
continue; continue;
dbg("start_paddr = %lx\n", start_paddr); dbg("start_paddr = %lx\n", start_paddr);
...@@ -580,50 +572,33 @@ void __init do_init_bootmem(void) ...@@ -580,50 +572,33 @@ void __init do_init_bootmem(void)
start_paddr >> PAGE_SHIFT, start_paddr >> PAGE_SHIFT,
end_paddr >> PAGE_SHIFT); end_paddr >> PAGE_SHIFT);
/* for (i = 0; i < lmb.memory.cnt; i++) {
* We need to do another scan of all memory sections to unsigned long physbase, size;
* associate memory with the correct node.
*/
addr_cells = get_mem_addr_cells();
size_cells = get_mem_size_cells();
memory = NULL;
while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
unsigned long mem_start, mem_size;
int numa_domain;
unsigned int *memcell_buf;
unsigned int len;
memcell_buf = (unsigned int *)get_property(memory, "reg", &len); physbase = lmb.memory.region[i].physbase;
if (!memcell_buf || len <= 0) size = lmb.memory.region[i].size;
continue;
mem_start = read_n_cells(addr_cells, &memcell_buf); if (physbase < end_paddr &&
mem_size = read_n_cells(size_cells, &memcell_buf); (physbase+size) > start_paddr) {
numa_domain = of_node_numa_domain(memory); /* overlaps */
if (physbase < start_paddr) {
size -= start_paddr - physbase;
physbase = start_paddr;
}
if (numa_domain != nid) if (size > end_paddr - physbase)
continue; size = end_paddr - physbase;
if (mem_start < end_paddr && dbg("free_bootmem %lx %lx\n", physbase, size);
(mem_start+mem_size) > start_paddr) { free_bootmem_node(NODE_DATA(nid), physbase,
/* should be no overlaps ! */ size);
dbg("free_bootmem %lx %lx\n", mem_start, mem_size);
free_bootmem_node(NODE_DATA(nid), mem_start,
mem_size);
} }
} }
/*
* Mark reserved regions on this node
*/
for (i = 0; i < lmb.reserved.cnt; i++) { for (i = 0; i < lmb.reserved.cnt; i++) {
unsigned long physbase = lmb.reserved.region[i].physbase; unsigned long physbase = lmb.reserved.region[i].physbase;
unsigned long size = lmb.reserved.region[i].size; unsigned long size = lmb.reserved.region[i].size;
if (pa_to_nid(physbase) != nid &&
pa_to_nid(physbase+size-1) != nid)
continue;
if (physbase < end_paddr && if (physbase < end_paddr &&
(physbase+size) > start_paddr) { (physbase+size) > start_paddr) {
/* overlaps */ /* overlaps */
...@@ -657,12 +632,13 @@ void __init paging_init(void) ...@@ -657,12 +632,13 @@ void __init paging_init(void)
unsigned long start_pfn; unsigned long start_pfn;
unsigned long end_pfn; unsigned long end_pfn;
start_pfn = init_node_data[nid].node_start_pfn; start_pfn = plat_node_bdata[nid].node_boot_start >> PAGE_SHIFT;
end_pfn = init_node_data[nid].node_end_pfn; end_pfn = plat_node_bdata[nid].node_low_pfn;
zones_size[ZONE_DMA] = end_pfn - start_pfn; zones_size[ZONE_DMA] = end_pfn - start_pfn;
zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - zholes_size[ZONE_DMA] = 0;
init_node_data[nid].node_present_pages; if (nid == 0)
zholes_size[ZONE_DMA] = node0_io_hole_size >> PAGE_SHIFT;
dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid, dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]); zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
......
...@@ -182,19 +182,15 @@ void clear_page_range(struct mmu_gather *tlb, ...@@ -182,19 +182,15 @@ void clear_page_range(struct mmu_gather *tlb,
unsigned long addr, unsigned long end) unsigned long addr, unsigned long end)
{ {
pgd_t *pgd; pgd_t *pgd;
unsigned long i, next; unsigned long next;
pgd = pgd_offset(tlb->mm, addr); pgd = pgd_offset(tlb->mm, addr);
for (i = pgd_index(addr); i <= pgd_index(end-1); i++) { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
continue; continue;
clear_pud_range(tlb, pgd, addr, next); clear_pud_range(tlb, pgd, addr, next);
pgd++; } while (pgd++, addr = next, addr != end);
addr = next;
if (addr == end)
break;
}
} }
pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address) pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment