init_64.c 29.6 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 *  linux/arch/x86_64/mm/init.c
 *
 *  Copyright (C) 1995  Linus Torvalds
 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
 *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
 */

#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/smp.h>
#include <linux/init.h>
21
#include <linux/initrd.h>
Linus Torvalds's avatar
Linus Torvalds committed
22 23 24
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/proc_fs.h>
25
#include <linux/pci.h>
26
#include <linux/pfn.h>
27
#include <linux/poison.h>
28
#include <linux/dma-mapping.h>
29 30
#include <linux/module.h>
#include <linux/memory_hotplug.h>
31
#include <linux/nmi.h>
Linus Torvalds's avatar
Linus Torvalds committed
32 33 34 35 36 37 38 39 40 41 42 43 44 45

#include <asm/processor.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/dma.h>
#include <asm/fixmap.h>
#include <asm/e820.h>
#include <asm/apic.h>
#include <asm/tlb.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/smp.h>
46
#include <asm/sections.h>
47
#include <asm/kdebug.h>
48
#include <asm/numa.h>
49
#include <asm/cacheflush.h>
Linus Torvalds's avatar
Linus Torvalds committed
50

51 52 53 54 55
/*
 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
 * The direct mapping extends to max_pfn_mapped, so that we can directly access
 * apertures, ACPI and other tables without having to play with fixmaps.
 */
56
unsigned long max_low_pfn_mapped;
57 58
unsigned long max_pfn_mapped;

59 60
static unsigned long dma_reserve __initdata;

Linus Torvalds's avatar
Linus Torvalds committed
61 62
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);

Ingo Molnar's avatar
Ingo Molnar committed
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
int direct_gbpages __meminitdata
#ifdef CONFIG_DIRECT_GBPAGES
				= 1
#endif
;

static int __init parse_direct_gbpages_off(char *arg)
{
	direct_gbpages = 0;
	return 0;
}
early_param("nogbpages", parse_direct_gbpages_off);

static int __init parse_direct_gbpages_on(char *arg)
{
	direct_gbpages = 1;
	return 0;
}
early_param("gbpages", parse_direct_gbpages_on);

Linus Torvalds's avatar
Linus Torvalds committed
83 84 85 86 87 88 89 90
/*
 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
 * physical space so we can cache the place of the first one and move
 * around without checking the pgd every time.
 */

void show_mem(void)
{
91 92
	long i, total = 0, reserved = 0;
	long shared = 0, cached = 0;
Linus Torvalds's avatar
Linus Torvalds committed
93
	struct page *page;
Thomas Gleixner's avatar
Thomas Gleixner committed
94
	pg_data_t *pgdat;
Linus Torvalds's avatar
Linus Torvalds committed
95

96
	printk(KERN_INFO "Mem-info:\n");
Linus Torvalds's avatar
Linus Torvalds committed
97
	show_free_areas();
98
	for_each_online_pgdat(pgdat) {
Thomas Gleixner's avatar
Thomas Gleixner committed
99 100 101 102 103 104
		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
			/*
			 * This loop can take a while with 256 GB and
			 * 4k pages so defer the NMI watchdog:
			 */
			if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
105
				touch_nmi_watchdog();
Thomas Gleixner's avatar
Thomas Gleixner committed
106

Bob Picco's avatar
Bob Picco committed
107 108
			if (!pfn_valid(pgdat->node_start_pfn + i))
				continue;
Thomas Gleixner's avatar
Thomas Gleixner committed
109

Linus Torvalds's avatar
Linus Torvalds committed
110 111
			page = pfn_to_page(pgdat->node_start_pfn + i);
			total++;
112 113 114 115 116 117
			if (PageReserved(page))
				reserved++;
			else if (PageSwapCache(page))
				cached++;
			else if (page_count(page))
				shared += page_count(page) - 1;
Thomas Gleixner's avatar
Thomas Gleixner committed
118
		}
Linus Torvalds's avatar
Linus Torvalds committed
119
	}
Thomas Gleixner's avatar
Thomas Gleixner committed
120 121 122 123
	printk(KERN_INFO "%lu pages of RAM\n",		total);
	printk(KERN_INFO "%lu reserved pages\n",	reserved);
	printk(KERN_INFO "%lu pages shared\n",		shared);
	printk(KERN_INFO "%lu pages swap cached\n",	cached);
Linus Torvalds's avatar
Linus Torvalds committed
124 125 126 127
}

int after_bootmem;

128
static __init void *spp_getpage(void)
Thomas Gleixner's avatar
Thomas Gleixner committed
129
{
Linus Torvalds's avatar
Linus Torvalds committed
130
	void *ptr;
Thomas Gleixner's avatar
Thomas Gleixner committed
131

Linus Torvalds's avatar
Linus Torvalds committed
132
	if (after_bootmem)
Thomas Gleixner's avatar
Thomas Gleixner committed
133
		ptr = (void *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds's avatar
Linus Torvalds committed
134 135
	else
		ptr = alloc_bootmem_pages(PAGE_SIZE);
Thomas Gleixner's avatar
Thomas Gleixner committed
136 137 138 139 140

	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) {
		panic("set_pte_phys: cannot allocate page data %s\n",
			after_bootmem ? "after bootmem" : "");
	}
Linus Torvalds's avatar
Linus Torvalds committed
141

142
	pr_debug("spp_getpage %p\n", ptr);
Thomas Gleixner's avatar
Thomas Gleixner committed
143

Linus Torvalds's avatar
Linus Torvalds committed
144
	return ptr;
Thomas Gleixner's avatar
Thomas Gleixner committed
145
}
Linus Torvalds's avatar
Linus Torvalds committed
146

147
void
148
set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
Linus Torvalds's avatar
Linus Torvalds committed
149 150 151
{
	pud_t *pud;
	pmd_t *pmd;
152
	pte_t *pte;
Linus Torvalds's avatar
Linus Torvalds committed
153

154
	pud = pud_page + pud_index(vaddr);
Linus Torvalds's avatar
Linus Torvalds committed
155
	if (pud_none(*pud)) {
Thomas Gleixner's avatar
Thomas Gleixner committed
156
		pmd = (pmd_t *) spp_getpage();
157
		pud_populate(&init_mm, pud, pmd);
Linus Torvalds's avatar
Linus Torvalds committed
158
		if (pmd != pmd_offset(pud, 0)) {
159
			printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
Thomas Gleixner's avatar
Thomas Gleixner committed
160
				pmd, pmd_offset(pud, 0));
Linus Torvalds's avatar
Linus Torvalds committed
161 162 163 164 165 166
			return;
		}
	}
	pmd = pmd_offset(pud, vaddr);
	if (pmd_none(*pmd)) {
		pte = (pte_t *) spp_getpage();
167
		pmd_populate_kernel(&init_mm, pmd, pte);
Linus Torvalds's avatar
Linus Torvalds committed
168
		if (pte != pte_offset_kernel(pmd, 0)) {
169
			printk(KERN_ERR "PAGETABLE BUG #02!\n");
Linus Torvalds's avatar
Linus Torvalds committed
170 171 172 173 174
			return;
		}
	}

	pte = pte_offset_kernel(pmd, vaddr);
175
	if (!pte_none(*pte) && pte_val(new_pte) &&
Linus Torvalds's avatar
Linus Torvalds committed
176 177 178 179 180 181 182 183 184 185 186
	    pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
		pte_ERROR(*pte);
	set_pte(pte, new_pte);

	/*
	 * It's enough to flush this one mapping.
	 * (PGE mappings get flushed as well)
	 */
	__flush_tlb_one(vaddr);
}

187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
void
set_pte_vaddr(unsigned long vaddr, pte_t pteval)
{
	pgd_t *pgd;
	pud_t *pud_page;

	pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));

	pgd = pgd_offset_k(vaddr);
	if (pgd_none(*pgd)) {
		printk(KERN_ERR
			"PGD FIXMAP MISSING, it should be setup in head.S!\n");
		return;
	}
	pud_page = (pud_t*)pgd_page_vaddr(*pgd);
	set_pte_vaddr_pud(pud_page, vaddr, pteval);
}

205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
/*
 * Create large page table mappings for a range of physical addresses.
 */
static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
						pgprot_t prot)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
	for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
		pgd = pgd_offset_k((unsigned long)__va(phys));
		if (pgd_none(*pgd)) {
			pud = (pud_t *) spp_getpage();
			set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
						_PAGE_USER));
		}
		pud = pud_offset(pgd, (unsigned long)__va(phys));
		if (pud_none(*pud)) {
			pmd = (pmd_t *) spp_getpage();
			set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
						_PAGE_USER));
		}
		pmd = pmd_offset(pud, phys);
		BUG_ON(!pmd_none(*pmd));
		set_pmd(pmd, __pmd(phys | pgprot_val(prot)));
	}
}

void __init init_extra_mapping_wb(unsigned long phys, unsigned long size)
{
	__init_extra_mapping(phys, size, PAGE_KERNEL_LARGE);
}

void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
{
	__init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE);
}

245
/*
246 247 248
 * The head.S code sets up the kernel high mapping:
 *
 *   from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
 *
 * phys_addr holds the negative offset to the kernel, which is added
 * to the compile time generated pmds. This results in invalid pmds up
 * to the point where we hit the physaddr 0 mapping.
 *
 * We limit the mappings to the region from _text to _end.  _end is
 * rounded up to the 2MB boundary. This catches the invalid pmds as
 * well, as they are located before _text:
 */
void __init cleanup_highmap(void)
{
	unsigned long vaddr = __START_KERNEL_map;
	unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1;
	pmd_t *pmd = level2_kernel_pgt;
	pmd_t *last_pmd = pmd + PTRS_PER_PMD;

	for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
266
		if (pmd_none(*pmd))
267 268 269 270 271 272
			continue;
		if (vaddr < (unsigned long) _text || vaddr > end)
			set_pmd(pmd, __pmd(0));
	}
}

273 274
static unsigned long __initdata table_start;
static unsigned long __meminitdata table_end;
275
static unsigned long __meminitdata table_top;
Linus Torvalds's avatar
Linus Torvalds committed
276

277
static __meminit void *alloc_low_page(unsigned long *phys)
Thomas Gleixner's avatar
Thomas Gleixner committed
278
{
279
	unsigned long pfn = table_end++;
Linus Torvalds's avatar
Linus Torvalds committed
280 281
	void *adr;

282 283 284
	if (after_bootmem) {
		adr = (void *)get_zeroed_page(GFP_ATOMIC);
		*phys = __pa(adr);
Thomas Gleixner's avatar
Thomas Gleixner committed
285

286 287 288
		return adr;
	}

289
	if (pfn >= table_top)
Thomas Gleixner's avatar
Thomas Gleixner committed
290
		panic("alloc_low_page: ran out of memory");
291 292

	adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
293
	memset(adr, 0, PAGE_SIZE);
294 295 296
	*phys  = pfn * PAGE_SIZE;
	return adr;
}
Linus Torvalds's avatar
Linus Torvalds committed
297

298
static __meminit void unmap_low_page(void *adr)
Thomas Gleixner's avatar
Thomas Gleixner committed
299
{
300 301 302
	if (after_bootmem)
		return;

303
	early_iounmap(adr, PAGE_SIZE);
Thomas Gleixner's avatar
Thomas Gleixner committed
304
}
Linus Torvalds's avatar
Linus Torvalds committed
305

306
static unsigned long __meminit
307 308 309
phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
{
	unsigned pages = 0;
310
	unsigned long last_map_addr = end;
311
	int i;
312

313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
	pte_t *pte = pte_page + pte_index(addr);

	for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {

		if (addr >= end) {
			if (!after_bootmem) {
				for(; i < PTRS_PER_PTE; i++, pte++)
					set_pte(pte, __pte(0));
			}
			break;
		}

		if (pte_val(*pte))
			continue;

		if (0)
			printk("   pte=%p addr=%lx pte=%016lx\n",
			       pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
		set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
332
		last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
333 334 335
		pages++;
	}
	update_page_count(PG_LEVEL_4K, pages);
336 337

	return last_map_addr;
338 339
}

340
static unsigned long __meminit
341 342 343 344
phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
{
	pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);

345
	return phys_pte_init(pte, address, end);
346 347
}

348
static unsigned long __meminit
349 350
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
			 unsigned long page_size_mask)
351
{
352
	unsigned long pages = 0;
353
	unsigned long last_map_addr = end;
354

355
	int i = pmd_index(address);
356

357
	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
358
		unsigned long pte_phys;
359
		pmd_t *pmd = pmd_page + pmd_index(address);
360
		pte_t *pte;
361

362
		if (address >= end) {
Thomas Gleixner's avatar
Thomas Gleixner committed
363
			if (!after_bootmem) {
364 365
				for (; i < PTRS_PER_PMD; i++, pmd++)
					set_pmd(pmd, __pmd(0));
Thomas Gleixner's avatar
Thomas Gleixner committed
366
			}
367 368
			break;
		}
369

370
		if (pmd_val(*pmd)) {
371
			if (!pmd_large(*pmd))
372 373
				last_map_addr = phys_pte_update(pmd, address,
								 end);
374 375 376
			continue;
		}

377
		if (page_size_mask & (1<<PG_LEVEL_2M)) {
378 379 380
			pages++;
			set_pte((pte_t *)pmd,
				pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
381
			last_map_addr = (address & PMD_MASK) + PMD_SIZE;
382
			continue;
383
		}
384

385
		pte = alloc_low_page(&pte_phys);
386
		last_map_addr = phys_pte_init(pte, address, end);
387 388 389
		unmap_low_page(pte);

		pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
390
	}
391
	update_page_count(PG_LEVEL_2M, pages);
392
	return last_map_addr;
393 394
}

395
static unsigned long __meminit
396 397
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
			 unsigned long page_size_mask)
398
{
Thomas Gleixner's avatar
Thomas Gleixner committed
399
	pmd_t *pmd = pmd_offset(pud, 0);
400 401
	unsigned long last_map_addr;

402
	spin_lock(&init_mm.page_table_lock);
403
	last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
404 405
	spin_unlock(&init_mm.page_table_lock);
	__flush_tlb_all();
406
	return last_map_addr;
407 408
}

409
static unsigned long __meminit
410 411
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
			 unsigned long page_size_mask)
Thomas Gleixner's avatar
Thomas Gleixner committed
412
{
413
	unsigned long pages = 0;
414
	unsigned long last_map_addr = end;
415
	int i = pud_index(addr);
416

Thomas Gleixner's avatar
Thomas Gleixner committed
417
	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
418 419
		unsigned long pmd_phys;
		pud_t *pud = pud_page + pud_index(addr);
Linus Torvalds's avatar
Linus Torvalds committed
420 421
		pmd_t *pmd;

422
		if (addr >= end)
Linus Torvalds's avatar
Linus Torvalds committed
423 424
			break;

Thomas Gleixner's avatar
Thomas Gleixner committed
425 426 427
		if (!after_bootmem &&
				!e820_any_mapped(addr, addr+PUD_SIZE, 0)) {
			set_pud(pud, __pud(0));
Linus Torvalds's avatar
Linus Torvalds committed
428
			continue;
Thomas Gleixner's avatar
Thomas Gleixner committed
429
		}
Linus Torvalds's avatar
Linus Torvalds committed
430

431
		if (pud_val(*pud)) {
432
			if (!pud_large(*pud))
433 434
				last_map_addr = phys_pmd_update(pud, addr, end,
							 page_size_mask);
435 436 437
			continue;
		}

438
		if (page_size_mask & (1<<PG_LEVEL_1G)) {
439
			pages++;
440 441
			set_pte((pte_t *)pud,
				pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
442
			last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
443 444 445
			continue;
		}

446
		pmd = alloc_low_page(&pmd_phys);
Thomas Gleixner's avatar
Thomas Gleixner committed
447

448
		spin_lock(&init_mm.page_table_lock);
449
		last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
450 451
		unmap_low_page(pmd);
		pud_populate(&init_mm, pud, __va(pmd_phys));
452
		spin_unlock(&init_mm.page_table_lock);
Thomas Gleixner's avatar
Thomas Gleixner committed
453

Linus Torvalds's avatar
Linus Torvalds committed
454
	}
455
	__flush_tlb_all();
456
	update_page_count(PG_LEVEL_1G, pages);
457

458
	return last_map_addr;
Thomas Gleixner's avatar
Thomas Gleixner committed
459
}
Linus Torvalds's avatar
Linus Torvalds committed
460

461
static unsigned long __meminit
462 463
phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
		 unsigned long page_size_mask)
464 465 466 467 468
{
	pud_t *pud;

	pud = (pud_t *)pgd_page_vaddr(*pgd);

469
	return phys_pud_init(pud, addr, end, page_size_mask);
470 471
}

Linus Torvalds's avatar
Linus Torvalds committed
472 473
static void __init find_early_table_space(unsigned long end)
{
474
	unsigned long puds, pmds, ptes, tables, start;
Linus Torvalds's avatar
Linus Torvalds committed
475 476

	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
477
	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
	if (direct_gbpages) {
		unsigned long extra;
		extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
		pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
	} else
		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
	tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);

	if (cpu_has_pse) {
		unsigned long extra;
		extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
	} else
		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
	tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
Linus Torvalds's avatar
Linus Torvalds committed
493

Thomas Gleixner's avatar
Thomas Gleixner committed
494 495 496 497 498 499
	/*
	 * RED-PEN putting page tables only on node 0 could
	 * cause a hotspot and fill up ZONE_DMA. The page tables
	 * need roughly 0.5KB per GB.
	 */
	start = 0x8000;
500
	table_start = find_e820_area(start, end, tables, PAGE_SIZE);
Linus Torvalds's avatar
Linus Torvalds committed
501 502 503 504 505
	if (table_start == -1UL)
		panic("Cannot find space for the kernel page tables");

	table_start >>= PAGE_SHIFT;
	table_end = table_start;
506
	table_top = table_start + (tables >> PAGE_SHIFT);
507

508 509
	printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
		end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT);
Linus Torvalds's avatar
Linus Torvalds committed
510 511
}

512 513 514 515 516 517 518 519
static void __init init_gbpages(void)
{
	if (direct_gbpages && cpu_has_gbpages)
		printk(KERN_INFO "Using GB pages for direct mapping\n");
	else
		direct_gbpages = 0;
}

520
#ifdef CONFIG_MEMTEST
Yinghai Lu's avatar
Yinghai Lu committed
521 522 523

static void __init memtest(unsigned long start_phys, unsigned long size,
				 unsigned pattern)
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565
{
	unsigned long i;
	unsigned long *start;
	unsigned long start_bad;
	unsigned long last_bad;
	unsigned long val;
	unsigned long start_phys_aligned;
	unsigned long count;
	unsigned long incr;

	switch (pattern) {
	case 0:
		val = 0UL;
		break;
	case 1:
		val = -1UL;
		break;
	case 2:
		val = 0x5555555555555555UL;
		break;
	case 3:
		val = 0xaaaaaaaaaaaaaaaaUL;
		break;
	default:
		return;
	}

	incr = sizeof(unsigned long);
	start_phys_aligned = ALIGN(start_phys, incr);
	count = (size - (start_phys_aligned - start_phys))/incr;
	start = __va(start_phys_aligned);
	start_bad = 0;
	last_bad = 0;

	for (i = 0; i < count; i++)
		start[i] = val;
	for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
		if (*start != val) {
			if (start_phys_aligned == last_bad + incr) {
				last_bad += incr;
			} else {
				if (start_bad) {
Yinghai Lu's avatar
Yinghai Lu committed
566
					printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
567 568 569 570 571 572 573 574
						val, start_bad, last_bad + incr);
					reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
				}
				start_bad = last_bad = start_phys_aligned;
			}
		}
	}
	if (start_bad) {
Yinghai Lu's avatar
Yinghai Lu committed
575
		printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
576 577 578 579 580 581
			val, start_bad, last_bad + incr);
		reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
	}

}

582 583
/* default is disabled */
static int memtest_pattern __initdata;
Yinghai Lu's avatar
Yinghai Lu committed
584

585 586 587
static int __init parse_memtest(char *arg)
{
	if (arg)
Yinghai Lu's avatar
Yinghai Lu committed
588
		memtest_pattern = simple_strtoul(arg, NULL, 0);
589 590 591 592 593 594 595
	return 0;
}

early_param("memtest", parse_memtest);

static void __init early_memtest(unsigned long start, unsigned long end)
{
596
	u64 t_start, t_size;
597 598
	unsigned pattern;

Yinghai Lu's avatar
Yinghai Lu committed
599 600 601 602
	if (!memtest_pattern)
		return;

	printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
603 604 605 606 607 608 609 610 611 612 613 614
	for (pattern = 0; pattern < memtest_pattern; pattern++) {
		t_start = start;
		t_size = 0;
		while (t_start < end) {
			t_start = find_e820_area_size(t_start, &t_size, 1);

			/* done ? */
			if (t_start >= end)
				break;
			if (t_start + t_size > end)
				t_size = end - t_start;

615 616 617
			printk(KERN_CONT "\n  %016llx - %016llx pattern %d",
				(unsigned long long)t_start,
				(unsigned long long)t_start + t_size, pattern);
618 619 620 621 622 623

			memtest(t_start, t_size, pattern);

			t_start += t_size;
		}
	}
Yinghai Lu's avatar
Yinghai Lu committed
624
	printk(KERN_CONT "\n");
625
}
Yinghai Lu's avatar
Yinghai Lu committed
626 627 628 629 630
#else
static void __init early_memtest(unsigned long start, unsigned long end)
{
}
#endif
631

632 633 634
static unsigned long __init kernel_physical_mapping_init(unsigned long start,
						unsigned long end,
						unsigned long page_size_mask)
Thomas Gleixner's avatar
Thomas Gleixner committed
635
{
Linus Torvalds's avatar
Linus Torvalds committed
636

637
	unsigned long next, last_map_addr = end;
Linus Torvalds's avatar
Linus Torvalds committed
638 639 640 641 642

	start = (unsigned long)__va(start);
	end = (unsigned long)__va(end);

	for (; start < end; start = next) {
643
		pgd_t *pgd = pgd_offset_k(start);
Thomas Gleixner's avatar
Thomas Gleixner committed
644
		unsigned long pud_phys;
645 646
		pud_t *pud;

647 648 649 650 651
		next = start + PGDIR_SIZE;
		if (next > end)
			next = end;

		if (pgd_val(*pgd)) {
652 653
			last_map_addr = phys_pud_update(pgd, __pa(start),
						 __pa(end), page_size_mask);
654 655 656
			continue;
		}

657
		if (after_bootmem)
658
			pud = pud_offset(pgd, start & PGDIR_MASK);
659
		else
660
			pud = alloc_low_page(&pud_phys);
661

662 663
		last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
						 page_size_mask);
664
		unmap_low_page(pud);
665 666
		pgd_populate(&init_mm, pgd_offset_k(start),
			     __va(pud_phys));
Thomas Gleixner's avatar
Thomas Gleixner committed
667
	}
Linus Torvalds's avatar
Linus Torvalds committed
668

669 670
	return last_map_addr;
}
671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696

struct map_range {
	unsigned long start;
	unsigned long end;
	unsigned page_size_mask;
};

#define NR_RANGE_MR 5

static int save_mr(struct map_range *mr, int nr_range,
		   unsigned long start_pfn, unsigned long end_pfn,
		   unsigned long page_size_mask)
{

	if (start_pfn < end_pfn) {
		if (nr_range >= NR_RANGE_MR)
			panic("run out of range for init_memory_mapping\n");
		mr[nr_range].start = start_pfn<<PAGE_SHIFT;
		mr[nr_range].end   = end_pfn<<PAGE_SHIFT;
		mr[nr_range].page_size_mask = page_size_mask;
		nr_range++;
	}

	return nr_range;
}

697 698 699 700 701 702 703 704
/*
 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
 * This runs before bootmem is initialized and gets pages directly from
 * the physical memory. To access them they are temporarily mapped.
 */
unsigned long __init_refok init_memory_mapping(unsigned long start,
					       unsigned long end)
{
705
	unsigned long last_map_addr = 0;
706
	unsigned long page_size_mask = 0;
707
	unsigned long start_pfn, end_pfn;
708

709 710 711
	struct map_range mr[NR_RANGE_MR];
	int nr_range, i;

712 713 714 715 716 717 718 719 720
	printk(KERN_INFO "init_memory_mapping\n");

	/*
	 * Find space for the kernel direct mapping tables.
	 *
	 * Later we should allocate these tables in the local node of the
	 * memory mapped. Unfortunately this is done currently before the
	 * nodes are discovered.
	 */
721
	if (!after_bootmem)
722 723 724 725 726 727 728
		init_gbpages();

	if (direct_gbpages)
		page_size_mask |= 1 << PG_LEVEL_1G;
	if (cpu_has_pse)
		page_size_mask |= 1 << PG_LEVEL_2M;

729 730 731 732
	memset(mr, 0, sizeof(mr));
	nr_range = 0;

	/* head if not big page alignment ?*/
733 734 735
	start_pfn = start >> PAGE_SHIFT;
	end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
			<< (PMD_SHIFT - PAGE_SHIFT);
736
	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
737 738 739 740 741 742 743 744

	/* big page (2M) range*/
	start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
			 << (PMD_SHIFT - PAGE_SHIFT);
	end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
			 << (PUD_SHIFT - PAGE_SHIFT);
	if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
		end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
745 746
	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
			page_size_mask & (1<<PG_LEVEL_2M));
747 748 749 750

	/* big page (1G) range */
	start_pfn = end_pfn;
	end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
751 752 753
	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
				page_size_mask &
				 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
754 755 756 757

	/* tail is not big page (1G) alignment */
	start_pfn = end_pfn;
	end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
758 759 760
	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
			page_size_mask & (1<<PG_LEVEL_2M));

761 762 763
	/* tail is not big page (2M) alignment */
	start_pfn = end_pfn;
	end_pfn = end>>PAGE_SHIFT;
764 765 766 767 768 769 770 771 772 773 774 775
	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);

	for (i = 0; i < nr_range; i++)
		printk(KERN_DEBUG " %010lx - %010lx page %s\n",
				mr[i].start, mr[i].end,
			(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
			 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));

	if (!after_bootmem)
		find_early_table_space(end);

	for (i = 0; i < nr_range; i++)
776
		last_map_addr = kernel_physical_mapping_init(
777 778
					mr[i].start, mr[i].end,
					mr[i].page_size_mask);
779

780
	if (!after_bootmem)
781
		mmu_cr4_features = read_cr4();
Linus Torvalds's avatar
Linus Torvalds committed
782
	__flush_tlb_all();
783

784
	if (!after_bootmem && table_end > table_start)
785 786
		reserve_early(table_start << PAGE_SHIFT,
				 table_end << PAGE_SHIFT, "PGTABLE");
787

788 789 790
	printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
			 last_map_addr, end);

791
	if (!after_bootmem)
792
		early_memtest(start, end);
793

794
	return last_map_addr >> PAGE_SHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
795 796
}

797
#ifndef CONFIG_NUMA
798 799 800 801 802 803 804 805 806
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
{
	unsigned long bootmap_size, bootmap;

	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
	bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
				 PAGE_SIZE);
	if (bootmap == -1L)
		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
Yinghai Lu's avatar
Yinghai Lu committed
807 808 809
	/* don't touch min_low_pfn */
	bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
					 0, end_pfn);
810 811 812 813 814 815
	e820_register_active_regions(0, start_pfn, end_pfn);
	free_bootmem_with_active_regions(0, end_pfn);
	early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
}

Linus Torvalds's avatar
Linus Torvalds committed
816 817
void __init paging_init(void)
{
818
	unsigned long max_zone_pfns[MAX_NR_ZONES];
Thomas Gleixner's avatar
Thomas Gleixner committed
819

820 821 822
	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
Yinghai Lu's avatar
Yinghai Lu committed
823
	max_zone_pfns[ZONE_NORMAL] = max_pfn;
824

Yinghai Lu's avatar
Yinghai Lu committed
825
	memory_present(0, 0, max_pfn);
826
	sparse_init();
827
	free_area_init_nodes(max_zone_pfns);
Linus Torvalds's avatar
Linus Torvalds committed
828 829 830
}
#endif

831 832 833
/*
 * Memory hotplug specific functions
 */
834
#ifdef CONFIG_MEMORY_HOTPLUG
835 836 837 838
/*
 * Memory is added always to NORMAL zone. This means you will never get
 * additional DMA/DMA32 memory.
 */
839
int arch_add_memory(int nid, u64 start, u64 size)
840
{
841
	struct pglist_data *pgdat = NODE_DATA(nid);
842
	struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
843
	unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
844 845 846
	unsigned long nr_pages = size >> PAGE_SHIFT;
	int ret;

847 848 849
	last_mapped_pfn = init_memory_mapping(start, start + size-1);
	if (last_mapped_pfn > max_pfn_mapped)
		max_pfn_mapped = last_mapped_pfn;
850

851
	ret = __add_pages(zone, start_pfn, nr_pages);
852
	WARN_ON(1);
853 854 855

	return ret;
}
856
EXPORT_SYMBOL_GPL(arch_add_memory);
857

858
#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
859 860 861 862
int memory_add_physaddr_to_nid(u64 start)
{
	return 0;
}
863
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
864 865
#endif

866 867
#endif /* CONFIG_MEMORY_HOTPLUG */

868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887
/*
 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
 * is valid. The argument is a physical page number.
 *
 *
 * On x86, access has to be given to the first megabyte of ram because that area
 * contains bios code and data regions used by X and dosemu and similar apps.
 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
 * mmio resources as well as potential bios/acpi data regions.
 */
int devmem_is_allowed(unsigned long pagenr)
{
	if (pagenr <= 256)
		return 1;
	if (!page_is_ram(pagenr))
		return 1;
	return 0;
}


Thomas Gleixner's avatar
Thomas Gleixner committed
888 889
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
			 kcore_modules, kcore_vsyscall;
Linus Torvalds's avatar
Linus Torvalds committed
890 891 892

void __init mem_init(void)
{
893
	long codesize, reservedpages, datasize, initsize;
Linus Torvalds's avatar
Linus Torvalds committed
894

895
	pci_iommu_alloc();
Linus Torvalds's avatar
Linus Torvalds committed
896

897
	/* clear_bss() already clear the empty_zero_page */
Linus Torvalds's avatar
Linus Torvalds committed
898 899 900 901

	reservedpages = 0;

	/* this will put all low memory onto the freelists */
902
#ifdef CONFIG_NUMA
903
	totalram_pages = numa_free_all_bootmem();
Linus Torvalds's avatar
Linus Torvalds committed
904
#else
905
	totalram_pages = free_all_bootmem();
Linus Torvalds's avatar
Linus Torvalds committed
906
#endif
Yinghai Lu's avatar
Yinghai Lu committed
907 908
	reservedpages = max_pfn - totalram_pages -
					absent_pages_in_range(0, max_pfn);
Linus Torvalds's avatar
Linus Torvalds committed
909 910 911 912 913 914 915
	after_bootmem = 1;

	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;

	/* Register memory areas for /proc/kcore */
Thomas Gleixner's avatar
Thomas Gleixner committed
916 917
	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
Linus Torvalds's avatar
Linus Torvalds committed
918 919 920
		   VMALLOC_END-VMALLOC_START);
	kclist_add(&kcore_kernel, &_stext, _end - _stext);
	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
Thomas Gleixner's avatar
Thomas Gleixner committed
921
	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
Linus Torvalds's avatar
Linus Torvalds committed
922 923
				 VSYSCALL_END - VSYSCALL_START);

924
	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
Thomas Gleixner's avatar
Thomas Gleixner committed
925
				"%ldk reserved, %ldk data, %ldk init)\n",
Linus Torvalds's avatar
Linus Torvalds committed
926
		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
Yinghai Lu's avatar
Yinghai Lu committed
927
		max_pfn << (PAGE_SHIFT-10),
Linus Torvalds's avatar
Linus Torvalds committed
928 929 930 931
		codesize >> 10,
		reservedpages << (PAGE_SHIFT-10),
		datasize >> 10,
		initsize >> 10);
932 933

	cpa_init();
Linus Torvalds's avatar
Linus Torvalds committed
934 935
}

936
void free_init_pages(char *what, unsigned long begin, unsigned long end)
Linus Torvalds's avatar
Linus Torvalds committed
937
{
938
	unsigned long addr = begin;
Linus Torvalds's avatar
Linus Torvalds committed
939

940
	if (addr >= end)
941 942
		return;

Ingo Molnar's avatar
Ingo Molnar committed
943 944 945 946 947 948 949 950 951 952
	/*
	 * If debugging page accesses then do not free this memory but
	 * mark them not present - any buggy init-section access will
	 * create a kernel page fault:
	 */
#ifdef CONFIG_DEBUG_PAGEALLOC
	printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
		begin, PAGE_ALIGN(end));
	set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
#else
953
	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
Thomas Gleixner's avatar
Thomas Gleixner committed
954

955
	for (; addr < end; addr += PAGE_SIZE) {
956 957 958 959 960
		ClearPageReserved(virt_to_page(addr));
		init_page_count(virt_to_page(addr));
		memset((void *)(addr & ~(PAGE_SIZE-1)),
			POISON_FREE_INITMEM, PAGE_SIZE);
		free_page(addr);
Linus Torvalds's avatar
Linus Torvalds committed
961 962
		totalram_pages++;
	}
Ingo Molnar's avatar
Ingo Molnar committed
963
#endif
964 965 966 967 968
}

void free_initmem(void)
{
	free_init_pages("unused kernel memory",
969 970
			(unsigned long)(&__init_begin),
			(unsigned long)(&__init_end));
Linus Torvalds's avatar
Linus Torvalds committed
971 972
}

973
#ifdef CONFIG_DEBUG_RODATA
974 975
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
976 977 978

void mark_rodata_ro(void)
{
979
	unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
980

981
	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
982
	       (end - start) >> 10);
983 984 985 986 987 988 989 990
	set_memory_ro(start, (end - start) >> PAGE_SHIFT);

	/*
	 * The rodata section (but not the kernel text!) should also be
	 * not-executable.
	 */
	start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
	set_memory_nx(start, (end - start) >> PAGE_SHIFT);
991

992 993
	rodata_test();

994
#ifdef CONFIG_CPA_DEBUG
995
	printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
996
	set_memory_rw(start, (end-start) >> PAGE_SHIFT);
997

998
	printk(KERN_INFO "Testing CPA: again\n");
999
	set_memory_ro(start, (end-start) >> PAGE_SHIFT);
1000
#endif
1001
}
1002

1003 1004
#endif

Linus Torvalds's avatar
Linus Torvalds committed
1005 1006 1007
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
1008
	free_init_pages("initrd memory", start, end);
Linus Torvalds's avatar
Linus Torvalds committed
1009 1010 1011
}
#endif

1012 1013
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
				   int flags)
Thomas Gleixner's avatar
Thomas Gleixner committed
1014
{
1015
#ifdef CONFIG_NUMA
1016
	int nid, next_nid;
1017
	int ret;
1018 1019
#endif
	unsigned long pfn = phys >> PAGE_SHIFT;
Thomas Gleixner's avatar
Thomas Gleixner committed
1020

Yinghai Lu's avatar
Yinghai Lu committed
1021
	if (pfn >= max_pfn) {
Thomas Gleixner's avatar
Thomas Gleixner committed
1022 1023 1024 1025
		/*
		 * This can happen with kdump kernels when accessing
		 * firmware tables:
		 */
1026
		if (pfn < max_pfn_mapped)
1027
			return -EFAULT;
Thomas Gleixner's avatar
Thomas Gleixner committed
1028

1029
		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n",
1030
				phys, len);
1031
		return -EFAULT;
1032 1033 1034 1035
	}

	/* Should check here against the e820 map to avoid double free */
#ifdef CONFIG_NUMA
1036 1037 1038
	nid = phys_to_nid(phys);
	next_nid = phys_to_nid(phys + len - 1);
	if (nid == next_nid)
1039
		ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
1040
	else
1041 1042 1043 1044 1045
		ret = reserve_bootmem(phys, len, flags);

	if (ret != 0)
		return ret;

Thomas Gleixner's avatar
Thomas Gleixner committed
1046
#else
1047
	reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
Linus Torvalds's avatar
Linus Torvalds committed
1048
#endif
1049

1050
	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
1051
		dma_reserve += len / PAGE_SIZE;
1052 1053
		set_dma_reserve(dma_reserve);
	}
1054 1055

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1056 1057
}

Thomas Gleixner's avatar
Thomas Gleixner committed
1058 1059
int kern_addr_valid(unsigned long addr)
{
Linus Torvalds's avatar
Linus Torvalds committed
1060
	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
Thomas Gleixner's avatar
Thomas Gleixner committed
1061 1062 1063 1064
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
Linus Torvalds's avatar
Linus Torvalds committed
1065 1066

	if (above != 0 && above != -1UL)
Thomas Gleixner's avatar
Thomas Gleixner committed
1067 1068
		return 0;

Linus Torvalds's avatar
Linus Torvalds committed
1069 1070 1071 1072 1073 1074
	pgd = pgd_offset_k(addr);
	if (pgd_none(*pgd))
		return 0;

	pud = pud_offset(pgd, addr);
	if (pud_none(*pud))
Thomas Gleixner's avatar
Thomas Gleixner committed
1075
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1076 1077 1078 1079

	pmd = pmd_offset(pud, addr);
	if (pmd_none(*pmd))
		return 0;
Thomas Gleixner's avatar
Thomas Gleixner committed
1080

Linus Torvalds's avatar
Linus Torvalds committed
1081 1082 1083 1084 1085 1086
	if (pmd_large(*pmd))
		return pfn_valid(pmd_pfn(*pmd));

	pte = pte_offset_kernel(pmd, addr);
	if (pte_none(*pte))
		return 0;
Thomas Gleixner's avatar
Thomas Gleixner committed
1087

Linus Torvalds's avatar
Linus Torvalds committed
1088 1089 1090
	return pfn_valid(pte_pfn(*pte));
}

Thomas Gleixner's avatar
Thomas Gleixner committed
1091 1092 1093 1094 1095
/*
 * A pseudo VMA to allow ptrace access for the vsyscall page.  This only
 * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
 * not need special handling anymore:
 */
Linus Torvalds's avatar
Linus Torvalds committed
1096
static struct vm_area_struct gate_vma = {
Thomas Gleixner's avatar
Thomas Gleixner committed
1097 1098 1099 1100
	.vm_start	= VSYSCALL_START,
	.vm_end		= VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE),
	.vm_page_prot	= PAGE_READONLY_EXEC,
	.vm_flags	= VM_READ | VM_EXEC
Linus Torvalds's avatar
Linus Torvalds committed
1101 1102 1103 1104 1105
};

struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
{
#ifdef CONFIG_IA32_EMULATION
1106 1107
	if (test_tsk_thread_flag(tsk, TIF_IA32))
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1108 1109 1110 1111 1112 1113 1114
#endif
	return &gate_vma;
}

int in_gate_area(struct task_struct *task, unsigned long addr)
{
	struct vm_area_struct *vma = get_gate_vma(task);
Thomas Gleixner's avatar
Thomas Gleixner committed
1115

1116 1117
	if (!vma)
		return 0;
Thomas Gleixner's avatar
Thomas Gleixner committed
1118

Linus Torvalds's avatar
Linus Torvalds committed
1119 1120 1121
	return (addr >= vma->vm_start) && (addr < vma->vm_end);
}

Thomas Gleixner's avatar
Thomas Gleixner committed
1122 1123 1124 1125
/*
 * Use this when you have no reliable task/vma, typically from interrupt
 * context. It is less reliable than using the task's vma and may give
 * false positives:
Linus Torvalds's avatar
Linus Torvalds committed
1126 1127 1128
 */
int in_gate_area_no_task(unsigned long addr)
{
1129
	return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
Linus Torvalds's avatar
Linus Torvalds committed
1130
}
1131

1132 1133 1134 1135 1136 1137 1138 1139
const char *arch_vma_name(struct vm_area_struct *vma)
{
	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
		return "[vdso]";
	if (vma == &gate_vma)
		return "[vsyscall]";
	return NULL;
}
1140 1141 1142 1143 1144

#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
 */
1145 1146 1147 1148
static long __meminitdata addr_start, addr_end;
static void __meminitdata *p_start, *p_end;
static int __meminitdata node_start;

Thomas Gleixner's avatar
Thomas Gleixner committed
1149 1150
int __meminit
vmemmap_populate(struct page *start_page, unsigned long size, int node)
1151 1152 1153 1154 1155 1156 1157 1158 1159
{
	unsigned long addr = (unsigned long)start_page;
	unsigned long end = (unsigned long)(start_page + size);
	unsigned long next;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	for (; addr < end; addr = next) {
1160
		void *p = NULL;
1161 1162 1163 1164

		pgd = vmemmap_pgd_populate(addr, node);
		if (!pgd)
			return -ENOMEM;
Thomas Gleixner's avatar
Thomas Gleixner committed
1165

1166 1167 1168 1169
		pud = vmemmap_pud_populate(pgd, addr, node);
		if (!pud)
			return -ENOMEM;

1170 1171 1172 1173 1174 1175 1176 1177
		if (!cpu_has_pse) {
			next = (addr + PAGE_SIZE) & PAGE_MASK;
			pmd = vmemmap_pmd_populate(pud, addr, node);

			if (!pmd)
				return -ENOMEM;

			p = vmemmap_pte_populate(pmd, addr, node);
Thomas Gleixner's avatar
Thomas Gleixner committed
1178

1179 1180 1181
			if (!p)
				return -ENOMEM;

1182 1183
			addr_end = addr + PAGE_SIZE;
			p_end = p + PAGE_SIZE;
Thomas Gleixner's avatar
Thomas Gleixner committed
1184
		} else {
1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
			next = pmd_addr_end(addr, end);

			pmd = pmd_offset(pud, addr);
			if (pmd_none(*pmd)) {
				pte_t entry;

				p = vmemmap_alloc_block(PMD_SIZE, node);
				if (!p)
					return -ENOMEM;

				entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
						PAGE_KERNEL_LARGE);
				set_pmd(pmd, __pmd(pte_val(entry)));

				/* check to see if we have contiguous blocks */
				if (p_end != p || node_start != node) {
					if (p_start)
						printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
						       addr_start, addr_end-1, p_start, p_end-1, node_start);
					addr_start = addr;
					node_start = node;
					p_start = p;
				}
Yinghai Lu's avatar
Yinghai Lu committed
1208 1209 1210

				addr_end = addr + PMD_SIZE;
				p_end = p + PMD_SIZE;
1211 1212
			} else
				vmemmap_verify((pte_t *)pmd, node, addr, next);
Thomas Gleixner's avatar
Thomas Gleixner committed
1213
		}
1214

1215 1216 1217
	}
	return 0;
}
1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228

void __meminit vmemmap_populate_print_last(void)
{
	if (p_start) {
		printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
			addr_start, addr_end-1, p_start, p_end-1, node_start);
		p_start = NULL;
		p_end = NULL;
		node_start = 0;
	}
}
1229
#endif