Merge samba.org:/scratch/anton/export into samba.org:/scratch/anton/tmp3

a6ee6a31 · Anton Blanchard · 5e7bd347 · 32737e43 · a6ee6a31 · a6ee6a31
Commit a6ee6a31 authored Sep 09, 2003 by Anton Blanchard
13 changed files
--- a/arch/ppc64/Kconfig
+++ b/arch/ppc64/Kconfig
@@ -69,6 +69,17 @@ config PPC64
 	bool
 	default y

+config HUGETLB_PAGE
+	bool "Huge TLB Page Support"
+	help
+	  This enables support for huge pages.  User space applications
+	  can make use of this support with the sys_alloc_hugepages and
+	  sys_free_hugepages system calls.  If your applications are
+	  huge page aware and your processor supports this (only POWER4,
+	  then say Y here.
+
+	  Otherwise, say N.
+
 config SMP
 	bool "Symmetric multi-processing support"
 	---help---

--- a/arch/ppc64/kernel/htab.c
+++ b/arch/ppc64/kernel/htab.c
@@ -197,7 +197,7 @@ pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
 	if (!pgd_none(*pg)) {

 		pm = pmd_offset(pg, ea);
-		if (!pmd_none(*pm)) { 
+		if (pmd_present(*pm)) { 
 			pt = pte_offset_kernel(pm, ea);
 			pte = *pt;
 			if (!pte_present(pte))
@@ -436,8 +436,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 	if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
 		local = 1;

+	ret = hash_huge_page(mm, access, ea, vsid, local);
+	if (ret < 0) {
 		ptep = find_linux_pte(pgdir, ea);
 		ret = __hash_page(ea, access, vsid, ptep, trap, local);
+	}
+
 	spin_unlock(&mm->page_table_lock);

 	return ret;

--- a/arch/ppc64/kernel/pSeries_lpar.c
+++ b/arch/ppc64/kernel/pSeries_lpar.c
@@ -36,18 +36,6 @@
 #include <asm/tlb.h>
 #include <asm/hvcall.h>

-
-long plpar_pte_enter(unsigned long flags,
-		     unsigned long ptex,
-		     unsigned long new_pteh, unsigned long new_ptel,
-		     unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
-{
-	unsigned long dummy, ret;
-	ret = plpar_hcall(H_ENTER, flags, ptex, new_pteh, new_ptel,
-			   old_pteh_ret, old_ptel_ret, &dummy);
-	return(ret);
-}
-
 long plpar_pte_remove(unsigned long flags,
 		      unsigned long ptex,
 		      unsigned long avpn,
@@ -83,7 +71,6 @@ long plpar_tce_get(unsigned long liobn,
 			   tce_ret, &dummy, &dummy);
 }

-
 long plpar_tce_put(unsigned long liobn,
 		   unsigned long ioba,
 		   unsigned long tceval)
@@ -104,10 +91,9 @@ long plpar_put_term_char(unsigned long termno,
 			 unsigned long len,
 			 const char *buffer)
 {
-	unsigned long dummy;
 	unsigned long *lbuf = (unsigned long *)buffer;  /* ToDo: alignment? */
-	return plpar_hcall(H_PUT_TERM_CHAR, termno, len,
-			   lbuf[0], lbuf[1], &dummy, &dummy, &dummy);
+	return plpar_hcall_norets(H_PUT_TERM_CHAR, termno, len, lbuf[0],
+				  lbuf[1]);
 }

 static void tce_build_pSeriesLP(struct TceTable *tbl, long tcenum, 
@@ -287,12 +273,11 @@ int hvc_get_chars(int index, char *buf, int count)

 int hvc_put_chars(int index, const char *buf, int count)
 {
-	unsigned long dummy;
 	unsigned long *lbuf = (unsigned long *) buf;
 	long ret;

-	ret = plpar_hcall(H_PUT_TERM_CHAR, index, count, lbuf[0], lbuf[1],
-			  &dummy, &dummy, &dummy);
+	ret = plpar_hcall_norets(H_PUT_TERM_CHAR, index, count, lbuf[0],
+				 lbuf[1]);
 	if (ret == H_Success)
 		return count;
 	if (ret == H_Busy)
@@ -318,7 +303,6 @@ int hvc_count(int *start_termno)



-
 long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 			      unsigned long va, unsigned long prpn,
 			      int secondary, unsigned long hpteflags,
@@ -329,6 +313,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	unsigned long flags;
 	unsigned long slot;
 	HPTE lhpte;
+	unsigned long dummy0, dummy1;

 	/* Fill in the local HPTE with absolute rpn, avpn and flags */
 	lhpte.dw1.dword1      = 0;
@@ -348,7 +333,6 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,

 	/* Now fill in the actual HPTE */
 	/* Set CEC cookie to 0         */
-	/* Large page = 0              */
 	/* Zero page = 0               */
 	/* I-cache Invalidate = 0      */
 	/* I-cache synchronize = 0     */
@@ -359,19 +343,8 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
 		lhpte.dw1.flags.flags &= ~_PAGE_COHERENT;

-	__asm__ __volatile__ (
-		H_ENTER_r3
-		"mr    4, %2\n"
-                "mr    5, %3\n"
-                "mr    6, %4\n"
-                "mr    7, %5\n"
-                HSC    
-                "mr    %0, 3\n"
-                "mr    %1, 4\n"
-		: "=r" (lpar_rc), "=r" (slot)
-		: "r" (flags), "r" (hpte_group), "r" (lhpte.dw0.dword0),
-		"r" (lhpte.dw1.dword1)
-		: "r3", "r4", "r5", "r6", "r7", "cc");
+	lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, lhpte.dw0.dword0,
+			      lhpte.dw1.dword1, &slot, &dummy0, &dummy1);

 	if (lpar_rc == H_PTEG_Full)
 		return -1;

--- a/arch/ppc64/kernel/stab.c
+++ b/arch/ppc64/kernel/stab.c
@@ -221,15 +221,18 @@ void make_slbe(unsigned long esid, unsigned long vsid, int large,
 }

 static inline void __ste_allocate(unsigned long esid, unsigned long vsid,
-				  int kernel_segment)
+				  int kernel_segment, mm_context_t context)
 {
 	if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) {
+		int large = 0;
+
 #ifndef CONFIG_PPC_ISERIES
 		if (REGION_ID(esid << SID_SHIFT) == KERNEL_REGION_ID)
-			make_slbe(esid, vsid, 1, kernel_segment); 
-		else
+			large = 1;
+		else if (REGION_ID(esid << SID_SHIFT) == USER_REGION_ID)
+			large = in_hugepage_area(context, esid << SID_SHIFT);
 #endif
-			make_slbe(esid, vsid, 0, kernel_segment);
+		make_slbe(esid, vsid, large, kernel_segment);
 	} else {
 		unsigned char top_entry, stab_entry, *segments; 

@@ -255,6 +258,7 @@ int ste_allocate(unsigned long ea)
 {
 	unsigned long vsid, esid;
 	int kernel_segment = 0;
+	mm_context_t context;

 	PMC_SW_PROCESSOR(stab_faults); 

@@ -266,16 +270,18 @@ int ste_allocate(unsigned long ea)
 	if (REGION_ID(ea) >= KERNEL_REGION_ID) {
 		kernel_segment = 1;
 		vsid = get_kernel_vsid(ea);
+		context = REGION_ID(ea);
 	} else {
-		struct mm_struct *mm = current->mm;
-		if (mm)
-			vsid = get_vsid(mm->context, ea);
-		else
+		if (! current->mm)
 			return 1;
+
+		context = current->mm->context;
+		
+		vsid = get_vsid(context, ea);
 	}

 	esid = GET_ESID(ea);
-	__ste_allocate(esid, vsid, kernel_segment);
+	__ste_allocate(esid, vsid, kernel_segment, context);
 	if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) {
 		/* Order update */
 		asm volatile("sync":::"memory"); 
@@ -302,7 +308,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm)
 		for (esid = 0; esid < 16; esid++) {
 			unsigned long ea = esid << SID_SHIFT;
 			vsid = get_vsid(mm->context, ea);
-			__ste_allocate(esid, vsid, 0);
+			__ste_allocate(esid, vsid, 0, mm->context);
 		}
 	} else {
 		unsigned long pc = KSTK_EIP(tsk);
@@ -316,7 +322,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm)
 			    (REGION_ID(pc) >= KERNEL_REGION_ID))
 				return;
 			vsid = get_vsid(mm->context, pc);
-			__ste_allocate(GET_ESID(pc), vsid, 0);
+			__ste_allocate(GET_ESID(pc), vsid, 0, mm->context);
 		}

 		if (stack && (pc_segment != stack_segment)) {
@@ -324,7 +330,7 @@ static void preload_stab(struct task_struct *tsk, struct mm_struct *mm)
 			    (REGION_ID(stack) >= KERNEL_REGION_ID))
 				return;
 			vsid = get_vsid(mm->context, stack);
-			__ste_allocate(GET_ESID(stack), vsid, 0);
+			__ste_allocate(GET_ESID(stack), vsid, 0, mm->context);
 		}
 	}


--- a/arch/ppc64/kernel/xics.c
+++ b/arch/ppc64/kernel/xics.c
@@ -384,12 +384,12 @@ void xics_init_IRQ(void)
 	int i;
 	unsigned long intr_size = 0;
 	struct device_node *np;
-	uint *ireg, ilen, indx=0;
+	uint *ireg, ilen, indx = 0;
 	unsigned long intr_base = 0;
 	struct xics_interrupt_node {
-		unsigned long long addr;
-		unsigned long long size;
-	} inodes[NR_CPUS*2]; 
+		unsigned long addr;
+		unsigned long size;
+	} inodes[NR_CPUS]; 

 	ppc64_boot_msg(0x20, "XICS Init");


--- a/arch/ppc64/mm/Makefile
+++ b/arch/ppc64/mm/Makefile
@@ -6,3 +6,4 @@ EXTRA_CFLAGS += -mno-minimal-toc

 obj-y := fault.o init.o extable.o imalloc.o
 obj-$(CONFIG_DISCONTIGMEM) += numa.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
+/*
+ * PPC64 (POWER4) Huge TLB Page Support for Kernel.
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ *
+ * Based on the IA-32 version:
+ * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/tlb.h>
+#include <asm/rmap.h>
+
+#include <linux/sysctl.h>
+
+int htlbpage_max;
+
+/* This lock protects the two counters and list below */
+static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
+
+static int htlbpage_free; /* = 0 */
+static int htlbpage_total; /* = 0 */
+static struct list_head hugepage_freelists[MAX_NUMNODES];
+
+static void enqueue_huge_page(struct page *page)
+{
+	list_add(&page->list,
+		&hugepage_freelists[page_zone(page)->zone_pgdat->node_id]);
+}
+
+/* XXX make this a sysctl */
+unsigned long largepage_roundrobin = 1;
+
+static struct page *dequeue_huge_page(void)
+{
+	static int nid = 0;
+	struct page *page = NULL;
+	int i;
+
+	if (!largepage_roundrobin)
+		nid = numa_node_id();
+
+	for (i = 0; i < numnodes; i++) {
+		if (!list_empty(&hugepage_freelists[nid]))
+			break;
+		nid = (nid + 1) % numnodes;
+	}
+
+	if (!list_empty(&hugepage_freelists[nid])) {
+		page = list_entry(hugepage_freelists[nid].next, struct page, list);
+		list_del(&page->list);
+	}
+
+	if (largepage_roundrobin)
+		nid = (nid + 1) % numnodes;
+
+	return page;
+}
+
+static struct page *alloc_fresh_huge_page(void)
+{
+	static int nid = 0;
+	struct page *page;
+
+	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	if (!page)
+		return NULL;
+
+	nid = page_zone(page)->zone_pgdat->node_id;
+	nid = (nid + 1) % numnodes;
+	return page;
+}
+
+/* HugePTE layout:
+ *
+ * 31 30 ... 15 14 13 12 10 9  8  7   6    5    4    3    2    1    0
+ * PFN>>12..... -  -  -  -  -  -  HASH_IX....   2ND  HASH RW   -    HG=1
+ */
+
+#define HUGEPTE_SHIFT	15
+#define _HUGEPAGE_PFN		0xffff8000
+#define _HUGEPAGE_BAD		0x00007f00
+#define _HUGEPAGE_HASHPTE	0x00000008
+#define _HUGEPAGE_SECONDARY	0x00000010
+#define _HUGEPAGE_GROUP_IX	0x000000e0
+#define _HUGEPAGE_HPTEFLAGS	(_HUGEPAGE_HASHPTE | _HUGEPAGE_SECONDARY | \
+				 _HUGEPAGE_GROUP_IX)
+#define _HUGEPAGE_RW		0x00000004
+
+typedef struct {unsigned int val;} hugepte_t;
+#define hugepte_val(hugepte)	((hugepte).val)
+#define __hugepte(x)		((hugepte_t) { (x) } )
+#define hugepte_pfn(x)		\
+	((unsigned long)(hugepte_val(x)>>HUGEPTE_SHIFT) << HUGETLB_PAGE_ORDER)
+#define mk_hugepte(page,wr)	__hugepte( \
+	((page_to_pfn(page)>>HUGETLB_PAGE_ORDER) << HUGEPTE_SHIFT ) \
+	| (!!(wr) * _HUGEPAGE_RW) | _PMD_HUGEPAGE )
+
+#define hugepte_bad(x)	( !(hugepte_val(x) & _PMD_HUGEPAGE) || \
+			  (hugepte_val(x) & _HUGEPAGE_BAD) )
+#define hugepte_page(x)	pfn_to_page(hugepte_pfn(x))
+#define hugepte_none(x)	(!(hugepte_val(x) & _HUGEPAGE_PFN))
+
+
+static void free_huge_page(struct page *page);
+static void flush_hash_hugepage(mm_context_t context, unsigned long ea,
+				hugepte_t pte, int local);
+
+static inline unsigned int hugepte_update(hugepte_t *p, unsigned int clr,
+					  unsigned int set)
+{
+	unsigned int old, tmp;
+
+	__asm__ __volatile__(
+	"1:	lwarx	%0,0,%3		# pte_update\n\
+	andc	%1,%0,%4 \n\
+	or	%1,%1,%5 \n\
+	stwcx.	%1,0,%3 \n\
+	bne-	1b"
+	: "=&r" (old), "=&r" (tmp), "=m" (*p)
+	: "r" (p), "r" (clr), "r" (set), "m" (*p)
+	: "cc" );
+	return old;
+}
+
+static inline void set_hugepte(hugepte_t *ptep, hugepte_t pte)
+{
+	hugepte_update(ptep, ~_HUGEPAGE_HPTEFLAGS,
+		       hugepte_val(pte) & ~_HUGEPAGE_HPTEFLAGS);
+}
+
+static struct page *alloc_hugetlb_page(void)
+{
+	int i;
+	struct page *page;
+
+	spin_lock(&htlbpage_lock);
+	page = dequeue_huge_page();
+	if (!page) {
+		spin_unlock(&htlbpage_lock);
+		return NULL;
+	}
+
+	htlbpage_free--;
+	spin_unlock(&htlbpage_lock);
+	set_page_count(page, 1);
+	page->lru.prev = (void *)free_huge_page;
+	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
+		clear_highpage(&page[i]);
+	return page;
+}
+
+static hugepte_t *hugepte_alloc(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pmd_t *pmd = NULL;
+
+	BUG_ON(!in_hugepage_area(mm->context, addr));
+
+	pgd = pgd_offset(mm, addr);
+	pmd = pmd_alloc(mm, pgd, addr);
+
+	/* We shouldn't find a (normal) PTE page pointer here */
+	BUG_ON(!pmd_none(*pmd) && !pmd_hugepage(*pmd));
+	
+	return (hugepte_t *)pmd;
+}
+
+static hugepte_t *hugepte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pmd_t *pmd = NULL;
+
+	BUG_ON(!in_hugepage_area(mm->context, addr));
+
+	pgd = pgd_offset(mm, addr);
+	pmd = pmd_offset(pgd, addr);
+
+	/* We shouldn't find a (normal) PTE page pointer here */
+	BUG_ON(!pmd_none(*pmd) && !pmd_hugepage(*pmd));
+
+	return (hugepte_t *)pmd;
+}
+
+static void setup_huge_pte(struct mm_struct *mm, struct page *page,
+			   hugepte_t *ptep, int write_access)
+{
+	hugepte_t entry;
+	int i;
+
+	mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+	entry = mk_hugepte(page, write_access);
+	for (i = 0; i < HUGEPTE_BATCH_SIZE; i++)
+		set_hugepte(ptep+i, entry);
+}
+
+static void teardown_huge_pte(hugepte_t *ptep)
+{
+	int i;
+
+	for (i = 0; i < HUGEPTE_BATCH_SIZE; i++)
+		pmd_clear((pmd_t *)(ptep+i));
+}
+
+/*
+ * This function checks for proper alignment of input addr and len parameters.
+ */
+int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+{
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (addr & ~HPAGE_MASK)
+		return -EINVAL;
+	if (! is_hugepage_only_range(addr, len))
+		return -EINVAL;
+	return 0;
+}
+
+static void do_slbia(void *unused)
+{
+	asm volatile ("isync; slbia; isync":::"memory");
+}
+
+/* Activate the low hpage region for 32bit processes.  mmap_sem must
+ * be held*/
+static int open_32bit_htlbpage_range(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	unsigned long addr;
+
+	if (mm->context & CONTEXT_LOW_HPAGES)
+		return 0; /* The window is already open */
+	
+	/* Check no VMAs are in the region */
+	vma = find_vma(mm, TASK_HPAGE_BASE_32);
+
+	if (vma && (vma->vm_start < TASK_HPAGE_END_32))
+		return -EBUSY;
+
+	/* Clean up any leftover PTE pages in the region */
+	spin_lock(&mm->page_table_lock);
+	for (addr = TASK_HPAGE_BASE_32; addr < TASK_HPAGE_END_32;
+	     addr += PMD_SIZE) {
+		pgd_t *pgd = pgd_offset(mm, addr);
+		pmd_t *pmd = pmd_offset(pgd, addr);
+
+		if (! pmd_none(*pmd)) {
+			struct page *page = pmd_page(*pmd);
+			pte_t *pte = (pte_t *)pmd_page_kernel(*pmd);
+			int i;
+
+			/* No VMAs, so there should be no PTEs, check
+			 * just in case. */
+			for (i = 0; i < PTRS_PER_PTE; i++) {
+				BUG_ON(! pte_none(*pte));
+				pte++;
+			}
+
+			pmd_clear(pmd);
+			pgtable_remove_rmap(page);
+			pte_free(page);
+		}
+	}
+	spin_unlock(&mm->page_table_lock);
+
+	/* FIXME: do we need to scan for PTEs too? */
+
+	mm->context |= CONTEXT_LOW_HPAGES;
+
+	/* the context change must make it to memory before the slbia,
+	 * so that further SLB misses do the right thing. */
+	mb();
+
+	on_each_cpu(do_slbia, NULL, 0, 1);
+
+	return 0;
+}
+
+int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
+			struct vm_area_struct *vma)
+{
+	hugepte_t *src_pte, *dst_pte, entry;
+	struct page *ptepage;
+	unsigned long addr = vma->vm_start;
+	unsigned long end = vma->vm_end;
+
+	while (addr < end) {
+		BUG_ON(! in_hugepage_area(src->context, addr));
+		BUG_ON(! in_hugepage_area(dst->context, addr));
+
+		dst_pte = hugepte_alloc(dst, addr);
+		if (!dst_pte)
+			return -ENOMEM;
+
+		src_pte = hugepte_offset(src, addr);
+		entry = *src_pte;
+		
+		if ((addr % HPAGE_SIZE) == 0) {
+			/* This is the first hugepte in a batch */
+			ptepage = hugepte_page(entry);
+			get_page(ptepage);
+			dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+		}
+		set_hugepte(dst_pte, entry);
+
+
+		addr += PMD_SIZE;
+	}
+	return 0;
+}
+
+int
+follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		    struct page **pages, struct vm_area_struct **vmas,
+		    unsigned long *position, int *length, int i)
+{
+	unsigned long vpfn, vaddr = *position;
+	int remainder = *length;
+
+	WARN_ON(!is_vm_hugetlb_page(vma));
+
+	vpfn = vaddr/PAGE_SIZE;
+	while (vaddr < vma->vm_end && remainder) {
+		BUG_ON(!in_hugepage_area(mm->context, vaddr));
+
+		if (pages) {
+			hugepte_t *pte;
+			struct page *page;
+
+			pte = hugepte_offset(mm, vaddr);
+
+			/* hugetlb should be locked, and hence, prefaulted */
+			WARN_ON(!pte || hugepte_none(*pte));
+
+			page = &hugepte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
+
+			WARN_ON(!PageCompound(page));
+
+			get_page(page);
+			pages[i] = page;
+		}
+
+		if (vmas)
+			vmas[i] = vma;
+
+		vaddr += PAGE_SIZE;
+		++vpfn;
+		--remainder;
+		++i;
+	}
+
+	*length = remainder;
+	*position = vaddr;
+
+	return i;
+}
+
+struct page *
+follow_huge_addr(struct mm_struct *mm,
+	struct vm_area_struct *vma, unsigned long address, int write)
+{
+	return NULL;
+}
+
+struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr)
+{
+	return NULL;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return pmd_hugepage(pmd);
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd, int write)
+{
+	struct page *page;
+
+	BUG_ON(! pmd_hugepage(*pmd));
+
+	page = hugepte_page(*(hugepte_t *)pmd);
+	if (page) {
+		page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
+		get_page(page);
+	}
+	return page;
+}
+
+static void free_huge_page(struct page *page)
+{
+	BUG_ON(page_count(page));
+	BUG_ON(page->mapping);
+
+	INIT_LIST_HEAD(&page->list);
+
+	spin_lock(&htlbpage_lock);
+	enqueue_huge_page(page);
+	htlbpage_free++;
+	spin_unlock(&htlbpage_lock);
+}
+
+void huge_page_release(struct page *page)
+{
+	if (!put_page_testzero(page))
+		return;
+
+	free_huge_page(page);
+}
+
+void unmap_hugepage_range(struct vm_area_struct *vma,
+			  unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long addr;
+	hugepte_t *ptep;
+	struct page *page;
+	int local = 0;
+	cpumask_t tmp;
+
+	WARN_ON(!is_vm_hugetlb_page(vma));
+	BUG_ON((start % HPAGE_SIZE) != 0);
+	BUG_ON((end % HPAGE_SIZE) != 0);
+
+	/* XXX are there races with checking cpu_vm_mask? - Anton */
+	tmp = cpumask_of_cpu(smp_processor_id());
+	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
+		local = 1;
+
+	for (addr = start; addr < end; addr += HPAGE_SIZE) {
+		hugepte_t pte;
+
+		BUG_ON(!in_hugepage_area(mm->context, addr));
+
+		ptep = hugepte_offset(mm, addr);
+		if (!ptep || hugepte_none(*ptep))
+			continue;
+
+		pte = *ptep;
+		page = hugepte_page(pte);
+		teardown_huge_pte(ptep);
+		
+		if (hugepte_val(pte) & _HUGEPAGE_HASHPTE)
+			flush_hash_hugepage(mm->context, addr,
+					    pte, local);
+
+		huge_page_release(page);
+	}
+
+	mm->rss -= (end - start) >> PAGE_SHIFT;
+}
+
+void zap_hugepage_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long length)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	spin_lock(&mm->page_table_lock);
+	unmap_hugepage_range(vma, start, start + length);
+	spin_unlock(&mm->page_table_lock);
+}
+
+int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr;
+	int ret = 0;
+
+	WARN_ON(!is_vm_hugetlb_page(vma));
+	BUG_ON((vma->vm_start % HPAGE_SIZE) != 0);
+	BUG_ON((vma->vm_end % HPAGE_SIZE) != 0);
+
+	spin_lock(&mm->page_table_lock);
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
+		unsigned long idx;
+		hugepte_t *pte = hugepte_alloc(mm, addr);
+		struct page *page;
+
+		BUG_ON(!in_hugepage_area(mm->context, addr));
+
+		if (!pte) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		if (!hugepte_none(*pte))
+			continue;
+
+		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
+			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+		page = find_get_page(mapping, idx);
+		if (!page) {
+			page = alloc_hugetlb_page();
+			if (!page) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
+			unlock_page(page);
+			if (ret) {
+				free_huge_page(page);
+				goto out;
+			}
+		}
+		setup_huge_pte(mm, page, pte, vma->vm_flags & VM_WRITE);
+	}
+out:
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
+
+/* Because we have an exclusive hugepage region which lies within the
+ * normal user address space, we have to take special measures to make
+ * non-huge mmap()s evade the hugepage reserved region. */
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+				     unsigned long len, unsigned long pgoff,
+				     unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long start_addr;
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start) &&
+		    !is_hugepage_only_range(addr,len))
+			return addr;
+	}
+	start_addr = addr = mm->free_area_cache;
+
+full_search:
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (TASK_SIZE - len < addr) {
+			/*
+			 * Start a new search - just in case we missed
+			 * some holes.
+			 */
+			if (start_addr != TASK_UNMAPPED_BASE) {
+				start_addr = addr = TASK_UNMAPPED_BASE;
+				goto full_search;
+			}
+			return -ENOMEM;
+		}
+		if (!vma || addr + len <= vma->vm_start) {
+			if (is_hugepage_only_range(addr, len)) {
+				if (addr < TASK_HPAGE_END_32)
+					addr = TASK_HPAGE_END_32;
+				else
+					addr = TASK_HPAGE_END;
+
+				continue;
+			}
+			/*
+			 * Remember the place where we stopped the search:
+			 */
+			mm->free_area_cache = addr + len;
+			return addr;
+		}
+		addr = vma->vm_end;
+	}
+}
+
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+					unsigned long len, unsigned long pgoff,
+					unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	unsigned long base, end;
+
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+
+	if (!(cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE))
+		return -EINVAL;
+
+	if (test_thread_flag(TIF_32BIT)) {
+		int err;
+
+		err = open_32bit_htlbpage_range(current->mm);
+		if (err)
+			return err; /* Should this just be EINVAL? */
+
+		base = TASK_HPAGE_BASE_32;
+		end = TASK_HPAGE_END_32;
+	} else {
+		base = TASK_HPAGE_BASE;
+		end = TASK_HPAGE_END;
+	}
+	
+	if (!in_hugepage_area(current->mm->context, addr) 
+	    || (addr & (HPAGE_SIZE - 1)))
+		addr = base;
+
+	for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (addr + len > end)
+			return -ENOMEM;
+		if (!vma || (addr + len) <= vma->vm_start)
+			return addr;
+		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+
+		/* Because we're in an exclusively hugepage region,
+		 * this alignment shouldn't have skipped over any
+		 * other vmas */
+	}
+}
+
+static inline unsigned long computeHugeHptePP(unsigned int hugepte)
+{
+	unsigned long flags = 0x2;
+
+	if (! (hugepte & _HUGEPAGE_RW))
+		flags |= 0x1;
+	return flags;
+}
+
+int hash_huge_page(struct mm_struct *mm, unsigned long access,
+		   unsigned long ea, unsigned long vsid, int local)
+{
+	hugepte_t *ptep;
+	unsigned long va, vpn;
+	int is_write;
+	hugepte_t old_pte, new_pte;
+	unsigned long hpteflags, prpn;
+	long slot;
+
+	/* Is this for us? */
+	if (!in_hugepage_area(mm->context, ea))
+		return -1;
+
+	ea &= ~(HPAGE_SIZE-1);
+
+	/* We have to find the first hugepte in the batch, since
+	 * that's the one that will store the HPTE flags */
+	ptep = hugepte_offset(mm, ea);
+
+	/* Search the Linux page table for a match with va */
+	va = (vsid << 28) | (ea & 0x0fffffff);
+	vpn = va >> HPAGE_SHIFT;
+
+	/*
+	 * If no pte found or not present, send the problem up to
+	 * do_page_fault
+	 */
+	if (unlikely(!ptep || hugepte_none(*ptep)))
+		return 1;
+
+	BUG_ON(hugepte_bad(*ptep));
+
+	/* 
+	 * Check the user's access rights to the page.  If access should be
+	 * prevented then send the problem up to do_page_fault.
+	 */
+	is_write = access & _PAGE_RW;
+	if (unlikely(is_write && !(hugepte_val(*ptep) & _HUGEPAGE_RW)))
+		return 1;
+
+	/*
+	 * At this point, we have a pte (old_pte) which can be used to build
+	 * or update an HPTE. There are 2 cases:
+	 *
+	 * 1. There is a valid (present) pte with no associated HPTE (this is 
+	 *	the most common case)
+	 * 2. There is a valid (present) pte with an associated HPTE. The
+	 *	current values of the pp bits in the HPTE prevent access
+	 *	because we are doing software DIRTY bit management and the
+	 *	page is currently not DIRTY. 
+	 */
+
+	old_pte = *ptep;
+	new_pte = old_pte;
+
+	hpteflags = computeHugeHptePP(hugepte_val(new_pte));
+
+	/* Check if pte already has an hpte (case 2) */
+	if (unlikely(hugepte_val(old_pte) & _HUGEPAGE_HASHPTE)) {
+		/* There MIGHT be an HPTE for this pte */
+		unsigned long hash, slot;
+
+		hash = hpt_hash(vpn, 1);
+		if (hugepte_val(old_pte) & _HUGEPAGE_SECONDARY)
+			hash = ~hash;
+		slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
+		slot += (hugepte_val(old_pte) & _HUGEPAGE_GROUP_IX) >> 5;
+
+		if (ppc_md.hpte_updatepp(slot, hpteflags, va, 1, local) == -1)
+			hugepte_val(old_pte) &= ~_HUGEPAGE_HPTEFLAGS;
+	}
+
+	if (likely(!(hugepte_val(old_pte) & _HUGEPAGE_HASHPTE))) {
+		unsigned long hash = hpt_hash(vpn, 1);
+		unsigned long hpte_group;
+
+		prpn = hugepte_pfn(old_pte);
+
+repeat:
+		hpte_group = ((hash & htab_data.htab_hash_mask) *
+			      HPTES_PER_GROUP) & ~0x7UL;
+
+		/* Update the linux pte with the HPTE slot */
+		hugepte_val(new_pte) &= ~_HUGEPAGE_HPTEFLAGS;
+		hugepte_val(new_pte) |= _HUGEPAGE_HASHPTE;
+
+		/* Add in WIMG bits */
+		/* XXX We should store these in the pte */
+		hpteflags |= _PAGE_COHERENT;
+
+		slot = ppc_md.hpte_insert(hpte_group, va, prpn, 0,
+					  hpteflags, 0, 1);
+
+		/* Primary is full, try the secondary */
+		if (unlikely(slot == -1)) {
+			hugepte_val(new_pte) |= _HUGEPAGE_SECONDARY;
+			hpte_group = ((~hash & htab_data.htab_hash_mask) *
+				      HPTES_PER_GROUP) & ~0x7UL; 
+			slot = ppc_md.hpte_insert(hpte_group, va, prpn,
+						  1, hpteflags, 0, 1);
+			if (slot == -1) {
+				if (mftb() & 0x1)
+					hpte_group = ((hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+
+				ppc_md.hpte_remove(hpte_group);
+				goto repeat;
+                        }
+		}
+
+		if (unlikely(slot == -2))
+			panic("hash_huge_page: pte_insert failed\n");
+
+		hugepte_val(new_pte) |= (slot<<5) & _HUGEPAGE_GROUP_IX;
+
+		/* 
+		 * No need to use ldarx/stdcx here because all who
+		 * might be updating the pte will hold the
+		 * page_table_lock or the hash_table_lock
+		 * (we hold both)
+		 */
+		*ptep = new_pte;
+	}
+
+	return 0;
+}
+
+static void flush_hash_hugepage(mm_context_t context, unsigned long ea,
+				hugepte_t pte, int local)
+{
+	unsigned long vsid, vpn, va, hash, secondary, slot;
+
+	BUG_ON(hugepte_bad(pte));
+	BUG_ON(!in_hugepage_area(context, ea));
+
+	vsid = get_vsid(context, ea);
+
+	va = (vsid << 28) | (ea & 0x0fffffff);
+	vpn = va >> LARGE_PAGE_SHIFT;
+	hash = hpt_hash(vpn, 1);
+	secondary = !!(hugepte_val(pte) & _HUGEPAGE_SECONDARY);
+	if (secondary)
+		hash = ~hash;
+	slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
+	slot += (hugepte_val(pte) & _HUGEPAGE_GROUP_IX) >> 5;
+
+	ppc_md.hpte_invalidate(slot, va, 1, local);
+}
+
+static void split_and_free_hugepage(struct page *page)
+{
+	int j;
+	struct page *map;
+
+	map = page;
+	htlbpage_total--;
+	for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
+		map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
+				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
+				1 << PG_private | 1<< PG_writeback);
+		set_page_count(map, 0);
+		map++;
+	}
+	set_page_count(page, 1);
+	__free_pages(page, HUGETLB_PAGE_ORDER);
+}
+
+int set_hugetlb_mem_size(int count)
+{
+	int lcount;
+	struct page *page;
+
+	if (!(cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE))
+		return 0;
+	
+	if (count < 0)
+		lcount = count;
+	else
+		lcount = count - htlbpage_total;
+
+	if (lcount == 0)
+		return htlbpage_total;
+	if (lcount > 0) {	/* Increase the mem size. */
+		while (lcount--) {
+			page = alloc_fresh_huge_page();
+			if (page == NULL)
+				break;
+			spin_lock(&htlbpage_lock);
+			enqueue_huge_page(page);
+			htlbpage_free++;
+			htlbpage_total++;
+			spin_unlock(&htlbpage_lock);
+		}
+		return htlbpage_total;
+	}
+	/* Shrink the memory size. */
+	while (lcount++) {
+		page = alloc_hugetlb_page();
+		if (page == NULL)
+			break;
+		spin_lock(&htlbpage_lock);
+		split_and_free_hugepage(page);
+		spin_unlock(&htlbpage_lock);
+	}
+	return htlbpage_total;
+}
+
+int hugetlb_sysctl_handler(ctl_table *table, int write,
+		struct file *file, void *buffer, size_t *length)
+{
+	proc_dointvec(table, write, file, buffer, length);
+	htlbpage_max = set_hugetlb_mem_size(htlbpage_max);
+	return 0;
+}
+
+static int __init hugetlb_setup(char *s)
+{
+	if (sscanf(s, "%d", &htlbpage_max) <= 0)
+		htlbpage_max = 0;
+	return 1;
+}
+__setup("hugepages=", hugetlb_setup);
+
+static int __init hugetlb_init(void)
+{
+	int i;
+	struct page *page;
+
+	if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) {
+		for (i = 0; i < MAX_NUMNODES; ++i)
+			INIT_LIST_HEAD(&hugepage_freelists[i]);
+
+		for (i = 0; i < htlbpage_max; ++i) {
+			page = alloc_fresh_huge_page();
+			if (!page)
+				break;
+			spin_lock(&htlbpage_lock);
+			enqueue_huge_page(page);
+			spin_unlock(&htlbpage_lock);
+		}
+		htlbpage_max = htlbpage_free = htlbpage_total = i;
+		printk("Total HugeTLB memory allocated, %d\n", htlbpage_free);
+	} else {
+		htlbpage_max = 0;
+		printk("CPU does not support HugeTLB\n");
+	}
+
+	return 0;
+}
+module_init(hugetlb_init);
+
+int hugetlb_report_meminfo(char *buf)
+{
+	return sprintf(buf,
+			"HugePages_Total: %5d\n"
+			"HugePages_Free:  %5d\n"
+			"Hugepagesize:    %5lu kB\n",
+			htlbpage_total,
+			htlbpage_free,
+			HPAGE_SIZE/1024);
+}
+
+/* This is advisory only, so we can get away with accesing
+ * htlbpage_free without taking the lock. */
+int is_hugepage_mem_enough(size_t size)
+{
+	return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpage_free;
+}
+
+/*
+ * We cannot handle pagefaults against hugetlb pages at all.  They cause
+ * handle_mm_fault() to try to instantiate regular-sized pages in the
+ * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
+ * this far.
+ */
+static struct page *hugetlb_nopage(struct vm_area_struct *vma,
+				unsigned long address, int unused)
+{
+	BUG();
+	return NULL;
+}
+
+struct vm_operations_struct hugetlb_vm_ops = {
+	.nopage = hugetlb_nopage,
+};
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -290,7 +290,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)

 	if (!pgd_none(*pgd)) {
 		pmd = pmd_offset(pgd, vmaddr);
-		if (!pmd_none(*pmd)) {
+		if (pmd_present(*pmd)) {
 			ptep = pte_offset_kernel(pmd, vmaddr);
 			/* Check if HPTE might exist and flush it if so */
 			pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
@@ -298,6 +298,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 				flush_hash_page(context, vmaddr, pte, local);
 			}
 		}
+		WARN_ON(pmd_hugepage(*pmd));
 	}
 }

@@ -348,7 +349,7 @@ __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
 				pmd_end = (start + PMD_SIZE) & PMD_MASK;
 				if (pmd_end > end)
 					pmd_end = end;
-				if (!pmd_none(*pmd)) {
+				if (pmd_present(*pmd)) {
 					ptep = pte_offset_kernel(pmd, start);
 					do {
 						if (pte_val(*ptep) & _PAGE_HASHPTE) {
@@ -367,6 +368,7 @@ __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
 						++ptep;
 					} while (start < pmd_end);
 				} else {
+					WARN_ON(pmd_hugepage(*pmd));
 					start = pmd_end;
 				}
 				++pmd;

--- a/include/asm-ppc64/hvcall.h
+++ b/include/asm-ppc64/hvcall.h
@@ -59,9 +59,6 @@
 #define H_XIRR			0x74
 #define H_PERFMON		0x7c

-#define HSC			".long 0x44000022\n"
-#define H_ENTER_r3		"li	3, 0x08\n"
-
 /* plpar_hcall() -- Generic call interface using above opcodes
 *
 * The actual call interface is a hypervisor call instruction with

--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -18,6 +18,12 @@
 /* Default "unsigned long" context */
 typedef unsigned long mm_context_t;

+#ifdef CONFIG_HUGETLB_PAGE
+#define CONTEXT_LOW_HPAGES	(1UL<<63)
+#else
+#define CONTEXT_LOW_HPAGES	0
+#endif
+
 /*
 * Define the size of the cache used for segment table entries.  The first
 * entry is used as a cache pointer, therefore the actual number of entries

--- a/include/asm-ppc64/mmu_context.h
+++ b/include/asm-ppc64/mmu_context.h
@@ -127,7 +127,8 @@ destroy_context(struct mm_struct *mm)
 #endif

 	mmu_context_queue.size++;
-	mmu_context_queue.elements[index] = mm->context;
+	mmu_context_queue.elements[index] =
+		mm->context & ~CONTEXT_LOW_HPAGES;

 	spin_unlock_irqrestore(&mmu_context_queue.lock, flags);
 }
@@ -189,6 +190,8 @@ get_vsid( unsigned long context, unsigned long ea )
 {
 	unsigned long ordinal, vsid;

+	context &= ~CONTEXT_LOW_HPAGES;
+
 	ordinal = (((ea >> 28) & 0x1fffff) * LAST_USER_CONTEXT) | context;
 	vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK;


--- a/include/asm-ppc64/page.h
+++ b/include/asm-ppc64/page.h
@@ -22,6 +22,39 @@
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 #define PAGE_OFFSET_MASK (PAGE_SIZE-1)

+#ifdef CONFIG_HUGETLB_PAGE
+
+#define HPAGE_SHIFT	24
+#define HPAGE_SIZE	((1UL) << HPAGE_SHIFT)
+#define HPAGE_MASK	(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+/* For 64-bit processes the hugepage range is 1T-1.5T */
+#define TASK_HPAGE_BASE 	(0x0000010000000000UL)
+#define TASK_HPAGE_END 	(0x0000018000000000UL)
+/* For 32-bit processes the hugepage range is 2-3G */
+#define TASK_HPAGE_BASE_32	(0x80000000UL)
+#define TASK_HPAGE_END_32	(0xc0000000UL)
+
+#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
+#define is_hugepage_only_range(addr, len) \
+	( ((addr > (TASK_HPAGE_BASE-len)) && (addr < TASK_HPAGE_END)) || \
+	  ((current->mm->context & CONTEXT_LOW_HPAGES) && \
+	   (addr > (TASK_HPAGE_BASE_32-len)) && (addr < TASK_HPAGE_END_32)) )
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+
+#define in_hugepage_area(context, addr) \
+	((cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) && \
+	 ((((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \
+	  (((context) & CONTEXT_LOW_HPAGES) && \
+	   (((addr) >= TASK_HPAGE_BASE_32) && ((addr) < TASK_HPAGE_END_32)))))
+
+#else /* !CONFIG_HUGETLB_PAGE */
+
+#define in_hugepage_area(mm, addr)	0
+
+#endif /* !CONFIG_HUGETLB_PAGE */
+
 #define SID_SHIFT       28
 #define SID_MASK        0xfffffffff
 #define GET_ESID(x)     (((x) >> SID_SHIFT) & SID_MASK)

--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -149,6 +149,25 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
 /* shift to put page number into pte */
 #define PTE_SHIFT (16)

+/* We allow 2^41 bytes of real memory, so we need 29 bits in the PMD
+ * to give the PTE page number.  The bottom two bits are for flags. */
+#define PMD_TO_PTEPAGE_SHIFT (2)
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define _PMD_HUGEPAGE	0x00000001U
+#define HUGEPTE_BATCH_SIZE (1<<(HPAGE_SHIFT-PMD_SHIFT))
+
+int hash_huge_page(struct mm_struct *mm, unsigned long access,
+		   unsigned long ea, unsigned long vsid, int local);
+
+#define HAVE_ARCH_UNMAPPED_AREA
+#else
+
+#define hash_huge_page(mm,a,ea,vsid,local)	-1
+#define _PMD_HUGEPAGE	0
+
+#endif
+
 #ifndef __ASSEMBLY__

 /*
@@ -178,12 +197,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
 #define pte_pfn(x)		((unsigned long)((pte_val(x) >> PTE_SHIFT)))
 #define pte_page(x)		pfn_to_page(pte_pfn(x))

-#define pmd_set(pmdp, ptep) 	(pmd_val(*(pmdp)) = (__ba_to_bpn(ptep)))
+#define pmd_set(pmdp, ptep) 	\
+	(pmd_val(*(pmdp)) = (__ba_to_bpn(ptep) << PMD_TO_PTEPAGE_SHIFT))
 #define pmd_none(pmd)		(!pmd_val(pmd))
-#define	pmd_bad(pmd)		((pmd_val(pmd)) == 0)
-#define	pmd_present(pmd)	((pmd_val(pmd)) != 0)
+#define	pmd_hugepage(pmd)	(!!(pmd_val(pmd) & _PMD_HUGEPAGE))
+#define	pmd_bad(pmd)		(((pmd_val(pmd)) == 0) || pmd_hugepage(pmd))
+#define	pmd_present(pmd)	((!pmd_hugepage(pmd)) \
+				 && (pmd_val(pmd) & ~_PMD_HUGEPAGE) != 0)
 #define	pmd_clear(pmdp)		(pmd_val(*(pmdp)) = 0)
-#define pmd_page_kernel(pmd)	(__bpn_to_ba(pmd_val(pmd)))
+#define pmd_page_kernel(pmd)	\
+	(__bpn_to_ba(pmd_val(pmd) >> PMD_TO_PTEPAGE_SHIFT))
 #define pmd_page(pmd)		virt_to_page(pmd_page_kernel(pmd))
 #define pgd_set(pgdp, pmdp)	(pgd_val(*(pgdp)) = (__ba_to_bpn(pmdp)))
 #define pgd_none(pgd)		(!pgd_val(pgd))