Commit 17ff6820 authored by David Gibson's avatar David Gibson Committed by Linus Torvalds

[PATCH] ppc64: rework hugepage code

Rework the ppc64 hugepage code.  Instead of using specially marked pmd
entries in the normal pagetables to represent hugepages, use normal pte_t
entries, in a special set of pagetables used for hugepages only.

Using pte_t instead of a special hugepte_t makes the code more similar to
that for other architecturess, allowing more possibilities for
consolidating the hugepage code.

Using independent pagetables for the hugepages is also a prerequisite for
moving the hugepages into their own region well outside the normal user
address space.  The restrictions imposed by the powerpc mmu's segment
design mean we probably want to do that in the fairly near future.
Signed-off-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent bdef750d
...@@ -341,9 +341,7 @@ void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte, ...@@ -341,9 +341,7 @@ void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
int local) int local)
{ {
unsigned long vsid, vpn, va, hash, secondary, slot; unsigned long vsid, vpn, va, hash, secondary, slot;
unsigned long huge = pte_huge(pte);
/* XXX fix for large ptes */
unsigned long large = 0;
if ((ea >= USER_START) && (ea <= USER_END)) if ((ea >= USER_START) && (ea <= USER_END))
vsid = get_vsid(context, ea); vsid = get_vsid(context, ea);
...@@ -351,18 +349,18 @@ void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte, ...@@ -351,18 +349,18 @@ void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
vsid = get_kernel_vsid(ea); vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff); va = (vsid << 28) | (ea & 0x0fffffff);
if (large) if (huge)
vpn = va >> HPAGE_SHIFT; vpn = va >> HPAGE_SHIFT;
else else
vpn = va >> PAGE_SHIFT; vpn = va >> PAGE_SHIFT;
hash = hpt_hash(vpn, large); hash = hpt_hash(vpn, huge);
secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15; secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
if (secondary) if (secondary)
hash = ~hash; hash = ~hash;
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12; slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
ppc_md.hpte_invalidate(slot, va, large, local); ppc_md.hpte_invalidate(slot, va, huge, local);
} }
void flush_hash_range(unsigned long context, unsigned long number, int local) void flush_hash_range(unsigned long context, unsigned long number, int local)
......
...@@ -27,116 +27,143 @@ ...@@ -27,116 +27,143 @@
#include <linux/sysctl.h> #include <linux/sysctl.h>
/* HugePTE layout: #define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3)
* #define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT)
* 31 30 ... 15 14 13 12 10 9 8 7 6 5 4 3 2 1 0 #define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1))
* PFN>>12..... - - - - - - HASH_IX.... 2ND HASH RW - HG=1
*/ #define HUGEPTE_INDEX_SIZE 9
#define HUGEPGD_INDEX_SIZE 10
#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE)
#define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE)
#define HUGEPTE_SHIFT 15 static inline int hugepgd_index(unsigned long addr)
#define _HUGEPAGE_PFN 0xffff8000
#define _HUGEPAGE_BAD 0x00007f00
#define _HUGEPAGE_HASHPTE 0x00000008
#define _HUGEPAGE_SECONDARY 0x00000010
#define _HUGEPAGE_GROUP_IX 0x000000e0
#define _HUGEPAGE_HPTEFLAGS (_HUGEPAGE_HASHPTE | _HUGEPAGE_SECONDARY | \
_HUGEPAGE_GROUP_IX)
#define _HUGEPAGE_RW 0x00000004
typedef struct {unsigned int val;} hugepte_t;
#define hugepte_val(hugepte) ((hugepte).val)
#define __hugepte(x) ((hugepte_t) { (x) } )
#define hugepte_pfn(x) \
((unsigned long)(hugepte_val(x)>>HUGEPTE_SHIFT) << HUGETLB_PAGE_ORDER)
#define mk_hugepte(page,wr) __hugepte( \
((page_to_pfn(page)>>HUGETLB_PAGE_ORDER) << HUGEPTE_SHIFT ) \
| (!!(wr) * _HUGEPAGE_RW) | _PMD_HUGEPAGE )
#define hugepte_bad(x) ( !(hugepte_val(x) & _PMD_HUGEPAGE) || \
(hugepte_val(x) & _HUGEPAGE_BAD) )
#define hugepte_page(x) pfn_to_page(hugepte_pfn(x))
#define hugepte_none(x) (!(hugepte_val(x) & _HUGEPAGE_PFN))
static void flush_hash_hugepage(mm_context_t context, unsigned long ea,
hugepte_t pte, int local);
static inline unsigned int hugepte_update(hugepte_t *p, unsigned int clr,
unsigned int set)
{ {
unsigned int old, tmp; return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT;
__asm__ __volatile__(
"1: lwarx %0,0,%3 # pte_update\n\
andc %1,%0,%4 \n\
or %1,%1,%5 \n\
stwcx. %1,0,%3 \n\
bne- 1b"
: "=&r" (old), "=&r" (tmp), "=m" (*p)
: "r" (p), "r" (clr), "r" (set), "m" (*p)
: "cc" );
return old;
} }
static inline void set_hugepte(hugepte_t *ptep, hugepte_t pte) static pgd_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr)
{ {
hugepte_update(ptep, ~_HUGEPAGE_HPTEFLAGS, int index;
hugepte_val(pte) & ~_HUGEPAGE_HPTEFLAGS);
if (! mm->context.huge_pgdir)
return NULL;
index = hugepgd_index(addr);
BUG_ON(index >= PTRS_PER_HUGEPGD);
return mm->context.huge_pgdir + index;
} }
static hugepte_t *hugepte_alloc(struct mm_struct *mm, unsigned long addr) static inline pte_t *hugepte_offset(pgd_t *dir, unsigned long addr)
{ {
pgd_t *pgd; int index;
pmd_t *pmd = NULL;
if (pgd_none(*dir))
return NULL;
BUG_ON(!in_hugepage_area(mm->context, addr)); index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE;
return (pte_t *)pgd_page(*dir) + index;
}
pgd = pgd_offset(mm, addr); static pgd_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr)
pmd = pmd_alloc(mm, pgd, addr); {
BUG_ON(! in_hugepage_area(mm->context, addr));
/* We shouldn't find a (normal) PTE page pointer here */ if (! mm->context.huge_pgdir) {
BUG_ON(!pmd_none(*pmd) && !pmd_hugepage(*pmd)); pgd_t *new;
spin_unlock(&mm->page_table_lock);
return (hugepte_t *)pmd; /* Don't use pgd_alloc(), because we want __GFP_REPEAT */
new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
spin_lock(&mm->page_table_lock);
/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (mm->context.huge_pgdir)
pgd_free(new);
else
mm->context.huge_pgdir = new;
}
return hugepgd_offset(mm, addr);
} }
static hugepte_t *hugepte_offset(struct mm_struct *mm, unsigned long addr) static pte_t *hugepte_alloc(struct mm_struct *mm, pgd_t *dir,
unsigned long addr)
{ {
pgd_t *pgd; if (! pgd_present(*dir)) {
pmd_t *pmd = NULL; pte_t *new;
BUG_ON(!in_hugepage_area(mm->context, addr)); spin_unlock(&mm->page_table_lock);
new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
spin_lock(&mm->page_table_lock);
/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (pgd_present(*dir)) {
if (new)
kmem_cache_free(zero_cache, new);
} else {
struct page *ptepage;
if (! new)
return NULL;
ptepage = virt_to_page(new);
ptepage->mapping = (void *) mm;
ptepage->index = addr & HUGEPGDIR_MASK;
pgd_populate(mm, dir, new);
}
}
pgd = pgd_offset(mm, addr); return hugepte_offset(dir, addr);
if (pgd_none(*pgd)) }
return NULL;
pmd = pmd_offset(pgd, addr); static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
BUG_ON(! in_hugepage_area(mm->context, addr));
/* We shouldn't find a (normal) PTE page pointer here */ pgd = hugepgd_offset(mm, addr);
BUG_ON(!pmd_none(*pmd) && !pmd_hugepage(*pmd)); if (! pgd)
return NULL;
return (hugepte_t *)pmd; return hugepte_offset(pgd, addr);
} }
static void setup_huge_pte(struct mm_struct *mm, struct page *page, static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
hugepte_t *ptep, int write_access)
{ {
hugepte_t entry; pgd_t *pgd;
int i;
mm->rss += (HPAGE_SIZE / PAGE_SIZE); BUG_ON(! in_hugepage_area(mm->context, addr));
entry = mk_hugepte(page, write_access);
for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) pgd = hugepgd_alloc(mm, addr);
set_hugepte(ptep+i, entry); if (! pgd)
return NULL;
return hugepte_alloc(mm, pgd, addr);
} }
static void teardown_huge_pte(hugepte_t *ptep) static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page, pte_t *ptep, int write_access)
{ {
int i; pte_t entry;
mm->rss += (HPAGE_SIZE / PAGE_SIZE);
if (write_access) {
entry =
pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
} else {
entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
}
entry = pte_mkyoung(entry);
entry = pte_mkhuge(entry);
for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) set_pte(ptep, entry);
pmd_clear((pmd_t *)(ptep+i));
} }
/* /*
...@@ -268,34 +295,31 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len) ...@@ -268,34 +295,31 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len)
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma) struct vm_area_struct *vma)
{ {
hugepte_t *src_pte, *dst_pte, entry; pte_t *src_pte, *dst_pte, entry;
struct page *ptepage; struct page *ptepage;
unsigned long addr = vma->vm_start; unsigned long addr = vma->vm_start;
unsigned long end = vma->vm_end; unsigned long end = vma->vm_end;
int err = -ENOMEM;
while (addr < end) { while (addr < end) {
BUG_ON(! in_hugepage_area(src->context, addr)); dst_pte = huge_pte_alloc(dst, addr);
BUG_ON(! in_hugepage_area(dst->context, addr));
dst_pte = hugepte_alloc(dst, addr);
if (!dst_pte) if (!dst_pte)
return -ENOMEM; goto out;
src_pte = hugepte_offset(src, addr); src_pte = huge_pte_offset(src, addr);
entry = *src_pte; entry = *src_pte;
if ((addr % HPAGE_SIZE) == 0) { ptepage = pte_page(entry);
/* This is the first hugepte in a batch */ get_page(ptepage);
ptepage = hugepte_page(entry); dst->rss += (HPAGE_SIZE / PAGE_SIZE);
get_page(ptepage); set_pte(dst_pte, entry);
dst->rss += (HPAGE_SIZE / PAGE_SIZE);
}
set_hugepte(dst_pte, entry);
addr += HPAGE_SIZE;
addr += PMD_SIZE;
} }
return 0;
err = 0;
out:
return err;
} }
int int
...@@ -310,18 +334,16 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -310,18 +334,16 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
vpfn = vaddr/PAGE_SIZE; vpfn = vaddr/PAGE_SIZE;
while (vaddr < vma->vm_end && remainder) { while (vaddr < vma->vm_end && remainder) {
BUG_ON(!in_hugepage_area(mm->context, vaddr));
if (pages) { if (pages) {
hugepte_t *pte; pte_t *pte;
struct page *page; struct page *page;
pte = hugepte_offset(mm, vaddr); pte = huge_pte_offset(mm, vaddr);
/* hugetlb should be locked, and hence, prefaulted */ /* hugetlb should be locked, and hence, prefaulted */
WARN_ON(!pte || hugepte_none(*pte)); WARN_ON(!pte || pte_none(*pte));
page = &hugepte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
WARN_ON(!PageCompound(page)); WARN_ON(!PageCompound(page));
...@@ -347,26 +369,31 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -347,26 +369,31 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page * struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
{ {
return ERR_PTR(-EINVAL); pte_t *ptep;
struct page *page;
if (! in_hugepage_area(mm->context, address))
return ERR_PTR(-EINVAL);
ptep = huge_pte_offset(mm, address);
page = pte_page(*ptep);
if (page)
page += (address % HPAGE_SIZE) / PAGE_SIZE;
return page;
} }
int pmd_huge(pmd_t pmd) int pmd_huge(pmd_t pmd)
{ {
return pmd_hugepage(pmd); return 0;
} }
struct page * struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address, follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write) pmd_t *pmd, int write)
{ {
struct page *page; BUG();
return NULL;
BUG_ON(! pmd_hugepage(*pmd));
page = hugepte_page(*(hugepte_t *)pmd);
if (page)
page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
return page;
} }
void unmap_hugepage_range(struct vm_area_struct *vma, void unmap_hugepage_range(struct vm_area_struct *vma,
...@@ -374,44 +401,38 @@ void unmap_hugepage_range(struct vm_area_struct *vma, ...@@ -374,44 +401,38 @@ void unmap_hugepage_range(struct vm_area_struct *vma,
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
unsigned long addr; unsigned long addr;
hugepte_t *ptep; pte_t *ptep;
struct page *page; struct page *page;
int cpu;
int local = 0;
cpumask_t tmp;
WARN_ON(!is_vm_hugetlb_page(vma)); WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON((start % HPAGE_SIZE) != 0); BUG_ON((start % HPAGE_SIZE) != 0);
BUG_ON((end % HPAGE_SIZE) != 0); BUG_ON((end % HPAGE_SIZE) != 0);
/* XXX are there races with checking cpu_vm_mask? - Anton */
cpu = get_cpu();
tmp = cpumask_of_cpu(cpu);
if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
local = 1;
for (addr = start; addr < end; addr += HPAGE_SIZE) { for (addr = start; addr < end; addr += HPAGE_SIZE) {
hugepte_t pte; pte_t pte;
BUG_ON(!in_hugepage_area(mm->context, addr));
ptep = hugepte_offset(mm, addr); ptep = huge_pte_offset(mm, addr);
if (!ptep || hugepte_none(*ptep)) if (!ptep || pte_none(*ptep))
continue; continue;
pte = *ptep; pte = *ptep;
page = hugepte_page(pte); page = pte_page(pte);
teardown_huge_pte(ptep); pte_clear(ptep);
if (hugepte_val(pte) & _HUGEPAGE_HASHPTE)
flush_hash_hugepage(mm->context, addr,
pte, local);
put_page(page); put_page(page);
} }
put_cpu();
mm->rss -= (end - start) >> PAGE_SHIFT; mm->rss -= (end - start) >> PAGE_SHIFT;
flush_tlb_pending();
}
void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
unsigned long start, unsigned long end)
{
/* Because the huge pgtables are only 2 level, they can take
* at most around 4M, much less than one hugepage which the
* process is presumably entitled to use. So we don't bother
* freeing up the pagetables on unmap, and wait until
* destroy_context() to clean up the lot. */
} }
int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
...@@ -427,16 +448,14 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) ...@@ -427,16 +448,14 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
unsigned long idx; unsigned long idx;
hugepte_t *pte = hugepte_alloc(mm, addr); pte_t *pte = huge_pte_alloc(mm, addr);
struct page *page; struct page *page;
BUG_ON(!in_hugepage_area(mm->context, addr));
if (!pte) { if (!pte) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
if (!hugepte_none(*pte)) if (! pte_none(*pte))
continue; continue;
idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
...@@ -463,7 +482,7 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) ...@@ -463,7 +482,7 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
goto out; goto out;
} }
} }
setup_huge_pte(mm, page, pte, vma->vm_flags & VM_WRITE); set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
} }
out: out:
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
...@@ -717,20 +736,55 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, ...@@ -717,20 +736,55 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
} }
} }
void hugetlb_mm_free_pgd(struct mm_struct *mm)
{
int i;
pgd_t *pgdir;
spin_lock(&mm->page_table_lock);
pgdir = mm->context.huge_pgdir;
if (! pgdir)
return;
mm->context.huge_pgdir = NULL;
/* cleanup any hugepte pages leftover */
for (i = 0; i < PTRS_PER_HUGEPGD; i++) {
pgd_t *pgd = pgdir + i;
if (! pgd_none(*pgd)) {
pte_t *pte = (pte_t *)pgd_page(*pgd);
struct page *ptepage = virt_to_page(pte);
ptepage->mapping = NULL;
BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE));
kmem_cache_free(zero_cache, pte);
}
pgd_clear(pgd);
}
BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE));
kmem_cache_free(zero_cache, pgdir);
spin_unlock(&mm->page_table_lock);
}
int hash_huge_page(struct mm_struct *mm, unsigned long access, int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local) unsigned long ea, unsigned long vsid, int local)
{ {
hugepte_t *ptep; pte_t *ptep;
unsigned long va, vpn; unsigned long va, vpn;
int is_write; int is_write;
hugepte_t old_pte, new_pte; pte_t old_pte, new_pte;
unsigned long hpteflags, prpn, flags; unsigned long hpteflags, prpn;
long slot; long slot;
int err = 1;
spin_lock(&mm->page_table_lock);
/* We have to find the first hugepte in the batch, since ptep = huge_pte_offset(mm, ea);
* that's the one that will store the HPTE flags */
ea &= HPAGE_MASK;
ptep = hugepte_offset(mm, ea);
/* Search the Linux page table for a match with va */ /* Search the Linux page table for a match with va */
va = (vsid << 28) | (ea & 0x0fffffff); va = (vsid << 28) | (ea & 0x0fffffff);
...@@ -740,19 +794,18 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, ...@@ -740,19 +794,18 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
* If no pte found or not present, send the problem up to * If no pte found or not present, send the problem up to
* do_page_fault * do_page_fault
*/ */
if (unlikely(!ptep || hugepte_none(*ptep))) if (unlikely(!ptep || pte_none(*ptep)))
return 1; goto out;
BUG_ON(hugepte_bad(*ptep)); /* BUG_ON(pte_bad(*ptep)); */
/* /*
* Check the user's access rights to the page. If access should be * Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault. * prevented then send the problem up to do_page_fault.
*/ */
is_write = access & _PAGE_RW; is_write = access & _PAGE_RW;
if (unlikely(is_write && !(hugepte_val(*ptep) & _HUGEPAGE_RW))) if (unlikely(is_write && !(pte_val(*ptep) & _PAGE_RW)))
return 1; goto out;
/* /*
* At this point, we have a pte (old_pte) which can be used to build * At this point, we have a pte (old_pte) which can be used to build
* or update an HPTE. There are 2 cases: * or update an HPTE. There are 2 cases:
...@@ -765,41 +818,40 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, ...@@ -765,41 +818,40 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
* page is currently not DIRTY. * page is currently not DIRTY.
*/ */
spin_lock_irqsave(&mm->page_table_lock, flags);
old_pte = *ptep; old_pte = *ptep;
new_pte = old_pte; new_pte = old_pte;
hpteflags = 0x2 | (! (hugepte_val(new_pte) & _HUGEPAGE_RW)); hpteflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
/* Check if pte already has an hpte (case 2) */ /* Check if pte already has an hpte (case 2) */
if (unlikely(hugepte_val(old_pte) & _HUGEPAGE_HASHPTE)) { if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
/* There MIGHT be an HPTE for this pte */ /* There MIGHT be an HPTE for this pte */
unsigned long hash, slot; unsigned long hash, slot;
hash = hpt_hash(vpn, 1); hash = hpt_hash(vpn, 1);
if (hugepte_val(old_pte) & _HUGEPAGE_SECONDARY) if (pte_val(old_pte) & _PAGE_SECONDARY)
hash = ~hash; hash = ~hash;
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
slot += (hugepte_val(old_pte) & _HUGEPAGE_GROUP_IX) >> 5; slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
if (ppc_md.hpte_updatepp(slot, hpteflags, va, 1, local) == -1) if (ppc_md.hpte_updatepp(slot, hpteflags, va, 1, local) == -1)
hugepte_val(old_pte) &= ~_HUGEPAGE_HPTEFLAGS; pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
} }
if (likely(!(hugepte_val(old_pte) & _HUGEPAGE_HASHPTE))) { if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
unsigned long hash = hpt_hash(vpn, 1); unsigned long hash = hpt_hash(vpn, 1);
unsigned long hpte_group; unsigned long hpte_group;
prpn = hugepte_pfn(old_pte); prpn = pte_pfn(old_pte);
repeat: repeat:
hpte_group = ((hash & htab_data.htab_hash_mask) * hpte_group = ((hash & htab_data.htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL; HPTES_PER_GROUP) & ~0x7UL;
/* Update the linux pte with the HPTE slot */ /* Update the linux pte with the HPTE slot */
hugepte_val(new_pte) &= ~_HUGEPAGE_HPTEFLAGS; pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
hugepte_val(new_pte) |= _HUGEPAGE_HASHPTE; pte_val(new_pte) |= _PAGE_HASHPTE;
/* Add in WIMG bits */ /* Add in WIMG bits */
/* XXX We should store these in the pte */ /* XXX We should store these in the pte */
...@@ -810,7 +862,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, ...@@ -810,7 +862,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
/* Primary is full, try the secondary */ /* Primary is full, try the secondary */
if (unlikely(slot == -1)) { if (unlikely(slot == -1)) {
hugepte_val(new_pte) |= _HUGEPAGE_SECONDARY; pte_val(new_pte) |= _PAGE_SECONDARY;
hpte_group = ((~hash & htab_data.htab_hash_mask) * hpte_group = ((~hash & htab_data.htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL; HPTES_PER_GROUP) & ~0x7UL;
slot = ppc_md.hpte_insert(hpte_group, va, prpn, slot = ppc_md.hpte_insert(hpte_group, va, prpn,
...@@ -827,39 +879,20 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, ...@@ -827,39 +879,20 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
if (unlikely(slot == -2)) if (unlikely(slot == -2))
panic("hash_huge_page: pte_insert failed\n"); panic("hash_huge_page: pte_insert failed\n");
hugepte_val(new_pte) |= (slot<<5) & _HUGEPAGE_GROUP_IX; pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
/* /*
* No need to use ldarx/stdcx here because all who * No need to use ldarx/stdcx here because all who
* might be updating the pte will hold the * might be updating the pte will hold the
* page_table_lock or the hash_table_lock * page_table_lock
* (we hold both)
*/ */
*ptep = new_pte; *ptep = new_pte;
} }
spin_unlock_irqrestore(&mm->page_table_lock, flags); err = 0;
return 0;
}
static void flush_hash_hugepage(mm_context_t context, unsigned long ea,
hugepte_t pte, int local)
{
unsigned long vsid, vpn, va, hash, slot;
BUG_ON(hugepte_bad(pte));
BUG_ON(!in_hugepage_area(context, ea));
vsid = get_vsid(context.id, ea);
va = (vsid << 28) | (ea & 0x0fffffff); out:
vpn = va >> HPAGE_SHIFT; spin_unlock(&mm->page_table_lock);
hash = hpt_hash(vpn, 1);
if (hugepte_val(pte) & _HUGEPAGE_SECONDARY)
hash = ~hash;
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
slot += (hugepte_val(pte) & _HUGEPAGE_GROUP_IX) >> 5;
ppc_md.hpte_invalidate(slot, va, 1, local); return err;
} }
...@@ -478,6 +478,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) ...@@ -478,6 +478,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
int index; int index;
int err; int err;
#ifdef CONFIG_HUGETLB_PAGE
/* We leave htlb_segs as it was, but for a fork, we need to
* clear the huge_pgdir. */
mm->context.huge_pgdir = NULL;
#endif
again: again:
if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
return -ENOMEM; return -ENOMEM;
...@@ -508,6 +514,8 @@ void destroy_context(struct mm_struct *mm) ...@@ -508,6 +514,8 @@ void destroy_context(struct mm_struct *mm)
spin_unlock(&mmu_context_lock); spin_unlock(&mmu_context_lock);
mm->context.id = NO_CONTEXT; mm->context.id = NO_CONTEXT;
hugetlb_mm_free_pgd(mm);
} }
static int __init mmu_context_init(void) static int __init mmu_context_init(void)
......
...@@ -24,6 +24,7 @@ typedef unsigned long mm_context_id_t; ...@@ -24,6 +24,7 @@ typedef unsigned long mm_context_id_t;
typedef struct { typedef struct {
mm_context_id_t id; mm_context_id_t id;
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
pgd_t *huge_pgdir;
u16 htlb_segs; /* bitmask */ u16 htlb_segs; /* bitmask */
#endif #endif
} mm_context_t; } mm_context_t;
......
...@@ -64,7 +64,6 @@ ...@@ -64,7 +64,6 @@
#define is_hugepage_only_range(addr, len) \ #define is_hugepage_only_range(addr, len) \
(touches_hugepage_high_range((addr), (len)) || \ (touches_hugepage_high_range((addr), (len)) || \
touches_hugepage_low_range((addr), (len))) touches_hugepage_low_range((addr), (len)))
#define hugetlb_free_pgtables free_pgtables
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#define in_hugepage_area(context, addr) \ #define in_hugepage_area(context, addr) \
......
...@@ -98,6 +98,7 @@ ...@@ -98,6 +98,7 @@
#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */ #define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */ #define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */ #define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
#define _PAGE_HUGE 0x10000 /* 16MB page */
/* Bits 0x7000 identify the index within an HPT Group */ /* Bits 0x7000 identify the index within an HPT Group */
#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX) #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
/* PAGE_MASK gives the right answer below, but only by accident */ /* PAGE_MASK gives the right answer below, but only by accident */
...@@ -157,19 +158,19 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; ...@@ -157,19 +158,19 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
/* shift to put page number into pte */ /* shift to put page number into pte */
#define PTE_SHIFT (16) #define PTE_SHIFT (17)
/* We allow 2^41 bytes of real memory, so we need 29 bits in the PMD /* We allow 2^41 bytes of real memory, so we need 29 bits in the PMD
* to give the PTE page number. The bottom two bits are for flags. */ * to give the PTE page number. The bottom two bits are for flags. */
#define PMD_TO_PTEPAGE_SHIFT (2) #define PMD_TO_PTEPAGE_SHIFT (2)
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
#define _PMD_HUGEPAGE 0x00000001U
#define HUGEPTE_BATCH_SIZE (1<<(HPAGE_SHIFT-PMD_SHIFT))
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
int hash_huge_page(struct mm_struct *mm, unsigned long access, int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local); unsigned long ea, unsigned long vsid, int local);
void hugetlb_mm_free_pgd(struct mm_struct *mm);
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA
...@@ -177,7 +178,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, ...@@ -177,7 +178,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
#else #else
#define hash_huge_page(mm,a,ea,vsid,local) -1 #define hash_huge_page(mm,a,ea,vsid,local) -1
#define _PMD_HUGEPAGE 0 #define hugetlb_mm_free_pgd(mm) do {} while (0)
#endif #endif
...@@ -213,10 +214,8 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, ...@@ -213,10 +214,8 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
#define pmd_set(pmdp, ptep) \ #define pmd_set(pmdp, ptep) \
(pmd_val(*(pmdp)) = (__ba_to_bpn(ptep) << PMD_TO_PTEPAGE_SHIFT)) (pmd_val(*(pmdp)) = (__ba_to_bpn(ptep) << PMD_TO_PTEPAGE_SHIFT))
#define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_hugepage(pmd) (!!(pmd_val(pmd) & _PMD_HUGEPAGE)) #define pmd_bad(pmd) (pmd_val(pmd) == 0)
#define pmd_bad(pmd) (((pmd_val(pmd)) == 0) || pmd_hugepage(pmd)) #define pmd_present(pmd) (pmd_val(pmd) != 0)
#define pmd_present(pmd) ((!pmd_hugepage(pmd)) \
&& (pmd_val(pmd) & ~_PMD_HUGEPAGE) != 0)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
#define pmd_page_kernel(pmd) \ #define pmd_page_kernel(pmd) \
(__bpn_to_ba(pmd_val(pmd) >> PMD_TO_PTEPAGE_SHIFT)) (__bpn_to_ba(pmd_val(pmd) >> PMD_TO_PTEPAGE_SHIFT))
...@@ -269,6 +268,7 @@ static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC;} ...@@ -269,6 +268,7 @@ static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC;}
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;} static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;}
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;} static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;}
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;} static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;}
static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE;}
static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; }
...@@ -294,6 +294,8 @@ static inline pte_t pte_mkdirty(pte_t pte) { ...@@ -294,6 +294,8 @@ static inline pte_t pte_mkdirty(pte_t pte) {
pte_val(pte) |= _PAGE_DIRTY; return pte; } pte_val(pte) |= _PAGE_DIRTY; return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { static inline pte_t pte_mkyoung(pte_t pte) {
pte_val(pte) |= _PAGE_ACCESSED; return pte; } pte_val(pte) |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mkhuge(pte_t pte) {
pte_val(pte) |= _PAGE_HUGE; return pte; }
/* Atomic PTE updates */ /* Atomic PTE updates */
static inline unsigned long pte_update(pte_t *p, unsigned long clr) static inline unsigned long pte_update(pte_t *p, unsigned long clr)
...@@ -464,6 +466,10 @@ extern pgd_t ioremap_dir[1024]; ...@@ -464,6 +466,10 @@ extern pgd_t ioremap_dir[1024];
extern void paging_init(void); extern void paging_init(void);
struct mmu_gather;
void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
unsigned long start, unsigned long end);
/* /*
* This gets called at the end of handling a page fault, when * This gets called at the end of handling a page fault, when
* the kernel has put a new PTE into the page table for the process. * the kernel has put a new PTE into the page table for the process.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment