Commit 3a0b82c0 authored by Ingo Molnar's avatar Ingo Molnar

adds simple support for atomically-mapped PTEs. On highmem systems this...

adds simple support for atomically-mapped PTEs. On highmem systems this enables the allocation of the pagetables in highmem.
parent 094686d3
......@@ -155,14 +155,25 @@ tristate '/dev/cpu/*/cpuid - CPU information support' CONFIG_X86_CPUID
choice 'High Memory Support' \
"off CONFIG_NOHIGHMEM \
4GB CONFIG_HIGHMEM4G \
64GB CONFIG_HIGHMEM64G" off
4GB-highpte CONFIG_HIGHMEM4G_HIGHPTE \
64GB CONFIG_HIGHMEM64G \
64GB-highpte CONFIG_HIGHMEM64G_HIGHPTE" off
if [ "$CONFIG_HIGHMEM4G" = "y" ]; then
define_bool CONFIG_HIGHMEM y
fi
if [ "$CONFIG_HIGHMEM4G_HIGHPTE" = "y" ]; then
define_bool CONFIG_HIGHMEM y
define_bool CONFIG_HIGHPTE y
fi
if [ "$CONFIG_HIGHMEM64G" = "y" ]; then
define_bool CONFIG_HIGHMEM y
define_bool CONFIG_X86_PAE y
fi
if [ "$CONFIG_HIGHMEM64G_HIGHPTE" = "y" ]; then
define_bool CONFIG_HIGHMEM y
define_bool CONFIG_HIGHPTE y
define_bool CONFIG_X86_PAE y
fi
bool 'Math emulation' CONFIG_MATH_EMULATION
bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
......@@ -397,11 +408,13 @@ comment 'Kernel hacking'
bool 'Kernel debugging' CONFIG_DEBUG_KERNEL
if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then
bool ' Debug high memory support' CONFIG_DEBUG_HIGHMEM
bool ' Debug memory allocations' CONFIG_DEBUG_SLAB
bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT
bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ
bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK
if [ "$CONFIG_HIGHMEM" = "y" ]; then
bool ' Highmem debugging' CONFIG_DEBUG_HIGHMEM
fi
fi
endmenu
......
......@@ -145,7 +145,6 @@ void cpu_idle (void)
while (!need_resched())
idle();
schedule();
check_pgt_cache();
}
}
......
......@@ -146,10 +146,6 @@ void __init smp_store_cpu_info(int id)
struct cpuinfo_x86 *c = cpu_data + id;
*c = boot_cpu_data;
c->pte_quick = 0;
c->pmd_quick = 0;
c->pgd_quick = 0;
c->pgtable_cache_sz = 0;
identify_cpu(c);
/*
* Mask B, Pentium, but not Pentium MMX
......
......@@ -788,7 +788,7 @@ void __init trap_init_f00f_bug(void)
page = (unsigned long) vmalloc(PAGE_SIZE);
pgd = pgd_offset(&init_mm, page);
pmd = pmd_offset(pgd, page);
pte = pte_offset(pmd, page);
pte = pte_offset_kernel(pmd, page);
__free_page(pte_page(*pte));
*pte = mk_pte_phys(__pa(&idt_table), PAGE_KERNEL_RO);
/*
......
......@@ -93,7 +93,7 @@ static void mark_screen_rdonly(struct task_struct * tsk)
{
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte;
pte_t *pte, *mapped;
int i;
pgd = pgd_offset(tsk->mm, 0xA0000);
......@@ -112,12 +112,15 @@ static void mark_screen_rdonly(struct task_struct * tsk)
pmd_clear(pmd);
return;
}
pte = pte_offset(pmd, 0xA0000);
preempt_disable();
pte = mapped = pte_offset_map(pmd, 0xA0000);
for (i = 0; i < 32; i++) {
if (pte_present(*pte))
set_pte(pte, pte_wrprotect(*pte));
pte++;
}
pte_unmap(mapped);
preempt_enable();
flush_tlb();
}
......
......@@ -320,12 +320,20 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
asm("movl %%cr3,%0":"=r" (page));
page = ((unsigned long *) __va(page))[address >> 22];
printk(KERN_ALERT "*pde = %08lx\n", page);
/*
* We must not directly access the pte in the highpte
* case, the page table might be allocated in highmem.
* And lets rather not kmap-atomic the pte, just in case
* it's allocated already.
*/
#ifndef CONFIG_HIGHPTE
if (page & 1) {
page &= PAGE_MASK;
address &= 0x003ff000;
page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
printk(KERN_ALERT "*pte = %08lx\n", page);
}
#endif
die("Oops", regs, error_code);
bust_spinlocks(0);
do_exit(SIGKILL);
......@@ -395,7 +403,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
goto no_context;
set_pmd(pmd, *pmd_k);
pte_k = pte_offset(pmd_k, address);
pte_k = pte_offset_kernel(pmd_k, address);
if (!pte_present(*pte_k))
goto no_context;
return;
......
......@@ -43,28 +43,6 @@ unsigned long highstart_pfn, highend_pfn;
static unsigned long totalram_pages;
static unsigned long totalhigh_pages;
int do_check_pgt_cache(int low, int high)
{
int freed = 0;
if(pgtable_cache_size > high) {
do {
if (pgd_quicklist) {
free_pgd_slow(get_pgd_fast());
freed++;
}
if (pmd_quicklist) {
pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
freed++;
}
if (pte_quicklist) {
pte_free_slow(pte_alloc_one_fast(NULL, 0));
freed++;
}
} while(pgtable_cache_size > low);
}
return freed;
}
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move
......@@ -76,7 +54,7 @@ pte_t *kmap_pte;
pgprot_t kmap_prot;
#define kmap_get_fixmap_pte(vaddr) \
pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
void __init kmap_init(void)
{
......@@ -116,7 +94,6 @@ void show_mem(void)
printk("%d reserved pages\n",reserved);
printk("%d pages shared\n",shared);
printk("%d pages swap cached\n",cached);
printk("%ld pages in page table cache\n",pgtable_cache_size);
show_buffers();
}
......@@ -143,7 +120,7 @@ static inline void set_pte_phys (unsigned long vaddr,
printk("PAE BUG #01!\n");
return;
}
pte = pte_offset(pmd, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
if (pte_val(*pte))
pte_ERROR(*pte);
pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags);
......@@ -196,7 +173,7 @@ static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t
if (pmd_none(*pmd)) {
pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
if (pte != pte_offset(pmd, 0))
if (pte != pte_offset_kernel(pmd, 0))
BUG();
}
vaddr += PMD_SIZE;
......@@ -267,7 +244,7 @@ static void __init pagetable_init (void)
*pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
}
set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
if (pte_base != pte_offset(pmd, 0))
if (pte_base != pte_offset_kernel(pmd, 0))
BUG();
}
......@@ -289,7 +266,7 @@ static void __init pagetable_init (void)
pgd = swapper_pg_dir + __pgd_offset(vaddr);
pmd = pmd_offset(pgd, vaddr);
pte = pte_offset(pmd, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
pkmap_page_table = pte;
#endif
......@@ -398,7 +375,7 @@ void __init test_wp_bit(void)
pgd = swapper_pg_dir + __pgd_offset(vaddr);
pmd = pmd_offset(pgd, vaddr);
pte = pte_offset(pmd, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
old_pte = *pte;
*pte = mk_pte_phys(0, PAGE_READONLY);
local_flush_tlb();
......
......@@ -49,7 +49,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
if (address >= end)
BUG();
do {
pte_t * pte = pte_alloc(&init_mm, pmd, address);
pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags);
......
......@@ -449,7 +449,8 @@ static int DRM(_vma_info)(char *buf, char **start, off_t offset, int request,
for (i = vma->vm_start; i < vma->vm_end; i += PAGE_SIZE) {
pgd = pgd_offset(vma->vm_mm, i);
pmd = pmd_offset(pgd, i);
pte = pte_offset(pmd, i);
preempt_disable();
pte = pte_offset_map(pmd, i);
if (pte_present(*pte)) {
address = __pa(pte_page(*pte))
+ (i & (PAGE_SIZE-1));
......@@ -465,6 +466,8 @@ static int DRM(_vma_info)(char *buf, char **start, off_t offset, int request,
} else {
DRM_PROC_PRINT(" 0x%08lx\n", i);
}
pte_unmap(pte);
preempt_enable();
}
#endif
}
......
......@@ -143,9 +143,15 @@ int DRM(sg_alloc)( struct inode *inode, struct file *filp,
if ( !pmd_present( *pmd ) )
goto failed;
pte = pte_offset( pmd, i );
if ( !pte_present( *pte ) )
preempt_disable();
pte = pte_offset_map( pmd, i );
if ( !pte_present( *pte ) ) {
pte_unmap(pte);
preempt_enable();
goto failed;
}
pte_unmap(pte);
preempt_enable();
entry->pagelist[j] = pte_page( *pte );
......
......@@ -169,8 +169,15 @@ struct page *DRM(vm_shm_nopage)(struct vm_area_struct *vma,
if( !pgd_present( *pgd ) ) return NOPAGE_OOM;
pmd = pmd_offset( pgd, i );
if( !pmd_present( *pmd ) ) return NOPAGE_OOM;
pte = pte_offset( pmd, i );
if( !pte_present( *pte ) ) return NOPAGE_OOM;
preempt_disable();
pte = pte_offset_map( pmd, i );
if( !pte_present( *pte ) ) {
pte_unmap(pte);
preempt_enable();
return NOPAGE_OOM;
}
pte_unmap(pte);
preempt_enable();
page = pte_page(*pte);
get_page(page);
......
......@@ -215,8 +215,11 @@ static inline struct page *uvirt_to_page(pgd_t *pgd, unsigned long adr)
if (!pgd_none(*pgd)) {
pmd = pmd_offset(pgd, adr);
if (!pmd_none(*pmd)) {
ptep = pte_offset(pmd, adr);
preempt_disable();
ptep = pte_offset_map(pmd, adr);
pte = *ptep;
pte_unmap(pte);
preempt_enable();
if(pte_present(pte))
ret = pte_page(pte);
}
......
......@@ -219,6 +219,7 @@ sgi_graphics_nopage (struct vm_area_struct *vma, unsigned long address, int
int board = GRAPHICS_CARD (vma->vm_dentry->d_inode->i_rdev);
unsigned long virt_add, phys_add;
struct page * page;
#ifdef DEBUG
printk ("Got a page fault for board %d address=%lx guser=%lx\n", board,
......@@ -245,8 +246,10 @@ sgi_graphics_nopage (struct vm_area_struct *vma, unsigned long address, int
pgd = pgd_offset(current->mm, address);
pmd = pmd_offset(pgd, address);
pte = pte_offset(pmd, address);
return pte_page(*pte);
pte = pte_kmap_offset(pmd, address);
page = pte_page(*pte);
pte_kunmap(pte);
return page;
}
/*
......
......@@ -139,8 +139,11 @@ static inline unsigned long uvirt_to_kva (pgd_t * pgd, unsigned long adr)
if (!pgd_none (*pgd)) {
pmd = pmd_offset (pgd, adr);
if (!pmd_none (*pmd)) {
ptep = pte_offset (pmd, adr);
preempt_disable();
ptep = pte_offset_map (pmd, adr);
pte = *ptep;
pte_unmap(pte);
preempt_enable();
if (pte_present (pte)) {
ret = (unsigned long) page_address (pte_page (pte));
ret |= (adr & (PAGE_SIZE - 1));
......
......@@ -115,8 +115,11 @@ static inline unsigned long uvirt_to_kva(pgd_t *pgd, unsigned long adr)
if (!pgd_none(*pgd)) {
pmd = pmd_offset(pgd, adr);
if (!pmd_none(*pmd)) {
ptep = pte_offset(pmd, adr);
preempt_disable();
ptep = pte_offset_map(pmd, adr);
pte = *ptep;
pte_unmap(pte);
preempt_enable();
if(pte_present(pte)) {
ret = (unsigned long) page_address(pte_page(pte));
ret |= (adr & (PAGE_SIZE - 1));
......
......@@ -271,15 +271,18 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a
pmd = pmd_alloc(tsk->mm, pgd, address);
if (!pmd)
goto out;
pte = pte_alloc(tsk->mm, pmd, address);
pte = pte_alloc_map(tsk->mm, pmd, address);
if (!pte)
goto out;
if (!pte_none(*pte))
if (!pte_none(*pte)) {
pte_unmap(pte);
goto out;
}
lru_cache_add(page);
flush_dcache_page(page);
flush_page_to_ram(page);
set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY))));
pte_unmap(pte);
tsk->mm->rss++;
spin_unlock(&tsk->mm->page_table_lock);
......
......@@ -393,11 +393,10 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
return res;
}
static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size,
int * pages, int * shared, int * dirty, int * total)
static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, int * pages, int * shared, int * dirty, int * total)
{
pte_t * pte;
unsigned long end;
unsigned long end, pmd_end;
pte_t *pte;
if (pmd_none(*pmd))
return;
......@@ -406,11 +405,12 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned
pmd_clear(pmd);
return;
}
pte = pte_offset(pmd, address);
address &= ~PMD_MASK;
preempt_disable();
pte = pte_offset_map(pmd, address);
end = address + size;
if (end > PMD_SIZE)
end = PMD_SIZE;
pmd_end = (address + PMD_SIZE) & PMD_MASK;
if (end > pmd_end)
end = pmd_end;
do {
pte_t page = *pte;
struct page *ptpage;
......@@ -431,6 +431,8 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned
if (page_count(pte_page(page)) > 1)
++*shared;
} while (address < end);
pte_unmap(pte - 1);
preempt_enable();
}
static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size,
......
......@@ -26,12 +26,6 @@
#include <asm/kmap_types.h>
#include <asm/pgtable.h>
#ifdef CONFIG_DEBUG_HIGHMEM
#define HIGHMEM_DEBUG 1
#else
#define HIGHMEM_DEBUG 0
#endif
/* declarations for highmem.c */
extern unsigned long highstart_pfn, highend_pfn;
......@@ -94,7 +88,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
#if HIGHMEM_DEBUG
#if CONFIG_DEBUG_HIGHMEM
if (!pte_none(*(kmap_pte-idx)))
BUG();
#endif
......@@ -106,8 +100,8 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
static inline void kunmap_atomic(void *kvaddr, enum km_type type)
{
#if HIGHMEM_DEBUG
unsigned long vaddr = (unsigned long) kvaddr;
#if CONFIG_DEBUG_HIGHMEM
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
if (vaddr < FIXADDR_START) { // FIXME
......
#ifndef _ASM_KMAP_TYPES_H
#define _ASM_KMAP_TYPES_H
#include <linux/config.h>
#if CONFIG_DEBUG_HIGHMEM
# define D(n) __KM_FENCE_##n ,
#else
# define D(n)
#endif
enum km_type {
KM_BOUNCE_READ,
KM_SKB_DATA,
KM_SKB_DATA_SOFTIRQ,
KM_USER0,
KM_USER1,
KM_BIO_IRQ,
KM_TYPE_NR
D(0) KM_BOUNCE_READ,
D(1) KM_SKB_DATA,
D(2) KM_SKB_DATA_SOFTIRQ,
D(3) KM_USER0,
D(4) KM_USER1,
D(5) KM_BIO_IRQ,
D(6) KM_PTE0,
D(7) KM_PTE1,
D(8) KM_TYPE_NR
};
#undef D
#endif
......@@ -5,15 +5,17 @@
#include <asm/processor.h>
#include <asm/fixmap.h>
#include <linux/threads.h>
#include <linux/highmem.h>
#define pgd_quicklist (current_cpu_data.pgd_quick)
#define pmd_quicklist (current_cpu_data.pmd_quick)
#define pte_quicklist (current_cpu_data.pte_quick)
#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
#define pmd_populate(mm, pmd, pte) \
#define pmd_populate_kernel(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
{
set_pmd(pmd, __pmd(_PAGE_TABLE +
((unsigned long long)(pte - mem_map) <<
(unsigned long long) PAGE_SHIFT)));
}
/*
* Allocate and free page tables.
*/
......@@ -29,7 +31,7 @@ extern void *kmem_cache_alloc(struct kmem_cache_s *, int);
extern void kmem_cache_free(struct kmem_cache_s *, void *);
static inline pgd_t *get_pgd_slow(void)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
int i;
pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL);
......@@ -56,7 +58,7 @@ static inline pgd_t *get_pgd_slow(void)
#else
static inline pgd_t *get_pgd_slow(void)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
......@@ -71,33 +73,7 @@ static inline pgd_t *get_pgd_slow(void)
#endif /* CONFIG_X86_PAE */
static inline pgd_t *get_pgd_fast(void)
{
unsigned long *ret;
preempt_disable();
if ((ret = pgd_quicklist) != NULL) {
pgd_quicklist = (unsigned long *)(*ret);
ret[0] = 0;
pgtable_cache_size--;
preempt_enable();
} else {
preempt_enable();
ret = (unsigned long *)get_pgd_slow();
}
return (pgd_t *)ret;
}
static inline void free_pgd_fast(pgd_t *pgd)
{
preempt_disable();
*(unsigned long *)pgd = (unsigned long) pgd_quicklist;
pgd_quicklist = (unsigned long *) pgd;
pgtable_cache_size++;
preempt_enable();
}
static inline void free_pgd_slow(pgd_t *pgd)
static inline void pgd_free(pgd_t *pgd)
{
#if defined(CONFIG_X86_PAE)
int i;
......@@ -110,64 +86,64 @@ static inline void free_pgd_slow(pgd_t *pgd)
#endif
}
static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
int count = 0;
pte_t *pte;
do {
pte = (pte_t *) __get_free_page(GFP_KERNEL);
if (pte)
clear_page(pte);
else {
current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout(HZ);
}
} while (!pte && (count++ < 10));
return pte;
}
static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
unsigned long address)
static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
unsigned long *ret;
int count = 0;
struct page *pte;
preempt_disable();
if ((ret = (unsigned long *)pte_quicklist) != NULL) {
pte_quicklist = (unsigned long *)(*ret);
ret[0] = ret[1];
pgtable_cache_size--;
do {
#if CONFIG_HIGHPTE
pte = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, 0);
#else
pte = alloc_pages(GFP_KERNEL, 0);
#endif
if (pte)
clear_highpage(pte);
else {
current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout(HZ);
}
preempt_enable();
return (pte_t *)ret;
} while (!pte && (count++ < 10));
return pte;
}
static inline void pte_free_fast(pte_t *pte)
static inline void pte_free_kernel(pte_t *pte)
{
preempt_disable();
*(unsigned long *)pte = (unsigned long) pte_quicklist;
pte_quicklist = (unsigned long *) pte;
pgtable_cache_size++;
preempt_enable();
free_page((unsigned long)pte);
}
static __inline__ void pte_free_slow(pte_t *pte)
static inline void pte_free(struct page *pte)
{
free_page((unsigned long)pte);
__free_page(pte);
}
#define pte_free(pte) pte_free_slow(pte)
#define pgd_free(pgd) free_pgd_slow(pgd)
#define pgd_alloc(mm) get_pgd_fast()
/*
* allocating and freeing a pmd is trivial: the 1-entry pmd is
* inside the pgd, so has no extra memory associated with it.
* (In the PAE case we free the pmds as part of the pgd.)
*/
#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); })
#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); })
#define pmd_free_slow(x) do { } while (0)
#define pmd_free_fast(x) do { } while (0)
#define pmd_free(x) do { } while (0)
#define pgd_populate(mm, pmd, pte) BUG()
extern int do_check_pgt_cache(int, int);
/*
* TLB flushing:
*
......
......@@ -316,9 +316,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define page_pte(page) page_pte_prot(page, __pgprot(0))
#define pmd_page(pmd) \
#define pmd_page_kernel(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
#define pmd_page(pmd) \
(mem_map + (pmd_val(pmd) >> PAGE_SHIFT))
/* to find an entry in a page-table-directory. */
#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
......@@ -335,8 +338,14 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
/* Find an entry in the third-level page table.. */
#define __pte_offset(address) \
((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \
__pte_offset(address))
#define pte_offset_kernel(dir, address) \
((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address))
#define pte_offset_map(dir, address) \
((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + __pte_offset(address))
#define pte_offset_map2(dir, address) \
((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + __pte_offset(address))
#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
#define pte_unmap2(pte) kunmap_atomic(pte, KM_PTE1)
/*
* The i386 doesn't have any external MMU info: the kernel page
......
......@@ -49,10 +49,6 @@ struct cpuinfo_x86 {
int f00f_bug;
int coma_bug;
unsigned long loops_per_jiffy;
unsigned long *pgd_quick;
unsigned long *pmd_quick;
unsigned long *pte_quick;
unsigned long pgtable_cache_sz;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define X86_VENDOR_INTEL 0
......
......@@ -4,7 +4,6 @@
#include <linux/config.h>
#include <linux/bio.h>
#include <linux/fs.h>
#include <asm/pgalloc.h>
#ifdef CONFIG_HIGHMEM
......@@ -16,6 +15,7 @@ extern struct page *highmem_start_page;
unsigned int nr_free_highpages(void);
extern void create_bounce(unsigned long pfn, int gfp, struct bio **bio_orig);
extern void check_highmem_ptes(void);
static inline char *bh_kmap(struct buffer_head *bh)
{
......@@ -92,8 +92,9 @@ static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
static inline void clear_highpage(struct page *page)
{
clear_page(kmap(page));
kunmap(page);
void *kaddr = kmap_atomic(page, KM_USER0);
clear_page(kaddr);
kunmap_atomic(kaddr, KM_USER0);
}
/*
......@@ -101,15 +102,16 @@ static inline void clear_highpage(struct page *page)
*/
static inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size)
{
char *kaddr;
void *kaddr;
if (offset + size > PAGE_SIZE)
BUG();
kaddr = kmap(page);
memset(kaddr + offset, 0, size);
kaddr = kmap_atomic(page, KM_USER0);
memset((char *)kaddr + offset, 0, size);
flush_dcache_page(page);
flush_page_to_ram(page);
kunmap(page);
kunmap_atomic(kaddr, KM_USER0);
}
static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr)
......
......@@ -364,7 +364,8 @@ extern int zeromap_page_range(struct vm_area_struct *vma, unsigned long from, un
extern int vmtruncate(struct inode * inode, loff_t offset);
extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
extern int make_pages_present(unsigned long addr, unsigned long end);
extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
......@@ -380,7 +381,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long
/*
* On a two-level page table, this ends up being trivial. Thus the
* inlining and the symmetry break with pte_alloc() that does all
* inlining and the symmetry break with pte_alloc_map() that does all
* of this out-of-line.
*/
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
......@@ -390,9 +391,6 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long a
return pmd_offset(pgd, address);
}
extern int pgt_cache_water[2];
extern int check_pgt_cache(void);
extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap,
unsigned long * zones_size, unsigned long zone_start_paddr,
......
......@@ -21,6 +21,7 @@
#include <linux/completion.h>
#include <asm/mmu_context.h>
#include <linux/kernel_stat.h>
#include <linux/highmem.h>
/*
* Priority of a process goes from 0 to 139. The 0-99
......@@ -761,6 +762,9 @@ asmlinkage void schedule(void)
if (unlikely(in_interrupt()))
BUG();
#if CONFIG_DEBUG_HIGHMEM
check_highmem_ptes();
#endif
need_resched:
preempt_disable();
prev = current;
......
......@@ -97,8 +97,6 @@ int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
extern int acct_parm[];
#endif
extern int pgt_cache_water[];
static int parse_table(int *, int, void *, size_t *, void *, size_t,
ctl_table *, void **);
static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
......@@ -268,8 +266,6 @@ static ctl_table vm_table[] = {
sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec},
{VM_PAGERDAEMON, "kswapd",
&pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec},
{VM_PGT_CACHE, "pagetable_cache",
&pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec},
{VM_PAGE_CLUSTER, "page-cluster",
&page_cluster, sizeof(int), 0644, NULL, &proc_dointvec},
{0}
......
......@@ -1974,7 +1974,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
/* Called with mm->page_table_lock held to protect against other
* threads/the swapper from ripping pte's out from under us.
*/
static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
static inline int filemap_sync_pte(pte_t *ptep, pmd_t *pmdp, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
pte_t pte = *ptep;
......@@ -1990,11 +1990,10 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
}
static inline int filemap_sync_pte_range(pmd_t * pmd,
unsigned long address, unsigned long size,
struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
unsigned long address, unsigned long end,
struct vm_area_struct *vma, unsigned int flags)
{
pte_t * pte;
unsigned long end;
pte_t *pte;
int error;
if (pmd_none(*pmd))
......@@ -2004,27 +2003,26 @@ static inline int filemap_sync_pte_range(pmd_t * pmd,
pmd_clear(pmd);
return 0;
}
pte = pte_offset(pmd, address);
offset += address & PMD_MASK;
address &= ~PMD_MASK;
end = address + size;
if (end > PMD_SIZE)
end = PMD_SIZE;
pte = pte_offset_map(pmd, address);
if ((address & PMD_MASK) != (end & PMD_MASK))
end = (address & PMD_MASK) + PMD_SIZE;
error = 0;
do {
error |= filemap_sync_pte(pte, vma, address + offset, flags);
error |= filemap_sync_pte(pte, pmd, vma, address, flags);
address += PAGE_SIZE;
pte++;
} while (address && (address < end));
pte_unmap(pte - 1);
return error;
}
static inline int filemap_sync_pmd_range(pgd_t * pgd,
unsigned long address, unsigned long size,
unsigned long address, unsigned long end,
struct vm_area_struct *vma, unsigned int flags)
{
pmd_t * pmd;
unsigned long offset, end;
int error;
if (pgd_none(*pgd))
......@@ -2035,14 +2033,11 @@ static inline int filemap_sync_pmd_range(pgd_t * pgd,
return 0;
}
pmd = pmd_offset(pgd, address);
offset = address & PGDIR_MASK;
address &= ~PGDIR_MASK;
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
if ((address & PGDIR_MASK) != (end & PGDIR_MASK))
end = (address & PGDIR_MASK) + PGDIR_SIZE;
error = 0;
do {
error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
error |= filemap_sync_pte_range(pmd, address, end, vma, flags);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
......@@ -2062,11 +2057,11 @@ int filemap_sync(struct vm_area_struct * vma, unsigned long address,
spin_lock(&vma->vm_mm->page_table_lock);
dir = pgd_offset(vma->vm_mm, address);
flush_cache_range(vma, end - size, end);
flush_cache_range(vma, address, end);
if (address >= end)
BUG();
do {
error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
error |= filemap_sync_pmd_range(dir, address, end, vma, flags);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
......
......@@ -20,6 +20,7 @@
#include <linux/pagemap.h>
#include <linux/mempool.h>
#include <linux/blkdev.h>
#include <asm/pgalloc.h>
static mempool_t *page_pool, *isa_page_pool;
......@@ -445,3 +446,19 @@ void create_bounce(unsigned long pfn, int gfp, struct bio **bio_orig)
bio->bi_private = *bio_orig;
*bio_orig = bio;
}
#if CONFIG_DEBUG_HIGHMEM
void check_highmem_ptes(void)
{
int idx, type;
for (type = 0; type < KM_TYPE_NR; type++) {
idx = type + KM_TYPE_NR*smp_processor_id();
if (!pte_none(*(kmap_pte-idx))) {
printk("scheduling with KM_TYPE %d held!\n", type);
BUG();
}
}
}
#endif
......@@ -90,7 +90,7 @@ void __free_pte(pte_t pte)
*/
static inline void free_one_pmd(pmd_t * dir)
{
pte_t * pte;
struct page *pte;
if (pmd_none(*dir))
return;
......@@ -99,7 +99,7 @@ static inline void free_one_pmd(pmd_t * dir)
pmd_clear(dir);
return;
}
pte = pte_offset(dir, 0);
pte = pmd_page(*dir);
pmd_clear(dir);
pte_free(pte);
}
......@@ -125,18 +125,6 @@ static inline void free_one_pgd(pgd_t * dir)
pmd_free(pmd);
}
/* Low and high watermarks for page table cache.
The system should try to have pgt_water[0] <= cache elements <= pgt_water[1]
*/
int pgt_cache_water[2] = { 25, 50 };
/* Returns the number of pages freed */
int check_pgt_cache(void)
{
return do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]);
}
/*
* This function clears all user-level page tables of a process - this
* is needed by execve(), so that old pages aren't in the way.
......@@ -152,11 +140,59 @@ void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
page_dir++;
} while (--nr);
spin_unlock(&mm->page_table_lock);
}
pte_t * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
if (!pmd_present(*pmd)) {
struct page *new;
/* keep the page table cache within bounds */
check_pgt_cache();
spin_unlock(&mm->page_table_lock);
new = pte_alloc_one(mm, address);
spin_lock(&mm->page_table_lock);
if (!new)
return NULL;
/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (pmd_present(*pmd)) {
pte_free(new);
goto out;
}
pmd_populate(mm, pmd, new);
}
out:
if (pmd_present(*pmd))
return pte_offset_map(pmd, address);
return NULL;
}
pte_t * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
if (!pmd_present(*pmd)) {
pte_t *new;
spin_unlock(&mm->page_table_lock);
new = pte_alloc_one_kernel(mm, address);
spin_lock(&mm->page_table_lock);
if (!new)
return NULL;
/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (pmd_present(*pmd)) {
pte_free_kernel(new);
goto out;
}
pmd_populate_kernel(mm, pmd, new);
}
out:
return pte_offset_kernel(pmd, address);
}
#define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t))
#define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t))
......@@ -169,7 +205,7 @@ void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
* variable count and make things faster. -jj
*
* dst->page_table_lock is held on entry and exit,
* but may be dropped within pmd_alloc() and pte_alloc().
* but may be dropped within pmd_alloc() and pte_alloc_map().
*/
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma)
......@@ -221,12 +257,11 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK;
goto cont_copy_pmd_range;
}
src_pte = pte_offset(src_pmd, address);
dst_pte = pte_alloc(dst, dst_pmd, address);
dst_pte = pte_alloc_map(dst, dst_pmd, address);
if (!dst_pte)
goto nomem;
spin_lock(&src->page_table_lock);
src_pte = pte_offset_map2(src_pmd, address);
do {
pte_t pte = *src_pte;
struct page *ptepage;
......@@ -259,11 +294,16 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK;
cont_copy_pte_range: set_pte(dst_pte, pte);
cont_copy_pte_range_noset: address += PAGE_SIZE;
if (address >= end)
if (address >= end) {
pte_unmap2(src_pte);
pte_unmap(dst_pte);
goto out_unlock;
}
src_pte++;
dst_pte++;
} while ((unsigned long)src_pte & PTE_TABLE_MASK);
pte_unmap2(src_pte-1);
pte_unmap(dst_pte-1);
spin_unlock(&src->page_table_lock);
cont_copy_pmd_range: src_pmd++;
......@@ -292,7 +332,7 @@ static inline void forget_pte(pte_t page)
static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
{
unsigned long offset;
pte_t * ptep;
pte_t *ptep;
int freed = 0;
if (pmd_none(*pmd))
......@@ -302,7 +342,7 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
pmd_clear(pmd);
return 0;
}
ptep = pte_offset(pmd, address);
ptep = pte_offset_map(pmd, address);
offset = address & ~PMD_MASK;
if (offset + size > PMD_SIZE)
size = PMD_SIZE - offset;
......@@ -322,6 +362,7 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
pte_clear(ptep);
}
}
pte_unmap(ptep-1);
return freed;
}
......@@ -415,11 +456,16 @@ static struct page * follow_page(struct mm_struct *mm, unsigned long address, in
if (pmd_none(*pmd) || pmd_bad(*pmd))
goto out;
ptep = pte_offset(pmd, address);
if (!ptep)
preempt_disable();
ptep = pte_offset_map(pmd, address);
if (!ptep) {
preempt_enable();
goto out;
}
pte = *ptep;
pte_unmap(ptep);
preempt_enable();
if (pte_present(pte)) {
if (!write ||
(pte_write(pte) && pte_dirty(pte)))
......@@ -748,10 +794,11 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
do {
pte_t * pte = pte_alloc(mm, pmd, address);
pte_t * pte = pte_alloc_map(mm, pmd, address);
if (!pte)
return -ENOMEM;
zeromap_pte_range(pte, address, end - address, prot);
pte_unmap(pte);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
......@@ -828,10 +875,11 @@ static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned lo
end = PGDIR_SIZE;
phys_addr -= address;
do {
pte_t * pte = pte_alloc(mm, pmd, address);
pte_t * pte = pte_alloc_map(mm, pmd, address);
if (!pte)
return -ENOMEM;
remap_pte_range(pte, address, end - address, address + phys_addr, prot);
pte_unmap(pte);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
......@@ -917,7 +965,7 @@ static inline void break_cow(struct vm_area_struct * vma, struct page * new_page
* with the page_table_lock released.
*/
static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
unsigned long address, pte_t *page_table, pte_t pte)
unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
{
struct page *old_page, *new_page;
......@@ -931,10 +979,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
if (reuse) {
flush_cache_page(vma, address);
establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
return 1; /* Minor fault */
}
}
pte_unmap(page_table);
/*
* Ok, we need to copy. Oh, well..
......@@ -951,6 +1001,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
* Re-check the pte - we dropped the lock
*/
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
if (pte_same(*page_table, pte)) {
if (PageReserved(old_page))
++mm->rss;
......@@ -960,12 +1011,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
/* Free the old page.. */
new_page = old_page;
}
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
page_cache_release(new_page);
page_cache_release(old_page);
return 1; /* Minor fault */
bad_wp_page:
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
return -1;
......@@ -1086,13 +1139,14 @@ void swapin_readahead(swp_entry_t entry)
*/
static int do_swap_page(struct mm_struct * mm,
struct vm_area_struct * vma, unsigned long address,
pte_t * page_table, pte_t orig_pte, int write_access)
pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
{
struct page *page;
swp_entry_t entry = pte_to_swp_entry(orig_pte);
pte_t pte;
int ret = 1;
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
page = lookup_swap_cache(entry);
if (!page) {
......@@ -1105,7 +1159,9 @@ static int do_swap_page(struct mm_struct * mm,
*/
int retval;
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
retval = pte_same(*page_table, orig_pte) ? -1 : 1;
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
return retval;
}
......@@ -1121,7 +1177,9 @@ static int do_swap_page(struct mm_struct * mm,
* released the page table lock.
*/
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
if (!pte_same(*page_table, orig_pte)) {
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
unlock_page(page);
page_cache_release(page);
......@@ -1146,6 +1204,7 @@ static int do_swap_page(struct mm_struct * mm,
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
return ret;
}
......@@ -1155,7 +1214,7 @@ static int do_swap_page(struct mm_struct * mm,
* spinlock held to protect against concurrent faults in
* multithreaded programs.
*/
static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, pmd_t *pmd, int write_access, unsigned long addr)
{
pte_t entry;
......@@ -1167,6 +1226,7 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
struct page *page;
/* Allocate our own private page. */
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
page = alloc_page(GFP_HIGHUSER);
......@@ -1175,7 +1235,10 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
clear_user_highpage(page, addr);
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, addr);
if (!pte_none(*page_table)) {
pte_unmap(page_table);
page_cache_release(page);
spin_unlock(&mm->page_table_lock);
return 1;
......@@ -1187,6 +1250,7 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
}
set_pte(page_table, entry);
pte_unmap(page_table);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, entry);
......@@ -1210,13 +1274,14 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
* spinlock held. Exit with the spinlock released.
*/
static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long address, int write_access, pte_t *page_table)
unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
{
struct page * new_page;
pte_t entry;
if (!vma->vm_ops || !vma->vm_ops->nopage)
return do_anonymous_page(mm, vma, page_table, write_access, address);
return do_anonymous_page(mm, vma, page_table, pmd, write_access, address);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0);
......@@ -1242,6 +1307,8 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
}
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
/*
* This silly early PAGE_DIRTY setting removes a race
* due to the bad i386 page protection. But it's valid
......@@ -1261,8 +1328,10 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
if (write_access)
entry = pte_mkwrite(pte_mkdirty(entry));
set_pte(page_table, entry);
pte_unmap(page_table);
} else {
/* One of our sibling threads was faster, back out. */
pte_unmap(page_table);
page_cache_release(new_page);
spin_unlock(&mm->page_table_lock);
return 1;
......@@ -1297,7 +1366,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
*/
static inline int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct * vma, unsigned long address,
int write_access, pte_t * pte)
int write_access, pte_t *pte, pmd_t *pmd)
{
pte_t entry;
......@@ -1309,18 +1378,19 @@ static inline int handle_pte_fault(struct mm_struct *mm,
* drop the lock.
*/
if (pte_none(entry))
return do_no_page(mm, vma, address, write_access, pte);
return do_swap_page(mm, vma, address, pte, entry, write_access);
return do_no_page(mm, vma, address, write_access, pte, pmd);
return do_swap_page(mm, vma, address, pte, pmd, entry, write_access);
}
if (write_access) {
if (!pte_write(entry))
return do_wp_page(mm, vma, address, pte, entry);
return do_wp_page(mm, vma, address, pte, pmd, entry);
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
establish_pte(vma, address, pte, entry);
pte_unmap(pte);
spin_unlock(&mm->page_table_lock);
return 1;
}
......@@ -1345,9 +1415,9 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
pmd = pmd_alloc(mm, pgd, address);
if (pmd) {
pte_t * pte = pte_alloc(mm, pmd, address);
pte_t * pte = pte_alloc_map(mm, pmd, address);
if (pte)
return handle_pte_fault(mm, vma, address, write_access, pte);
return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
}
spin_unlock(&mm->page_table_lock);
return -1;
......@@ -1366,9 +1436,6 @@ pmd_t *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
pmd_t *new;
/* "fast" allocation can happen without dropping the lock.. */
new = pmd_alloc_one_fast(mm, address);
if (!new) {
spin_unlock(&mm->page_table_lock);
new = pmd_alloc_one(mm, address);
spin_lock(&mm->page_table_lock);
......@@ -1379,51 +1446,15 @@ pmd_t *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (!pgd_none(*pgd)) {
if (pgd_present(*pgd)) {
pmd_free(new);
goto out;
}
}
pgd_populate(mm, pgd, new);
out:
return pmd_offset(pgd, address);
}
/*
* Allocate the page table directory.
*
* We've already handled the fast-path in-line, and we own the
* page table lock.
*/
pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
if (pmd_none(*pmd)) {
pte_t *new;
/* "fast" allocation can happen without dropping the lock.. */
new = pte_alloc_one_fast(mm, address);
if (!new) {
spin_unlock(&mm->page_table_lock);
new = pte_alloc_one(mm, address);
spin_lock(&mm->page_table_lock);
if (!new)
return NULL;
/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (!pmd_none(*pmd)) {
pte_free(new);
goto out;
}
}
pmd_populate(mm, pmd, new);
}
out:
return pte_offset(pmd, address);
}
int make_pages_present(unsigned long addr, unsigned long end)
{
int ret, len, write;
......
......@@ -13,6 +13,7 @@
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <linux/highmem.h>
static inline void change_pte_range(pmd_t * pmd, unsigned long address,
unsigned long size, pgprot_t newprot)
......@@ -27,7 +28,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address,
pmd_clear(pmd);
return;
}
pte = pte_offset(pmd, address);
pte = pte_offset_map(pmd, address);
address &= ~PMD_MASK;
end = address + size;
if (end > PMD_SIZE)
......@@ -46,6 +47,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address,
address += PAGE_SIZE;
pte++;
} while (address && (address < end));
pte_unmap(pte - 1);
}
static inline void change_pmd_range(pgd_t * pgd, unsigned long address,
......
......@@ -17,7 +17,7 @@
extern int vm_enough_memory(long pages);
static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr)
static inline pte_t *get_one_pte_map2(struct mm_struct *mm, unsigned long addr)
{
pgd_t * pgd;
pmd_t * pmd;
......@@ -41,21 +41,23 @@ static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr)
goto end;
}
pte = pte_offset(pmd, addr);
if (pte_none(*pte))
pte = pte_offset_map2(pmd, addr);
if (pte_none(*pte)) {
pte_unmap2(pte);
pte = NULL;
}
end:
return pte;
}
static inline pte_t *alloc_one_pte(struct mm_struct *mm, unsigned long addr)
static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
{
pmd_t * pmd;
pte_t * pte = NULL;
pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr);
if (pmd)
pte = pte_alloc(mm, pmd, addr);
pte = pte_alloc_map(mm, pmd, addr);
return pte;
}
......@@ -79,12 +81,16 @@ static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst)
static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned long new_addr)
{
int error = 0;
pte_t * src;
pte_t *src, *dst;
spin_lock(&mm->page_table_lock);
src = get_one_pte(mm, old_addr);
if (src)
error = copy_one_pte(mm, src, alloc_one_pte(mm, new_addr));
src = get_one_pte_map2(mm, old_addr);
if (src) {
dst = alloc_one_pte_map(mm, new_addr);
error = copy_one_pte(mm, src, dst);
pte_unmap2(src);
pte_unmap(dst);
}
spin_unlock(&mm->page_table_lock);
return error;
}
......
......@@ -393,7 +393,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
pmd_clear(dir);
return;
}
pte = pte_offset(dir, address);
pte = pte_offset_map(dir, address);
offset += address & PMD_MASK;
address &= ~PMD_MASK;
end = address + size;
......@@ -404,6 +404,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
address += PAGE_SIZE;
pte++;
} while (address && (address < end));
pte_unmap(pte - 1);
}
/* mmlist_lock and vma->vm_mm->page_table_lock are held */
......
......@@ -30,7 +30,7 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned lo
pmd_clear(pmd);
return;
}
pte = pte_offset(pmd, address);
pte = pte_offset_kernel(pmd, address);
address &= ~PMD_MASK;
end = address + size;
if (end > PMD_SIZE)
......@@ -125,7 +125,7 @@ static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
do {
pte_t * pte = pte_alloc(&init_mm, pmd, address);
pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
if (!pte)
return -ENOMEM;
if (alloc_area_pte(pte, address, end - address, gfp_mask, prot))
......
......@@ -167,7 +167,7 @@ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vm
return count;
}
pte = pte_offset(dir, address);
pte = pte_offset_map(dir, address);
pmd_end = (address + PMD_SIZE) & PMD_MASK;
if (end > pmd_end)
......@@ -181,6 +181,7 @@ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vm
count -= try_to_swap_out(mm, vma, address, pte, page, classzone);
if (!count) {
address += PAGE_SIZE;
pte++;
break;
}
}
......@@ -188,6 +189,7 @@ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vm
address += PAGE_SIZE;
pte++;
} while (address && (address < end));
pte_unmap(pte - 1);
mm->swap_address = address;
return count;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment