Commit 3a0b82c0 authored by Ingo Molnar's avatar Ingo Molnar

adds simple support for atomically-mapped PTEs. On highmem systems this...

adds simple support for atomically-mapped PTEs. On highmem systems this enables the allocation of the pagetables in highmem.
parent 094686d3
...@@ -153,16 +153,27 @@ tristate '/dev/cpu/*/msr - Model-specific register support' CONFIG_X86_MSR ...@@ -153,16 +153,27 @@ tristate '/dev/cpu/*/msr - Model-specific register support' CONFIG_X86_MSR
tristate '/dev/cpu/*/cpuid - CPU information support' CONFIG_X86_CPUID tristate '/dev/cpu/*/cpuid - CPU information support' CONFIG_X86_CPUID
choice 'High Memory Support' \ choice 'High Memory Support' \
"off CONFIG_NOHIGHMEM \ "off CONFIG_NOHIGHMEM \
4GB CONFIG_HIGHMEM4G \ 4GB CONFIG_HIGHMEM4G \
64GB CONFIG_HIGHMEM64G" off 4GB-highpte CONFIG_HIGHMEM4G_HIGHPTE \
64GB CONFIG_HIGHMEM64G \
64GB-highpte CONFIG_HIGHMEM64G_HIGHPTE" off
if [ "$CONFIG_HIGHMEM4G" = "y" ]; then if [ "$CONFIG_HIGHMEM4G" = "y" ]; then
define_bool CONFIG_HIGHMEM y define_bool CONFIG_HIGHMEM y
fi fi
if [ "$CONFIG_HIGHMEM4G_HIGHPTE" = "y" ]; then
define_bool CONFIG_HIGHMEM y
define_bool CONFIG_HIGHPTE y
fi
if [ "$CONFIG_HIGHMEM64G" = "y" ]; then if [ "$CONFIG_HIGHMEM64G" = "y" ]; then
define_bool CONFIG_HIGHMEM y define_bool CONFIG_HIGHMEM y
define_bool CONFIG_X86_PAE y define_bool CONFIG_X86_PAE y
fi fi
if [ "$CONFIG_HIGHMEM64G_HIGHPTE" = "y" ]; then
define_bool CONFIG_HIGHMEM y
define_bool CONFIG_HIGHPTE y
define_bool CONFIG_X86_PAE y
fi
bool 'Math emulation' CONFIG_MATH_EMULATION bool 'Math emulation' CONFIG_MATH_EMULATION
bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
...@@ -397,11 +408,13 @@ comment 'Kernel hacking' ...@@ -397,11 +408,13 @@ comment 'Kernel hacking'
bool 'Kernel debugging' CONFIG_DEBUG_KERNEL bool 'Kernel debugging' CONFIG_DEBUG_KERNEL
if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then
bool ' Debug high memory support' CONFIG_DEBUG_HIGHMEM
bool ' Debug memory allocations' CONFIG_DEBUG_SLAB bool ' Debug memory allocations' CONFIG_DEBUG_SLAB
bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT
bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ
bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK
if [ "$CONFIG_HIGHMEM" = "y" ]; then
bool ' Highmem debugging' CONFIG_DEBUG_HIGHMEM
fi
fi fi
endmenu endmenu
......
...@@ -145,7 +145,6 @@ void cpu_idle (void) ...@@ -145,7 +145,6 @@ void cpu_idle (void)
while (!need_resched()) while (!need_resched())
idle(); idle();
schedule(); schedule();
check_pgt_cache();
} }
} }
......
...@@ -146,10 +146,6 @@ void __init smp_store_cpu_info(int id) ...@@ -146,10 +146,6 @@ void __init smp_store_cpu_info(int id)
struct cpuinfo_x86 *c = cpu_data + id; struct cpuinfo_x86 *c = cpu_data + id;
*c = boot_cpu_data; *c = boot_cpu_data;
c->pte_quick = 0;
c->pmd_quick = 0;
c->pgd_quick = 0;
c->pgtable_cache_sz = 0;
identify_cpu(c); identify_cpu(c);
/* /*
* Mask B, Pentium, but not Pentium MMX * Mask B, Pentium, but not Pentium MMX
......
...@@ -788,7 +788,7 @@ void __init trap_init_f00f_bug(void) ...@@ -788,7 +788,7 @@ void __init trap_init_f00f_bug(void)
page = (unsigned long) vmalloc(PAGE_SIZE); page = (unsigned long) vmalloc(PAGE_SIZE);
pgd = pgd_offset(&init_mm, page); pgd = pgd_offset(&init_mm, page);
pmd = pmd_offset(pgd, page); pmd = pmd_offset(pgd, page);
pte = pte_offset(pmd, page); pte = pte_offset_kernel(pmd, page);
__free_page(pte_page(*pte)); __free_page(pte_page(*pte));
*pte = mk_pte_phys(__pa(&idt_table), PAGE_KERNEL_RO); *pte = mk_pte_phys(__pa(&idt_table), PAGE_KERNEL_RO);
/* /*
......
...@@ -93,7 +93,7 @@ static void mark_screen_rdonly(struct task_struct * tsk) ...@@ -93,7 +93,7 @@ static void mark_screen_rdonly(struct task_struct * tsk)
{ {
pgd_t *pgd; pgd_t *pgd;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte, *mapped;
int i; int i;
pgd = pgd_offset(tsk->mm, 0xA0000); pgd = pgd_offset(tsk->mm, 0xA0000);
...@@ -112,12 +112,15 @@ static void mark_screen_rdonly(struct task_struct * tsk) ...@@ -112,12 +112,15 @@ static void mark_screen_rdonly(struct task_struct * tsk)
pmd_clear(pmd); pmd_clear(pmd);
return; return;
} }
pte = pte_offset(pmd, 0xA0000); preempt_disable();
pte = mapped = pte_offset_map(pmd, 0xA0000);
for (i = 0; i < 32; i++) { for (i = 0; i < 32; i++) {
if (pte_present(*pte)) if (pte_present(*pte))
set_pte(pte, pte_wrprotect(*pte)); set_pte(pte, pte_wrprotect(*pte));
pte++; pte++;
} }
pte_unmap(mapped);
preempt_enable();
flush_tlb(); flush_tlb();
} }
......
...@@ -320,12 +320,20 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -320,12 +320,20 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
asm("movl %%cr3,%0":"=r" (page)); asm("movl %%cr3,%0":"=r" (page));
page = ((unsigned long *) __va(page))[address >> 22]; page = ((unsigned long *) __va(page))[address >> 22];
printk(KERN_ALERT "*pde = %08lx\n", page); printk(KERN_ALERT "*pde = %08lx\n", page);
/*
* We must not directly access the pte in the highpte
* case, the page table might be allocated in highmem.
* And lets rather not kmap-atomic the pte, just in case
* it's allocated already.
*/
#ifndef CONFIG_HIGHPTE
if (page & 1) { if (page & 1) {
page &= PAGE_MASK; page &= PAGE_MASK;
address &= 0x003ff000; address &= 0x003ff000;
page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
printk(KERN_ALERT "*pte = %08lx\n", page); printk(KERN_ALERT "*pte = %08lx\n", page);
} }
#endif
die("Oops", regs, error_code); die("Oops", regs, error_code);
bust_spinlocks(0); bust_spinlocks(0);
do_exit(SIGKILL); do_exit(SIGKILL);
...@@ -395,7 +403,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -395,7 +403,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
goto no_context; goto no_context;
set_pmd(pmd, *pmd_k); set_pmd(pmd, *pmd_k);
pte_k = pte_offset(pmd_k, address); pte_k = pte_offset_kernel(pmd_k, address);
if (!pte_present(*pte_k)) if (!pte_present(*pte_k))
goto no_context; goto no_context;
return; return;
......
...@@ -43,28 +43,6 @@ unsigned long highstart_pfn, highend_pfn; ...@@ -43,28 +43,6 @@ unsigned long highstart_pfn, highend_pfn;
static unsigned long totalram_pages; static unsigned long totalram_pages;
static unsigned long totalhigh_pages; static unsigned long totalhigh_pages;
int do_check_pgt_cache(int low, int high)
{
int freed = 0;
if(pgtable_cache_size > high) {
do {
if (pgd_quicklist) {
free_pgd_slow(get_pgd_fast());
freed++;
}
if (pmd_quicklist) {
pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
freed++;
}
if (pte_quicklist) {
pte_free_slow(pte_alloc_one_fast(NULL, 0));
freed++;
}
} while(pgtable_cache_size > low);
}
return freed;
}
/* /*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move * physical space so we can cache the place of the first one and move
...@@ -76,7 +54,7 @@ pte_t *kmap_pte; ...@@ -76,7 +54,7 @@ pte_t *kmap_pte;
pgprot_t kmap_prot; pgprot_t kmap_prot;
#define kmap_get_fixmap_pte(vaddr) \ #define kmap_get_fixmap_pte(vaddr) \
pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
void __init kmap_init(void) void __init kmap_init(void)
{ {
...@@ -116,7 +94,6 @@ void show_mem(void) ...@@ -116,7 +94,6 @@ void show_mem(void)
printk("%d reserved pages\n",reserved); printk("%d reserved pages\n",reserved);
printk("%d pages shared\n",shared); printk("%d pages shared\n",shared);
printk("%d pages swap cached\n",cached); printk("%d pages swap cached\n",cached);
printk("%ld pages in page table cache\n",pgtable_cache_size);
show_buffers(); show_buffers();
} }
...@@ -143,7 +120,7 @@ static inline void set_pte_phys (unsigned long vaddr, ...@@ -143,7 +120,7 @@ static inline void set_pte_phys (unsigned long vaddr,
printk("PAE BUG #01!\n"); printk("PAE BUG #01!\n");
return; return;
} }
pte = pte_offset(pmd, vaddr); pte = pte_offset_kernel(pmd, vaddr);
if (pte_val(*pte)) if (pte_val(*pte))
pte_ERROR(*pte); pte_ERROR(*pte);
pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags); pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags);
...@@ -196,7 +173,7 @@ static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t ...@@ -196,7 +173,7 @@ static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t
if (pmd_none(*pmd)) { if (pmd_none(*pmd)) {
pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
if (pte != pte_offset(pmd, 0)) if (pte != pte_offset_kernel(pmd, 0))
BUG(); BUG();
} }
vaddr += PMD_SIZE; vaddr += PMD_SIZE;
...@@ -267,7 +244,7 @@ static void __init pagetable_init (void) ...@@ -267,7 +244,7 @@ static void __init pagetable_init (void)
*pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
} }
set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
if (pte_base != pte_offset(pmd, 0)) if (pte_base != pte_offset_kernel(pmd, 0))
BUG(); BUG();
} }
...@@ -289,7 +266,7 @@ static void __init pagetable_init (void) ...@@ -289,7 +266,7 @@ static void __init pagetable_init (void)
pgd = swapper_pg_dir + __pgd_offset(vaddr); pgd = swapper_pg_dir + __pgd_offset(vaddr);
pmd = pmd_offset(pgd, vaddr); pmd = pmd_offset(pgd, vaddr);
pte = pte_offset(pmd, vaddr); pte = pte_offset_kernel(pmd, vaddr);
pkmap_page_table = pte; pkmap_page_table = pte;
#endif #endif
...@@ -398,7 +375,7 @@ void __init test_wp_bit(void) ...@@ -398,7 +375,7 @@ void __init test_wp_bit(void)
pgd = swapper_pg_dir + __pgd_offset(vaddr); pgd = swapper_pg_dir + __pgd_offset(vaddr);
pmd = pmd_offset(pgd, vaddr); pmd = pmd_offset(pgd, vaddr);
pte = pte_offset(pmd, vaddr); pte = pte_offset_kernel(pmd, vaddr);
old_pte = *pte; old_pte = *pte;
*pte = mk_pte_phys(0, PAGE_READONLY); *pte = mk_pte_phys(0, PAGE_READONLY);
local_flush_tlb(); local_flush_tlb();
......
...@@ -49,7 +49,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo ...@@ -49,7 +49,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
if (address >= end) if (address >= end)
BUG(); BUG();
do { do {
pte_t * pte = pte_alloc(&init_mm, pmd, address); pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
if (!pte) if (!pte)
return -ENOMEM; return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags); remap_area_pte(pte, address, end - address, address + phys_addr, flags);
......
...@@ -449,7 +449,8 @@ static int DRM(_vma_info)(char *buf, char **start, off_t offset, int request, ...@@ -449,7 +449,8 @@ static int DRM(_vma_info)(char *buf, char **start, off_t offset, int request,
for (i = vma->vm_start; i < vma->vm_end; i += PAGE_SIZE) { for (i = vma->vm_start; i < vma->vm_end; i += PAGE_SIZE) {
pgd = pgd_offset(vma->vm_mm, i); pgd = pgd_offset(vma->vm_mm, i);
pmd = pmd_offset(pgd, i); pmd = pmd_offset(pgd, i);
pte = pte_offset(pmd, i); preempt_disable();
pte = pte_offset_map(pmd, i);
if (pte_present(*pte)) { if (pte_present(*pte)) {
address = __pa(pte_page(*pte)) address = __pa(pte_page(*pte))
+ (i & (PAGE_SIZE-1)); + (i & (PAGE_SIZE-1));
...@@ -465,6 +466,8 @@ static int DRM(_vma_info)(char *buf, char **start, off_t offset, int request, ...@@ -465,6 +466,8 @@ static int DRM(_vma_info)(char *buf, char **start, off_t offset, int request,
} else { } else {
DRM_PROC_PRINT(" 0x%08lx\n", i); DRM_PROC_PRINT(" 0x%08lx\n", i);
} }
pte_unmap(pte);
preempt_enable();
} }
#endif #endif
} }
......
...@@ -143,9 +143,15 @@ int DRM(sg_alloc)( struct inode *inode, struct file *filp, ...@@ -143,9 +143,15 @@ int DRM(sg_alloc)( struct inode *inode, struct file *filp,
if ( !pmd_present( *pmd ) ) if ( !pmd_present( *pmd ) )
goto failed; goto failed;
pte = pte_offset( pmd, i ); preempt_disable();
if ( !pte_present( *pte ) ) pte = pte_offset_map( pmd, i );
if ( !pte_present( *pte ) ) {
pte_unmap(pte);
preempt_enable();
goto failed; goto failed;
}
pte_unmap(pte);
preempt_enable();
entry->pagelist[j] = pte_page( *pte ); entry->pagelist[j] = pte_page( *pte );
......
...@@ -169,8 +169,15 @@ struct page *DRM(vm_shm_nopage)(struct vm_area_struct *vma, ...@@ -169,8 +169,15 @@ struct page *DRM(vm_shm_nopage)(struct vm_area_struct *vma,
if( !pgd_present( *pgd ) ) return NOPAGE_OOM; if( !pgd_present( *pgd ) ) return NOPAGE_OOM;
pmd = pmd_offset( pgd, i ); pmd = pmd_offset( pgd, i );
if( !pmd_present( *pmd ) ) return NOPAGE_OOM; if( !pmd_present( *pmd ) ) return NOPAGE_OOM;
pte = pte_offset( pmd, i ); preempt_disable();
if( !pte_present( *pte ) ) return NOPAGE_OOM; pte = pte_offset_map( pmd, i );
if( !pte_present( *pte ) ) {
pte_unmap(pte);
preempt_enable();
return NOPAGE_OOM;
}
pte_unmap(pte);
preempt_enable();
page = pte_page(*pte); page = pte_page(*pte);
get_page(page); get_page(page);
......
...@@ -215,8 +215,11 @@ static inline struct page *uvirt_to_page(pgd_t *pgd, unsigned long adr) ...@@ -215,8 +215,11 @@ static inline struct page *uvirt_to_page(pgd_t *pgd, unsigned long adr)
if (!pgd_none(*pgd)) { if (!pgd_none(*pgd)) {
pmd = pmd_offset(pgd, adr); pmd = pmd_offset(pgd, adr);
if (!pmd_none(*pmd)) { if (!pmd_none(*pmd)) {
ptep = pte_offset(pmd, adr); preempt_disable();
ptep = pte_offset_map(pmd, adr);
pte = *ptep; pte = *ptep;
pte_unmap(pte);
preempt_enable();
if(pte_present(pte)) if(pte_present(pte))
ret = pte_page(pte); ret = pte_page(pte);
} }
......
...@@ -219,6 +219,7 @@ sgi_graphics_nopage (struct vm_area_struct *vma, unsigned long address, int ...@@ -219,6 +219,7 @@ sgi_graphics_nopage (struct vm_area_struct *vma, unsigned long address, int
int board = GRAPHICS_CARD (vma->vm_dentry->d_inode->i_rdev); int board = GRAPHICS_CARD (vma->vm_dentry->d_inode->i_rdev);
unsigned long virt_add, phys_add; unsigned long virt_add, phys_add;
struct page * page;
#ifdef DEBUG #ifdef DEBUG
printk ("Got a page fault for board %d address=%lx guser=%lx\n", board, printk ("Got a page fault for board %d address=%lx guser=%lx\n", board,
...@@ -245,8 +246,10 @@ sgi_graphics_nopage (struct vm_area_struct *vma, unsigned long address, int ...@@ -245,8 +246,10 @@ sgi_graphics_nopage (struct vm_area_struct *vma, unsigned long address, int
pgd = pgd_offset(current->mm, address); pgd = pgd_offset(current->mm, address);
pmd = pmd_offset(pgd, address); pmd = pmd_offset(pgd, address);
pte = pte_offset(pmd, address); pte = pte_kmap_offset(pmd, address);
return pte_page(*pte); page = pte_page(*pte);
pte_kunmap(pte);
return page;
} }
/* /*
......
...@@ -139,8 +139,11 @@ static inline unsigned long uvirt_to_kva (pgd_t * pgd, unsigned long adr) ...@@ -139,8 +139,11 @@ static inline unsigned long uvirt_to_kva (pgd_t * pgd, unsigned long adr)
if (!pgd_none (*pgd)) { if (!pgd_none (*pgd)) {
pmd = pmd_offset (pgd, adr); pmd = pmd_offset (pgd, adr);
if (!pmd_none (*pmd)) { if (!pmd_none (*pmd)) {
ptep = pte_offset (pmd, adr); preempt_disable();
ptep = pte_offset_map (pmd, adr);
pte = *ptep; pte = *ptep;
pte_unmap(pte);
preempt_enable();
if (pte_present (pte)) { if (pte_present (pte)) {
ret = (unsigned long) page_address (pte_page (pte)); ret = (unsigned long) page_address (pte_page (pte));
ret |= (adr & (PAGE_SIZE - 1)); ret |= (adr & (PAGE_SIZE - 1));
......
...@@ -115,8 +115,11 @@ static inline unsigned long uvirt_to_kva(pgd_t *pgd, unsigned long adr) ...@@ -115,8 +115,11 @@ static inline unsigned long uvirt_to_kva(pgd_t *pgd, unsigned long adr)
if (!pgd_none(*pgd)) { if (!pgd_none(*pgd)) {
pmd = pmd_offset(pgd, adr); pmd = pmd_offset(pgd, adr);
if (!pmd_none(*pmd)) { if (!pmd_none(*pmd)) {
ptep = pte_offset(pmd, adr); preempt_disable();
ptep = pte_offset_map(pmd, adr);
pte = *ptep; pte = *ptep;
pte_unmap(pte);
preempt_enable();
if(pte_present(pte)) { if(pte_present(pte)) {
ret = (unsigned long) page_address(pte_page(pte)); ret = (unsigned long) page_address(pte_page(pte));
ret |= (adr & (PAGE_SIZE - 1)); ret |= (adr & (PAGE_SIZE - 1));
......
...@@ -271,15 +271,18 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a ...@@ -271,15 +271,18 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a
pmd = pmd_alloc(tsk->mm, pgd, address); pmd = pmd_alloc(tsk->mm, pgd, address);
if (!pmd) if (!pmd)
goto out; goto out;
pte = pte_alloc(tsk->mm, pmd, address); pte = pte_alloc_map(tsk->mm, pmd, address);
if (!pte) if (!pte)
goto out; goto out;
if (!pte_none(*pte)) if (!pte_none(*pte)) {
pte_unmap(pte);
goto out; goto out;
}
lru_cache_add(page); lru_cache_add(page);
flush_dcache_page(page); flush_dcache_page(page);
flush_page_to_ram(page); flush_page_to_ram(page);
set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY)))); set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY))));
pte_unmap(pte);
tsk->mm->rss++; tsk->mm->rss++;
spin_unlock(&tsk->mm->page_table_lock); spin_unlock(&tsk->mm->page_table_lock);
......
...@@ -393,11 +393,10 @@ int proc_pid_stat(struct task_struct *task, char * buffer) ...@@ -393,11 +393,10 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
return res; return res;
} }
static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, int * pages, int * shared, int * dirty, int * total)
int * pages, int * shared, int * dirty, int * total)
{ {
pte_t * pte; unsigned long end, pmd_end;
unsigned long end; pte_t *pte;
if (pmd_none(*pmd)) if (pmd_none(*pmd))
return; return;
...@@ -406,11 +405,12 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned ...@@ -406,11 +405,12 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned
pmd_clear(pmd); pmd_clear(pmd);
return; return;
} }
pte = pte_offset(pmd, address); preempt_disable();
address &= ~PMD_MASK; pte = pte_offset_map(pmd, address);
end = address + size; end = address + size;
if (end > PMD_SIZE) pmd_end = (address + PMD_SIZE) & PMD_MASK;
end = PMD_SIZE; if (end > pmd_end)
end = pmd_end;
do { do {
pte_t page = *pte; pte_t page = *pte;
struct page *ptpage; struct page *ptpage;
...@@ -431,6 +431,8 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned ...@@ -431,6 +431,8 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned
if (page_count(pte_page(page)) > 1) if (page_count(pte_page(page)) > 1)
++*shared; ++*shared;
} while (address < end); } while (address < end);
pte_unmap(pte - 1);
preempt_enable();
} }
static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size, static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size,
......
...@@ -26,12 +26,6 @@ ...@@ -26,12 +26,6 @@
#include <asm/kmap_types.h> #include <asm/kmap_types.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#ifdef CONFIG_DEBUG_HIGHMEM
#define HIGHMEM_DEBUG 1
#else
#define HIGHMEM_DEBUG 0
#endif
/* declarations for highmem.c */ /* declarations for highmem.c */
extern unsigned long highstart_pfn, highend_pfn; extern unsigned long highstart_pfn, highend_pfn;
...@@ -94,7 +88,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) ...@@ -94,7 +88,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
idx = type + KM_TYPE_NR*smp_processor_id(); idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
#if HIGHMEM_DEBUG #if CONFIG_DEBUG_HIGHMEM
if (!pte_none(*(kmap_pte-idx))) if (!pte_none(*(kmap_pte-idx)))
BUG(); BUG();
#endif #endif
...@@ -106,8 +100,8 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) ...@@ -106,8 +100,8 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
static inline void kunmap_atomic(void *kvaddr, enum km_type type) static inline void kunmap_atomic(void *kvaddr, enum km_type type)
{ {
#if HIGHMEM_DEBUG #if CONFIG_DEBUG_HIGHMEM
unsigned long vaddr = (unsigned long) kvaddr; unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
if (vaddr < FIXADDR_START) { // FIXME if (vaddr < FIXADDR_START) { // FIXME
......
#ifndef _ASM_KMAP_TYPES_H #ifndef _ASM_KMAP_TYPES_H
#define _ASM_KMAP_TYPES_H #define _ASM_KMAP_TYPES_H
#include <linux/config.h>
#if CONFIG_DEBUG_HIGHMEM
# define D(n) __KM_FENCE_##n ,
#else
# define D(n)
#endif
enum km_type { enum km_type {
KM_BOUNCE_READ, D(0) KM_BOUNCE_READ,
KM_SKB_DATA, D(1) KM_SKB_DATA,
KM_SKB_DATA_SOFTIRQ, D(2) KM_SKB_DATA_SOFTIRQ,
KM_USER0, D(3) KM_USER0,
KM_USER1, D(4) KM_USER1,
KM_BIO_IRQ, D(5) KM_BIO_IRQ,
KM_TYPE_NR D(6) KM_PTE0,
D(7) KM_PTE1,
D(8) KM_TYPE_NR
}; };
#undef D
#endif #endif
...@@ -5,15 +5,17 @@ ...@@ -5,15 +5,17 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/highmem.h>
#define pgd_quicklist (current_cpu_data.pgd_quick) #define pmd_populate_kernel(mm, pmd, pte) \
#define pmd_quicklist (current_cpu_data.pmd_quick)
#define pte_quicklist (current_cpu_data.pte_quick)
#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
#define pmd_populate(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
{
set_pmd(pmd, __pmd(_PAGE_TABLE +
((unsigned long long)(pte - mem_map) <<
(unsigned long long) PAGE_SHIFT)));
}
/* /*
* Allocate and free page tables. * Allocate and free page tables.
*/ */
...@@ -29,7 +31,7 @@ extern void *kmem_cache_alloc(struct kmem_cache_s *, int); ...@@ -29,7 +31,7 @@ extern void *kmem_cache_alloc(struct kmem_cache_s *, int);
extern void kmem_cache_free(struct kmem_cache_s *, void *); extern void kmem_cache_free(struct kmem_cache_s *, void *);
static inline pgd_t *get_pgd_slow(void) static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
int i; int i;
pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL); pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL);
...@@ -56,7 +58,7 @@ static inline pgd_t *get_pgd_slow(void) ...@@ -56,7 +58,7 @@ static inline pgd_t *get_pgd_slow(void)
#else #else
static inline pgd_t *get_pgd_slow(void) static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
...@@ -71,33 +73,7 @@ static inline pgd_t *get_pgd_slow(void) ...@@ -71,33 +73,7 @@ static inline pgd_t *get_pgd_slow(void)
#endif /* CONFIG_X86_PAE */ #endif /* CONFIG_X86_PAE */
static inline pgd_t *get_pgd_fast(void) static inline void pgd_free(pgd_t *pgd)
{
unsigned long *ret;
preempt_disable();
if ((ret = pgd_quicklist) != NULL) {
pgd_quicklist = (unsigned long *)(*ret);
ret[0] = 0;
pgtable_cache_size--;
preempt_enable();
} else {
preempt_enable();
ret = (unsigned long *)get_pgd_slow();
}
return (pgd_t *)ret;
}
static inline void free_pgd_fast(pgd_t *pgd)
{
preempt_disable();
*(unsigned long *)pgd = (unsigned long) pgd_quicklist;
pgd_quicklist = (unsigned long *) pgd;
pgtable_cache_size++;
preempt_enable();
}
static inline void free_pgd_slow(pgd_t *pgd)
{ {
#if defined(CONFIG_X86_PAE) #if defined(CONFIG_X86_PAE)
int i; int i;
...@@ -110,64 +86,64 @@ static inline void free_pgd_slow(pgd_t *pgd) ...@@ -110,64 +86,64 @@ static inline void free_pgd_slow(pgd_t *pgd)
#endif #endif
} }
static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{ {
int count = 0;
pte_t *pte; pte_t *pte;
pte = (pte_t *) __get_free_page(GFP_KERNEL); do {
if (pte) pte = (pte_t *) __get_free_page(GFP_KERNEL);
clear_page(pte); if (pte)
clear_page(pte);
else {
current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout(HZ);
}
} while (!pte && (count++ < 10));
return pte; return pte;
} }
static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
unsigned long address)
{ {
unsigned long *ret; int count = 0;
struct page *pte;
preempt_disable();
if ((ret = (unsigned long *)pte_quicklist) != NULL) { do {
pte_quicklist = (unsigned long *)(*ret); #if CONFIG_HIGHPTE
ret[0] = ret[1]; pte = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, 0);
pgtable_cache_size--; #else
} pte = alloc_pages(GFP_KERNEL, 0);
preempt_enable(); #endif
return (pte_t *)ret; if (pte)
clear_highpage(pte);
else {
current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout(HZ);
}
} while (!pte && (count++ < 10));
return pte;
} }
static inline void pte_free_fast(pte_t *pte) static inline void pte_free_kernel(pte_t *pte)
{ {
preempt_disable(); free_page((unsigned long)pte);
*(unsigned long *)pte = (unsigned long) pte_quicklist;
pte_quicklist = (unsigned long *) pte;
pgtable_cache_size++;
preempt_enable();
} }
static __inline__ void pte_free_slow(pte_t *pte) static inline void pte_free(struct page *pte)
{ {
free_page((unsigned long)pte); __free_page(pte);
} }
#define pte_free(pte) pte_free_slow(pte)
#define pgd_free(pgd) free_pgd_slow(pgd)
#define pgd_alloc(mm) get_pgd_fast()
/* /*
* allocating and freeing a pmd is trivial: the 1-entry pmd is * allocating and freeing a pmd is trivial: the 1-entry pmd is
* inside the pgd, so has no extra memory associated with it. * inside the pgd, so has no extra memory associated with it.
* (In the PAE case we free the pmds as part of the pgd.) * (In the PAE case we free the pmds as part of the pgd.)
*/ */
#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); })
#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); })
#define pmd_free_slow(x) do { } while (0)
#define pmd_free_fast(x) do { } while (0)
#define pmd_free(x) do { } while (0) #define pmd_free(x) do { } while (0)
#define pgd_populate(mm, pmd, pte) BUG() #define pgd_populate(mm, pmd, pte) BUG()
extern int do_check_pgt_cache(int, int);
/* /*
* TLB flushing: * TLB flushing:
* *
......
...@@ -316,9 +316,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -316,9 +316,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define page_pte(page) page_pte_prot(page, __pgprot(0)) #define page_pte(page) page_pte_prot(page, __pgprot(0))
#define pmd_page(pmd) \ #define pmd_page_kernel(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
#define pmd_page(pmd) \
(mem_map + (pmd_val(pmd) >> PAGE_SHIFT))
/* to find an entry in a page-table-directory. */ /* to find an entry in a page-table-directory. */
#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) #define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
...@@ -335,8 +338,14 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -335,8 +338,14 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
/* Find an entry in the third-level page table.. */ /* Find an entry in the third-level page table.. */
#define __pte_offset(address) \ #define __pte_offset(address) \
((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ #define pte_offset_kernel(dir, address) \
__pte_offset(address)) ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address))
#define pte_offset_map(dir, address) \
((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + __pte_offset(address))
#define pte_offset_map2(dir, address) \
((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + __pte_offset(address))
#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
#define pte_unmap2(pte) kunmap_atomic(pte, KM_PTE1)
/* /*
* The i386 doesn't have any external MMU info: the kernel page * The i386 doesn't have any external MMU info: the kernel page
......
...@@ -49,10 +49,6 @@ struct cpuinfo_x86 { ...@@ -49,10 +49,6 @@ struct cpuinfo_x86 {
int f00f_bug; int f00f_bug;
int coma_bug; int coma_bug;
unsigned long loops_per_jiffy; unsigned long loops_per_jiffy;
unsigned long *pgd_quick;
unsigned long *pmd_quick;
unsigned long *pte_quick;
unsigned long pgtable_cache_sz;
} __attribute__((__aligned__(SMP_CACHE_BYTES))); } __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define X86_VENDOR_INTEL 0 #define X86_VENDOR_INTEL 0
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <asm/pgalloc.h>
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
...@@ -16,6 +15,7 @@ extern struct page *highmem_start_page; ...@@ -16,6 +15,7 @@ extern struct page *highmem_start_page;
unsigned int nr_free_highpages(void); unsigned int nr_free_highpages(void);
extern void create_bounce(unsigned long pfn, int gfp, struct bio **bio_orig); extern void create_bounce(unsigned long pfn, int gfp, struct bio **bio_orig);
extern void check_highmem_ptes(void);
static inline char *bh_kmap(struct buffer_head *bh) static inline char *bh_kmap(struct buffer_head *bh)
{ {
...@@ -92,8 +92,9 @@ static inline void clear_user_highpage(struct page *page, unsigned long vaddr) ...@@ -92,8 +92,9 @@ static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
static inline void clear_highpage(struct page *page) static inline void clear_highpage(struct page *page)
{ {
clear_page(kmap(page)); void *kaddr = kmap_atomic(page, KM_USER0);
kunmap(page); clear_page(kaddr);
kunmap_atomic(kaddr, KM_USER0);
} }
/* /*
...@@ -101,15 +102,16 @@ static inline void clear_highpage(struct page *page) ...@@ -101,15 +102,16 @@ static inline void clear_highpage(struct page *page)
*/ */
static inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size) static inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size)
{ {
char *kaddr; void *kaddr;
if (offset + size > PAGE_SIZE) if (offset + size > PAGE_SIZE)
BUG(); BUG();
kaddr = kmap(page);
memset(kaddr + offset, 0, size); kaddr = kmap_atomic(page, KM_USER0);
memset((char *)kaddr + offset, 0, size);
flush_dcache_page(page); flush_dcache_page(page);
flush_page_to_ram(page); flush_page_to_ram(page);
kunmap(page); kunmap_atomic(kaddr, KM_USER0);
} }
static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr) static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr)
......
...@@ -364,7 +364,8 @@ extern int zeromap_page_range(struct vm_area_struct *vma, unsigned long from, un ...@@ -364,7 +364,8 @@ extern int zeromap_page_range(struct vm_area_struct *vma, unsigned long from, un
extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate(struct inode * inode, loff_t offset);
extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
extern int make_pages_present(unsigned long addr, unsigned long end); extern int make_pages_present(unsigned long addr, unsigned long end);
extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
...@@ -380,7 +381,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long ...@@ -380,7 +381,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long
/* /*
* On a two-level page table, this ends up being trivial. Thus the * On a two-level page table, this ends up being trivial. Thus the
* inlining and the symmetry break with pte_alloc() that does all * inlining and the symmetry break with pte_alloc_map() that does all
* of this out-of-line. * of this out-of-line.
*/ */
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
...@@ -390,9 +391,6 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long a ...@@ -390,9 +391,6 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long a
return pmd_offset(pgd, address); return pmd_offset(pgd, address);
} }
extern int pgt_cache_water[2];
extern int check_pgt_cache(void);
extern void free_area_init(unsigned long * zones_size); extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap, extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap,
unsigned long * zones_size, unsigned long zone_start_paddr, unsigned long * zones_size, unsigned long zone_start_paddr,
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/completion.h> #include <linux/completion.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/highmem.h>
/* /*
* Priority of a process goes from 0 to 139. The 0-99 * Priority of a process goes from 0 to 139. The 0-99
...@@ -761,6 +762,9 @@ asmlinkage void schedule(void) ...@@ -761,6 +762,9 @@ asmlinkage void schedule(void)
if (unlikely(in_interrupt())) if (unlikely(in_interrupt()))
BUG(); BUG();
#if CONFIG_DEBUG_HIGHMEM
check_highmem_ptes();
#endif
need_resched: need_resched:
preempt_disable(); preempt_disable();
prev = current; prev = current;
......
...@@ -97,8 +97,6 @@ int proc_dol2crvec(ctl_table *table, int write, struct file *filp, ...@@ -97,8 +97,6 @@ int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
extern int acct_parm[]; extern int acct_parm[];
#endif #endif
extern int pgt_cache_water[];
static int parse_table(int *, int, void *, size_t *, void *, size_t, static int parse_table(int *, int, void *, size_t *, void *, size_t,
ctl_table *, void **); ctl_table *, void **);
static int proc_doutsstring(ctl_table *table, int write, struct file *filp, static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
...@@ -268,8 +266,6 @@ static ctl_table vm_table[] = { ...@@ -268,8 +266,6 @@ static ctl_table vm_table[] = {
sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec}, sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec},
{VM_PAGERDAEMON, "kswapd", {VM_PAGERDAEMON, "kswapd",
&pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec}, &pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec},
{VM_PGT_CACHE, "pagetable_cache",
&pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec},
{VM_PAGE_CLUSTER, "page-cluster", {VM_PAGE_CLUSTER, "page-cluster",
&page_cluster, sizeof(int), 0644, NULL, &proc_dointvec}, &page_cluster, sizeof(int), 0644, NULL, &proc_dointvec},
{0} {0}
......
...@@ -1974,7 +1974,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address ...@@ -1974,7 +1974,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
/* Called with mm->page_table_lock held to protect against other /* Called with mm->page_table_lock held to protect against other
* threads/the swapper from ripping pte's out from under us. * threads/the swapper from ripping pte's out from under us.
*/ */
static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma, static inline int filemap_sync_pte(pte_t *ptep, pmd_t *pmdp, struct vm_area_struct *vma,
unsigned long address, unsigned int flags) unsigned long address, unsigned int flags)
{ {
pte_t pte = *ptep; pte_t pte = *ptep;
...@@ -1990,11 +1990,10 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma, ...@@ -1990,11 +1990,10 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
} }
static inline int filemap_sync_pte_range(pmd_t * pmd, static inline int filemap_sync_pte_range(pmd_t * pmd,
unsigned long address, unsigned long size, unsigned long address, unsigned long end,
struct vm_area_struct *vma, unsigned long offset, unsigned int flags) struct vm_area_struct *vma, unsigned int flags)
{ {
pte_t * pte; pte_t *pte;
unsigned long end;
int error; int error;
if (pmd_none(*pmd)) if (pmd_none(*pmd))
...@@ -2004,27 +2003,26 @@ static inline int filemap_sync_pte_range(pmd_t * pmd, ...@@ -2004,27 +2003,26 @@ static inline int filemap_sync_pte_range(pmd_t * pmd,
pmd_clear(pmd); pmd_clear(pmd);
return 0; return 0;
} }
pte = pte_offset(pmd, address); pte = pte_offset_map(pmd, address);
offset += address & PMD_MASK; if ((address & PMD_MASK) != (end & PMD_MASK))
address &= ~PMD_MASK; end = (address & PMD_MASK) + PMD_SIZE;
end = address + size;
if (end > PMD_SIZE)
end = PMD_SIZE;
error = 0; error = 0;
do { do {
error |= filemap_sync_pte(pte, vma, address + offset, flags); error |= filemap_sync_pte(pte, pmd, vma, address, flags);
address += PAGE_SIZE; address += PAGE_SIZE;
pte++; pte++;
} while (address && (address < end)); } while (address && (address < end));
pte_unmap(pte - 1);
return error; return error;
} }
static inline int filemap_sync_pmd_range(pgd_t * pgd, static inline int filemap_sync_pmd_range(pgd_t * pgd,
unsigned long address, unsigned long size, unsigned long address, unsigned long end,
struct vm_area_struct *vma, unsigned int flags) struct vm_area_struct *vma, unsigned int flags)
{ {
pmd_t * pmd; pmd_t * pmd;
unsigned long offset, end;
int error; int error;
if (pgd_none(*pgd)) if (pgd_none(*pgd))
...@@ -2035,14 +2033,11 @@ static inline int filemap_sync_pmd_range(pgd_t * pgd, ...@@ -2035,14 +2033,11 @@ static inline int filemap_sync_pmd_range(pgd_t * pgd,
return 0; return 0;
} }
pmd = pmd_offset(pgd, address); pmd = pmd_offset(pgd, address);
offset = address & PGDIR_MASK; if ((address & PGDIR_MASK) != (end & PGDIR_MASK))
address &= ~PGDIR_MASK; end = (address & PGDIR_MASK) + PGDIR_SIZE;
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
error = 0; error = 0;
do { do {
error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags); error |= filemap_sync_pte_range(pmd, address, end, vma, flags);
address = (address + PMD_SIZE) & PMD_MASK; address = (address + PMD_SIZE) & PMD_MASK;
pmd++; pmd++;
} while (address && (address < end)); } while (address && (address < end));
...@@ -2062,11 +2057,11 @@ int filemap_sync(struct vm_area_struct * vma, unsigned long address, ...@@ -2062,11 +2057,11 @@ int filemap_sync(struct vm_area_struct * vma, unsigned long address,
spin_lock(&vma->vm_mm->page_table_lock); spin_lock(&vma->vm_mm->page_table_lock);
dir = pgd_offset(vma->vm_mm, address); dir = pgd_offset(vma->vm_mm, address);
flush_cache_range(vma, end - size, end); flush_cache_range(vma, address, end);
if (address >= end) if (address >= end)
BUG(); BUG();
do { do {
error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags); error |= filemap_sync_pmd_range(dir, address, end, vma, flags);
address = (address + PGDIR_SIZE) & PGDIR_MASK; address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++; dir++;
} while (address && (address < end)); } while (address && (address < end));
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/mempool.h> #include <linux/mempool.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <asm/pgalloc.h>
static mempool_t *page_pool, *isa_page_pool; static mempool_t *page_pool, *isa_page_pool;
...@@ -445,3 +446,19 @@ void create_bounce(unsigned long pfn, int gfp, struct bio **bio_orig) ...@@ -445,3 +446,19 @@ void create_bounce(unsigned long pfn, int gfp, struct bio **bio_orig)
bio->bi_private = *bio_orig; bio->bi_private = *bio_orig;
*bio_orig = bio; *bio_orig = bio;
} }
#if CONFIG_DEBUG_HIGHMEM
void check_highmem_ptes(void)
{
int idx, type;
for (type = 0; type < KM_TYPE_NR; type++) {
idx = type + KM_TYPE_NR*smp_processor_id();
if (!pte_none(*(kmap_pte-idx))) {
printk("scheduling with KM_TYPE %d held!\n", type);
BUG();
}
}
}
#endif
...@@ -90,7 +90,7 @@ void __free_pte(pte_t pte) ...@@ -90,7 +90,7 @@ void __free_pte(pte_t pte)
*/ */
static inline void free_one_pmd(pmd_t * dir) static inline void free_one_pmd(pmd_t * dir)
{ {
pte_t * pte; struct page *pte;
if (pmd_none(*dir)) if (pmd_none(*dir))
return; return;
...@@ -99,7 +99,7 @@ static inline void free_one_pmd(pmd_t * dir) ...@@ -99,7 +99,7 @@ static inline void free_one_pmd(pmd_t * dir)
pmd_clear(dir); pmd_clear(dir);
return; return;
} }
pte = pte_offset(dir, 0); pte = pmd_page(*dir);
pmd_clear(dir); pmd_clear(dir);
pte_free(pte); pte_free(pte);
} }
...@@ -125,18 +125,6 @@ static inline void free_one_pgd(pgd_t * dir) ...@@ -125,18 +125,6 @@ static inline void free_one_pgd(pgd_t * dir)
pmd_free(pmd); pmd_free(pmd);
} }
/* Low and high watermarks for page table cache.
The system should try to have pgt_water[0] <= cache elements <= pgt_water[1]
*/
int pgt_cache_water[2] = { 25, 50 };
/* Returns the number of pages freed */
int check_pgt_cache(void)
{
return do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]);
}
/* /*
* This function clears all user-level page tables of a process - this * This function clears all user-level page tables of a process - this
* is needed by execve(), so that old pages aren't in the way. * is needed by execve(), so that old pages aren't in the way.
...@@ -152,11 +140,59 @@ void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr) ...@@ -152,11 +140,59 @@ void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
page_dir++; page_dir++;
} while (--nr); } while (--nr);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
}
pte_t * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
if (!pmd_present(*pmd)) {
struct page *new;
/* keep the page table cache within bounds */ spin_unlock(&mm->page_table_lock);
check_pgt_cache(); new = pte_alloc_one(mm, address);
spin_lock(&mm->page_table_lock);
if (!new)
return NULL;
/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (pmd_present(*pmd)) {
pte_free(new);
goto out;
}
pmd_populate(mm, pmd, new);
}
out:
if (pmd_present(*pmd))
return pte_offset_map(pmd, address);
return NULL;
} }
pte_t * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
if (!pmd_present(*pmd)) {
pte_t *new;
spin_unlock(&mm->page_table_lock);
new = pte_alloc_one_kernel(mm, address);
spin_lock(&mm->page_table_lock);
if (!new)
return NULL;
/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (pmd_present(*pmd)) {
pte_free_kernel(new);
goto out;
}
pmd_populate_kernel(mm, pmd, new);
}
out:
return pte_offset_kernel(pmd, address);
}
#define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t)) #define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t))
#define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t)) #define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t))
...@@ -169,7 +205,7 @@ void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr) ...@@ -169,7 +205,7 @@ void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
* variable count and make things faster. -jj * variable count and make things faster. -jj
* *
* dst->page_table_lock is held on entry and exit, * dst->page_table_lock is held on entry and exit,
* but may be dropped within pmd_alloc() and pte_alloc(). * but may be dropped within pmd_alloc() and pte_alloc_map().
*/ */
int copy_page_range(struct mm_struct *dst, struct mm_struct *src, int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma) struct vm_area_struct *vma)
...@@ -221,12 +257,11 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; ...@@ -221,12 +257,11 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK;
goto cont_copy_pmd_range; goto cont_copy_pmd_range;
} }
src_pte = pte_offset(src_pmd, address); dst_pte = pte_alloc_map(dst, dst_pmd, address);
dst_pte = pte_alloc(dst, dst_pmd, address);
if (!dst_pte) if (!dst_pte)
goto nomem; goto nomem;
spin_lock(&src->page_table_lock); spin_lock(&src->page_table_lock);
src_pte = pte_offset_map2(src_pmd, address);
do { do {
pte_t pte = *src_pte; pte_t pte = *src_pte;
struct page *ptepage; struct page *ptepage;
...@@ -259,11 +294,16 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; ...@@ -259,11 +294,16 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK;
cont_copy_pte_range: set_pte(dst_pte, pte); cont_copy_pte_range: set_pte(dst_pte, pte);
cont_copy_pte_range_noset: address += PAGE_SIZE; cont_copy_pte_range_noset: address += PAGE_SIZE;
if (address >= end) if (address >= end) {
pte_unmap2(src_pte);
pte_unmap(dst_pte);
goto out_unlock; goto out_unlock;
}
src_pte++; src_pte++;
dst_pte++; dst_pte++;
} while ((unsigned long)src_pte & PTE_TABLE_MASK); } while ((unsigned long)src_pte & PTE_TABLE_MASK);
pte_unmap2(src_pte-1);
pte_unmap(dst_pte-1);
spin_unlock(&src->page_table_lock); spin_unlock(&src->page_table_lock);
cont_copy_pmd_range: src_pmd++; cont_copy_pmd_range: src_pmd++;
...@@ -292,7 +332,7 @@ static inline void forget_pte(pte_t page) ...@@ -292,7 +332,7 @@ static inline void forget_pte(pte_t page)
static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size) static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
{ {
unsigned long offset; unsigned long offset;
pte_t * ptep; pte_t *ptep;
int freed = 0; int freed = 0;
if (pmd_none(*pmd)) if (pmd_none(*pmd))
...@@ -302,7 +342,7 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad ...@@ -302,7 +342,7 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
pmd_clear(pmd); pmd_clear(pmd);
return 0; return 0;
} }
ptep = pte_offset(pmd, address); ptep = pte_offset_map(pmd, address);
offset = address & ~PMD_MASK; offset = address & ~PMD_MASK;
if (offset + size > PMD_SIZE) if (offset + size > PMD_SIZE)
size = PMD_SIZE - offset; size = PMD_SIZE - offset;
...@@ -322,6 +362,7 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad ...@@ -322,6 +362,7 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
pte_clear(ptep); pte_clear(ptep);
} }
} }
pte_unmap(ptep-1);
return freed; return freed;
} }
...@@ -415,11 +456,16 @@ static struct page * follow_page(struct mm_struct *mm, unsigned long address, in ...@@ -415,11 +456,16 @@ static struct page * follow_page(struct mm_struct *mm, unsigned long address, in
if (pmd_none(*pmd) || pmd_bad(*pmd)) if (pmd_none(*pmd) || pmd_bad(*pmd))
goto out; goto out;
ptep = pte_offset(pmd, address); preempt_disable();
if (!ptep) ptep = pte_offset_map(pmd, address);
if (!ptep) {
preempt_enable();
goto out; goto out;
}
pte = *ptep; pte = *ptep;
pte_unmap(ptep);
preempt_enable();
if (pte_present(pte)) { if (pte_present(pte)) {
if (!write || if (!write ||
(pte_write(pte) && pte_dirty(pte))) (pte_write(pte) && pte_dirty(pte)))
...@@ -748,10 +794,11 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned ...@@ -748,10 +794,11 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned
if (end > PGDIR_SIZE) if (end > PGDIR_SIZE)
end = PGDIR_SIZE; end = PGDIR_SIZE;
do { do {
pte_t * pte = pte_alloc(mm, pmd, address); pte_t * pte = pte_alloc_map(mm, pmd, address);
if (!pte) if (!pte)
return -ENOMEM; return -ENOMEM;
zeromap_pte_range(pte, address, end - address, prot); zeromap_pte_range(pte, address, end - address, prot);
pte_unmap(pte);
address = (address + PMD_SIZE) & PMD_MASK; address = (address + PMD_SIZE) & PMD_MASK;
pmd++; pmd++;
} while (address && (address < end)); } while (address && (address < end));
...@@ -828,10 +875,11 @@ static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned lo ...@@ -828,10 +875,11 @@ static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned lo
end = PGDIR_SIZE; end = PGDIR_SIZE;
phys_addr -= address; phys_addr -= address;
do { do {
pte_t * pte = pte_alloc(mm, pmd, address); pte_t * pte = pte_alloc_map(mm, pmd, address);
if (!pte) if (!pte)
return -ENOMEM; return -ENOMEM;
remap_pte_range(pte, address, end - address, address + phys_addr, prot); remap_pte_range(pte, address, end - address, address + phys_addr, prot);
pte_unmap(pte);
address = (address + PMD_SIZE) & PMD_MASK; address = (address + PMD_SIZE) & PMD_MASK;
pmd++; pmd++;
} while (address && (address < end)); } while (address && (address < end));
...@@ -917,7 +965,7 @@ static inline void break_cow(struct vm_area_struct * vma, struct page * new_page ...@@ -917,7 +965,7 @@ static inline void break_cow(struct vm_area_struct * vma, struct page * new_page
* with the page_table_lock released. * with the page_table_lock released.
*/ */
static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
unsigned long address, pte_t *page_table, pte_t pte) unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
{ {
struct page *old_page, *new_page; struct page *old_page, *new_page;
...@@ -931,10 +979,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, ...@@ -931,10 +979,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
if (reuse) { if (reuse) {
flush_cache_page(vma, address); flush_cache_page(vma, address);
establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return 1; /* Minor fault */ return 1; /* Minor fault */
} }
} }
pte_unmap(page_table);
/* /*
* Ok, we need to copy. Oh, well.. * Ok, we need to copy. Oh, well..
...@@ -951,6 +1001,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, ...@@ -951,6 +1001,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
* Re-check the pte - we dropped the lock * Re-check the pte - we dropped the lock
*/ */
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
if (pte_same(*page_table, pte)) { if (pte_same(*page_table, pte)) {
if (PageReserved(old_page)) if (PageReserved(old_page))
++mm->rss; ++mm->rss;
...@@ -960,12 +1011,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, ...@@ -960,12 +1011,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
/* Free the old page.. */ /* Free the old page.. */
new_page = old_page; new_page = old_page;
} }
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
page_cache_release(new_page); page_cache_release(new_page);
page_cache_release(old_page); page_cache_release(old_page);
return 1; /* Minor fault */ return 1; /* Minor fault */
bad_wp_page: bad_wp_page:
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
return -1; return -1;
...@@ -1086,13 +1139,14 @@ void swapin_readahead(swp_entry_t entry) ...@@ -1086,13 +1139,14 @@ void swapin_readahead(swp_entry_t entry)
*/ */
static int do_swap_page(struct mm_struct * mm, static int do_swap_page(struct mm_struct * mm,
struct vm_area_struct * vma, unsigned long address, struct vm_area_struct * vma, unsigned long address,
pte_t * page_table, pte_t orig_pte, int write_access) pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
{ {
struct page *page; struct page *page;
swp_entry_t entry = pte_to_swp_entry(orig_pte); swp_entry_t entry = pte_to_swp_entry(orig_pte);
pte_t pte; pte_t pte;
int ret = 1; int ret = 1;
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
page = lookup_swap_cache(entry); page = lookup_swap_cache(entry);
if (!page) { if (!page) {
...@@ -1105,7 +1159,9 @@ static int do_swap_page(struct mm_struct * mm, ...@@ -1105,7 +1159,9 @@ static int do_swap_page(struct mm_struct * mm,
*/ */
int retval; int retval;
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
retval = pte_same(*page_table, orig_pte) ? -1 : 1; retval = pte_same(*page_table, orig_pte) ? -1 : 1;
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return retval; return retval;
} }
...@@ -1121,7 +1177,9 @@ static int do_swap_page(struct mm_struct * mm, ...@@ -1121,7 +1177,9 @@ static int do_swap_page(struct mm_struct * mm,
* released the page table lock. * released the page table lock.
*/ */
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
if (!pte_same(*page_table, orig_pte)) { if (!pte_same(*page_table, orig_pte)) {
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
...@@ -1146,6 +1204,7 @@ static int do_swap_page(struct mm_struct * mm, ...@@ -1146,6 +1204,7 @@ static int do_swap_page(struct mm_struct * mm,
/* No need to invalidate - it was non-present before */ /* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte); update_mmu_cache(vma, address, pte);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return ret; return ret;
} }
...@@ -1155,7 +1214,7 @@ static int do_swap_page(struct mm_struct * mm, ...@@ -1155,7 +1214,7 @@ static int do_swap_page(struct mm_struct * mm,
* spinlock held to protect against concurrent faults in * spinlock held to protect against concurrent faults in
* multithreaded programs. * multithreaded programs.
*/ */
static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, pmd_t *pmd, int write_access, unsigned long addr)
{ {
pte_t entry; pte_t entry;
...@@ -1167,6 +1226,7 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, ...@@ -1167,6 +1226,7 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
struct page *page; struct page *page;
/* Allocate our own private page. */ /* Allocate our own private page. */
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
page = alloc_page(GFP_HIGHUSER); page = alloc_page(GFP_HIGHUSER);
...@@ -1175,7 +1235,10 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, ...@@ -1175,7 +1235,10 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
clear_user_highpage(page, addr); clear_user_highpage(page, addr);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, addr);
if (!pte_none(*page_table)) { if (!pte_none(*page_table)) {
pte_unmap(page_table);
page_cache_release(page); page_cache_release(page);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return 1; return 1;
...@@ -1187,6 +1250,7 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, ...@@ -1187,6 +1250,7 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
} }
set_pte(page_table, entry); set_pte(page_table, entry);
pte_unmap(page_table);
/* No need to invalidate - it was non-present before */ /* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, entry); update_mmu_cache(vma, addr, entry);
...@@ -1210,13 +1274,14 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, ...@@ -1210,13 +1274,14 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
* spinlock held. Exit with the spinlock released. * spinlock held. Exit with the spinlock released.
*/ */
static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long address, int write_access, pte_t *page_table) unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
{ {
struct page * new_page; struct page * new_page;
pte_t entry; pte_t entry;
if (!vma->vm_ops || !vma->vm_ops->nopage) if (!vma->vm_ops || !vma->vm_ops->nopage)
return do_anonymous_page(mm, vma, page_table, write_access, address); return do_anonymous_page(mm, vma, page_table, pmd, write_access, address);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0); new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0);
...@@ -1242,6 +1307,8 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, ...@@ -1242,6 +1307,8 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
} }
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
/* /*
* This silly early PAGE_DIRTY setting removes a race * This silly early PAGE_DIRTY setting removes a race
* due to the bad i386 page protection. But it's valid * due to the bad i386 page protection. But it's valid
...@@ -1261,8 +1328,10 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, ...@@ -1261,8 +1328,10 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
if (write_access) if (write_access)
entry = pte_mkwrite(pte_mkdirty(entry)); entry = pte_mkwrite(pte_mkdirty(entry));
set_pte(page_table, entry); set_pte(page_table, entry);
pte_unmap(page_table);
} else { } else {
/* One of our sibling threads was faster, back out. */ /* One of our sibling threads was faster, back out. */
pte_unmap(page_table);
page_cache_release(new_page); page_cache_release(new_page);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return 1; return 1;
...@@ -1297,7 +1366,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, ...@@ -1297,7 +1366,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
*/ */
static inline int handle_pte_fault(struct mm_struct *mm, static inline int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct * vma, unsigned long address, struct vm_area_struct * vma, unsigned long address,
int write_access, pte_t * pte) int write_access, pte_t *pte, pmd_t *pmd)
{ {
pte_t entry; pte_t entry;
...@@ -1309,18 +1378,19 @@ static inline int handle_pte_fault(struct mm_struct *mm, ...@@ -1309,18 +1378,19 @@ static inline int handle_pte_fault(struct mm_struct *mm,
* drop the lock. * drop the lock.
*/ */
if (pte_none(entry)) if (pte_none(entry))
return do_no_page(mm, vma, address, write_access, pte); return do_no_page(mm, vma, address, write_access, pte, pmd);
return do_swap_page(mm, vma, address, pte, entry, write_access); return do_swap_page(mm, vma, address, pte, pmd, entry, write_access);
} }
if (write_access) { if (write_access) {
if (!pte_write(entry)) if (!pte_write(entry))
return do_wp_page(mm, vma, address, pte, entry); return do_wp_page(mm, vma, address, pte, pmd, entry);
entry = pte_mkdirty(entry); entry = pte_mkdirty(entry);
} }
entry = pte_mkyoung(entry); entry = pte_mkyoung(entry);
establish_pte(vma, address, pte, entry); establish_pte(vma, address, pte, entry);
pte_unmap(pte);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return 1; return 1;
} }
...@@ -1345,9 +1415,9 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, ...@@ -1345,9 +1415,9 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
pmd = pmd_alloc(mm, pgd, address); pmd = pmd_alloc(mm, pgd, address);
if (pmd) { if (pmd) {
pte_t * pte = pte_alloc(mm, pmd, address); pte_t * pte = pte_alloc_map(mm, pmd, address);
if (pte) if (pte)
return handle_pte_fault(mm, vma, address, write_access, pte); return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
} }
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return -1; return -1;
...@@ -1366,64 +1436,25 @@ pmd_t *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) ...@@ -1366,64 +1436,25 @@ pmd_t *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{ {
pmd_t *new; pmd_t *new;
/* "fast" allocation can happen without dropping the lock.. */ spin_unlock(&mm->page_table_lock);
new = pmd_alloc_one_fast(mm, address); new = pmd_alloc_one(mm, address);
if (!new) { spin_lock(&mm->page_table_lock);
spin_unlock(&mm->page_table_lock); if (!new)
new = pmd_alloc_one(mm, address); return NULL;
spin_lock(&mm->page_table_lock);
if (!new)
return NULL;
/* /*
* Because we dropped the lock, we should re-check the * Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it.. * entry, as somebody else could have populated it..
*/ */
if (!pgd_none(*pgd)) { if (pgd_present(*pgd)) {
pmd_free(new); pmd_free(new);
goto out; goto out;
}
} }
pgd_populate(mm, pgd, new); pgd_populate(mm, pgd, new);
out: out:
return pmd_offset(pgd, address); return pmd_offset(pgd, address);
} }
/*
* Allocate the page table directory.
*
* We've already handled the fast-path in-line, and we own the
* page table lock.
*/
pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
if (pmd_none(*pmd)) {
pte_t *new;
/* "fast" allocation can happen without dropping the lock.. */
new = pte_alloc_one_fast(mm, address);
if (!new) {
spin_unlock(&mm->page_table_lock);
new = pte_alloc_one(mm, address);
spin_lock(&mm->page_table_lock);
if (!new)
return NULL;
/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (!pmd_none(*pmd)) {
pte_free(new);
goto out;
}
}
pmd_populate(mm, pmd, new);
}
out:
return pte_offset(pmd, address);
}
int make_pages_present(unsigned long addr, unsigned long end) int make_pages_present(unsigned long addr, unsigned long end)
{ {
int ret, len, write; int ret, len, write;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <linux/highmem.h>
static inline void change_pte_range(pmd_t * pmd, unsigned long address, static inline void change_pte_range(pmd_t * pmd, unsigned long address,
unsigned long size, pgprot_t newprot) unsigned long size, pgprot_t newprot)
...@@ -27,7 +28,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address, ...@@ -27,7 +28,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address,
pmd_clear(pmd); pmd_clear(pmd);
return; return;
} }
pte = pte_offset(pmd, address); pte = pte_offset_map(pmd, address);
address &= ~PMD_MASK; address &= ~PMD_MASK;
end = address + size; end = address + size;
if (end > PMD_SIZE) if (end > PMD_SIZE)
...@@ -46,6 +47,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address, ...@@ -46,6 +47,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address,
address += PAGE_SIZE; address += PAGE_SIZE;
pte++; pte++;
} while (address && (address < end)); } while (address && (address < end));
pte_unmap(pte - 1);
} }
static inline void change_pmd_range(pgd_t * pgd, unsigned long address, static inline void change_pmd_range(pgd_t * pgd, unsigned long address,
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
extern int vm_enough_memory(long pages); extern int vm_enough_memory(long pages);
static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) static inline pte_t *get_one_pte_map2(struct mm_struct *mm, unsigned long addr)
{ {
pgd_t * pgd; pgd_t * pgd;
pmd_t * pmd; pmd_t * pmd;
...@@ -41,21 +41,23 @@ static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) ...@@ -41,21 +41,23 @@ static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr)
goto end; goto end;
} }
pte = pte_offset(pmd, addr); pte = pte_offset_map2(pmd, addr);
if (pte_none(*pte)) if (pte_none(*pte)) {
pte_unmap2(pte);
pte = NULL; pte = NULL;
}
end: end:
return pte; return pte;
} }
static inline pte_t *alloc_one_pte(struct mm_struct *mm, unsigned long addr) static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
{ {
pmd_t * pmd; pmd_t * pmd;
pte_t * pte = NULL; pte_t * pte = NULL;
pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr); pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr);
if (pmd) if (pmd)
pte = pte_alloc(mm, pmd, addr); pte = pte_alloc_map(mm, pmd, addr);
return pte; return pte;
} }
...@@ -79,12 +81,16 @@ static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst) ...@@ -79,12 +81,16 @@ static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst)
static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned long new_addr) static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned long new_addr)
{ {
int error = 0; int error = 0;
pte_t * src; pte_t *src, *dst;
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
src = get_one_pte(mm, old_addr); src = get_one_pte_map2(mm, old_addr);
if (src) if (src) {
error = copy_one_pte(mm, src, alloc_one_pte(mm, new_addr)); dst = alloc_one_pte_map(mm, new_addr);
error = copy_one_pte(mm, src, dst);
pte_unmap2(src);
pte_unmap(dst);
}
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return error; return error;
} }
......
...@@ -393,7 +393,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, ...@@ -393,7 +393,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
pmd_clear(dir); pmd_clear(dir);
return; return;
} }
pte = pte_offset(dir, address); pte = pte_offset_map(dir, address);
offset += address & PMD_MASK; offset += address & PMD_MASK;
address &= ~PMD_MASK; address &= ~PMD_MASK;
end = address + size; end = address + size;
...@@ -404,6 +404,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, ...@@ -404,6 +404,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
address += PAGE_SIZE; address += PAGE_SIZE;
pte++; pte++;
} while (address && (address < end)); } while (address && (address < end));
pte_unmap(pte - 1);
} }
/* mmlist_lock and vma->vm_mm->page_table_lock are held */ /* mmlist_lock and vma->vm_mm->page_table_lock are held */
......
...@@ -30,7 +30,7 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned lo ...@@ -30,7 +30,7 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned lo
pmd_clear(pmd); pmd_clear(pmd);
return; return;
} }
pte = pte_offset(pmd, address); pte = pte_offset_kernel(pmd, address);
address &= ~PMD_MASK; address &= ~PMD_MASK;
end = address + size; end = address + size;
if (end > PMD_SIZE) if (end > PMD_SIZE)
...@@ -125,7 +125,7 @@ static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo ...@@ -125,7 +125,7 @@ static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
if (end > PGDIR_SIZE) if (end > PGDIR_SIZE)
end = PGDIR_SIZE; end = PGDIR_SIZE;
do { do {
pte_t * pte = pte_alloc(&init_mm, pmd, address); pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
if (!pte) if (!pte)
return -ENOMEM; return -ENOMEM;
if (alloc_area_pte(pte, address, end - address, gfp_mask, prot)) if (alloc_area_pte(pte, address, end - address, gfp_mask, prot))
......
...@@ -167,7 +167,7 @@ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vm ...@@ -167,7 +167,7 @@ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vm
return count; return count;
} }
pte = pte_offset(dir, address); pte = pte_offset_map(dir, address);
pmd_end = (address + PMD_SIZE) & PMD_MASK; pmd_end = (address + PMD_SIZE) & PMD_MASK;
if (end > pmd_end) if (end > pmd_end)
...@@ -181,6 +181,7 @@ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vm ...@@ -181,6 +181,7 @@ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vm
count -= try_to_swap_out(mm, vma, address, pte, page, classzone); count -= try_to_swap_out(mm, vma, address, pte, page, classzone);
if (!count) { if (!count) {
address += PAGE_SIZE; address += PAGE_SIZE;
pte++;
break; break;
} }
} }
...@@ -188,6 +189,7 @@ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vm ...@@ -188,6 +189,7 @@ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vm
address += PAGE_SIZE; address += PAGE_SIZE;
pte++; pte++;
} while (address && (address < end)); } while (address && (address < end));
pte_unmap(pte - 1);
mm->swap_address = address; mm->swap_address = address;
return count; return count;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment