Commit b2229e8d authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] hugetlb fixes andhugetlb fixes and cleanups cleanups

huge_page_release()             -- hugepage refcounting
free_huge_page()                -- separates freeing from inode refcounting
unmap_hugepage_range()          -- unmapping refcounting hook when locked
zap_hugepage_range()            -- unmappping refcounting hook when unlocked
export setattr_mask()           -- hugetlbfs wants to call it
export destroy_inode()          -- hugetlbfs wants to use it
export unmap_vma()              -- hugetlbpage.c wants to use it
unlock_page() in hugetlbpage.c  -- fixes deadlock in hugetlbfs_truncate()
parent 5c7eb9d8
......@@ -249,91 +249,68 @@ asmlinkage int sys_olduname(struct oldold_utsname * name)
}
#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_ALIGN(x) (((unsigned long)x + (HPAGE_SIZE -1)) & HPAGE_MASK)
extern long sys_munmap(unsigned long, size_t);
/* get_addr function gets the currently unused virtaul range in
* current process's address space. It returns the LARGE_PAGE_SIZE
* current process's address space. It returns the HPAGE_SIZE
* aligned address (in cases of success). Other kernel generic
* routines only could gurantee that allocated address is PAGE_SIZSE aligned.
* routines only could gurantee that allocated address is PAGE_SIZE aligned.
*/
static unsigned long
get_addr(unsigned long addr, unsigned long len)
static unsigned long get_addr(unsigned long addr, unsigned long len)
{
struct vm_area_struct *vma;
struct vm_area_struct *vma;
if (addr) {
addr = HPAGE_ALIGN(addr);
addr = (addr + HPAGE_SIZE - 1) & HPAGE_MASK;
vma = find_vma(current->mm, addr);
if (((TASK_SIZE - len) >= addr) &&
(!vma || addr + len <= vma->vm_start))
if (TASK_SIZE > addr + len && !(vma && addr + len >= vma->vm_start))
goto found_addr;
}
addr = HPAGE_ALIGN(TASK_UNMAPPED_BASE);
for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
if (TASK_SIZE - len < addr)
return -ENOMEM;
if (!vma || ((addr + len) < vma->vm_start))
addr = TASK_UNMAPPED_BASE;
for (vma = find_vma(current->mm, addr); TASK_SIZE > addr + len; vma = vma->vm_next) {
if (!vma || addr + len < vma->vm_start)
goto found_addr;
addr = HPAGE_ALIGN(vma->vm_end);
addr = (vma->vm_end + HPAGE_SIZE - 1) & HPAGE_MASK;
}
return -ENOMEM;
found_addr:
return addr;
}
asmlinkage unsigned long
sys_alloc_hugepages(int key, unsigned long addr, unsigned long len, int prot, int flag)
asmlinkage unsigned long sys_alloc_hugepages(int key, unsigned long addr, unsigned long len, int prot, int flag)
{
struct mm_struct *mm = current->mm;
unsigned long raddr;
int retval = 0;
extern int alloc_hugetlb_pages(int, unsigned long, unsigned long, int, int);
if (!(cpu_has_pse))
return -EINVAL;
if (key < 0)
return -EINVAL;
if (len & (HPAGE_SIZE - 1))
if (!cpu_has_pse || key < 0 || len & ~HPAGE_MASK)
return -EINVAL;
down_write(&mm->mmap_sem);
raddr = get_addr(addr, len);
if (raddr == -ENOMEM)
goto raddr_out;
retval = alloc_hugetlb_pages(key, raddr, len, prot, flag);
raddr_out: up_write(&mm->mmap_sem);
if (retval < 0)
return (unsigned long) retval;
return raddr;
if (raddr != -ENOMEM)
retval = alloc_hugetlb_pages(key, raddr, len, prot, flag);
up_write(&mm->mmap_sem);
return (retval < 0) ? (unsigned long)retval : raddr;
}
asmlinkage int
sys_free_hugepages(unsigned long addr)
asmlinkage int sys_free_hugepages(unsigned long addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int retval;
extern int free_hugepages(struct vm_area_struct *);
struct vm_area_struct *vma;
int retval;
vma = find_vma(current->mm, addr);
if ((!vma) || (!is_vm_hugetlb_page(vma)) || (vma->vm_start!=addr))
if (!vma || !(vma->vm_flags & VM_HUGETLB) || vma->vm_start != addr)
return -EINVAL;
down_write(&mm->mmap_sem);
spin_lock(&mm->page_table_lock);
retval = free_hugepages(vma);
spin_unlock(&mm->page_table_lock);
retval = do_munmap(vma->vm_mm, addr, vma->vm_end - addr);
up_write(&mm->mmap_sem);
return retval;
}
#else
asmlinkage unsigned long
sys_alloc_hugepages(int key, unsigned long addr, size_t len, int prot, int flag)
asmlinkage unsigned long sys_alloc_hugepages(int key, unsigned long addr, size_t len, int prot, int flag)
{
return -ENOSYS;
}
asmlinkage int
sys_free_hugepages(unsigned long addr)
asmlinkage int sys_free_hugepages(unsigned long addr)
{
return -ENOSYS;
}
#endif
......@@ -31,8 +31,7 @@ struct htlbpagekey {
int key;
} htlbpagek[MAX_ID];
static struct inode *
find_key_inode(int key)
static struct inode *find_key_inode(int key)
{
int i;
......@@ -42,8 +41,8 @@ find_key_inode(int key)
}
return NULL;
}
static struct page *
alloc_hugetlb_page(void)
static struct page *alloc_hugetlb_page(void)
{
int i;
struct page *page;
......@@ -64,36 +63,7 @@ alloc_hugetlb_page(void)
return page;
}
static void
free_hugetlb_page(struct page *page)
{
spin_lock(&htlbpage_lock);
if ((page->mapping != NULL) && (page_count(page) == 2)) {
struct inode *inode = page->mapping->host;
int i;
ClearPageDirty(page);
remove_from_page_cache(page);
set_page_count(page, 1);
if ((inode->i_size -= HPAGE_SIZE) == 0) {
for (i = 0; i < MAX_ID; i++)
if (htlbpagek[i].key == inode->i_ino) {
htlbpagek[i].key = 0;
htlbpagek[i].in = NULL;
break;
}
kfree(inode);
}
}
if (put_page_testzero(page)) {
list_add(&page->list, &htlbpage_freelist);
htlbpagemem++;
}
spin_unlock(&htlbpage_lock);
}
static pte_t *
huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pmd_t *pmd = NULL;
......@@ -103,8 +73,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
return (pte_t *) pmd;
}
static pte_t *
huge_pte_offset(struct mm_struct *mm, unsigned long addr)
static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pmd_t *pmd = NULL;
......@@ -116,9 +85,7 @@ huge_pte_offset(struct mm_struct *mm, unsigned long addr)
#define mk_pte_huge(entry) {entry.pte_low |= (_PAGE_PRESENT | _PAGE_PSE);}
static void
set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page, pte_t * page_table, int write_access)
static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, pte_t * page_table, int write_access)
{
pte_t entry;
......@@ -131,24 +98,17 @@ set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
entry = pte_mkyoung(entry);
mk_pte_huge(entry);
set_pte(page_table, entry);
return;
}
static int
anon_get_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
int write_access, pte_t * page_table)
static int anon_get_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, int write_access, pte_t *page_table)
{
struct page *page;
page = alloc_hugetlb_page();
if (page == NULL)
return -1;
set_huge_pte(mm, vma, page, page_table, write_access);
return 1;
struct page *page = alloc_hugetlb_page();
if (page)
set_huge_pte(mm, vma, page, page_table, write_access);
return page ? 1 : -1;
}
int
make_hugetlb_pages_present(unsigned long addr, unsigned long end, int flags)
int make_hugetlb_pages_present(unsigned long addr, unsigned long end, int flags)
{
int write;
struct mm_struct *mm = current->mm;
......@@ -254,31 +214,61 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
return i;
}
void
zap_hugetlb_resources(struct vm_area_struct *mpnt)
void free_huge_page(struct page *page)
{
BUG_ON(page_count(page));
BUG_ON(page->mapping);
INIT_LIST_HEAD(&page->list);
spin_lock(&htlbpage_lock);
list_add(&page->list, &htlbpage_freelist);
htlbpagemem++;
spin_unlock(&htlbpage_lock);
}
void huge_page_release(struct page *page)
{
if (!put_page_testzero(page))
return;
free_huge_page(page);
}
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
struct mm_struct *mm = mpnt->vm_mm;
unsigned long len, addr, end;
pte_t *ptep;
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
pte_t *pte;
struct page *page;
addr = mpnt->vm_start;
end = mpnt->vm_end;
len = end - addr;
do {
ptep = huge_pte_offset(mm, addr);
page = pte_page(*ptep);
pte_clear(ptep);
free_hugetlb_page(page);
addr += HPAGE_SIZE;
} while (addr < end);
mm->rss -= (len >> PAGE_SHIFT);
mpnt->vm_ops = NULL;
flush_tlb_range(mpnt, end - len, end);
BUG_ON(start & (HPAGE_SIZE - 1));
BUG_ON(end & (HPAGE_SIZE - 1));
for (address = start; address < end; address += HPAGE_SIZE) {
pte = huge_pte_offset(mm, address);
page = pte_page(*pte);
huge_page_release(page);
pte_clear(pte);
}
mm->rss -= (end - start) >> PAGE_SHIFT;
flush_tlb_range(vma, start, end);
}
static void
unlink_vma(struct vm_area_struct *mpnt)
void zap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long length)
{
struct mm_struct *mm = vma->vm_mm;
spin_lock(&mm->page_table_lock);
unmap_hugepage_range(vma, start, start + length);
spin_unlock(&mm->page_table_lock);
}
void zap_hugetlb_resources(struct vm_area_struct *vma)
{
zap_hugepage_range(vma, vma->vm_start, vma->vm_end);
}
static void unlink_vma(struct vm_area_struct *mpnt)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
......@@ -297,17 +287,7 @@ unlink_vma(struct vm_area_struct *mpnt)
mm->map_count--;
}
int
free_hugepages(struct vm_area_struct *mpnt)
{
unlink_vma(mpnt);
zap_hugetlb_resources(mpnt);
kmem_cache_free(vm_area_cachep, mpnt);
return 1;
}
static struct inode *
set_new_inode(unsigned long len, int prot, int flag, int key)
static struct inode *set_new_inode(unsigned long len, int prot, int flag, int key)
{
struct inode *inode;
int i;
......@@ -337,8 +317,7 @@ set_new_inode(unsigned long len, int prot, int flag, int key)
return inode;
}
static int
check_size_prot(struct inode *inode, unsigned long len, int prot, int flag)
static int check_size_prot(struct inode *inode, unsigned long len, int prot, int flag)
{
if (inode->i_uid != current->fsuid)
return -1;
......@@ -349,9 +328,7 @@ check_size_prot(struct inode *inode, unsigned long len, int prot, int flag)
return 0;
}
static int
alloc_shared_hugetlb_pages(int key, unsigned long addr, unsigned long len,
int prot, int flag)
static int alloc_shared_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
......@@ -474,6 +451,7 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
goto out;
}
add_to_page_cache(page, mapping, idx);
unlock_page(page);
}
set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
}
......@@ -482,9 +460,7 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
return ret;
}
static int
alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len,
int prot, int flag)
static int alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag)
{
if (!capable(CAP_SYS_ADMIN)) {
if (!in_group_p(0))
......@@ -501,17 +477,14 @@ alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len,
return 0;
}
int
alloc_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot,
int flag)
int alloc_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag)
{
if (key > 0)
return alloc_shared_hugetlb_pages(key, addr, len, prot, flag);
return alloc_private_hugetlb_pages(key, addr, len, prot, flag);
}
int
set_hugetlb_mem_size(int count)
int set_hugetlb_mem_size(int count)
{
int j, lcount;
struct page *page, *map;
......@@ -564,5 +537,4 @@ set_hugetlb_mem_size(int count)
}
static struct vm_operations_struct hugetlb_vm_ops = {
.close = zap_hugetlb_resources,
};
......@@ -95,7 +95,7 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
return error;
}
static int setattr_mask(unsigned int ia_valid)
int setattr_mask(unsigned int ia_valid)
{
unsigned long dn_mask = 0;
......
......@@ -142,7 +142,7 @@ static struct inode *alloc_inode(struct super_block *sb)
return inode;
}
static void destroy_inode(struct inode *inode)
void destroy_inode(struct inode *inode)
{
if (inode_has_buffers(inode))
BUG();
......
......@@ -1147,6 +1147,7 @@ extern int filemap_fdatawrite(struct address_space *);
extern int filemap_fdatawait(struct address_space *);
extern void sync_supers(void);
extern sector_t bmap(struct inode *, sector_t);
extern int setattr_mask(unsigned int);
extern int notify_change(struct dentry *, struct iattr *);
extern int permission(struct inode *, int);
extern int vfs_permission(struct inode *, int);
......@@ -1225,6 +1226,7 @@ static inline struct inode *iget(struct super_block *sb, unsigned long ino)
extern void __iget(struct inode * inode);
extern void clear_inode(struct inode *);
extern void destroy_inode(struct inode *);
extern struct inode *new_inode(struct super_block *);
extern void remove_suid(struct dentry *);
......
......@@ -8,41 +8,24 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
}
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **, unsigned long *, int *, int);
int free_hugepages(struct vm_area_struct *);
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
void huge_page_release(struct page *);
#else /* !CONFIG_HUGETLB_PAGE */
static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
{
return 0;
}
static inline int
copy_hugetlb_page_range(struct mm_struct *src, struct mm_struct *dst,
struct vm_area_struct *vma)
{
return -ENOSYS;
}
static inline int
follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *start, int *len, int i)
{
return -ENOSYS;
}
#define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; })
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
#define zap_hugepage_range(vma, start, len) BUG()
#define unmap_hugepage_range(vma, start, end) BUG()
#define huge_page_release(page) BUG()
static inline int free_hugepages(struct vm_area_struct *vma)
{
return -EINVAL;
}
static inline int
hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
{
return -ENOSYS;
}
#endif /* !CONFIG_HUGETLB_PAGE */
#ifdef CONFIG_HUGETLBFS
......@@ -50,29 +33,21 @@ extern struct file_operations hugetlbfs_file_operations;
extern struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_zero_setup(size_t);
static inline int is_file_hugetlb_page(struct file *file)
static inline int is_file_hugepages(struct file *file)
{
return file->f_op == &hugetlbfs_file_operations;
}
static inline void set_file_hugetlb_page(struct file *file)
static inline void set_file_hugepages(struct file *file)
{
file->f_op = &hugetlbfs_file_operations;
}
#else /* !CONFIG_HUGETLBFS */
static inline int is_file_hugetlb_page(struct file *file)
{
return 0;
}
static inline void set_file_hugetlb_page(struct file *file)
{
}
#define is_file_hugepages(file) 0
#define set_file_hugepages(file) BUG()
#define hugetlb_zero_setup(size) ERR_PTR(-ENOSYS)
static inline struct file *hugetlb_zero_setup(size_t size)
{
return ERR_PTR(-ENOSYS);
}
#endif /* !CONFIG_HUGETLBFS */
#endif /* _LINUX_HUGETLB_H */
......@@ -519,6 +519,7 @@ extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned lon
struct vm_area_struct **pprev);
extern int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long addr, int new_below);
extern void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area);
/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
NULL if none. Assume start_addr < end_addr. */
......
......@@ -398,6 +398,11 @@ void unmap_page_range(mmu_gather_t *tlb, struct vm_area_struct *vma, unsigned lo
{
pgd_t * dir;
if (is_vm_hugetlb_page(vma)) {
unmap_hugepage_range(vma, address, end);
return;
}
BUG_ON(address >= end);
dir = pgd_offset(vma->vm_mm, address);
......@@ -437,6 +442,11 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned
mmu_gather_t *tlb;
unsigned long end, block;
if (is_vm_hugetlb_page(vma)) {
zap_hugepage_range(vma, address, size);
return;
}
spin_lock(&mm->page_table_lock);
/*
......
......@@ -940,7 +940,7 @@ static void free_pgtables(mmu_gather_t *tlb, struct vm_area_struct *prev,
* By the time this function is called, the area struct has been
* removed from the process mapping list.
*/
static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
{
size_t len = area->vm_end - area->vm_start;
......@@ -1025,14 +1025,10 @@ static struct vm_area_struct *touched_by_munmap(struct mm_struct *mm,
touched = NULL;
do {
struct vm_area_struct *next = mpnt->vm_next;
if (!(is_vm_hugetlb_page(mpnt))) {
mpnt->vm_next = touched;
touched = mpnt;
rb_erase(&mpnt->vm_rb, &mm->mm_rb);
mm->map_count--;
}
else
free_hugepages(mpnt);
mpnt->vm_next = touched;
touched = mpnt;
rb_erase(&mpnt->vm_rb, &mm->mm_rb);
mm->map_count--;
mpnt = next;
} while (mpnt && mpnt->vm_start < end);
*npp = mpnt;
......@@ -1285,10 +1281,7 @@ void exit_mmap(struct mm_struct * mm)
vm_unacct_memory((end - start) >> PAGE_SHIFT);
mm->map_count--;
if (!(is_vm_hugetlb_page(mpnt)))
unmap_page_range(tlb, mpnt, start, end);
else
mpnt->vm_ops->close(mpnt);
unmap_page_range(tlb, mpnt, start, end);
mpnt = mpnt->vm_next;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment