Commit f19dc938 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] hugetlb bugfixes

From Rohit Seth

1) Bug fixes (mainly in the unsuccessful attempts of hugepages).

   i) not modifying the value of key for unsuccessful key
      allocation

   ii) Correct usage of mmap_sem in free_hugepages

   iii) Proper unlocking of key->lock for partial hugepage
        allocations


2) Include the IPC_LOCK for permission to use hugepages via the
   syscall interface.  This brings the syscall interface into line with
   the hugetlbfs interface.

   It also adds permits users who are in the superuser group to
   access hugetlb resources.  This is so that database servers can run
   without elevated permissions.

3) Increment the key_counts during forks to correctly identify the
   number of processes references a key.
parent 0c74aabb
...@@ -298,17 +298,17 @@ asmlinkage int sys_free_hugepages(unsigned long addr) ...@@ -298,17 +298,17 @@ asmlinkage int sys_free_hugepages(unsigned long addr)
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct hugetlb_key *key;
int retval; int retval;
vma = find_vma(current->mm, addr);
if (!vma || !(vma->vm_flags & VM_HUGETLB) || vma->vm_start != addr)
return -EINVAL;
down_write(&mm->mmap_sem); down_write(&mm->mmap_sem);
key = (struct hugetlb_key *)vma->vm_private_data; vma = find_vma(current->mm, addr);
if (!vma || !(vma->vm_flags & VM_HUGETLB) || vma->vm_start != addr) {
retval = -EINVAL;
goto out;
}
retval = do_munmap(vma->vm_mm, addr, vma->vm_end - addr); retval = do_munmap(vma->vm_mm, addr, vma->vm_end - addr);
out:
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
hugetlb_release_key(key);
return retval; return retval;
} }
#else #else
......
...@@ -84,7 +84,7 @@ struct hugetlb_key *alloc_key(int key, unsigned long len, int prot, int flag) ...@@ -84,7 +84,7 @@ struct hugetlb_key *alloc_key(int key, unsigned long len, int prot, int flag)
spin_lock(&htlbpage_lock); spin_lock(&htlbpage_lock);
hugetlb_key = find_key(key); hugetlb_key = find_key(key);
if (!hugetlb_key) { if (!hugetlb_key) {
if (!capable(CAP_SYS_ADMIN) || !in_group_p(0)) if (!capable(CAP_SYS_ADMIN) || !capable(CAP_IPC_LOCK) || !in_group_p(0))
hugetlb_key = ERR_PTR(-EPERM); hugetlb_key = ERR_PTR(-EPERM);
else if (!(flag & IPC_CREAT)) else if (!(flag & IPC_CREAT))
hugetlb_key = ERR_PTR(-ENOENT); hugetlb_key = ERR_PTR(-ENOENT);
...@@ -110,7 +110,6 @@ struct hugetlb_key *alloc_key(int key, unsigned long len, int prot, int flag) ...@@ -110,7 +110,6 @@ struct hugetlb_key *alloc_key(int key, unsigned long len, int prot, int flag)
hugetlb_key = ERR_PTR(-EAGAIN); hugetlb_key = ERR_PTR(-EAGAIN);
spin_unlock(&htlbpage_lock); spin_unlock(&htlbpage_lock);
} else if (check_size_prot(hugetlb_key, len, prot, flag) < 0) { } else if (check_size_prot(hugetlb_key, len, prot, flag) < 0) {
hugetlb_key->key = 0;
hugetlb_key = ERR_PTR(-EINVAL); hugetlb_key = ERR_PTR(-EINVAL);
} }
} while (hugetlb_key == ERR_PTR(-EAGAIN)); } while (hugetlb_key == ERR_PTR(-EAGAIN));
...@@ -250,7 +249,7 @@ static int make_hugetlb_pages_present(unsigned long addr, unsigned long end, int ...@@ -250,7 +249,7 @@ static int make_hugetlb_pages_present(unsigned long addr, unsigned long end, int
vma->vm_end = end; vma->vm_end = end;
} }
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
out_error1: out_error1:
return -1; return -1;
} }
...@@ -262,7 +261,10 @@ copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -262,7 +261,10 @@ copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct page *ptepage; struct page *ptepage;
unsigned long addr = vma->vm_start; unsigned long addr = vma->vm_start;
unsigned long end = vma->vm_end; unsigned long end = vma->vm_end;
struct hugetlb_key *key = vma->vm_private_data;
if (key)
atomic_inc(&key->count);
while (addr < end) { while (addr < end) {
dst_pte = huge_pte_alloc(dst, addr); dst_pte = huge_pte_alloc(dst, addr);
if (!dst_pte) if (!dst_pte)
...@@ -355,6 +357,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsig ...@@ -355,6 +357,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsig
spin_unlock(&htlbpage_lock); spin_unlock(&htlbpage_lock);
for (address = start; address < end; address += HPAGE_SIZE) { for (address = start; address < end; address += HPAGE_SIZE) {
pte = huge_pte_offset(mm, address); pte = huge_pte_offset(mm, address);
if (pte_none(*pte))
continue;
page = pte_page(*pte); page = pte_page(*pte);
huge_page_release(page); huge_page_release(page);
pte_clear(pte); pte_clear(pte);
...@@ -429,7 +433,6 @@ static int prefault_key(struct hugetlb_key *key, struct vm_area_struct *vma) ...@@ -429,7 +433,6 @@ static int prefault_key(struct hugetlb_key *key, struct vm_area_struct *vma)
if (!page) { if (!page) {
page = alloc_hugetlb_page(); page = alloc_hugetlb_page();
if (!page) { if (!page) {
spin_unlock(&key->lock);
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
...@@ -437,8 +440,8 @@ static int prefault_key(struct hugetlb_key *key, struct vm_area_struct *vma) ...@@ -437,8 +440,8 @@ static int prefault_key(struct hugetlb_key *key, struct vm_area_struct *vma)
} }
set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE); set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
} }
spin_unlock(&key->lock);
out: out:
spin_unlock(&key->lock);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return ret; return ret;
} }
...@@ -467,8 +470,6 @@ static int alloc_shared_hugetlb_pages(int key, unsigned long addr, unsigned long ...@@ -467,8 +470,6 @@ static int alloc_shared_hugetlb_pages(int key, unsigned long addr, unsigned long
} }
retval = prefault_key(hugetlb_key, vma); retval = prefault_key(hugetlb_key, vma);
if (retval)
goto out;
vma->vm_flags |= (VM_HUGETLB | VM_RESERVED); vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
vma->vm_ops = &hugetlb_vm_ops; vma->vm_ops = &hugetlb_vm_ops;
...@@ -477,17 +478,6 @@ static int alloc_shared_hugetlb_pages(int key, unsigned long addr, unsigned long ...@@ -477,17 +478,6 @@ static int alloc_shared_hugetlb_pages(int key, unsigned long addr, unsigned long
clear_key_busy(hugetlb_key); clear_key_busy(hugetlb_key);
spin_unlock(&htlbpage_lock); spin_unlock(&htlbpage_lock);
return retval; return retval;
out:
if (addr > vma->vm_start) {
unsigned long raddr;
raddr = vma->vm_end;
vma->vm_end = addr;
zap_hugepage_range(vma, vma->vm_start, vma->vm_end - vma->vm_start);
vma->vm_end = raddr;
}
spin_lock(&mm->page_table_lock);
do_munmap(mm, vma->vm_start, len);
spin_unlock(&mm->page_table_lock);
out_release: out_release:
hugetlb_release_key(hugetlb_key); hugetlb_release_key(hugetlb_key);
return retval; return retval;
...@@ -536,10 +526,8 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) ...@@ -536,10 +526,8 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
static int alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag) static int alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag)
{ {
if (!capable(CAP_SYS_ADMIN)) { if (!capable(CAP_IPC_LOCK) && !in_group_p(0))
if (!in_group_p(0)) return -EPERM;
return -EPERM;
}
addr = do_mmap_pgoff(NULL, addr, len, prot, addr = do_mmap_pgoff(NULL, addr, len, prot,
MAP_NORESERVE|MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, 0); MAP_NORESERVE|MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, 0);
if (IS_ERR((void *) addr)) if (IS_ERR((void *) addr))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment