Commit f93fcfa9 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Fix futexes in huge pages

Using a futex in a large page causes a kernel lockup in __pin_page() -
because __pin_page's page revalidation uses follow_page(), and follow_page()
doesn't work for hugepages.

The patch fixes up follow_page() to return the appropriate 4k page for
hugepages.

This incurs a vma lookup for each follow_page(), which is considerable
overhead in some situations.  We only _need_ to do this if the architecture
cannot determin a page's hugeness from the contents of the PMD.

So this patch is a "reference" implementation for, say, PPC BAT-based
hugepages.
parent 08f16f8f
...@@ -150,6 +150,35 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -150,6 +150,35 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
return i; return i;
} }
struct page *
follow_huge_addr(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address, int write)
{
unsigned long start = address;
int length = 1;
int nr;
struct page *page;
nr = follow_hugetlb_page(mm, vma, &page, NULL, &start, &length, 0);
if (nr == 1)
return page;
return NULL;
}
/*
* If virtual address `addr' lies within a huge page, return its controlling
* VMA, else NULL.
*/
struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr)
{
if (mm->used_hugetlb) {
struct vm_area_struct *vma = find_vma(mm, addr);
if (vma && is_vm_hugetlb_page(vma))
return vma;
}
return NULL;
}
void free_huge_page(struct page *page) void free_huge_page(struct page *page)
{ {
BUG_ON(page_count(page)); BUG_ON(page_count(page));
......
...@@ -20,16 +20,28 @@ int hugetlb_prefault(struct address_space *, struct vm_area_struct *); ...@@ -20,16 +20,28 @@ int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
void huge_page_release(struct page *); void huge_page_release(struct page *);
int hugetlb_report_meminfo(char *); int hugetlb_report_meminfo(char *);
int is_hugepage_mem_enough(size_t); int is_hugepage_mem_enough(size_t);
struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, int write);
struct vm_area_struct *hugepage_vma(struct mm_struct *mm,
unsigned long address);
extern int htlbpage_max; extern int htlbpage_max;
static inline void
mark_mm_hugetlb(struct mm_struct *mm, struct vm_area_struct *vma)
{
if (is_vm_hugetlb_page(vma))
mm->used_hugetlb = 1;
}
#else /* !CONFIG_HUGETLB_PAGE */ #else /* !CONFIG_HUGETLB_PAGE */
static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
{ {
return 0; return 0;
} }
#define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; }) #define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; })
#define follow_huge_addr(mm, vma, addr, write) 0
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
#define zap_hugepage_range(vma, start, len) BUG() #define zap_hugepage_range(vma, start, len) BUG()
...@@ -37,6 +49,8 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) ...@@ -37,6 +49,8 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
#define huge_page_release(page) BUG() #define huge_page_release(page) BUG()
#define is_hugepage_mem_enough(size) 0 #define is_hugepage_mem_enough(size) 0
#define hugetlb_report_meminfo(buf) 0 #define hugetlb_report_meminfo(buf) 0
#define hugepage_vma(mm, addr) 0
#define mark_mm_hugetlb(mm, vma) do { } while (0)
#endif /* !CONFIG_HUGETLB_PAGE */ #endif /* !CONFIG_HUGETLB_PAGE */
......
...@@ -201,7 +201,9 @@ struct mm_struct { ...@@ -201,7 +201,9 @@ struct mm_struct {
unsigned long swap_address; unsigned long swap_address;
unsigned dumpable:1; unsigned dumpable:1;
#ifdef CONFIG_HUGETLB_PAGE
int used_hugetlb;
#endif
/* Architecture-specific MM context */ /* Architecture-specific MM context */
mm_context_t context; mm_context_t context;
......
...@@ -607,6 +607,11 @@ follow_page(struct mm_struct *mm, unsigned long address, int write) ...@@ -607,6 +607,11 @@ follow_page(struct mm_struct *mm, unsigned long address, int write)
pmd_t *pmd; pmd_t *pmd;
pte_t *ptep, pte; pte_t *ptep, pte;
unsigned long pfn; unsigned long pfn;
struct vm_area_struct *vma;
vma = hugepage_vma(mm, address);
if (vma)
return follow_huge_addr(mm, vma, address, write);
pgd = pgd_offset(mm, address); pgd = pgd_offset(mm, address);
if (pgd_none(*pgd) || pgd_bad(*pgd)) if (pgd_none(*pgd) || pgd_bad(*pgd))
......
...@@ -362,6 +362,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -362,6 +362,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
if (mapping) if (mapping)
up(&mapping->i_shared_sem); up(&mapping->i_shared_sem);
mark_mm_hugetlb(mm, vma);
mm->map_count++; mm->map_count++;
validate_mm(mm); validate_mm(mm);
} }
...@@ -1423,7 +1424,6 @@ void exit_mmap(struct mm_struct *mm) ...@@ -1423,7 +1424,6 @@ void exit_mmap(struct mm_struct *mm)
kmem_cache_free(vm_area_cachep, vma); kmem_cache_free(vm_area_cachep, vma);
vma = next; vma = next;
} }
} }
/* Insert vm structure into process list sorted by address /* Insert vm structure into process list sorted by address
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment