Commit b3a656b6 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] convert hugetlb code to use compound pages

The odd thing about hugetlb is that it maintains its own freelist of pages.
And it has to do that, else it would trivially run out of pages due to buddy
fragmetation.

So we we don't want callers of put_page() to be passing those pages
to __free_pages_ok() on the final put().

So hugetlb installs a destructor in the compound pages to point at
free_huge_page(), which knows how to put these pages back onto the free list.

Also, don't mark hugepages as all PageReserved any more.  That's preenting
callers from doing proper refcounting.  Any code which does a user pagetable
walk and hits part of a hugepage will now handle it transparently.
parent eefb08ee
...@@ -46,6 +46,7 @@ static struct page *alloc_hugetlb_page(void) ...@@ -46,6 +46,7 @@ static struct page *alloc_hugetlb_page(void)
htlbpagemem--; htlbpagemem--;
spin_unlock(&htlbpage_lock); spin_unlock(&htlbpage_lock);
set_page_count(page, 1); set_page_count(page, 1);
page->lru.prev = (void *)huge_page_release;
for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
clear_highpage(&page[i]); clear_highpage(&page[i]);
return page; return page;
...@@ -134,6 +135,7 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -134,6 +135,7 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
page = pte_page(pte); page = pte_page(pte);
if (pages) { if (pages) {
page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT); page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
get_page(page);
pages[i] = page; pages[i] = page;
} }
if (vmas) if (vmas)
...@@ -218,8 +220,10 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, ...@@ -218,8 +220,10 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
struct page *page; struct page *page;
page = pte_page(*(pte_t *)pmd); page = pte_page(*(pte_t *)pmd);
if (page) if (page) {
page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
get_page(page);
}
return page; return page;
} }
#endif #endif
...@@ -372,8 +376,8 @@ int try_to_free_low(int count) ...@@ -372,8 +376,8 @@ int try_to_free_low(int count)
int set_hugetlb_mem_size(int count) int set_hugetlb_mem_size(int count)
{ {
int j, lcount; int lcount;
struct page *page, *map; struct page *page;
extern long htlbzone_pages; extern long htlbzone_pages;
extern struct list_head htlbpage_freelist; extern struct list_head htlbpage_freelist;
...@@ -389,11 +393,6 @@ int set_hugetlb_mem_size(int count) ...@@ -389,11 +393,6 @@ int set_hugetlb_mem_size(int count)
page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER); page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
if (page == NULL) if (page == NULL)
break; break;
map = page;
for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
SetPageReserved(map);
map++;
}
spin_lock(&htlbpage_lock); spin_lock(&htlbpage_lock);
list_add(&page->list, &htlbpage_freelist); list_add(&page->list, &htlbpage_freelist);
htlbpagemem++; htlbpagemem++;
...@@ -415,7 +414,8 @@ int set_hugetlb_mem_size(int count) ...@@ -415,7 +414,8 @@ int set_hugetlb_mem_size(int count)
return (int) htlbzone_pages; return (int) htlbzone_pages;
} }
int hugetlb_sysctl_handler(ctl_table *table, int write, struct file *file, void *buffer, size_t *length) int hugetlb_sysctl_handler(ctl_table *table, int write,
struct file *file, void *buffer, size_t *length)
{ {
proc_dointvec(table, write, file, buffer, length); proc_dointvec(table, write, file, buffer, length);
htlbpage_max = set_hugetlb_mem_size(htlbpage_max); htlbpage_max = set_hugetlb_mem_size(htlbpage_max);
...@@ -432,15 +432,13 @@ __setup("hugepages=", hugetlb_setup); ...@@ -432,15 +432,13 @@ __setup("hugepages=", hugetlb_setup);
static int __init hugetlb_init(void) static int __init hugetlb_init(void)
{ {
int i, j; int i;
struct page *page; struct page *page;
for (i = 0; i < htlbpage_max; ++i) { for (i = 0; i < htlbpage_max; ++i) {
page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER); page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
if (!page) if (!page)
break; break;
for (j = 0; j < HPAGE_SIZE/PAGE_SIZE; ++j)
SetPageReserved(&page[j]);
spin_lock(&htlbpage_lock); spin_lock(&htlbpage_lock);
list_add(&page->list, &htlbpage_freelist); list_add(&page->list, &htlbpage_freelist);
spin_unlock(&htlbpage_lock); spin_unlock(&htlbpage_lock);
......
...@@ -227,6 +227,7 @@ follow_hugetlb_page (struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -227,6 +227,7 @@ follow_hugetlb_page (struct mm_struct *mm, struct vm_area_struct *vma,
page = pte_page(pte); page = pte_page(pte);
if (pages) { if (pages) {
page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT); page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
get_page(page);
pages[i] = page; pages[i] = page;
} }
if (vmas) if (vmas)
...@@ -303,11 +304,6 @@ set_hugetlb_mem_size (int count) ...@@ -303,11 +304,6 @@ set_hugetlb_mem_size (int count)
page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER); page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
if (page == NULL) if (page == NULL)
break; break;
map = page;
for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
SetPageReserved(map);
map++;
}
spin_lock(&htlbpage_lock); spin_lock(&htlbpage_lock);
list_add(&page->list, &htlbpage_freelist); list_add(&page->list, &htlbpage_freelist);
htlbpagemem++; htlbpagemem++;
...@@ -327,7 +323,7 @@ set_hugetlb_mem_size (int count) ...@@ -327,7 +323,7 @@ set_hugetlb_mem_size (int count)
map = page; map = page;
for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) { for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | 1 << PG_dirty | 1 << PG_active |
1 << PG_private | 1<< PG_writeback); 1 << PG_private | 1<< PG_writeback);
map++; map++;
} }
......
...@@ -288,6 +288,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -288,6 +288,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
page = pte_page(pte); page = pte_page(pte);
if (pages) { if (pages) {
page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT); page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
get_page(page);
pages[i] = page; pages[i] = page;
} }
if (vmas) if (vmas)
...@@ -584,11 +585,6 @@ int set_hugetlb_mem_size(int count) ...@@ -584,11 +585,6 @@ int set_hugetlb_mem_size(int count)
page = alloc_pages(GFP_ATOMIC, HUGETLB_PAGE_ORDER); page = alloc_pages(GFP_ATOMIC, HUGETLB_PAGE_ORDER);
if (page == NULL) if (page == NULL)
break; break;
map = page;
for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
SetPageReserved(map);
map++;
}
spin_lock(&htlbpage_lock); spin_lock(&htlbpage_lock);
list_add(&page->list, &htlbpage_freelist); list_add(&page->list, &htlbpage_freelist);
htlbpagemem++; htlbpagemem++;
...@@ -613,7 +609,6 @@ int set_hugetlb_mem_size(int count) ...@@ -613,7 +609,6 @@ int set_hugetlb_mem_size(int count)
map->flags &= ~(1UL << PG_locked | 1UL << PG_error | map->flags &= ~(1UL << PG_locked | 1UL << PG_error |
1UL << PG_referenced | 1UL << PG_referenced |
1UL << PG_dirty | 1UL << PG_active | 1UL << PG_dirty | 1UL << PG_active |
1UL << PG_reserved |
1UL << PG_private | 1UL << PG_writeback); 1UL << PG_private | 1UL << PG_writeback);
set_page_count(page, 0); set_page_count(page, 0);
map++; map++;
......
...@@ -134,6 +134,7 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -134,6 +134,7 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
page = pte_page(pte); page = pte_page(pte);
if (pages) { if (pages) {
page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT); page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
get_page(page);
pages[i] = page; pages[i] = page;
} }
if (vmas) if (vmas)
...@@ -263,11 +264,6 @@ int set_hugetlb_mem_size(int count) ...@@ -263,11 +264,6 @@ int set_hugetlb_mem_size(int count)
page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER); page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
if (page == NULL) if (page == NULL)
break; break;
map = page;
for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
SetPageReserved(map);
map++;
}
spin_lock(&htlbpage_lock); spin_lock(&htlbpage_lock);
list_add(&page->list, &htlbpage_freelist); list_add(&page->list, &htlbpage_freelist);
htlbpagemem++; htlbpagemem++;
...@@ -286,8 +282,9 @@ int set_hugetlb_mem_size(int count) ...@@ -286,8 +282,9 @@ int set_hugetlb_mem_size(int count)
spin_unlock(&htlbpage_lock); spin_unlock(&htlbpage_lock);
map = page; map = page;
for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) { for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | map->flags &= ~(1 << PG_locked | 1 << PG_error |
1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | 1 << PG_referenced |
1 << PG_dirty | 1 << PG_active |
1 << PG_private | 1<< PG_writeback); 1 << PG_private | 1<< PG_writeback);
set_page_count(map, 0); set_page_count(map, 0);
map++; map++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment