Commit e5ff2159 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

hugetlb: multiple hstates for multiple page sizes

Add basic support for more than one hstate in hugetlbfs.  This is the key
to supporting multiple hugetlbfs page sizes at once.

- Rather than a single hstate, we now have an array, with an iterator
- default_hstate continues to be the struct hstate which we use by default
- Add functions for architectures to register new hstates

[akpm@linux-foundation.org: coding-style fixes]
Acked-by: default avatarAdam Litke <agl@us.ibm.com>
Acked-by: default avatarNishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a5516438
...@@ -36,8 +36,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to, ...@@ -36,8 +36,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_struct *vma); struct vm_area_struct *vma);
void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
extern unsigned long max_huge_pages;
extern unsigned long sysctl_overcommit_huge_pages;
extern unsigned long hugepages_treat_as_movable; extern unsigned long hugepages_treat_as_movable;
extern const unsigned long hugetlb_zero, hugetlb_infinity; extern const unsigned long hugetlb_zero, hugetlb_infinity;
extern int sysctl_hugetlb_shm_group; extern int sysctl_hugetlb_shm_group;
...@@ -181,7 +179,17 @@ struct hstate { ...@@ -181,7 +179,17 @@ struct hstate {
unsigned int surplus_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES];
}; };
extern struct hstate default_hstate; void __init hugetlb_add_hstate(unsigned order);
struct hstate *size_to_hstate(unsigned long size);
#ifndef HUGE_MAX_HSTATE
#define HUGE_MAX_HSTATE 1
#endif
extern struct hstate hstates[HUGE_MAX_HSTATE];
extern unsigned int default_hstate_idx;
#define default_hstate (hstates[default_hstate_idx])
static inline struct hstate *hstate_vma(struct vm_area_struct *vma) static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
{ {
...@@ -230,6 +238,11 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h) ...@@ -230,6 +238,11 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
#include <asm/hugetlb.h> #include <asm/hugetlb.h>
static inline struct hstate *page_hstate(struct page *page)
{
return size_to_hstate(PAGE_SIZE << compound_order(page));
}
#else #else
struct hstate {}; struct hstate {};
#define hstate_file(f) NULL #define hstate_file(f) NULL
......
...@@ -959,7 +959,7 @@ static struct ctl_table vm_table[] = { ...@@ -959,7 +959,7 @@ static struct ctl_table vm_table[] = {
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
{ {
.procname = "nr_hugepages", .procname = "nr_hugepages",
.data = &max_huge_pages, .data = NULL,
.maxlen = sizeof(unsigned long), .maxlen = sizeof(unsigned long),
.mode = 0644, .mode = 0644,
.proc_handler = &hugetlb_sysctl_handler, .proc_handler = &hugetlb_sysctl_handler,
...@@ -985,10 +985,12 @@ static struct ctl_table vm_table[] = { ...@@ -985,10 +985,12 @@ static struct ctl_table vm_table[] = {
{ {
.ctl_name = CTL_UNNUMBERED, .ctl_name = CTL_UNNUMBERED,
.procname = "nr_overcommit_hugepages", .procname = "nr_overcommit_hugepages",
.data = &sysctl_overcommit_huge_pages, .data = NULL,
.maxlen = sizeof(sysctl_overcommit_huge_pages), .maxlen = sizeof(unsigned long),
.mode = 0644, .mode = 0644,
.proc_handler = &hugetlb_overcommit_handler, .proc_handler = &hugetlb_overcommit_handler,
.extra1 = (void *)&hugetlb_zero,
.extra2 = (void *)&hugetlb_infinity,
}, },
#endif #endif
{ {
......
...@@ -22,12 +22,19 @@ ...@@ -22,12 +22,19 @@
#include "internal.h" #include "internal.h"
const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
unsigned long max_huge_pages;
unsigned long sysctl_overcommit_huge_pages;
static gfp_t htlb_alloc_mask = GFP_HIGHUSER; static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
unsigned long hugepages_treat_as_movable; unsigned long hugepages_treat_as_movable;
struct hstate default_hstate; static int max_hstate;
unsigned int default_hstate_idx;
struct hstate hstates[HUGE_MAX_HSTATE];
/* for command line parsing */
static struct hstate * __initdata parsed_hstate;
static unsigned long __initdata default_hstate_max_huge_pages;
#define for_each_hstate(h) \
for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++)
/* /*
* Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
...@@ -454,13 +461,24 @@ static void update_and_free_page(struct hstate *h, struct page *page) ...@@ -454,13 +461,24 @@ static void update_and_free_page(struct hstate *h, struct page *page)
__free_pages(page, huge_page_order(h)); __free_pages(page, huge_page_order(h));
} }
struct hstate *size_to_hstate(unsigned long size)
{
struct hstate *h;
for_each_hstate(h) {
if (huge_page_size(h) == size)
return h;
}
return NULL;
}
static void free_huge_page(struct page *page) static void free_huge_page(struct page *page)
{ {
/* /*
* Can't pass hstate in here because it is called from the * Can't pass hstate in here because it is called from the
* compound page destructor. * compound page destructor.
*/ */
struct hstate *h = &default_hstate; struct hstate *h = page_hstate(page);
int nid = page_to_nid(page); int nid = page_to_nid(page);
struct address_space *mapping; struct address_space *mapping;
...@@ -887,39 +905,94 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, ...@@ -887,39 +905,94 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
return page; return page;
} }
static int __init hugetlb_init(void) static void __init hugetlb_init_one_hstate(struct hstate *h)
{ {
unsigned long i; unsigned long i;
struct hstate *h = &default_hstate;
if (HPAGE_SHIFT == 0)
return 0;
if (!h->order) {
h->order = HPAGE_SHIFT - PAGE_SHIFT;
h->mask = HPAGE_MASK;
}
for (i = 0; i < MAX_NUMNODES; ++i) for (i = 0; i < MAX_NUMNODES; ++i)
INIT_LIST_HEAD(&h->hugepage_freelists[i]); INIT_LIST_HEAD(&h->hugepage_freelists[i]);
h->hugetlb_next_nid = first_node(node_online_map); h->hugetlb_next_nid = first_node(node_online_map);
for (i = 0; i < max_huge_pages; ++i) { for (i = 0; i < h->max_huge_pages; ++i) {
if (!alloc_fresh_huge_page(h)) if (!alloc_fresh_huge_page(h))
break; break;
} }
max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i; h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
printk(KERN_INFO "Total HugeTLB memory allocated, %ld\n", }
h->free_huge_pages);
static void __init hugetlb_init_hstates(void)
{
struct hstate *h;
for_each_hstate(h) {
hugetlb_init_one_hstate(h);
}
}
static void __init report_hugepages(void)
{
struct hstate *h;
for_each_hstate(h) {
printk(KERN_INFO "Total HugeTLB memory allocated, "
"%ld %dMB pages\n",
h->free_huge_pages,
1 << (h->order + PAGE_SHIFT - 20));
}
}
static int __init hugetlb_init(void)
{
BUILD_BUG_ON(HPAGE_SHIFT == 0);
if (!size_to_hstate(HPAGE_SIZE)) {
hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
}
default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
hugetlb_init_hstates();
report_hugepages();
return 0; return 0;
} }
module_init(hugetlb_init); module_init(hugetlb_init);
/* Should be called on processing a hugepagesz=... option */
void __init hugetlb_add_hstate(unsigned order)
{
struct hstate *h;
if (size_to_hstate(PAGE_SIZE << order)) {
printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
return;
}
BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
BUG_ON(order == 0);
h = &hstates[max_hstate++];
h->order = order;
h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
hugetlb_init_one_hstate(h);
parsed_hstate = h;
}
static int __init hugetlb_setup(char *s) static int __init hugetlb_setup(char *s)
{ {
if (sscanf(s, "%lu", &max_huge_pages) <= 0) unsigned long *mhp;
max_huge_pages = 0;
/*
* !max_hstate means we haven't parsed a hugepagesz= parameter yet,
* so this hugepages= parameter goes to the "default hstate".
*/
if (!max_hstate)
mhp = &default_hstate_max_huge_pages;
else
mhp = &parsed_hstate->max_huge_pages;
if (sscanf(s, "%lu", mhp) <= 0)
*mhp = 0;
return 1; return 1;
} }
__setup("hugepages=", hugetlb_setup); __setup("hugepages=", hugetlb_setup);
...@@ -950,7 +1023,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count) ...@@ -950,7 +1023,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count)
if (PageHighMem(page)) if (PageHighMem(page))
continue; continue;
list_del(&page->lru); list_del(&page->lru);
update_and_free_page(page); update_and_free_page(h, page);
h->free_huge_pages--; h->free_huge_pages--;
h->free_huge_pages_node[page_to_nid(page)]--; h->free_huge_pages_node[page_to_nid(page)]--;
} }
...@@ -963,10 +1036,9 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count) ...@@ -963,10 +1036,9 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
#endif #endif
#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages) #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
static unsigned long set_max_huge_pages(unsigned long count) static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
{ {
unsigned long min_count, ret; unsigned long min_count, ret;
struct hstate *h = &default_hstate;
/* /*
* Increase the pool size * Increase the pool size
...@@ -1037,8 +1109,19 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, ...@@ -1037,8 +1109,19 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, struct file *file, void __user *buffer,
size_t *length, loff_t *ppos) size_t *length, loff_t *ppos)
{ {
struct hstate *h = &default_hstate;
unsigned long tmp;
if (!write)
tmp = h->max_huge_pages;
table->data = &tmp;
table->maxlen = sizeof(unsigned long);
proc_doulongvec_minmax(table, write, file, buffer, length, ppos); proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
max_huge_pages = set_max_huge_pages(max_huge_pages);
if (write)
h->max_huge_pages = set_max_huge_pages(h, tmp);
return 0; return 0;
} }
...@@ -1059,10 +1142,21 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, ...@@ -1059,10 +1142,21 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
size_t *length, loff_t *ppos) size_t *length, loff_t *ppos)
{ {
struct hstate *h = &default_hstate; struct hstate *h = &default_hstate;
unsigned long tmp;
if (!write)
tmp = h->nr_overcommit_huge_pages;
table->data = &tmp;
table->maxlen = sizeof(unsigned long);
proc_doulongvec_minmax(table, write, file, buffer, length, ppos); proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
if (write) {
spin_lock(&hugetlb_lock); spin_lock(&hugetlb_lock);
h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages; h->nr_overcommit_huge_pages = tmp;
spin_unlock(&hugetlb_lock); spin_unlock(&hugetlb_lock);
}
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment