Commit 69fba2dd authored by Kamezawa Hiroyuki's avatar Kamezawa Hiroyuki Committed by Linus Torvalds

[PATCH] no buddy bitmap patch revisit: for mm/page_alloc.c

This patch removes bitmaps from page allocator in mm/page_alloc.c.

This buddy system uses page->private field to record free page's order
instead of using bitmaps.

The algorithm of the buddy system is unchanged. Only bitmaps are removed.

In this buddy system, 2 pages,a page and "buddy", can be coalesced when

(buddy->private & PG_private) &&
(page_order(page)) == (page_order(buddy)) &&
!PageReserved(buddy) &&
page_count(buddy) == 0

this also means "buddy" is a head of continuous free pages
of length of (1 << page_order(buddy)).

bad_range() is called from inner loop of __free_pages_bulk().
In many archs, bad_range() is only a sanity check, it will always return 0.
But if a zone's memmap has a hole, it sometimes returns 1.
An architecture with memory holes in a zone has to define CONFIG_HOLES_IN_ZONE.
When CONFIG_HOLES_IN_ZONE is defined, pfn_valid() is called for checking
whether a buddy pages is valid or not.
Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 6951e82f
......@@ -71,6 +71,10 @@ static int bad_range(struct zone *zone, struct page *page)
return 1;
if (page_to_pfn(page) < zone->zone_start_pfn)
return 1;
#ifdef CONFIG_HOLES_IN_ZONE
if (!pfn_valid(page_to_pfn(page)))
return 1;
#endif
if (zone != page_zone(page))
return 1;
return 0;
......@@ -158,6 +162,45 @@ static void destroy_compound_page(struct page *page, unsigned long order)
}
#endif /* CONFIG_HUGETLB_PAGE */
/*
* function for dealing with page's order in buddy system.
* zone->lock is already acquired when we use these.
* So, we don't need atomic page->flags operations here.
*/
static inline unsigned long page_order(struct page *page) {
return page->private;
}
static inline void set_page_order(struct page *page, int order) {
page->private = order;
__SetPagePrivate(page);
}
static inline void rmv_page_order(struct page *page)
{
__ClearPagePrivate(page);
page->private = 0;
}
/*
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
* (a) the buddy is free &&
* (b) the buddy is on the buddy system &&
* (c) a page and its buddy have the same order.
* for recording page's order, we use page->private and PG_private.
*
*/
static inline int page_is_buddy(struct page *page, int order)
{
if (PagePrivate(page) &&
(page_order(page) == order) &&
!PageReserved(page) &&
page_count(page) == 0)
return 1;
return 0;
}
/*
* Freeing function for a buddy system allocator.
*
......@@ -170,9 +213,10 @@ static void destroy_compound_page(struct page *page, unsigned long order)
* at the bottom level available, and propagating the changes upward
* as necessary, plus some accounting needed to play nicely with other
* parts of the VM system.
* At each level, we keep one bit for each pair of blocks, which
* is set to 1 iff only one of the pair is allocated. So when we
* are allocating or freeing one, we can derive the state of the
* At each level, we keep a list of pages, which are heads of continuous
* free pages of length of (1 << order) and marked with PG_Private.Page's
* order is recorded in page->private field.
* So when we are allocating or freeing one, we can derive the state of the
* other. That is, if we allocate a small block, and both were
* free, the remainder of the region must be split into blocks.
* If a block is freed, and its buddy is also free, then this
......@@ -182,44 +226,44 @@ static void destroy_compound_page(struct page *page, unsigned long order)
*/
static inline void __free_pages_bulk (struct page *page, struct page *base,
struct zone *zone, struct free_area *area, unsigned int order)
struct zone *zone, unsigned int order)
{
unsigned long page_idx, index, mask;
unsigned long page_idx;
struct page *coalesced;
int order_size = 1 << order;
if (order)
if (unlikely(order))
destroy_compound_page(page, order);
mask = (~0UL) << order;
page_idx = page - base;
if (page_idx & ~mask)
BUG();
index = page_idx >> (1 + order);
zone->free_pages += 1 << order;
BUG_ON(page_idx & (order_size - 1));
BUG_ON(bad_range(zone, page));
zone->free_pages += order_size;
while (order < MAX_ORDER-1) {
struct page *buddy1, *buddy2;
struct free_area *area;
struct page *buddy;
int buddy_idx;
BUG_ON(area >= zone->free_area + MAX_ORDER);
if (!__test_and_change_bit(index, area->map))
/*
* the buddy page is still allocated.
*/
buddy_idx = (page_idx ^ (1 << order));
buddy = base + buddy_idx;
if (bad_range(zone, buddy))
break;
if (!page_is_buddy(buddy, order))
break;
/* Move the buddy up one level. */
buddy1 = base + (page_idx ^ (1 << order));
buddy2 = base + page_idx;
BUG_ON(bad_range(zone, buddy1));
BUG_ON(bad_range(zone, buddy2));
list_del(&buddy1->lru);
list_del(&buddy->lru);
area = zone->free_area + order;
area->nr_free--;
mask <<= 1;
rmv_page_order(buddy);
page_idx &= buddy_idx;
order++;
area++;
index >>= 1;
page_idx &= mask;
}
list_add(&(base + page_idx)->lru, &area->free_list);
area->nr_free++;
coalesced = base + page_idx;
set_page_order(coalesced, order);
list_add(&coalesced->lru, &zone->free_area[order].free_list);
zone->free_area[order].nr_free++;
}
static inline void free_pages_check(const char *function, struct page *page)
......@@ -257,12 +301,10 @@ free_pages_bulk(struct zone *zone, int count,
struct list_head *list, unsigned int order)
{
unsigned long flags;
struct free_area *area;
struct page *base, *page = NULL;
int ret = 0;
base = zone->zone_mem_map;
area = zone->free_area + order;
spin_lock_irqsave(&zone->lock, flags);
zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
......@@ -270,7 +312,7 @@ free_pages_bulk(struct zone *zone, int count,
page = list_entry(list->prev, struct page, lru);
/* have to delete it as __free_pages_bulk list manipulates */
list_del(&page->lru);
__free_pages_bulk(page, base, zone, area, order);
__free_pages_bulk(page, base, zone, order);
ret++;
}
spin_unlock_irqrestore(&zone->lock, flags);
......@@ -299,8 +341,6 @@ void __free_pages_ok(struct page *page, unsigned int order)
free_pages_bulk(page_zone(page), 1, &list, order);
}
#define MARK_USED(index, order, area) \
__change_bit((index) >> (1+(order)), (area)->map)
/*
* The order of subdivision here is critical for the IO subsystem.
......@@ -318,7 +358,7 @@ void __free_pages_ok(struct page *page, unsigned int order)
*/
static inline struct page *
expand(struct zone *zone, struct page *page,
unsigned long index, int low, int high, struct free_area *area)
int low, int high, struct free_area *area)
{
unsigned long size = 1 << high;
......@@ -329,7 +369,7 @@ expand(struct zone *zone, struct page *page,
BUG_ON(bad_range(zone, &page[size]));
list_add(&page[size].lru, &area->free_list);
area->nr_free++;
MARK_USED(index + size, high, area);
set_page_order(&page[size], high);
}
return page;
}
......@@ -384,7 +424,6 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
struct free_area * area;
unsigned int current_order;
struct page *page;
unsigned int index;
for (current_order = order; current_order < MAX_ORDER; ++current_order) {
area = zone->free_area + current_order;
......@@ -393,12 +432,10 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
page = list_entry(area->free_list.next, struct page, lru);
list_del(&page->lru);
rmv_page_order(page);
area->nr_free--;
index = page - zone->zone_mem_map;
if (current_order != MAX_ORDER-1)
MARK_USED(index, current_order, area);
zone->free_pages -= 1UL << order;
return expand(zone, page, index, order, current_order, area);
return expand(zone, page, order, current_order, area);
}
return NULL;
......@@ -1473,49 +1510,12 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone,
}
}
/*
* Page buddy system uses "index >> (i+1)", where "index" is
* at most "size-1".
*
* The extra "+3" is to round down to byte size (8 bits per byte
* assumption). Thus we get "(size-1) >> (i+4)" as the last byte
* we can access.
*
* The "+1" is because we want to round the byte allocation up
* rather than down. So we should have had a "+7" before we shifted
* down by three. Also, we have to add one as we actually _use_ the
* last bit (it's [0,n] inclusive, not [0,n[).
*
* So we actually had +7+1 before we shift down by 3. But
* (n+8) >> 3 == (n >> 3) + 1 (modulo overflows, which we do not have).
*
* Finally, we LONG_ALIGN because all bitmap operations are on longs.
*/
unsigned long pages_to_bitmap_size(unsigned long order, unsigned long nr_pages)
{
unsigned long bitmap_size;
bitmap_size = (nr_pages-1) >> (order+4);
bitmap_size = LONG_ALIGN(bitmap_size+1);
return bitmap_size;
}
void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, unsigned long size)
void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
unsigned long size)
{
int order;
for (order = 0; ; order++) {
unsigned long bitmap_size;
for (order = 0; order < MAX_ORDER ; order++) {
INIT_LIST_HEAD(&zone->free_area[order].free_list);
if (order == MAX_ORDER-1) {
zone->free_area[order].map = NULL;
break;
}
bitmap_size = pages_to_bitmap_size(order, size);
zone->free_area[order].map =
(unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
zone->free_area[order].nr_free = 0;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment