Commit 5f2809e6 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

bootmem: clean up alloc_bootmem_core

alloc_bootmem_core has become quite nasty to read over time.  This is a
clean rewrite that keeps the semantics.

bdata->last_pos has been dropped.

bdata->last_success has been renamed to hint_idx and it is now an index
relative to the node's range.  Since further block searching might start
at this index, it is now set to the end of a succeeded allocation rather
than its beginning.

bdata->last_offset has been renamed to last_end_off to be more clear that
it represents the ending address of the last allocation relative to the
node.

[y-goto@jp.fujitsu.com: fix new alloc_bootmem_core()]
Signed-off-by: default avatarJohannes Weiner <hannes@saeurebad.de>
Signed-off-by: default avatarYasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 41546c17
...@@ -31,10 +31,8 @@ typedef struct bootmem_data { ...@@ -31,10 +31,8 @@ typedef struct bootmem_data {
unsigned long node_boot_start; unsigned long node_boot_start;
unsigned long node_low_pfn; unsigned long node_low_pfn;
void *node_bootmem_map; void *node_bootmem_map;
unsigned long last_offset; unsigned long last_end_off;
unsigned long last_pos; unsigned long hint_idx;
unsigned long last_success; /* Previous allocation point. To speed
* up searching */
struct list_head list; struct list_head list;
} bootmem_data_t; } bootmem_data_t;
......
...@@ -242,8 +242,9 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, ...@@ -242,8 +242,9 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
* considered reserved. * considered reserved.
*/ */
if (addr >= bdata->node_boot_start && addr < bdata->last_success) if (addr >= bdata->node_boot_start &&
bdata->last_success = addr; PFN_DOWN(addr - bdata->node_boot_start) < bdata->hint_idx)
bdata->hint_idx = PFN_DOWN(addr - bdata->node_boot_start);
/* /*
* Round up to index to the range. * Round up to index to the range.
...@@ -431,36 +432,16 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size, ...@@ -431,36 +432,16 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
} }
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
/* static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
* We 'merge' subsequent allocations to save space. We might 'lose' unsigned long size, unsigned long align,
* some fraction of a page if allocations cannot be satisfied due to unsigned long goal, unsigned long limit)
* size constraints on boxes where there is physical RAM space
* fragmentation - in these cases (mostly large memory boxes) this
* is not a problem.
*
* On low memory boxes we get it right in 100% of the cases.
*
* alignment has to be a power of 2 value.
*
* NOTE: This function is _not_ reentrant.
*/
static void * __init
alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
unsigned long align, unsigned long goal, unsigned long limit)
{ {
unsigned long areasize, preferred; unsigned long min, max, start, sidx, midx, step;
unsigned long i, start = 0, incr, eidx, end_pfn;
void *ret; BUG_ON(!size);
unsigned long node_boot_start; BUG_ON(align & (align - 1));
void *node_bootmem_map; BUG_ON(limit && goal + size > limit);
if (!size) {
printk("alloc_bootmem_core(): zero-sized request\n");
BUG();
}
BUG_ON(align & (align-1));
/* on nodes without memory - bootmem_map is NULL */
if (!bdata->node_bootmem_map) if (!bdata->node_bootmem_map)
return NULL; return NULL;
...@@ -468,126 +449,85 @@ alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, ...@@ -468,126 +449,85 @@ alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
align, goal, limit); align, goal, limit);
/* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */ min = PFN_DOWN(bdata->node_boot_start);
node_boot_start = bdata->node_boot_start; max = bdata->node_low_pfn;
node_bootmem_map = bdata->node_bootmem_map;
if (align) {
node_boot_start = ALIGN(bdata->node_boot_start, align);
if (node_boot_start > bdata->node_boot_start)
node_bootmem_map = (unsigned long *)bdata->node_bootmem_map +
PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG;
}
if (limit && node_boot_start >= limit) goal >>= PAGE_SHIFT;
limit >>= PAGE_SHIFT;
if (limit && max > limit)
max = limit;
if (max <= min)
return NULL; return NULL;
end_pfn = bdata->node_low_pfn; step = max(align >> PAGE_SHIFT, 1UL);
limit = PFN_DOWN(limit);
if (limit && end_pfn > limit)
end_pfn = limit;
eidx = end_pfn - PFN_DOWN(node_boot_start); if (goal && min < goal && goal < max)
start = ALIGN(goal, step);
else
start = ALIGN(min, step);
/* sidx = start - PFN_DOWN(bdata->node_boot_start);
* We try to allocate bootmem pages above 'goal' midx = max - PFN_DOWN(bdata->node_boot_start);
* first, then we try to allocate lower pages.
*/
preferred = 0;
if (goal && PFN_DOWN(goal) < end_pfn) {
if (goal > node_boot_start)
preferred = goal - node_boot_start;
if (bdata->last_success > node_boot_start &&
bdata->last_success - node_boot_start >= preferred)
if (!limit || (limit && limit > bdata->last_success))
preferred = bdata->last_success - node_boot_start;
}
preferred = PFN_DOWN(ALIGN(preferred, align)); if (bdata->hint_idx > sidx) {
areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; /* Make sure we retry on failure */
incr = align >> PAGE_SHIFT ? : 1; goal = 1;
sidx = ALIGN(bdata->hint_idx, step);
}
restart_scan: while (1) {
for (i = preferred; i < eidx;) { int merge;
unsigned long j; void *region;
unsigned long eidx, i, start_off, end_off;
find_block:
sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
sidx = ALIGN(sidx, step);
eidx = sidx + PFN_UP(size);
i = find_next_zero_bit(node_bootmem_map, eidx, i); if (sidx >= midx || eidx > midx)
i = ALIGN(i, incr);
if (i >= eidx)
break; break;
if (test_bit(i, node_bootmem_map)) {
i += incr;
continue;
}
for (j = i + 1; j < i + areasize; ++j) {
if (j >= eidx)
goto fail_block;
if (test_bit(j, node_bootmem_map))
goto fail_block;
}
start = i;
goto found;
fail_block:
i = ALIGN(j, incr);
if (i == j)
i += incr;
}
if (preferred > 0) {
preferred = 0;
goto restart_scan;
}
return NULL;
found: for (i = sidx; i < eidx; i++)
bdata->last_success = PFN_PHYS(start) + node_boot_start; if (test_bit(i, bdata->node_bootmem_map)) {
BUG_ON(start >= eidx); sidx = ALIGN(i, step);
if (sidx == i)
sidx += step;
goto find_block;
}
/* if (bdata->last_end_off &&
* Is the next page of the previous allocation-end the start PFN_DOWN(bdata->last_end_off) + 1 == sidx)
* of this allocation's buffer? If yes then we can 'merge' start_off = ALIGN(bdata->last_end_off, align);
* the previous partial page with this allocation. else
*/ start_off = PFN_PHYS(sidx);
if (align < PAGE_SIZE &&
bdata->last_offset && bdata->last_pos+1 == start) { merge = PFN_DOWN(start_off) < sidx;
unsigned long offset, remaining_size; end_off = start_off + size;
offset = ALIGN(bdata->last_offset, align);
BUG_ON(offset > PAGE_SIZE); bdata->last_end_off = end_off;
remaining_size = PAGE_SIZE - offset; bdata->hint_idx = PFN_UP(end_off);
if (size < remaining_size) {
areasize = 0; /*
/* last_pos unchanged */ * Reserve the area now:
bdata->last_offset = offset + size; */
ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + for (i = PFN_DOWN(start_off) + merge;
offset + node_boot_start); i < PFN_UP(end_off); i++)
} else { if (test_and_set_bit(i, bdata->node_bootmem_map))
remaining_size = size - remaining_size; BUG();
areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + region = phys_to_virt(bdata->node_boot_start + start_off);
offset + node_boot_start); memset(region, 0, size);
bdata->last_pos = start + areasize - 1; return region;
bdata->last_offset = remaining_size;
}
bdata->last_offset &= ~PAGE_MASK;
} else {
bdata->last_pos = start + areasize - 1;
bdata->last_offset = size & ~PAGE_MASK;
ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
} }
bdebug("nid=%td start=%lx end=%lx\n", if (goal) {
bdata - bootmem_node_data, goal = 0;
start + PFN_DOWN(bdata->node_boot_start), sidx = 0;
start + areasize + PFN_DOWN(bdata->node_boot_start)); goto find_block;
}
/* return NULL;
* Reserve the area now:
*/
for (i = start; i < start + areasize; i++)
if (unlikely(test_and_set_bit(i, node_bootmem_map)))
BUG();
memset(ret, 0, size);
return ret;
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment