Commit ce71acce authored by Rusty Russell's avatar Rusty Russell

alloc: first cut of new Tridge-inspired allocator

This version has limitations: pools must be at least 1MB, and allocations
are restricted to 1/1024 of the total pool size.
parent a18fb319
......@@ -101,6 +101,8 @@ int main(int argc, char *argv[])
if (strcmp(argv[1], "depends") == 0) {
printf("ccan/build_assert\n");
printf("ccan/likely\n");
printf("ccan/short_types\n");
return 0;
}
......
......@@ -6,886 +6,723 @@
#include <stdlib.h>
#include "alloc.h"
#include <ccan/build_assert/build_assert.h>
#include <ccan/alignof/alignof.h>
#include <ccan/likely/likely.h>
#include <ccan/short_types/short_types.h>
#include "config.h"
/* FIXME: We assume getpagesize() doesnt change. Remapping file with
* different pagesize should still work. */
/*
Inspired by (and parts taken from) Andrew Tridgell's alloc_mmap:
http://samba.org/~tridge/junkcode/alloc_mmap/
/* FIXME: Doesn't handle non-page-aligned poolsize. */
Copyright (C) Andrew Tridgell 2007
/* FIXME: Reduce. */
#define MIN_SIZE (getpagesize() * 2)
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
/* What's the granularity of sub-page allocs? */
#define BITMAP_GRANULARITY 4
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
/* File layout:
*
* file := pagestates pad uniform-cache metadata
* pagestates := pages * 2-bits-per-page
* pad := pad to next ALIGNOF(metaheader)
*
* metadata := metalen next-ptr metabits
* metabits := freeblock | bitblock | uniformblock
* freeblock := FREE +
* bitblock := BITMAP + 2-bits-per-bit-in-page + pad-to-byte
* uniformblock := UNIFORM + 14-bit-byte-len + bits + pad-to-byte
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define UNIFORM_CACHE_NUM 16
struct uniform_cache
{
uint16_t size[UNIFORM_CACHE_NUM];
/* These could be u32 if we're prepared to limit size. */
unsigned long page[UNIFORM_CACHE_NUM];
};
struct metaheader
{
/* Next meta header, or 0 */
unsigned long next;
/* Bits start here. */
};
#if 0 /* Until we have the tiny allocator working, go down to 1 MB */
/* Assumes a is a power of two. */
static unsigned long align_up(unsigned long x, unsigned long a)
{
return (x + a - 1) & ~(a - 1);
}
/* We divide the pool into this many large pages (nearest power of 2) */
#define MAX_PAGES (1024UL)
static unsigned long align_down(unsigned long x, unsigned long a)
{
return x & ~(a - 1);
}
/* 32 small pages == 1 large page. */
#define BITS_FROM_SMALL_TO_LARGE_PAGE 5
static unsigned long div_up(unsigned long x, unsigned long a)
{
return (x + a - 1) / a;
}
#else
/* It turns out that we spend a lot of time dealing with bit pairs.
* These routines manipulate them.
*/
static uint8_t get_bit_pair(const uint8_t *bits, unsigned long index)
{
return bits[index * 2 / CHAR_BIT] >> (index * 2 % CHAR_BIT) & 3;
}
#define MAX_PAGES (128UL)
#define BITS_FROM_SMALL_TO_LARGE_PAGE 4
static void set_bit_pair(uint8_t *bits, unsigned long index, uint8_t val)
{
bits[index * 2 / CHAR_BIT] &= ~(3 << (index * 2 % CHAR_BIT));
bits[index * 2 / CHAR_BIT] |= (val << (index * 2 % CHAR_BIT));
}
#endif
/* This is used for page states and subpage allocations */
enum alloc_state
{
FREE,
TAKEN,
TAKEN_START,
SPECIAL, /* Sub-page allocation for page states. */
};
/* Smallest pool size for this scheme: 512-byte small pages. That's
* 4/8% overhead for 32/64 bit. */
#define MIN_USEFUL_SIZE (MAX_PAGES << (9 + BITS_FROM_SMALL_TO_LARGE_PAGE))
/* The types for subpage metadata. */
enum sub_metadata_type
{
/* FREE is same as alloc state */
BITMAP = 1, /* bitmap allocated page */
UNIFORM, /* uniform size allocated page */
};
/* Every 4 buckets, we jump up a power of 2. ...8 10 12 14 16 20 24 28 32... */
#define INTER_BUCKET_SPACE 4
/* Page states are represented by bitpairs, at the start of the pool. */
#define BITS_PER_PAGE 2
/* FIXME: Figure this out properly. */
#define MAX_SIZE (1 << 30)
/* How much metadata info per byte? */
#define METADATA_PER_BYTE (CHAR_BIT / 2)
/* How few object to fit in a page before using a larger one? (8) */
#define MAX_PAGE_OBJECT_ORDER 3
static uint8_t *get_page_statebits(const void *pool)
{
return (uint8_t *)pool + sizeof(struct uniform_cache);
}
#define BITS_PER_LONG (sizeof(long) * CHAR_BIT)
static enum alloc_state get_page_state(const void *pool, unsigned long page)
{
return get_bit_pair(get_page_statebits(pool), page);
}
struct bucket_state {
unsigned long elements_per_page;
unsigned long page_list;
unsigned long full_list;
};
static void set_page_state(void *pool, unsigned long page, enum alloc_state s)
{
set_bit_pair(get_page_statebits(pool), page, s);
}
struct header {
/* 1024 bit bitmap of which pages are large. */
unsigned long pagesize[MAX_PAGES / BITS_PER_LONG];
/* The offset of metadata for a subpage allocation is found at the end
* of the subpage */
#define SUBPAGE_METAOFF (getpagesize() - sizeof(unsigned long))
/* List of unused small/large pages. */
unsigned long small_free_list;
unsigned long large_free_list;
/* This is the length of metadata in bits. It consists of two bits
* for every BITMAP_GRANULARITY of usable bytes in the page, then two
* bits for the tailer.. */
#define BITMAP_METABITLEN \
((div_up(SUBPAGE_METAOFF, BITMAP_GRANULARITY) + 1) * BITS_PER_PAGE)
/* This is less defined: we have two buckets for each power of 2 */
struct bucket_state bs[1];
};
/* This is the length in bytes. */
#define BITMAP_METALEN (div_up(BITMAP_METABITLEN, CHAR_BIT))
struct page_header {
unsigned long next, prev;
u32 elements_used;
/* FIXME: Pack this in somewhere... */
u8 bucket;
unsigned long used[1]; /* One bit per element. */
};
static struct metaheader *first_mheader(void *pool, unsigned long poolsize)
/* 2 bit for every byte to allocate. */
static void tiny_alloc_init(void *pool, unsigned long poolsize)
{
unsigned int pagestatelen;
pagestatelen = align_up(div_up(poolsize/getpagesize() * BITS_PER_PAGE,
CHAR_BIT),
ALIGNOF(struct metaheader));
return (struct metaheader *)(get_page_statebits(pool) + pagestatelen);
/* FIXME */
}
static struct metaheader *next_mheader(void *pool, struct metaheader *mh)
static void *tiny_alloc_get(void *pool, unsigned long poolsize,
unsigned long size, unsigned long align)
{
if (!mh->next)
/* FIXME */
return NULL;
return (struct metaheader *)((char *)pool + mh->next);
}
static unsigned long pool_offset(void *pool, void *p)
static void tiny_alloc_free(void *pool, unsigned long poolsize, void *free)
{
return (char *)p - (char *)pool;
/* FIXME */
}
void alloc_init(void *pool, unsigned long poolsize)
static unsigned long tiny_alloc_size(void *pool, unsigned long poolsize,
void *p)
{
/* FIXME: Alignment assumptions about pool. */
unsigned long len, i;
struct metaheader *mh;
if (poolsize < MIN_SIZE)
return;
mh = first_mheader(pool, poolsize);
/* Mark all page states FREE, all uniform caches zero, and all of
* metaheader bitmap which takes rest of first page. */
len = align_up(pool_offset(pool, mh + 1), getpagesize());
BUILD_ASSERT(FREE == 0);
memset(pool, 0, len);
/* FIXME */
return 0;
}
/* Mark the pagestate and metadata page(s) allocated. */
set_page_state(pool, 0, TAKEN_START);
for (i = 1; i < div_up(len, getpagesize()); i++)
set_page_state(pool, i, TAKEN);
static bool tiny_alloc_check(void *pool, unsigned long poolsize)
{
/* FIXME */
return true;
}
/* Two bits per element, representing page states. Returns 0 on fail.
* off is used to allocate from subpage bitmaps, which use the first 2
* bits as the type, so the real bitmap is offset by 1. */
static unsigned long alloc_from_bitmap(uint8_t *bits, unsigned long off,
unsigned long elems,
unsigned long want, unsigned long align)
static unsigned int fls(unsigned long val)
{
long i;
unsigned long free;
free = 0;
/* We allocate from far end, to increase ability to expand metadata. */
for (i = elems - 1; i >= 0; i--) {
switch (get_bit_pair(bits, off+i)) {
case FREE:
if (++free >= want) {
unsigned long j;
/* They might ask for large alignment. */
if (align && i % align)
continue;
#if HAVE_BUILTIN_CLZL
/* This is significantly faster! */
return val ? sizeof(long) * CHAR_BIT - __builtin_clzl(val) : 0;
#else
unsigned int r = 32;
set_bit_pair(bits, off+i, TAKEN_START);
for (j = i+1; j < i + want; j++)
set_bit_pair(bits, off+j, TAKEN);
return off+i;
if (!val)
return 0;
if (!(val & 0xffff0000u)) {
val <<= 16;
r -= 16;
}
break;
case SPECIAL:
case TAKEN_START:
case TAKEN:
free = 0;
break;
if (!(val & 0xff000000u)) {
val <<= 8;
r -= 8;
}
if (!(val & 0xf0000000u)) {
val <<= 4;
r -= 4;
}
if (!(val & 0xc0000000u)) {
val <<= 2;
r -= 2;
}
if (!(val & 0x80000000u)) {
val <<= 1;
r -= 1;
}
return r;
#endif
}
/* FIXME: Move to bitops. */
static unsigned int ffsl(unsigned long val)
{
#if HAVE_BUILTIN_FFSL
/* This is significantly faster! */
return __builtin_ffsl(val);
#else
unsigned int r = 1;
if (!val)
return 0;
if (sizeof(long) == sizeof(u64)) {
if (!(val & 0xffffffff)) {
/* Workaround gcc warning on 32-bit:
error: right shift count >= width of type */
u64 tmp = val;
tmp >>= 32;
val = tmp;
r += 32;
}
}
if (!(val & 0xffff)) {
val >>= 16;
r += 16;
}
if (!(val & 0xff)) {
val >>= 8;
r += 8;
}
if (!(val & 0xf)) {
val >>= 4;
r += 4;
}
if (!(val & 3)) {
val >>= 2;
r += 2;
}
if (!(val & 1)) {
val >>= 1;
r += 1;
}
return r;
#endif
}
static unsigned int popcount(unsigned long val)
{
#if HAVE_BUILTIN_POPCOUNTL
return __builtin_popcountl(val);
#else
if (sizeof(long) == sizeof(u64)) {
u64 v = val;
v = (v & 0x5555555555555555ULL)
+ ((v >> 1) & 0x5555555555555555ULL);
v = (v & 0x3333333333333333ULL)
+ ((v >> 1) & 0x3333333333333333ULL);
v = (v & 0x0F0F0F0F0F0F0F0FULL)
+ ((v >> 1) & 0x0F0F0F0F0F0F0F0FULL);
v = (v & 0x00FF00FF00FF00FFULL)
+ ((v >> 1) & 0x00FF00FF00FF00FFULL);
v = (v & 0x0000FFFF0000FFFFULL)
+ ((v >> 1) & 0x0000FFFF0000FFFFULL);
v = (v & 0x00000000FFFFFFFFULL)
+ ((v >> 1) & 0x00000000FFFFFFFFULL);
return v;
}
val = (val & 0x55555555ULL) + ((val >> 1) & 0x55555555ULL);
val = (val & 0x33333333ULL) + ((val >> 1) & 0x33333333ULL);
val = (val & 0x0F0F0F0FULL) + ((val >> 1) & 0x0F0F0F0FULL);
val = (val & 0x00FF00FFULL) + ((val >> 1) & 0x00FF00FFULL);
val = (val & 0x0000FFFFULL) + ((val >> 1) & 0x0000FFFFULL);
return val;
#endif
}
/*
* Every 4 buckets, the size doubles.
* Between buckets, sizes increase linearly.
*
* eg. bucket 40 = 2^10 = 1024
* bucket 41 = 2^10 + 2^10*4 = 1024 + 256
* bucket 42 = 2^10 + 2^10*4 = 1024 + 512
* bucket 43 = 2^10 + 2^10*4 = 1024 + 768
* bucket 45 = 2^11 = 2048
*
* Care is taken to handle low numbered buckets, at cost of overflow.
*/
static unsigned long bucket_to_size(unsigned int bucket)
{
unsigned long base = 1 << (bucket / INTER_BUCKET_SPACE);
return base + ((bucket % INTER_BUCKET_SPACE)
<< (bucket / INTER_BUCKET_SPACE))
/ INTER_BUCKET_SPACE;
}
static unsigned long alloc_get_pages(void *pool, unsigned long poolsize,
unsigned long pages, unsigned long align)
/*
* Say size is 10.
* fls(size/2) == 3. 1 << 3 == 8, so we're 2 too large, out of a possible
* 8 too large. That's 1/4 of the way to the next power of 2 == 1 bucket.
*
* We make sure we round up. Note that this fails on 32 bit at size
* 1879048193 (around bucket 120).
*/
static unsigned int size_to_bucket(unsigned long size)
{
return alloc_from_bitmap(get_page_statebits(pool),
0, poolsize / getpagesize(), pages,
align / getpagesize());
unsigned int base = fls(size/2);
unsigned long overshoot;
overshoot = size - (1 << base);
return base * INTER_BUCKET_SPACE
+ ((overshoot * INTER_BUCKET_SPACE + (1 << base)-1) >> base);
}
/* Offset to metadata is at end of page. */
static unsigned long *metadata_off(void *pool, unsigned long page)
static unsigned int large_page_bits(unsigned long poolsize)
{
return (unsigned long *)
((char *)pool + (page+1)*getpagesize() - sizeof(unsigned long));
return fls(poolsize / MAX_PAGES / 2);
}
static uint8_t *get_page_metadata(void *pool, unsigned long page)
static unsigned long align_up(unsigned long x, unsigned long align)
{
return (uint8_t *)pool + *metadata_off(pool, page);
return (x + align - 1) & ~(align - 1);
}
static void set_page_metadata(void *pool, unsigned long page, uint8_t *meta)
static void *from_off(struct header *head, unsigned long off)
{
*metadata_off(pool, page) = meta - (uint8_t *)pool;
return (char *)head + off;
}
static unsigned long sub_page_alloc(void *pool, unsigned long page,
unsigned long size, unsigned long align)
static unsigned long to_off(struct header *head, void *p)
{
uint8_t *bits = get_page_metadata(pool, page);
unsigned long i;
enum sub_metadata_type type;
type = get_bit_pair(bits, 0);
/* If this is a uniform page, we can't allocate from it. */
if (type == UNIFORM)
return 0;
assert(type == BITMAP);
/* We use a standart bitmap, but offset because of that BITMAP
* header. */
i = alloc_from_bitmap(bits, 1, SUBPAGE_METAOFF/BITMAP_GRANULARITY,
div_up(size, BITMAP_GRANULARITY),
align / BITMAP_GRANULARITY);
/* Can't allocate? */
if (i == 0)
return 0;
/* i-1 because of the header. */
return page*getpagesize() + (i-1)*BITMAP_GRANULARITY;
return (char *)p - (char *)head;
}
/* We look at the page states to figure out where the allocation for this
* metadata ends. */
static unsigned long get_metalen(void *pool, unsigned long poolsize,
struct metaheader *mh)
static size_t used_size(unsigned int num_elements)
{
unsigned long i, first, pages = poolsize / getpagesize();
first = pool_offset(pool, mh + 1)/getpagesize();
for (i = first + 1; i < pages && get_page_state(pool,i) == TAKEN; i++);
return i * getpagesize() - pool_offset(pool, mh + 1);
return (num_elements + BITS_PER_LONG-1) / BITS_PER_LONG;
}
static unsigned int uniform_metalen(unsigned int usize)
/*
* We always align the first entry to the lower power of 2.
* eg. the 12-byte bucket gets 8-byte aligned. The 4096-byte bucket
* gets 4096-byte aligned.
*/
static unsigned long page_header_size(unsigned int align_bits,
unsigned long num_elements)
{
unsigned int metalen;
assert(usize < (1 << 14));
/* Two bits for the header, 14 bits for size, then one bit for each
* element the page can hold. Round up to number of bytes. */
metalen = div_up(2 + 14 + SUBPAGE_METAOFF / usize, CHAR_BIT);
unsigned long size;
/* To ensure metaheader is always aligned, round bytes up. */
metalen = align_up(metalen, ALIGNOF(struct metaheader));
return metalen;
size = sizeof(struct page_header)
- sizeof(((struct page_header *)0)->used)
+ used_size(num_elements);
return align_up(size, 1 << align_bits);
}
static unsigned int decode_usize(uint8_t *meta)
static void add_to_list(struct header *head,
unsigned long *list, struct page_header *ph)
{
return ((unsigned)meta[1] << (CHAR_BIT-2)) | (meta[0] >> 2);
}
unsigned long h = *list, offset = to_off(head, ph);
static void encode_usize(uint8_t *meta, unsigned int usize)
{
meta[0] = (UNIFORM | (usize << 2));
meta[1] = (usize >> (CHAR_BIT - 2));
ph->next = h;
if (h) {
struct page_header *prev = from_off(head, h);
assert(prev->prev == 0);
prev->prev = offset;
}
*list = offset;
ph->prev = 0;
}
static uint8_t *alloc_metaspace(void *pool, unsigned long poolsize,
struct metaheader *mh, unsigned long bytes,
enum sub_metadata_type type)
static void del_from_list(struct header *head,
unsigned long *list, struct page_header *ph)
{
uint8_t *meta = (uint8_t *)(mh + 1);
unsigned long free = 0, len, i, metalen;
metalen = get_metalen(pool, poolsize, mh);
/* Walk through metadata looking for free. */
for (i = 0; i < metalen * METADATA_PER_BYTE; i += len) {
switch (get_bit_pair(meta, i)) {
case FREE:
len = 1;
free++;
if (free == bytes * METADATA_PER_BYTE) {
/* Mark this as a bitmap. */
set_bit_pair(meta, i - free + 1, type);
return meta + (i - free + 1)/METADATA_PER_BYTE;
}
break;
case BITMAP:
/* Skip over this allocated part. */
len = BITMAP_METALEN * METADATA_PER_BYTE;
free = 0;
break;
case UNIFORM:
/* Figure metalen given usize. */
len = decode_usize(meta + i / METADATA_PER_BYTE);
len = uniform_metalen(len) * METADATA_PER_BYTE;
free = 0;
break;
default:
assert(0);
return NULL;
/* Front of list? */
if (ph->prev == 0) {
*list = ph->next;
} else {
struct page_header *prev = from_off(head, ph->prev);
prev->next = ph->next;
}
if (ph->next != 0) {
struct page_header *next = from_off(head, ph->next);
next->prev = ph->prev;
}
return NULL;
}
/* We need this many bytes of metadata. */
static uint8_t *new_metadata(void *pool, unsigned long poolsize,
unsigned long bytes, enum sub_metadata_type type)
static unsigned long pop_from_list(struct header *head,
unsigned long *list)
{
struct metaheader *mh, *newmh;
unsigned long page;
uint8_t *meta;
for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh))
if ((meta = alloc_metaspace(pool, poolsize, mh, bytes, type)))
return meta;
/* No room for metadata? Can we expand an existing one? */
for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
unsigned long nextpage;
unsigned long h = *list;
struct page_header *ph = from_off(head, h);
/* We start on this page. */
nextpage = pool_offset(pool, (char *)(mh+1))/getpagesize();
/* Iterate through any other pages we own. */
while (get_page_state(pool, ++nextpage) == TAKEN);
/* Now, can we grab that page? */
if (get_page_state(pool, nextpage) != FREE)
continue;
/* OK, expand metadata, do it again. */
set_page_state(pool, nextpage, TAKEN);
BUILD_ASSERT(FREE == 0);
memset((char *)pool + nextpage*getpagesize(), 0, getpagesize());
return alloc_metaspace(pool, poolsize, mh, bytes, type);
if (likely(h)) {
*list = ph->next;
if (*list) {
struct page_header *next = from_off(head, *list);
next->prev = 0;
}
}
return h;
}
/* No metadata left at all? */
page = alloc_get_pages(pool, poolsize, div_up(bytes, getpagesize()), 1);
if (!page)
return NULL;
static void add_small_page_to_freelist(struct header *head,
struct page_header *ph)
{
add_to_list(head, &head->small_free_list, ph);
}
newmh = (struct metaheader *)((char *)pool + page * getpagesize());
BUILD_ASSERT(FREE == 0);
memset(newmh + 1, 0, getpagesize() - sizeof(*mh));
static void add_large_page_to_freelist(struct header *head,
struct page_header *ph)
{
add_to_list(head, &head->large_free_list, ph);
}
/* Sew it into linked list */
mh = first_mheader(pool,poolsize);
newmh->next = mh->next;
mh->next = pool_offset(pool, newmh);
static void add_to_bucket_list(struct header *head,
struct bucket_state *bs,
struct page_header *ph)
{
add_to_list(head, &bs->page_list, ph);
}
return alloc_metaspace(pool, poolsize, newmh, bytes, type);
static void del_from_bucket_list(struct header *head,
struct bucket_state *bs,
struct page_header *ph)
{
del_from_list(head, &bs->page_list, ph);
}
static void alloc_free_pages(void *pool, unsigned long pagenum)
static void del_from_bucket_full_list(struct header *head,
struct bucket_state *bs,
struct page_header *ph)
{
assert(get_page_state(pool, pagenum) == TAKEN_START);
set_page_state(pool, pagenum, FREE);
while (get_page_state(pool, ++pagenum) == TAKEN)
set_page_state(pool, pagenum, FREE);
del_from_list(head, &bs->full_list, ph);
}
static void maybe_transform_uniform_page(void *pool, unsigned long offset)
static void add_to_bucket_full_list(struct header *head,
struct bucket_state *bs,
struct page_header *ph)
{
/* FIXME: If possible and page isn't full, change to a bitmap */
add_to_list(head, &bs->full_list, ph);
}
/* Returns 0 or the size of the uniform alloc to use */
static unsigned long suitable_for_uc(unsigned long size, unsigned long align)
static void clear_bit(unsigned long bitmap[], unsigned int off)
{
unsigned long num_elems, wastage, usize;
unsigned long bitmap_cost;
bitmap[off / BITS_PER_LONG] &= ~(1 << (off % BITS_PER_LONG));
}
if (size == 0)
size = 1;
static bool test_bit(const unsigned long bitmap[], unsigned int off)
{
return bitmap[off / BITS_PER_LONG] & (1 << (off % BITS_PER_LONG));
}
/* Fix up silly alignments. */
usize = align_up(size, align);
static void set_bit(unsigned long bitmap[], unsigned int off)
{
bitmap[off / BITS_PER_LONG] |= (1 << (off % BITS_PER_LONG));
}
/* How many can fit in this page? */
num_elems = SUBPAGE_METAOFF / usize;
/* There must be a bit to be found. */
static unsigned int find_free_bit(const unsigned long bitmap[])
{
unsigned int i;
/* Can happen with bigger alignments. */
if (!num_elems)
return 0;
for (i = 0; bitmap[i] == -1UL; i++);
return (i*BITS_PER_LONG) + ffsl(~bitmap[i]) - 1;
}
/* Usize maxes out at 14 bits. */
if (usize >= (1 << 14))
return 0;
/* How many elements can we fit in a page? */
static unsigned long elements_per_page(unsigned long align_bits,
unsigned long esize,
unsigned long psize)
{
unsigned long num, overhead;
/* How many bytes would be left at the end? */
wastage = SUBPAGE_METAOFF % usize;
/* First approximation: no extra room for bitmap. */
overhead = align_up(sizeof(struct page_header), 1 << align_bits);
num = (psize - overhead) / esize;
/* If we can get a larger allocation within alignment constraints, we
* should do it, otherwise might as well leave wastage at the end. */
usize += align_down(wastage / num_elems, align);
while (page_header_size(align_bits, num) + esize * num > psize)
num--;
return num;
}
/* Bitmap allocation costs 2 bits per BITMAP_GRANULARITY bytes, plus
* however much we waste in rounding up to BITMAP_GRANULARITY. */
bitmap_cost = 2 * div_up(size, BITMAP_GRANULARITY)
+ CHAR_BIT * (align_up(size, BITMAP_GRANULARITY) - size);
static bool large_page_bucket(unsigned int bucket, unsigned long poolsize)
{
unsigned int sp_bits;
unsigned long max_smallsize;
/* Our cost is 1 bit, plus usize overhead */
if (bitmap_cost < 1 + (usize - size) * CHAR_BIT)
return 0;
sp_bits = large_page_bits(poolsize) - BITS_FROM_SMALL_TO_LARGE_PAGE;
/* Note: this doesn't take into account page header. */
max_smallsize = (1UL << sp_bits) >> MAX_PAGE_OBJECT_ORDER;
return usize;
return bucket_to_size(bucket) > max_smallsize;
}
static unsigned long uniform_alloc(void *pool, unsigned long poolsize,
struct uniform_cache *uc,
unsigned long ucnum)
static unsigned int max_bucket(unsigned int lp_bits)
{
uint8_t *metadata = get_page_metadata(pool, uc->page[ucnum]) + 2;
unsigned long i, max;
/* Simple one-bit-per-object bitmap. */
max = SUBPAGE_METAOFF / uc->size[ucnum];
for (i = 0; i < max; i++) {
if (!(metadata[i / CHAR_BIT] & (1 << (i % CHAR_BIT)))) {
metadata[i / CHAR_BIT] |= (1 << (i % CHAR_BIT));
return uc->page[ucnum] * getpagesize()
+ i * uc->size[ucnum];
}
}
return 0;
return (lp_bits - MAX_PAGE_OBJECT_ORDER) * INTER_BUCKET_SPACE;
}
static unsigned long new_uniform_page(void *pool, unsigned long poolsize,
unsigned long usize)
void alloc_init(void *pool, unsigned long poolsize)
{
unsigned long page, metalen;
uint8_t *metadata;
/* FIXME: Walk metadata looking for an existing uniform page. */
page = alloc_get_pages(pool, poolsize, 1, 1);
if (page == 0)
return 0;
metalen = uniform_metalen(usize);
struct header *head = pool;
struct page_header *ph;
unsigned int lp_bits, sp_bits, num_buckets;
unsigned long header_size, i;
/* Get metadata for page. */
metadata = new_metadata(pool, poolsize, metalen, UNIFORM);
if (!metadata) {
alloc_free_pages(pool, page);
return 0;
if (poolsize < MIN_USEFUL_SIZE) {
tiny_alloc_init(pool, poolsize);
return;
}
encode_usize(metadata, usize);
lp_bits = large_page_bits(poolsize);
sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
BUILD_ASSERT(FREE == 0);
memset(metadata + 2, 0, metalen - 2);
num_buckets = max_bucket(lp_bits);
/* Actually, this is a subpage page now. */
set_page_state(pool, page, SPECIAL);
head = pool;
header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
/* Set metadata pointer for page. */
set_page_metadata(pool, page, metadata);
memset(head, 0, header_size);
for (i = 0; i < num_buckets; i++) {
unsigned long pagesize;
return page;
}
if (large_page_bucket(i, poolsize))
pagesize = 1UL << lp_bits;
else
pagesize = 1UL << sp_bits;
static unsigned long alloc_sub_page(void *pool, unsigned long poolsize,
unsigned long size, unsigned long align)
{
unsigned long i, usize;
uint8_t *metadata;
struct uniform_cache *uc = pool;
usize = suitable_for_uc(size, align);
if (usize) {
static int random_entry;
/* Look for a uniform page. */
for (i = 0; i < UNIFORM_CACHE_NUM; i++) {
if (uc->size[i] == usize) {
unsigned long ret;
ret = uniform_alloc(pool, poolsize, uc, i);
if (ret != 0)
return ret;
/* OK, that one is full, remove from cache. */
uc->size[i] = 0;
break;
}
head->bs[i].elements_per_page
= elements_per_page(i / INTER_BUCKET_SPACE,
bucket_to_size(i),
pagesize);
}
/* OK, try a new uniform page. Use random discard for now. */
i = (++random_entry % UNIFORM_CACHE_NUM);
maybe_transform_uniform_page(pool, uc->page[i]);
/* They start as all large pages. */
memset(head->pagesize, 0xFF, sizeof(head->pagesize));
/* FIXME: small pages for last bit? */
uc->page[i] = new_uniform_page(pool, poolsize, usize);
if (uc->page[i]) {
uc->size[i] = usize;
return uniform_alloc(pool, poolsize, uc, i);
}
uc->size[i] = 0;
}
/* Split first page into small pages. */
assert(header_size << (1UL << lp_bits));
clear_bit(head->pagesize, 0);
/* Look for partial page. */
for (i = 0; i < poolsize / getpagesize(); i++) {
unsigned long ret;
if (get_page_state(pool, i) != SPECIAL)
continue;
ret = sub_page_alloc(pool, i, size, align);
if (ret)
return ret;
/* Skip over page(s) used by header, add rest to free list */
for (i = align_up(header_size, (1 << sp_bits)) >> sp_bits;
i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE);
i++) {
ph = from_off(head, i<<sp_bits);
ph->elements_used = 0;
add_small_page_to_freelist(head, ph);
}
/* Create new SUBPAGE page. */
i = alloc_get_pages(pool, poolsize, 1, 1);
if (i == 0)
return 0;
/* Get metadata for page. */
metadata = new_metadata(pool, poolsize, BITMAP_METALEN, BITMAP);
if (!metadata) {
alloc_free_pages(pool, i);
return 0;
/* Add the rest of the pages as large pages. */
i = (1 << lp_bits);
while (i + (1 << lp_bits) <= poolsize) {
ph = from_off(head, i);
ph->elements_used = 0;
add_large_page_to_freelist(head, ph);
i += (1 << lp_bits);
}
/* Actually, this is a subpage page now. */
set_page_state(pool, i, SPECIAL);
/* Set metadata pointer for page. */
set_page_metadata(pool, i, metadata);
/* Do allocation like normal */
return sub_page_alloc(pool, i, size, align);
}
static bool bitmap_page_is_empty(uint8_t *meta)
/* A large page worth of small pages are free: delete them from free list. */
static void del_large_from_small_free_list(struct header *head,
struct page_header *ph,
unsigned int sp_bits)
{
unsigned int i;
/* Skip the header (first bit of metadata). */
for (i = 1; i < SUBPAGE_METAOFF/BITMAP_GRANULARITY+1; i++)
if (get_bit_pair(meta, i) != FREE)
return false;
unsigned long i;
return true;
for (i = 0; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++) {
del_from_list(head, &head->small_free_list,
(void *)ph + (i << sp_bits));
}
}
static bool uniform_page_is_empty(uint8_t *meta)
static bool all_empty(struct header *head, unsigned long off, unsigned sp_bits)
{
unsigned int i, metalen;
metalen = uniform_metalen(decode_usize(meta));
unsigned long i;
/* Skip the header (first two bytes of metadata). */
for (i = 2; i < metalen; i++) {
BUILD_ASSERT(FREE == 0);
if (meta[i])
for (i = 0; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++) {
struct page_header *ph = from_off(head, off + (i << sp_bits));
if (ph->elements_used)
return false;
}
return true;
}
static bool special_page_is_empty(void *pool, unsigned long page)
static unsigned long get_large_page(struct header *head,
unsigned long poolsize)
{
uint8_t *meta;
enum sub_metadata_type type;
meta = get_page_metadata(pool, page);
type = get_bit_pair(meta, 0);
switch (type) {
case UNIFORM:
return uniform_page_is_empty(meta);
case BITMAP:
return bitmap_page_is_empty(meta);
default:
assert(0);
}
}
unsigned long lp_bits, sp_bits, i, page;
static void clear_special_metadata(void *pool, unsigned long page)
{
uint8_t *meta;
enum sub_metadata_type type;
meta = get_page_metadata(pool, page);
type = get_bit_pair(meta, 0);
switch (type) {
case UNIFORM:
/* First two bytes are the header, rest is already FREE */
BUILD_ASSERT(FREE == 0);
memset(meta, 0, 2);
break;
case BITMAP:
/* First two bits is the header. */
BUILD_ASSERT(BITMAP_METALEN > 1);
meta[0] = 0;
break;
default:
assert(0);
}
}
page = pop_from_list(head, &head->large_free_list);
if (likely(page))
return page;
/* Returns true if we cleaned any pages. */
static bool clean_empty_subpages(void *pool, unsigned long poolsize)
{
unsigned long i;
bool progress = false;
/* Look for small pages to coalesce, after first large page. */
lp_bits = large_page_bits(poolsize);
sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
for (i = 0; i < poolsize/getpagesize(); i++) {
if (get_page_state(pool, i) != SPECIAL)
for (i = (1 << lp_bits); i < poolsize; i += (1 << lp_bits)) {
/* Already a large page? */
if (test_bit(head->pagesize, i >> lp_bits))
continue;
if (special_page_is_empty(pool, i)) {
clear_special_metadata(pool, i);
set_page_state(pool, i, FREE);
progress = true;
if (all_empty(head, i, sp_bits)) {
struct page_header *ph = from_off(head, i);
set_bit(head->pagesize, i >> lp_bits);
del_large_from_small_free_list(head, ph, sp_bits);
add_large_page_to_freelist(head, ph);
}
}
return progress;
return pop_from_list(head, &head->large_free_list);
}
/* Returns true if we cleaned any pages. */
static bool clean_metadata(void *pool, unsigned long poolsize)
/* Returns small page. */
static unsigned long break_up_large_page(struct header *head,
unsigned long psize,
unsigned long lpage)
{
struct metaheader *mh, *prev_mh = NULL;
bool progress = false;
for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
uint8_t *meta;
long i;
unsigned long metalen = get_metalen(pool, poolsize, mh);
meta = (uint8_t *)(mh + 1);
BUILD_ASSERT(FREE == 0);
for (i = metalen - 1; i > 0; i--)
if (meta[i] != 0)
break;
/* Completely empty? */
if (prev_mh && i == metalen) {
alloc_free_pages(pool,
pool_offset(pool, mh)/getpagesize());
prev_mh->next = mh->next;
mh = prev_mh;
progress = true;
} else {
uint8_t *p;
/* Some pages at end are free? */
for (p = (uint8_t *)(mh+1) + metalen - getpagesize();
p > meta + i;
p -= getpagesize()) {
set_page_state(pool,
pool_offset(pool, p)
/ getpagesize(),
FREE);
progress = true;
}
}
}
unsigned long lp_bits, sp_bits, i;
lp_bits = large_page_bits(psize);
sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
clear_bit(head->pagesize, lpage >> lp_bits);
return progress;
for (i = 1; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++)
add_small_page_to_freelist(head,
from_off(head,
lpage + (i<<sp_bits)));
return lpage;
}
void *alloc_get(void *pool, unsigned long poolsize,
unsigned long size, unsigned long align)
static unsigned long get_small_page(struct header *head,
unsigned long poolsize)
{
bool subpage_clean = false, metadata_clean = false;
unsigned long ret;
if (poolsize < MIN_SIZE)
return NULL;
again:
/* Sub-page allocations have an overhead of ~12%. */
if (size + size/8 >= getpagesize() || align >= getpagesize()) {
unsigned long pages = div_up(size, getpagesize());
ret = alloc_get_pages(pool, poolsize, pages, align)
* getpagesize();
} else
ret = alloc_sub_page(pool, poolsize, size, align);
ret = pop_from_list(head, &head->small_free_list);
if (likely(ret))
return ret;
ret = get_large_page(head, poolsize);
if (likely(ret))
ret = break_up_large_page(head, poolsize, ret);
return ret;
}
if (ret != 0)
return (char *)pool + ret;
void *alloc_get(void *pool, unsigned long poolsize,
unsigned long size, unsigned long align)
{
struct header *head = pool;
unsigned int bucket;
unsigned long i;
struct bucket_state *bs;
struct page_header *ph;
/* Allocation failed: garbage collection. */
if (!subpage_clean) {
subpage_clean = true;
if (clean_empty_subpages(pool, poolsize))
goto again;
if (poolsize < MIN_USEFUL_SIZE) {
return tiny_alloc_get(pool, poolsize, size, align);
}
if (!metadata_clean) {
metadata_clean = true;
if (clean_metadata(pool, poolsize))
goto again;
}
size = align_up(size, align);
if (unlikely(!size))
size = 1;
bucket = size_to_bucket(size);
/* FIXME: Compact metadata? */
if (bucket >= max_bucket(large_page_bits(poolsize))) {
/* FIXME: huge alloc. */
return NULL;
}
static void bitmap_free(void *pool, unsigned long pagenum, unsigned long off,
uint8_t *metadata)
{
assert(off % BITMAP_GRANULARITY == 0);
off /= BITMAP_GRANULARITY;
/* Offset by one because first bit is used for header. */
off++;
}
set_bit_pair(metadata, off++, FREE);
while (off <= SUBPAGE_METAOFF / BITMAP_GRANULARITY
&& get_bit_pair(metadata, off) == TAKEN)
set_bit_pair(metadata, off++, FREE);
}
bs = &head->bs[bucket];
static void uniform_free(void *pool, unsigned long pagenum, unsigned long off,
uint8_t *metadata)
{
unsigned int usize, bit;
if (!bs->page_list) {
struct page_header *ph;
usize = decode_usize(metadata);
/* Must have been this size. */
assert(off % usize == 0);
bit = off / usize;
if (large_page_bucket(bucket, poolsize))
bs->page_list = get_large_page(head, poolsize);
else
bs->page_list = get_small_page(head, poolsize);
/* FIXME: Try large-aligned alloc? Header stuffing? */
if (unlikely(!bs->page_list))
return NULL;
ph = from_off(head, bs->page_list);
ph->bucket = bucket;
ph->elements_used = 0;
ph->next = 0;
memset(ph->used, 0, used_size(bs->elements_per_page));
}
/* Skip header. */
metadata += 2;
ph = from_off(head, bs->page_list);
/* Must have been allocated. */
assert(metadata[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT)));
metadata[bit / CHAR_BIT] &= ~(1 << (bit % CHAR_BIT));
}
i = find_free_bit(ph->used);
set_bit(ph->used, i);
ph->elements_used++;
static void subpage_free(void *pool, unsigned long pagenum, void *free)
{
unsigned long off = (unsigned long)free % getpagesize();
uint8_t *metadata = get_page_metadata(pool, pagenum);
enum sub_metadata_type type;
type = get_bit_pair(metadata, 0);
assert(off < SUBPAGE_METAOFF);
switch (type) {
case BITMAP:
bitmap_free(pool, pagenum, off, metadata);
break;
case UNIFORM:
uniform_free(pool, pagenum, off, metadata);
break;
default:
assert(0);
/* check if this page is now full */
if (unlikely(ph->elements_used == bs->elements_per_page)) {
del_from_bucket_list(head, bs, ph);
add_to_bucket_full_list(head, bs, ph);
}
return (char *)ph + page_header_size(ph->bucket / INTER_BUCKET_SPACE,
bs->elements_per_page)
+ i * bucket_to_size(bucket);
}
void alloc_free(void *pool, unsigned long poolsize, void *free)
{
unsigned long pagenum;
struct metaheader *mh;
struct header *head = pool;
struct bucket_state *bs;
unsigned int pagebits;
unsigned long i, pgoffset, offset = (char *)free - (char *)pool;
bool smallpage;
struct page_header *ph;
if (!free)
return;
if (poolsize < MIN_USEFUL_SIZE) {
return tiny_alloc_free(pool, poolsize, free);
}
assert(poolsize >= MIN_SIZE);
/* Get page header. */
pagebits = large_page_bits(poolsize);
if (!test_bit(head->pagesize, offset >> pagebits)) {
smallpage = true;
pagebits -= BITS_FROM_SMALL_TO_LARGE_PAGE;
} else
smallpage = false;
mh = first_mheader(pool, poolsize);
assert((char *)free >= (char *)(mh + 1));
assert((char *)pool + poolsize > (char *)free);
/* Step back to page header. */
ph = from_off(head, offset & ~((1UL << pagebits) - 1));
bs = &head->bs[ph->bucket];
pgoffset = (offset & ((1UL << pagebits) - 1))
- page_header_size(ph->bucket / INTER_BUCKET_SPACE,
bs->elements_per_page);
pagenum = pool_offset(pool, free) / getpagesize();
if (unlikely(ph->elements_used == bs->elements_per_page)) {
del_from_bucket_full_list(head, bs, ph);
add_to_bucket_list(head, bs, ph);
}
/* Which element are we? */
i = pgoffset / bucket_to_size(ph->bucket);
clear_bit(ph->used, i);
ph->elements_used--;
if (get_page_state(pool, pagenum) == SPECIAL)
subpage_free(pool, pagenum, free);
else {
assert((unsigned long)free % getpagesize() == 0);
alloc_free_pages(pool, pagenum);
if (unlikely(ph->elements_used == 0)) {
bs = &head->bs[ph->bucket];
del_from_bucket_list(head, bs, ph);
if (smallpage)
add_small_page_to_freelist(head, ph);
else
add_large_page_to_freelist(head, ph);
}
}
unsigned long alloc_size(void *pool, unsigned long poolsize, void *p)
{
unsigned long len, pagenum;
struct metaheader *mh;
assert(poolsize >= MIN_SIZE);
mh = first_mheader(pool, poolsize);
assert((char *)p >= (char *)(mh + 1));
assert((char *)pool + poolsize > (char *)p);
pagenum = pool_offset(pool, p) / getpagesize();
if (get_page_state(pool, pagenum) == SPECIAL) {
unsigned long off = (unsigned long)p % getpagesize();
uint8_t *metadata = get_page_metadata(pool, pagenum);
enum sub_metadata_type type = get_bit_pair(metadata, 0);
assert(off < SUBPAGE_METAOFF);
switch (type) {
case BITMAP:
assert(off % BITMAP_GRANULARITY == 0);
off /= BITMAP_GRANULARITY;
/* Offset by one because first bit used for header. */
off++;
len = BITMAP_GRANULARITY;
while (++off < SUBPAGE_METAOFF / BITMAP_GRANULARITY
&& get_bit_pair(metadata, off) == TAKEN)
len += BITMAP_GRANULARITY;
break;
case UNIFORM:
len = decode_usize(metadata);
break;
default:
assert(0);
}
} else {
len = getpagesize();
while (get_page_state(pool, ++pagenum) == TAKEN)
len += getpagesize();
}
return len;
}
struct header *head = pool;
unsigned int pagebits;
unsigned long offset = (char *)p - (char *)pool;
struct page_header *ph;
static bool is_metadata_page(void *pool, unsigned long poolsize,
unsigned long page)
{
struct metaheader *mh;
if (poolsize < MIN_USEFUL_SIZE)
return tiny_alloc_size(pool, poolsize, p);
for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
unsigned long start, end;
/* Get page header. */
pagebits = large_page_bits(poolsize);
if (!test_bit(head->pagesize, offset >> pagebits))
pagebits -= BITS_FROM_SMALL_TO_LARGE_PAGE;
start = pool_offset(pool, mh);
end = pool_offset(pool, (char *)(mh+1)
+ get_metalen(pool, poolsize, mh));
if (page >= start/getpagesize() && page < end/getpagesize())
return true;
}
return false;
/* Step back to page header. */
ph = from_off(head, offset & ~((1UL << pagebits) - 1));
return bucket_to_size(ph->bucket);
}
/* Useful for gdb breakpoints. */
......@@ -894,293 +731,215 @@ static bool check_fail(void)
return false;
}
static bool check_bitmap_metadata(void *pool, unsigned long *mhoff)
static unsigned long count_bits(const unsigned long bitmap[],
unsigned long limit)
{
enum alloc_state last_state = FREE;
unsigned int i;
unsigned long i, count = 0;
for (i = 0; i < SUBPAGE_METAOFF / BITMAP_GRANULARITY; i++) {
enum alloc_state state;
/* +1 because header is the first byte. */
state = get_bit_pair((uint8_t *)pool + *mhoff, i+1);
switch (state) {
case SPECIAL:
return check_fail();
case TAKEN:
if (last_state == FREE)
return check_fail();
break;
default:
break;
while (limit >= BITS_PER_LONG) {
count += popcount(bitmap[0]);
bitmap++;
limit -= BITS_PER_LONG;
}
last_state = state;
}
return true;
for (i = 0; i < limit; i++)
if (test_bit(bitmap, i))
count++;
return count;
}
/* We don't know what alignment they asked for, but we can infer worst
* case from the size. */
static unsigned int max_align(unsigned int size)
static bool out_of_bounds(unsigned long off,
unsigned long pagesize,
unsigned long poolsize)
{
unsigned int align = 1;
while (size % (align * 2) == 0)
align *= 2;
return align;
return (off > poolsize || off + pagesize > poolsize);
}
static bool check_uniform_metadata(void *pool, unsigned long *mhoff)
static bool check_bucket(struct header *head,
unsigned long poolsize,
unsigned long pages[],
struct bucket_state *bs,
unsigned int bindex)
{
uint8_t *meta = (uint8_t *)pool + *mhoff;
unsigned int i, usize;
struct uniform_cache *uc = pool;
bool lp_bucket = large_page_bucket(bindex, poolsize);
struct page_header *ph;
unsigned long taken, i, prev, pagesize, sp_bits, lp_bits;
usize = decode_usize(meta);
if (usize == 0 || suitable_for_uc(usize, max_align(usize)) != usize)
return check_fail();
lp_bits = large_page_bits(poolsize);
sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
/* If it's in uniform cache, make sure that agrees on size. */
for (i = 0; i < UNIFORM_CACHE_NUM; i++) {
uint8_t *ucm;
pagesize = 1UL << (lp_bucket ? lp_bits : sp_bits);
if (!uc->size[i])
continue;
ucm = get_page_metadata(pool, uc->page[i]);
if (ucm != meta)
continue;
if (usize != uc->size[i])
/* This many elements fit? */
taken = page_header_size(bindex / INTER_BUCKET_SPACE,
bs->elements_per_page);
taken += bucket_to_size(bindex) * bs->elements_per_page;
if (taken > pagesize)
return check_fail();
}
return true;
}
static bool check_subpage(void *pool, unsigned long poolsize,
unsigned long page)
{
unsigned long *mhoff = metadata_off(pool, page);
if (*mhoff + sizeof(struct metaheader) > poolsize)
/* One more wouldn't fit? */
taken = page_header_size(bindex / INTER_BUCKET_SPACE,
bs->elements_per_page + 1);
taken += bucket_to_size(bindex) * (bs->elements_per_page + 1);
if (taken <= pagesize)
return check_fail();
if (*mhoff % ALIGNOF(struct metaheader) != 0)
/* Walk used list. */
prev = 0;
for (i = bs->page_list; i; i = ph->next) {
/* Bad pointer? */
if (out_of_bounds(i, pagesize, poolsize))
return check_fail();
/* It must point to a metadata page. */
if (!is_metadata_page(pool, poolsize, *mhoff / getpagesize()))
/* Wrong size page? */
if (!!test_bit(head->pagesize, i >> lp_bits) != lp_bucket)
return check_fail();
/* Header at start of subpage allocation */
switch (get_bit_pair((uint8_t *)pool + *mhoff, 0)) {
case BITMAP:
return check_bitmap_metadata(pool, mhoff);
case UNIFORM:
return check_uniform_metadata(pool, mhoff);
default:
/* Not page boundary? */
if (i % pagesize)
return check_fail();
}
}
bool alloc_check(void *pool, unsigned long poolsize)
{
unsigned long i;
struct metaheader *mh;
enum alloc_state last_state = FREE;
bool was_metadata = false;
if (poolsize < MIN_SIZE)
return true;
if (get_page_state(pool, 0) != TAKEN_START)
ph = from_off(head, i);
/* Linked list corrupt? */
if (ph->prev != prev)
return check_fail();
/* First check metadata pages. */
/* Metadata pages will be marked TAKEN. */
for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
unsigned long start, end;
start = pool_offset(pool, mh);
if (start + sizeof(*mh) > poolsize)
/* Already seen this page? */
if (test_bit(pages, i >> sp_bits))
return check_fail();
end = pool_offset(pool, (char *)(mh+1)
+ get_metalen(pool, poolsize, mh));
if (end > poolsize)
set_bit(pages, i >> sp_bits);
/* Empty or full? */
if (ph->elements_used == 0)
return check_fail();
/* Non-first pages should start on a page boundary. */
if (mh != first_mheader(pool, poolsize)
&& start % getpagesize() != 0)
if (ph->elements_used >= bs->elements_per_page)
return check_fail();
/* It should end on a page boundary. */
if (end % getpagesize() != 0)
/* Used bits don't agree? */
if (ph->elements_used != count_bits(ph->used,
bs->elements_per_page))
return check_fail();
/* Wrong bucket? */
if (ph->bucket != bindex)
return check_fail();
prev = i;
}
for (i = 0; i < poolsize / getpagesize(); i++) {
enum alloc_state state = get_page_state(pool, i);
bool is_metadata = is_metadata_page(pool, poolsize,i);
switch (state) {
case FREE:
/* metadata pages are never free. */
if (is_metadata)
/* Walk full list. */
prev = 0;
for (i = bs->full_list; i; i = ph->next) {
/* Bad pointer? */
if (out_of_bounds(i, pagesize, poolsize))
return check_fail();
case TAKEN_START:
break;
case TAKEN:
/* This should continue a previous block. */
if (last_state == FREE)
/* Wrong size page? */
if (!!test_bit(head->pagesize, i >> lp_bits) != lp_bucket)
return check_fail();
if (is_metadata != was_metadata)
/* Not page boundary? */
if (i % pagesize)
return check_fail();
break;
case SPECIAL:
/* Check metadata pointer etc. */
if (!check_subpage(pool, poolsize, i))
ph = from_off(head, i);
/* Linked list corrupt? */
if (ph->prev != prev)
return check_fail();
}
last_state = state;
was_metadata = is_metadata;
/* Already seen this page? */
if (test_bit(pages, i >> sp_bits))
return check_fail();
set_bit(pages, i >> sp_bits);
/* Not full? */
if (ph->elements_used != bs->elements_per_page)
return check_fail();
/* Used bits don't agree? */
if (ph->elements_used != count_bits(ph->used,
bs->elements_per_page))
return check_fail();
/* Wrong bucket? */
if (ph->bucket != bindex)
return check_fail();
prev = i;
}
return true;
}
void alloc_visualize(FILE *out, void *pool, unsigned long poolsize)
bool alloc_check(void *pool, unsigned long poolsize)
{
struct metaheader *mh;
struct uniform_cache *uc = pool;
unsigned long pagebitlen, metadata_pages, count[1<<BITS_PER_PAGE], tot;
long i;
struct header *head = pool;
unsigned long prev, i, lp_bits, sp_bits, header_size, num_buckets;
struct page_header *ph;
unsigned long pages[(MAX_PAGES << BITS_FROM_SMALL_TO_LARGE_PAGE)
/ BITS_PER_LONG] = { 0 };
if (poolsize < MIN_SIZE) {
fprintf(out, "Pool smaller than %u: no content\n", MIN_SIZE);
return;
}
tot = 0;
for (i = 0; i < UNIFORM_CACHE_NUM; i++)
tot += (uc->size[i] != 0);
fprintf(out, "Uniform cache (%lu entries):\n", tot);
for (i = 0; i < UNIFORM_CACHE_NUM; i++) {
unsigned int j, total = 0;
uint8_t *meta;
if (poolsize < MIN_USEFUL_SIZE)
return tiny_alloc_check(pool, poolsize);
if (!uc->size[i])
continue;
lp_bits = large_page_bits(poolsize);
sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
/* First two bytes are header. */
meta = get_page_metadata(pool, uc->page[i]) + 2;
num_buckets = max_bucket(lp_bits);
for (j = 0; j < SUBPAGE_METAOFF / uc->size[i]; j++)
if (meta[j / 8] & (1 << (j % 8)))
total++;
header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
printf(" %u: %lu: %u/%zu (%zu%% density)\n",
uc->size[i], uc->page[i], total,
SUBPAGE_METAOFF / uc->size[i],
(total * 100) / (SUBPAGE_METAOFF / uc->size[i]));
}
/* First, set all bits taken by header. */
for (i = 0; i < header_size; i += (1UL << sp_bits))
set_bit(pages, i >> sp_bits);
memset(count, 0, sizeof(count));
for (i = 0; i < poolsize / getpagesize(); i++)
count[get_page_state(pool, i)]++;
mh = first_mheader(pool, poolsize);
pagebitlen = (uint8_t *)mh - get_page_statebits(pool);
fprintf(out, "%lu bytes of page bits: FREE/TAKEN/TAKEN_START/SUBPAGE = %lu/%lu/%lu/%lu\n",
pagebitlen, count[0], count[1], count[2], count[3]);
/* One metadata page for every page of page bits. */
metadata_pages = div_up(pagebitlen, getpagesize());
/* Now do each metadata page. */
for (; mh; mh = next_mheader(pool,mh)) {
unsigned long free = 0, bitmapblocks = 0, uniformblocks = 0,
len = 0, uniformlen = 0, bitmaplen = 0, metalen;
uint8_t *meta = (uint8_t *)(mh + 1);
metalen = get_metalen(pool, poolsize, mh);
metadata_pages += (sizeof(*mh) + metalen) / getpagesize();
for (i = 0; i < metalen * METADATA_PER_BYTE; i += len) {
switch (get_bit_pair(meta, i)) {
case FREE:
len = 1;
free++;
break;
case BITMAP:
/* Skip over this allocated part. */
len = BITMAP_METALEN * METADATA_PER_BYTE;
bitmapblocks++;
bitmaplen += len;
break;
case UNIFORM:
/* Skip over this part. */
len = decode_usize(meta + i/METADATA_PER_BYTE);
len = uniform_metalen(len) * METADATA_PER_BYTE;
uniformblocks++;
uniformlen += len;
break;
default:
assert(0);
}
/* Check small page free list. */
prev = 0;
for (i = head->small_free_list; i; i = ph->next) {
/* Bad pointer? */
if (out_of_bounds(i, 1 << sp_bits, poolsize))
return check_fail();
/* Large page? */
if (test_bit(head->pagesize, i >> lp_bits))
return check_fail();
/* Not page boundary? */
if (i % (1 << sp_bits))
return check_fail();
ph = from_off(head, i);
/* Linked list corrupt? */
if (ph->prev != prev)
return check_fail();
/* Already seen this page? */
if (test_bit(pages, i >> sp_bits))
return check_fail();
set_bit(pages, i >> sp_bits);
prev = i;
}
fprintf(out, "Metadata %lu-%lu: %lu free, %lu bitmapblocks, %lu uniformblocks, %lu%% density\n",
pool_offset(pool, mh),
pool_offset(pool, (char *)(mh+1) + metalen),
free, bitmapblocks, uniformblocks,
(bitmaplen + uniformlen) * 100
/ (free + bitmaplen + uniformlen));
/* Check large page free list. */
prev = 0;
for (i = head->large_free_list; i; i = ph->next) {
/* Bad pointer? */
if (out_of_bounds(i, 1 << lp_bits, poolsize))
return check_fail();
/* Not large page? */
if (!test_bit(head->pagesize, i >> lp_bits))
return check_fail();
/* Not page boundary? */
if (i % (1 << lp_bits))
return check_fail();
ph = from_off(head, i);
/* Linked list corrupt? */
if (ph->prev != prev)
return check_fail();
/* Already seen this page? */
if (test_bit(pages, i >> sp_bits))
return check_fail();
set_bit(pages, i >> sp_bits);
prev = i;
}
/* Account for total pages allocated. */
tot = (count[1] + count[2] - metadata_pages) * getpagesize();
fprintf(out, "Total metadata bytes = %lu\n",
metadata_pages * getpagesize());
/* Now do every subpage. */
for (i = 0; i < poolsize / getpagesize(); i++) {
uint8_t *meta;
unsigned int j, allocated;
enum sub_metadata_type type;
/* Check the buckets. */
for (i = 0; i < max_bucket(lp_bits); i++) {
struct bucket_state *bs = &head->bs[i];
if (get_page_state(pool, i) != SPECIAL)
continue;
memset(count, 0, sizeof(count));
meta = get_page_metadata(pool, i);
type = get_bit_pair(meta, 0);
if (!check_bucket(head, poolsize, pages, bs, i))
return false;
}
if (type == BITMAP) {
for (j = 0; j < SUBPAGE_METAOFF/BITMAP_GRANULARITY; j++)
count[get_page_state(meta, j)]++;
allocated = (count[1] + count[2]) * BITMAP_GRANULARITY;
fprintf(out, "Subpage bitmap ");
} else {
unsigned int usize = decode_usize(meta);
assert(type == UNIFORM);
fprintf(out, "Subpage uniform (%u) ", usize);
meta += 2;
for (j = 0; j < SUBPAGE_METAOFF / usize; j++)
count[!!(meta[j / 8] & (1 << (j % 8)))]++;
allocated = count[1] * usize;
/* Make sure every page accounted for. */
for (i = 0; i < poolsize >> sp_bits; i++) {
if (!test_bit(pages, i))
return check_fail();
if (test_bit(head->pagesize,
i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
/* Large page, skip rest. */
i += (1 << BITS_FROM_SMALL_TO_LARGE_PAGE) - 1;
}
fprintf(out, "%lu: FREE/TAKEN/TAKEN_START = %lu/%lu/%lu %u%% density\n",
i, count[0], count[1], count[2],
allocated * 100 / getpagesize());
tot += allocated;
}
/* This is optimistic, since we overalloc in several cases. */
fprintf(out, "Best possible allocation density = %lu%%\n",
tot * 100 / poolsize);
return true;
}
......@@ -3,12 +3,131 @@
#include <stdio.h>
#include <stdbool.h>
/**
* alloc_init - initialize a pool of memory for the allocator.
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
*
* This stores all the setup state required to perform allocation within the
* pool (there is no external state). Any previous contents of @pool is
* discarded.
*
* The same @pool and @poolsize arguments must be handed to the other alloc
* functions after this.
*
* If the pool is too small for meaningful allocations, alloc_get will fail.
*
* Example:
* void *pool = malloc(32*1024*1024);
* if (!pool)
* err(1, "Failed to allocate 32MB");
* alloc_init(pool, 32*1024*1024);
*/
void alloc_init(void *pool, unsigned long poolsize);
/**
* alloc_get - allocate some memory from the pool
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
* @size: the size of the desired allocation
* @align: the alignment of the desired allocation (0 or power of 2)
*
* This is "malloc" within an initialized pool.
*
* It will return a unique pointer within the pool (ie. between @pool
* and @pool+@poolsize) which meets the alignment requirements of
* @align. Note that the alignment is relative to the start of the pool,
* so of @pool is not aligned, the pointer won't be either.
*
* Returns NULL if there is no contiguous room.
*
* Example:
* #include <ccan/alignof/alignof.h>
* ...
* double *d = alloc_get(pool, 32*1024*1024,
* sizeof(*d), ALIGNOF(*d));
* if (!d)
* err(1, "Failed to allocate a double");
*/
void *alloc_get(void *pool, unsigned long poolsize,
unsigned long size, unsigned long align);
/**
* alloc_free - free some allocated memory from the pool
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
* @p: the non-NULL pointer returned from alloc_get.
*
* This is "free" within an initialized pool. A pointer should only be
* freed once, and must be a pointer returned from a successful alloc_get()
* call.
*
* Example:
* alloc_free(pool, 32*1024*1024, d);
*/
void alloc_free(void *pool, unsigned long poolsize, void *free);
/**
* alloc_size - get the actual size allocated by alloc_get
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
* @p: the non-NULL pointer returned from alloc_get.
*
* alloc_get() may overallocate, in which case you may use the extra
* space exactly as if you had asked for it.
*
* The return value will always be at least the @size passed to alloc_get().
*
* Example:
* printf("Allocating a double actually got me %lu bytes\n",
* alloc_size(pool, 32*1024*1024, d));
*/
unsigned long alloc_size(void *pool, unsigned long poolsize, void *p);
/**
* alloc_check - check the integrity of the allocation pool
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
*
* alloc_check() can be used for debugging suspected pool corruption. It may
* be quite slow, but provides some assistance for hard-to-find overruns or
* double-frees. Unlike the rest of the code, it will not crash on corrupted
* pools.
*
* There is an internal function check_fail() which this calls on failure which
* is useful for placing breakpoints and gaining more insight into the type
* of the corruption detected.
*
* Example:
* #include <assert.h>
*
* ...
* assert(alloc_check(pool, 32*1024*1024));
*/
bool alloc_check(void *pool, unsigned long poolsize);
/**
* alloc_visualize - dump information about the allocation pool
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
*
* When debugging the allocator itself, it's often useful to see how
* the pool is being used. alloc_visualize() does that, but makes
* assumptions about correctness (like the rest of the code) so if you
* suspect corruption call alloc_check() first.
*
* Example:
* #include <stdio.h>
*
* double *d = alloc_get(pool, 32*1024*1024,
* sizeof(*d), ALIGNOF(*d));
* if (!d) {
* fprintf(stderr, "Allocation failed!\n");
* if (!alloc_check(pool, 32*1024*1024))
* errx(1, "Allocation pool is corrupt");
* alloc_visualize(stderr, pool, 32*1024*1024));
* exit(1);
* }
*/
void alloc_visualize(FILE *out, void *pool, unsigned long poolsize);
#endif /* ALLOC_H */
......@@ -4,7 +4,7 @@
#include <stdlib.h>
#include <err.h>
#define POOL_ORD 16
#define POOL_ORD 20
#define POOL_SIZE (1 << POOL_ORD)
#define sort(p, num, cmp) \
......@@ -32,14 +32,14 @@ static bool free_every_second_one(void *mem, unsigned int num, void *p[])
/* Free every second one. */
for (i = 0; i < num; i += 2) {
alloc_free(mem, POOL_SIZE, p[i]);
}
if (!alloc_check(mem, POOL_SIZE))
return false;
}
for (i = 1; i < num; i += 2) {
alloc_free(mem, POOL_SIZE, p[i]);
}
if (!alloc_check(mem, POOL_SIZE))
return false;
}
return true;
}
......@@ -48,9 +48,9 @@ int main(int argc, char *argv[])
{
void *mem;
unsigned int i, num, max_size;
void *p[POOL_SIZE];
void **p = calloc(POOL_SIZE, sizeof(*p));
plan_tests(178);
plan_tests(120);
/* FIXME: Needs to be page aligned for now. */
if (posix_memalign(&mem, 1 << POOL_ORD, POOL_SIZE) != 0)
......@@ -134,7 +134,7 @@ int main(int argc, char *argv[])
ok1(alloc_check(mem, POOL_SIZE));
/* Alignment constraints should be met, as long as powers of two */
for (i = 0; i < POOL_ORD-1; i++) {
for (i = 0; i < /*FIXME: POOL_ORD-1*/ 10; i++) {
p[i] = alloc_get(mem, POOL_SIZE, i, 1 << i);
ok1(p[i]);
ok1(((unsigned long)p[i] % (1 << i)) == 0);
......@@ -142,13 +142,13 @@ int main(int argc, char *argv[])
ok1(alloc_size(mem, POOL_SIZE, p[i]) >= i);
}
for (i = 0; i < POOL_ORD-1; i++) {
for (i = 0; i < /*FIXME: POOL_ORD-1*/ 10; i++) {
alloc_free(mem, POOL_SIZE, p[i]);
ok1(alloc_check(mem, POOL_SIZE));
}
/* Alignment constraints for a single-byte allocation. */
for (i = 0; i < POOL_ORD; i++) {
for (i = 0; i < /*FIXME: POOL_ORD*/ 10; i++) {
p[0] = alloc_get(mem, POOL_SIZE, 1, 1 << i);
ok1(p[0]);
ok1(alloc_check(mem, POOL_SIZE));
......@@ -158,50 +158,11 @@ int main(int argc, char *argv[])
}
/* Alignment check for a 0-byte allocation. Corner case. */
p[0] = alloc_get(mem, POOL_SIZE, 0, 1 << (POOL_ORD - 1));
p[0] = alloc_get(mem, POOL_SIZE, 0, 1 << (/*FIXME: POOL_ORD - 1*/ 10));
ok1(alloc_check(mem, POOL_SIZE));
ok1(alloc_size(mem, POOL_SIZE, p[0]) < POOL_SIZE);
alloc_free(mem, POOL_SIZE, p[0]);
ok1(alloc_check(mem, POOL_SIZE));
/* Force the testing of split metadata. */
alloc_init(mem, POOL_SIZE);
for (i = 0; i < POOL_SIZE; i++) {
p[i] = alloc_get(mem, POOL_SIZE, getpagesize(), getpagesize());
if (!p[i])
break;
}
ok1(alloc_check(mem, POOL_SIZE));
ok1(alloc_size(mem, POOL_SIZE, p[i-1]) >= getpagesize());
/* Sort them. */
sort(p, i-1, addr_cmp);
/* Free all but the one next to the metadata. */
for (i = 1; p[i]; i++)
alloc_free(mem, POOL_SIZE, p[i]);
ok1(alloc_check(mem, POOL_SIZE));
ok1(alloc_size(mem, POOL_SIZE, p[0]) >= getpagesize());
/* Now do a whole heap of subpage allocs. */
for (i = 1; i < POOL_SIZE; i++) {
p[i] = alloc_get(mem, POOL_SIZE, 1, 1);
if (!p[i])
break;
}
ok1(alloc_check(mem, POOL_SIZE));
/* Free up our page next to metadata, and should be able to alloc */
alloc_free(mem, POOL_SIZE, p[0]);
ok1(alloc_check(mem, POOL_SIZE));
p[0] = alloc_get(mem, POOL_SIZE, 1, 1);
ok1(p[0]);
ok1(alloc_size(mem, POOL_SIZE, p[0]) >= 1);
/* Clean up. */
for (i = 0; p[i]; i++)
alloc_free(mem, POOL_SIZE, p[i]);
ok1(alloc_check(mem, POOL_SIZE));
return exit_status();
}
......@@ -13,3 +13,7 @@
#define HAVE_STATEMENT_EXPR 1
#define HAVE_TYPEOF 1
#define HAVE_UTIME 1
#define HAVE_BUILTIN_CLZL 1
#define HAVE_BUILTIN_FFSL 1
#define HAVE_BUILTIN_POPCOUNTL 1
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment