Commit ce71acce authored by Rusty Russell's avatar Rusty Russell

alloc: first cut of new Tridge-inspired allocator

This version has limitations: pools must be at least 1MB, and allocations
are restricted to 1/1024 of the total pool size.
parent a18fb319
...@@ -101,6 +101,8 @@ int main(int argc, char *argv[]) ...@@ -101,6 +101,8 @@ int main(int argc, char *argv[])
if (strcmp(argv[1], "depends") == 0) { if (strcmp(argv[1], "depends") == 0) {
printf("ccan/build_assert\n"); printf("ccan/build_assert\n");
printf("ccan/likely\n");
printf("ccan/short_types\n");
return 0; return 0;
} }
......
...@@ -6,886 +6,723 @@ ...@@ -6,886 +6,723 @@
#include <stdlib.h> #include <stdlib.h>
#include "alloc.h" #include "alloc.h"
#include <ccan/build_assert/build_assert.h> #include <ccan/build_assert/build_assert.h>
#include <ccan/alignof/alignof.h> #include <ccan/likely/likely.h>
#include <ccan/short_types/short_types.h>
#include "config.h" #include "config.h"
/* FIXME: We assume getpagesize() doesnt change. Remapping file with /*
* different pagesize should still work. */ Inspired by (and parts taken from) Andrew Tridgell's alloc_mmap:
http://samba.org/~tridge/junkcode/alloc_mmap/
/* FIXME: Doesn't handle non-page-aligned poolsize. */ Copyright (C) Andrew Tridgell 2007
/* FIXME: Reduce. */ This library is free software; you can redistribute it and/or
#define MIN_SIZE (getpagesize() * 2) modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
/* What's the granularity of sub-page allocs? */ This library is distributed in the hope that it will be useful,
#define BITMAP_GRANULARITY 4 but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
/* File layout: You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* file := pagestates pad uniform-cache metadata Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* pagestates := pages * 2-bits-per-page
* pad := pad to next ALIGNOF(metaheader)
*
* metadata := metalen next-ptr metabits
* metabits := freeblock | bitblock | uniformblock
* freeblock := FREE +
* bitblock := BITMAP + 2-bits-per-bit-in-page + pad-to-byte
* uniformblock := UNIFORM + 14-bit-byte-len + bits + pad-to-byte
*/ */
#define UNIFORM_CACHE_NUM 16
struct uniform_cache
{
uint16_t size[UNIFORM_CACHE_NUM];
/* These could be u32 if we're prepared to limit size. */
unsigned long page[UNIFORM_CACHE_NUM];
};
struct metaheader #if 0 /* Until we have the tiny allocator working, go down to 1 MB */
{
/* Next meta header, or 0 */
unsigned long next;
/* Bits start here. */
};
/* Assumes a is a power of two. */ /* We divide the pool into this many large pages (nearest power of 2) */
static unsigned long align_up(unsigned long x, unsigned long a) #define MAX_PAGES (1024UL)
{
return (x + a - 1) & ~(a - 1);
}
static unsigned long align_down(unsigned long x, unsigned long a) /* 32 small pages == 1 large page. */
{ #define BITS_FROM_SMALL_TO_LARGE_PAGE 5
return x & ~(a - 1);
}
static unsigned long div_up(unsigned long x, unsigned long a) #else
{
return (x + a - 1) / a;
}
/* It turns out that we spend a lot of time dealing with bit pairs. #define MAX_PAGES (128UL)
* These routines manipulate them. #define BITS_FROM_SMALL_TO_LARGE_PAGE 4
*/
static uint8_t get_bit_pair(const uint8_t *bits, unsigned long index)
{
return bits[index * 2 / CHAR_BIT] >> (index * 2 % CHAR_BIT) & 3;
}
static void set_bit_pair(uint8_t *bits, unsigned long index, uint8_t val) #endif
{
bits[index * 2 / CHAR_BIT] &= ~(3 << (index * 2 % CHAR_BIT));
bits[index * 2 / CHAR_BIT] |= (val << (index * 2 % CHAR_BIT));
}
/* This is used for page states and subpage allocations */ /* Smallest pool size for this scheme: 512-byte small pages. That's
enum alloc_state * 4/8% overhead for 32/64 bit. */
{ #define MIN_USEFUL_SIZE (MAX_PAGES << (9 + BITS_FROM_SMALL_TO_LARGE_PAGE))
FREE,
TAKEN,
TAKEN_START,
SPECIAL, /* Sub-page allocation for page states. */
};
/* The types for subpage metadata. */ /* Every 4 buckets, we jump up a power of 2. ...8 10 12 14 16 20 24 28 32... */
enum sub_metadata_type #define INTER_BUCKET_SPACE 4
{
/* FREE is same as alloc state */
BITMAP = 1, /* bitmap allocated page */
UNIFORM, /* uniform size allocated page */
};
/* Page states are represented by bitpairs, at the start of the pool. */ /* FIXME: Figure this out properly. */
#define BITS_PER_PAGE 2 #define MAX_SIZE (1 << 30)
/* How much metadata info per byte? */ /* How few object to fit in a page before using a larger one? (8) */
#define METADATA_PER_BYTE (CHAR_BIT / 2) #define MAX_PAGE_OBJECT_ORDER 3
static uint8_t *get_page_statebits(const void *pool) #define BITS_PER_LONG (sizeof(long) * CHAR_BIT)
{
return (uint8_t *)pool + sizeof(struct uniform_cache);
}
static enum alloc_state get_page_state(const void *pool, unsigned long page) struct bucket_state {
{ unsigned long elements_per_page;
return get_bit_pair(get_page_statebits(pool), page); unsigned long page_list;
} unsigned long full_list;
};
static void set_page_state(void *pool, unsigned long page, enum alloc_state s) struct header {
{ /* 1024 bit bitmap of which pages are large. */
set_bit_pair(get_page_statebits(pool), page, s); unsigned long pagesize[MAX_PAGES / BITS_PER_LONG];
}
/* The offset of metadata for a subpage allocation is found at the end /* List of unused small/large pages. */
* of the subpage */ unsigned long small_free_list;
#define SUBPAGE_METAOFF (getpagesize() - sizeof(unsigned long)) unsigned long large_free_list;
/* This is the length of metadata in bits. It consists of two bits /* This is less defined: we have two buckets for each power of 2 */
* for every BITMAP_GRANULARITY of usable bytes in the page, then two struct bucket_state bs[1];
* bits for the tailer.. */ };
#define BITMAP_METABITLEN \
((div_up(SUBPAGE_METAOFF, BITMAP_GRANULARITY) + 1) * BITS_PER_PAGE)
/* This is the length in bytes. */ struct page_header {
#define BITMAP_METALEN (div_up(BITMAP_METABITLEN, CHAR_BIT)) unsigned long next, prev;
u32 elements_used;
/* FIXME: Pack this in somewhere... */
u8 bucket;
unsigned long used[1]; /* One bit per element. */
};
static struct metaheader *first_mheader(void *pool, unsigned long poolsize) /* 2 bit for every byte to allocate. */
static void tiny_alloc_init(void *pool, unsigned long poolsize)
{ {
unsigned int pagestatelen; /* FIXME */
pagestatelen = align_up(div_up(poolsize/getpagesize() * BITS_PER_PAGE,
CHAR_BIT),
ALIGNOF(struct metaheader));
return (struct metaheader *)(get_page_statebits(pool) + pagestatelen);
} }
static struct metaheader *next_mheader(void *pool, struct metaheader *mh) static void *tiny_alloc_get(void *pool, unsigned long poolsize,
unsigned long size, unsigned long align)
{ {
if (!mh->next) /* FIXME */
return NULL; return NULL;
return (struct metaheader *)((char *)pool + mh->next);
} }
static unsigned long pool_offset(void *pool, void *p) static void tiny_alloc_free(void *pool, unsigned long poolsize, void *free)
{ {
return (char *)p - (char *)pool; /* FIXME */
} }
void alloc_init(void *pool, unsigned long poolsize) static unsigned long tiny_alloc_size(void *pool, unsigned long poolsize,
void *p)
{ {
/* FIXME: Alignment assumptions about pool. */ /* FIXME */
unsigned long len, i; return 0;
struct metaheader *mh; }
if (poolsize < MIN_SIZE)
return;
mh = first_mheader(pool, poolsize);
/* Mark all page states FREE, all uniform caches zero, and all of
* metaheader bitmap which takes rest of first page. */
len = align_up(pool_offset(pool, mh + 1), getpagesize());
BUILD_ASSERT(FREE == 0);
memset(pool, 0, len);
/* Mark the pagestate and metadata page(s) allocated. */ static bool tiny_alloc_check(void *pool, unsigned long poolsize)
set_page_state(pool, 0, TAKEN_START); {
for (i = 1; i < div_up(len, getpagesize()); i++) /* FIXME */
set_page_state(pool, i, TAKEN); return true;
} }
/* Two bits per element, representing page states. Returns 0 on fail. static unsigned int fls(unsigned long val)
* off is used to allocate from subpage bitmaps, which use the first 2
* bits as the type, so the real bitmap is offset by 1. */
static unsigned long alloc_from_bitmap(uint8_t *bits, unsigned long off,
unsigned long elems,
unsigned long want, unsigned long align)
{ {
long i; #if HAVE_BUILTIN_CLZL
unsigned long free; /* This is significantly faster! */
return val ? sizeof(long) * CHAR_BIT - __builtin_clzl(val) : 0;
free = 0; #else
/* We allocate from far end, to increase ability to expand metadata. */ unsigned int r = 32;
for (i = elems - 1; i >= 0; i--) {
switch (get_bit_pair(bits, off+i)) {
case FREE:
if (++free >= want) {
unsigned long j;
/* They might ask for large alignment. */
if (align && i % align)
continue;
set_bit_pair(bits, off+i, TAKEN_START); if (!val)
for (j = i+1; j < i + want; j++) return 0;
set_bit_pair(bits, off+j, TAKEN); if (!(val & 0xffff0000u)) {
return off+i; val <<= 16;
r -= 16;
} }
break; if (!(val & 0xff000000u)) {
case SPECIAL: val <<= 8;
case TAKEN_START: r -= 8;
case TAKEN:
free = 0;
break;
} }
if (!(val & 0xf0000000u)) {
val <<= 4;
r -= 4;
} }
if (!(val & 0xc0000000u)) {
val <<= 2;
r -= 2;
}
if (!(val & 0x80000000u)) {
val <<= 1;
r -= 1;
}
return r;
#endif
}
/* FIXME: Move to bitops. */
static unsigned int ffsl(unsigned long val)
{
#if HAVE_BUILTIN_FFSL
/* This is significantly faster! */
return __builtin_ffsl(val);
#else
unsigned int r = 1;
if (!val)
return 0; return 0;
if (sizeof(long) == sizeof(u64)) {
if (!(val & 0xffffffff)) {
/* Workaround gcc warning on 32-bit:
error: right shift count >= width of type */
u64 tmp = val;
tmp >>= 32;
val = tmp;
r += 32;
}
}
if (!(val & 0xffff)) {
val >>= 16;
r += 16;
}
if (!(val & 0xff)) {
val >>= 8;
r += 8;
}
if (!(val & 0xf)) {
val >>= 4;
r += 4;
}
if (!(val & 3)) {
val >>= 2;
r += 2;
}
if (!(val & 1)) {
val >>= 1;
r += 1;
}
return r;
#endif
}
static unsigned int popcount(unsigned long val)
{
#if HAVE_BUILTIN_POPCOUNTL
return __builtin_popcountl(val);
#else
if (sizeof(long) == sizeof(u64)) {
u64 v = val;
v = (v & 0x5555555555555555ULL)
+ ((v >> 1) & 0x5555555555555555ULL);
v = (v & 0x3333333333333333ULL)
+ ((v >> 1) & 0x3333333333333333ULL);
v = (v & 0x0F0F0F0F0F0F0F0FULL)
+ ((v >> 1) & 0x0F0F0F0F0F0F0F0FULL);
v = (v & 0x00FF00FF00FF00FFULL)
+ ((v >> 1) & 0x00FF00FF00FF00FFULL);
v = (v & 0x0000FFFF0000FFFFULL)
+ ((v >> 1) & 0x0000FFFF0000FFFFULL);
v = (v & 0x00000000FFFFFFFFULL)
+ ((v >> 1) & 0x00000000FFFFFFFFULL);
return v;
}
val = (val & 0x55555555ULL) + ((val >> 1) & 0x55555555ULL);
val = (val & 0x33333333ULL) + ((val >> 1) & 0x33333333ULL);
val = (val & 0x0F0F0F0FULL) + ((val >> 1) & 0x0F0F0F0FULL);
val = (val & 0x00FF00FFULL) + ((val >> 1) & 0x00FF00FFULL);
val = (val & 0x0000FFFFULL) + ((val >> 1) & 0x0000FFFFULL);
return val;
#endif
}
/*
* Every 4 buckets, the size doubles.
* Between buckets, sizes increase linearly.
*
* eg. bucket 40 = 2^10 = 1024
* bucket 41 = 2^10 + 2^10*4 = 1024 + 256
* bucket 42 = 2^10 + 2^10*4 = 1024 + 512
* bucket 43 = 2^10 + 2^10*4 = 1024 + 768
* bucket 45 = 2^11 = 2048
*
* Care is taken to handle low numbered buckets, at cost of overflow.
*/
static unsigned long bucket_to_size(unsigned int bucket)
{
unsigned long base = 1 << (bucket / INTER_BUCKET_SPACE);
return base + ((bucket % INTER_BUCKET_SPACE)
<< (bucket / INTER_BUCKET_SPACE))
/ INTER_BUCKET_SPACE;
} }
static unsigned long alloc_get_pages(void *pool, unsigned long poolsize, /*
unsigned long pages, unsigned long align) * Say size is 10.
* fls(size/2) == 3. 1 << 3 == 8, so we're 2 too large, out of a possible
* 8 too large. That's 1/4 of the way to the next power of 2 == 1 bucket.
*
* We make sure we round up. Note that this fails on 32 bit at size
* 1879048193 (around bucket 120).
*/
static unsigned int size_to_bucket(unsigned long size)
{ {
return alloc_from_bitmap(get_page_statebits(pool), unsigned int base = fls(size/2);
0, poolsize / getpagesize(), pages, unsigned long overshoot;
align / getpagesize());
overshoot = size - (1 << base);
return base * INTER_BUCKET_SPACE
+ ((overshoot * INTER_BUCKET_SPACE + (1 << base)-1) >> base);
} }
/* Offset to metadata is at end of page. */ static unsigned int large_page_bits(unsigned long poolsize)
static unsigned long *metadata_off(void *pool, unsigned long page)
{ {
return (unsigned long *) return fls(poolsize / MAX_PAGES / 2);
((char *)pool + (page+1)*getpagesize() - sizeof(unsigned long));
} }
static uint8_t *get_page_metadata(void *pool, unsigned long page) static unsigned long align_up(unsigned long x, unsigned long align)
{ {
return (uint8_t *)pool + *metadata_off(pool, page); return (x + align - 1) & ~(align - 1);
} }
static void set_page_metadata(void *pool, unsigned long page, uint8_t *meta) static void *from_off(struct header *head, unsigned long off)
{ {
*metadata_off(pool, page) = meta - (uint8_t *)pool; return (char *)head + off;
} }
static unsigned long sub_page_alloc(void *pool, unsigned long page, static unsigned long to_off(struct header *head, void *p)
unsigned long size, unsigned long align)
{ {
uint8_t *bits = get_page_metadata(pool, page); return (char *)p - (char *)head;
unsigned long i;
enum sub_metadata_type type;
type = get_bit_pair(bits, 0);
/* If this is a uniform page, we can't allocate from it. */
if (type == UNIFORM)
return 0;
assert(type == BITMAP);
/* We use a standart bitmap, but offset because of that BITMAP
* header. */
i = alloc_from_bitmap(bits, 1, SUBPAGE_METAOFF/BITMAP_GRANULARITY,
div_up(size, BITMAP_GRANULARITY),
align / BITMAP_GRANULARITY);
/* Can't allocate? */
if (i == 0)
return 0;
/* i-1 because of the header. */
return page*getpagesize() + (i-1)*BITMAP_GRANULARITY;
} }
/* We look at the page states to figure out where the allocation for this static size_t used_size(unsigned int num_elements)
* metadata ends. */
static unsigned long get_metalen(void *pool, unsigned long poolsize,
struct metaheader *mh)
{ {
unsigned long i, first, pages = poolsize / getpagesize(); return (num_elements + BITS_PER_LONG-1) / BITS_PER_LONG;
first = pool_offset(pool, mh + 1)/getpagesize();
for (i = first + 1; i < pages && get_page_state(pool,i) == TAKEN; i++);
return i * getpagesize() - pool_offset(pool, mh + 1);
} }
static unsigned int uniform_metalen(unsigned int usize) /*
* We always align the first entry to the lower power of 2.
* eg. the 12-byte bucket gets 8-byte aligned. The 4096-byte bucket
* gets 4096-byte aligned.
*/
static unsigned long page_header_size(unsigned int align_bits,
unsigned long num_elements)
{ {
unsigned int metalen; unsigned long size;
assert(usize < (1 << 14));
/* Two bits for the header, 14 bits for size, then one bit for each
* element the page can hold. Round up to number of bytes. */
metalen = div_up(2 + 14 + SUBPAGE_METAOFF / usize, CHAR_BIT);
/* To ensure metaheader is always aligned, round bytes up. */ size = sizeof(struct page_header)
metalen = align_up(metalen, ALIGNOF(struct metaheader)); - sizeof(((struct page_header *)0)->used)
+ used_size(num_elements);
return metalen; return align_up(size, 1 << align_bits);
} }
static unsigned int decode_usize(uint8_t *meta) static void add_to_list(struct header *head,
unsigned long *list, struct page_header *ph)
{ {
return ((unsigned)meta[1] << (CHAR_BIT-2)) | (meta[0] >> 2); unsigned long h = *list, offset = to_off(head, ph);
}
static void encode_usize(uint8_t *meta, unsigned int usize) ph->next = h;
{ if (h) {
meta[0] = (UNIFORM | (usize << 2)); struct page_header *prev = from_off(head, h);
meta[1] = (usize >> (CHAR_BIT - 2)); assert(prev->prev == 0);
prev->prev = offset;
}
*list = offset;
ph->prev = 0;
} }
static uint8_t *alloc_metaspace(void *pool, unsigned long poolsize, static void del_from_list(struct header *head,
struct metaheader *mh, unsigned long bytes, unsigned long *list, struct page_header *ph)
enum sub_metadata_type type)
{ {
uint8_t *meta = (uint8_t *)(mh + 1); /* Front of list? */
unsigned long free = 0, len, i, metalen; if (ph->prev == 0) {
*list = ph->next;
metalen = get_metalen(pool, poolsize, mh); } else {
struct page_header *prev = from_off(head, ph->prev);
/* Walk through metadata looking for free. */ prev->next = ph->next;
for (i = 0; i < metalen * METADATA_PER_BYTE; i += len) {
switch (get_bit_pair(meta, i)) {
case FREE:
len = 1;
free++;
if (free == bytes * METADATA_PER_BYTE) {
/* Mark this as a bitmap. */
set_bit_pair(meta, i - free + 1, type);
return meta + (i - free + 1)/METADATA_PER_BYTE;
}
break;
case BITMAP:
/* Skip over this allocated part. */
len = BITMAP_METALEN * METADATA_PER_BYTE;
free = 0;
break;
case UNIFORM:
/* Figure metalen given usize. */
len = decode_usize(meta + i / METADATA_PER_BYTE);
len = uniform_metalen(len) * METADATA_PER_BYTE;
free = 0;
break;
default:
assert(0);
return NULL;
} }
if (ph->next != 0) {
struct page_header *next = from_off(head, ph->next);
next->prev = ph->prev;
} }
return NULL;
} }
/* We need this many bytes of metadata. */ static unsigned long pop_from_list(struct header *head,
static uint8_t *new_metadata(void *pool, unsigned long poolsize, unsigned long *list)
unsigned long bytes, enum sub_metadata_type type)
{ {
struct metaheader *mh, *newmh; unsigned long h = *list;
unsigned long page; struct page_header *ph = from_off(head, h);
uint8_t *meta;
for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh))
if ((meta = alloc_metaspace(pool, poolsize, mh, bytes, type)))
return meta;
/* No room for metadata? Can we expand an existing one? */
for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
unsigned long nextpage;
/* We start on this page. */ if (likely(h)) {
nextpage = pool_offset(pool, (char *)(mh+1))/getpagesize(); *list = ph->next;
/* Iterate through any other pages we own. */ if (*list) {
while (get_page_state(pool, ++nextpage) == TAKEN); struct page_header *next = from_off(head, *list);
next->prev = 0;
/* Now, can we grab that page? */ }
if (get_page_state(pool, nextpage) != FREE)
continue;
/* OK, expand metadata, do it again. */
set_page_state(pool, nextpage, TAKEN);
BUILD_ASSERT(FREE == 0);
memset((char *)pool + nextpage*getpagesize(), 0, getpagesize());
return alloc_metaspace(pool, poolsize, mh, bytes, type);
} }
return h;
}
/* No metadata left at all? */ static void add_small_page_to_freelist(struct header *head,
page = alloc_get_pages(pool, poolsize, div_up(bytes, getpagesize()), 1); struct page_header *ph)
if (!page) {
return NULL; add_to_list(head, &head->small_free_list, ph);
}
newmh = (struct metaheader *)((char *)pool + page * getpagesize()); static void add_large_page_to_freelist(struct header *head,
BUILD_ASSERT(FREE == 0); struct page_header *ph)
memset(newmh + 1, 0, getpagesize() - sizeof(*mh)); {
add_to_list(head, &head->large_free_list, ph);
}
/* Sew it into linked list */ static void add_to_bucket_list(struct header *head,
mh = first_mheader(pool,poolsize); struct bucket_state *bs,
newmh->next = mh->next; struct page_header *ph)
mh->next = pool_offset(pool, newmh); {
add_to_list(head, &bs->page_list, ph);
}
return alloc_metaspace(pool, poolsize, newmh, bytes, type); static void del_from_bucket_list(struct header *head,
struct bucket_state *bs,
struct page_header *ph)
{
del_from_list(head, &bs->page_list, ph);
} }
static void alloc_free_pages(void *pool, unsigned long pagenum) static void del_from_bucket_full_list(struct header *head,
struct bucket_state *bs,
struct page_header *ph)
{ {
assert(get_page_state(pool, pagenum) == TAKEN_START); del_from_list(head, &bs->full_list, ph);
set_page_state(pool, pagenum, FREE);
while (get_page_state(pool, ++pagenum) == TAKEN)
set_page_state(pool, pagenum, FREE);
} }
static void maybe_transform_uniform_page(void *pool, unsigned long offset) static void add_to_bucket_full_list(struct header *head,
struct bucket_state *bs,
struct page_header *ph)
{ {
/* FIXME: If possible and page isn't full, change to a bitmap */ add_to_list(head, &bs->full_list, ph);
} }
/* Returns 0 or the size of the uniform alloc to use */ static void clear_bit(unsigned long bitmap[], unsigned int off)
static unsigned long suitable_for_uc(unsigned long size, unsigned long align)
{ {
unsigned long num_elems, wastage, usize; bitmap[off / BITS_PER_LONG] &= ~(1 << (off % BITS_PER_LONG));
unsigned long bitmap_cost; }
if (size == 0) static bool test_bit(const unsigned long bitmap[], unsigned int off)
size = 1; {
return bitmap[off / BITS_PER_LONG] & (1 << (off % BITS_PER_LONG));
}
/* Fix up silly alignments. */ static void set_bit(unsigned long bitmap[], unsigned int off)
usize = align_up(size, align); {
bitmap[off / BITS_PER_LONG] |= (1 << (off % BITS_PER_LONG));
}
/* How many can fit in this page? */ /* There must be a bit to be found. */
num_elems = SUBPAGE_METAOFF / usize; static unsigned int find_free_bit(const unsigned long bitmap[])
{
unsigned int i;
/* Can happen with bigger alignments. */ for (i = 0; bitmap[i] == -1UL; i++);
if (!num_elems) return (i*BITS_PER_LONG) + ffsl(~bitmap[i]) - 1;
return 0; }
/* Usize maxes out at 14 bits. */ /* How many elements can we fit in a page? */
if (usize >= (1 << 14)) static unsigned long elements_per_page(unsigned long align_bits,
return 0; unsigned long esize,
unsigned long psize)
{
unsigned long num, overhead;
/* How many bytes would be left at the end? */ /* First approximation: no extra room for bitmap. */
wastage = SUBPAGE_METAOFF % usize; overhead = align_up(sizeof(struct page_header), 1 << align_bits);
num = (psize - overhead) / esize;
/* If we can get a larger allocation within alignment constraints, we while (page_header_size(align_bits, num) + esize * num > psize)
* should do it, otherwise might as well leave wastage at the end. */ num--;
usize += align_down(wastage / num_elems, align); return num;
}
/* Bitmap allocation costs 2 bits per BITMAP_GRANULARITY bytes, plus static bool large_page_bucket(unsigned int bucket, unsigned long poolsize)
* however much we waste in rounding up to BITMAP_GRANULARITY. */ {
bitmap_cost = 2 * div_up(size, BITMAP_GRANULARITY) unsigned int sp_bits;
+ CHAR_BIT * (align_up(size, BITMAP_GRANULARITY) - size); unsigned long max_smallsize;
/* Our cost is 1 bit, plus usize overhead */ sp_bits = large_page_bits(poolsize) - BITS_FROM_SMALL_TO_LARGE_PAGE;
if (bitmap_cost < 1 + (usize - size) * CHAR_BIT) /* Note: this doesn't take into account page header. */
return 0; max_smallsize = (1UL << sp_bits) >> MAX_PAGE_OBJECT_ORDER;
return usize; return bucket_to_size(bucket) > max_smallsize;
} }
static unsigned long uniform_alloc(void *pool, unsigned long poolsize, static unsigned int max_bucket(unsigned int lp_bits)
struct uniform_cache *uc,
unsigned long ucnum)
{ {
uint8_t *metadata = get_page_metadata(pool, uc->page[ucnum]) + 2; return (lp_bits - MAX_PAGE_OBJECT_ORDER) * INTER_BUCKET_SPACE;
unsigned long i, max;
/* Simple one-bit-per-object bitmap. */
max = SUBPAGE_METAOFF / uc->size[ucnum];
for (i = 0; i < max; i++) {
if (!(metadata[i / CHAR_BIT] & (1 << (i % CHAR_BIT)))) {
metadata[i / CHAR_BIT] |= (1 << (i % CHAR_BIT));
return uc->page[ucnum] * getpagesize()
+ i * uc->size[ucnum];
}
}
return 0;
} }
static unsigned long new_uniform_page(void *pool, unsigned long poolsize, void alloc_init(void *pool, unsigned long poolsize)
unsigned long usize)
{ {
unsigned long page, metalen; struct header *head = pool;
uint8_t *metadata; struct page_header *ph;
unsigned int lp_bits, sp_bits, num_buckets;
/* FIXME: Walk metadata looking for an existing uniform page. */ unsigned long header_size, i;
page = alloc_get_pages(pool, poolsize, 1, 1);
if (page == 0)
return 0;
metalen = uniform_metalen(usize);
/* Get metadata for page. */ if (poolsize < MIN_USEFUL_SIZE) {
metadata = new_metadata(pool, poolsize, metalen, UNIFORM); tiny_alloc_init(pool, poolsize);
if (!metadata) { return;
alloc_free_pages(pool, page);
return 0;
} }
encode_usize(metadata, usize); lp_bits = large_page_bits(poolsize);
sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
BUILD_ASSERT(FREE == 0); num_buckets = max_bucket(lp_bits);
memset(metadata + 2, 0, metalen - 2);
/* Actually, this is a subpage page now. */ head = pool;
set_page_state(pool, page, SPECIAL); header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
/* Set metadata pointer for page. */ memset(head, 0, header_size);
set_page_metadata(pool, page, metadata); for (i = 0; i < num_buckets; i++) {
unsigned long pagesize;
return page; if (large_page_bucket(i, poolsize))
} pagesize = 1UL << lp_bits;
else
pagesize = 1UL << sp_bits;
static unsigned long alloc_sub_page(void *pool, unsigned long poolsize, head->bs[i].elements_per_page
unsigned long size, unsigned long align) = elements_per_page(i / INTER_BUCKET_SPACE,
{ bucket_to_size(i),
unsigned long i, usize; pagesize);
uint8_t *metadata;
struct uniform_cache *uc = pool;
usize = suitable_for_uc(size, align);
if (usize) {
static int random_entry;
/* Look for a uniform page. */
for (i = 0; i < UNIFORM_CACHE_NUM; i++) {
if (uc->size[i] == usize) {
unsigned long ret;
ret = uniform_alloc(pool, poolsize, uc, i);
if (ret != 0)
return ret;
/* OK, that one is full, remove from cache. */
uc->size[i] = 0;
break;
}
} }
/* OK, try a new uniform page. Use random discard for now. */ /* They start as all large pages. */
i = (++random_entry % UNIFORM_CACHE_NUM); memset(head->pagesize, 0xFF, sizeof(head->pagesize));
maybe_transform_uniform_page(pool, uc->page[i]); /* FIXME: small pages for last bit? */
uc->page[i] = new_uniform_page(pool, poolsize, usize); /* Split first page into small pages. */
if (uc->page[i]) { assert(header_size << (1UL << lp_bits));
uc->size[i] = usize; clear_bit(head->pagesize, 0);
return uniform_alloc(pool, poolsize, uc, i);
}
uc->size[i] = 0;
}
/* Look for partial page. */ /* Skip over page(s) used by header, add rest to free list */
for (i = 0; i < poolsize / getpagesize(); i++) { for (i = align_up(header_size, (1 << sp_bits)) >> sp_bits;
unsigned long ret; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE);
if (get_page_state(pool, i) != SPECIAL) i++) {
continue; ph = from_off(head, i<<sp_bits);
ph->elements_used = 0;
ret = sub_page_alloc(pool, i, size, align); add_small_page_to_freelist(head, ph);
if (ret)
return ret;
} }
/* Create new SUBPAGE page. */ /* Add the rest of the pages as large pages. */
i = alloc_get_pages(pool, poolsize, 1, 1); i = (1 << lp_bits);
if (i == 0) while (i + (1 << lp_bits) <= poolsize) {
return 0; ph = from_off(head, i);
ph->elements_used = 0;
/* Get metadata for page. */ add_large_page_to_freelist(head, ph);
metadata = new_metadata(pool, poolsize, BITMAP_METALEN, BITMAP); i += (1 << lp_bits);
if (!metadata) {
alloc_free_pages(pool, i);
return 0;
} }
/* Actually, this is a subpage page now. */
set_page_state(pool, i, SPECIAL);
/* Set metadata pointer for page. */
set_page_metadata(pool, i, metadata);
/* Do allocation like normal */
return sub_page_alloc(pool, i, size, align);
} }
static bool bitmap_page_is_empty(uint8_t *meta) /* A large page worth of small pages are free: delete them from free list. */
static void del_large_from_small_free_list(struct header *head,
struct page_header *ph,
unsigned int sp_bits)
{ {
unsigned int i; unsigned long i;
/* Skip the header (first bit of metadata). */
for (i = 1; i < SUBPAGE_METAOFF/BITMAP_GRANULARITY+1; i++)
if (get_bit_pair(meta, i) != FREE)
return false;
return true; for (i = 0; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++) {
del_from_list(head, &head->small_free_list,
(void *)ph + (i << sp_bits));
}
} }
static bool uniform_page_is_empty(uint8_t *meta) static bool all_empty(struct header *head, unsigned long off, unsigned sp_bits)
{ {
unsigned int i, metalen; unsigned long i;
metalen = uniform_metalen(decode_usize(meta));
/* Skip the header (first two bytes of metadata). */ for (i = 0; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++) {
for (i = 2; i < metalen; i++) { struct page_header *ph = from_off(head, off + (i << sp_bits));
BUILD_ASSERT(FREE == 0); if (ph->elements_used)
if (meta[i])
return false; return false;
} }
return true; return true;
} }
static bool special_page_is_empty(void *pool, unsigned long page) static unsigned long get_large_page(struct header *head,
unsigned long poolsize)
{ {
uint8_t *meta; unsigned long lp_bits, sp_bits, i, page;
enum sub_metadata_type type;
meta = get_page_metadata(pool, page);
type = get_bit_pair(meta, 0);
switch (type) {
case UNIFORM:
return uniform_page_is_empty(meta);
case BITMAP:
return bitmap_page_is_empty(meta);
default:
assert(0);
}
}
static void clear_special_metadata(void *pool, unsigned long page) page = pop_from_list(head, &head->large_free_list);
{ if (likely(page))
uint8_t *meta; return page;
enum sub_metadata_type type;
meta = get_page_metadata(pool, page);
type = get_bit_pair(meta, 0);
switch (type) {
case UNIFORM:
/* First two bytes are the header, rest is already FREE */
BUILD_ASSERT(FREE == 0);
memset(meta, 0, 2);
break;
case BITMAP:
/* First two bits is the header. */
BUILD_ASSERT(BITMAP_METALEN > 1);
meta[0] = 0;
break;
default:
assert(0);
}
}
/* Returns true if we cleaned any pages. */ /* Look for small pages to coalesce, after first large page. */
static bool clean_empty_subpages(void *pool, unsigned long poolsize) lp_bits = large_page_bits(poolsize);
{ sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
unsigned long i;
bool progress = false;
for (i = 0; i < poolsize/getpagesize(); i++) { for (i = (1 << lp_bits); i < poolsize; i += (1 << lp_bits)) {
if (get_page_state(pool, i) != SPECIAL) /* Already a large page? */
if (test_bit(head->pagesize, i >> lp_bits))
continue; continue;
if (all_empty(head, i, sp_bits)) {
if (special_page_is_empty(pool, i)) { struct page_header *ph = from_off(head, i);
clear_special_metadata(pool, i); set_bit(head->pagesize, i >> lp_bits);
set_page_state(pool, i, FREE); del_large_from_small_free_list(head, ph, sp_bits);
progress = true; add_large_page_to_freelist(head, ph);
} }
} }
return progress;
return pop_from_list(head, &head->large_free_list);
} }
/* Returns true if we cleaned any pages. */ /* Returns small page. */
static bool clean_metadata(void *pool, unsigned long poolsize) static unsigned long break_up_large_page(struct header *head,
unsigned long psize,
unsigned long lpage)
{ {
struct metaheader *mh, *prev_mh = NULL; unsigned long lp_bits, sp_bits, i;
bool progress = false;
lp_bits = large_page_bits(psize);
for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){ sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
uint8_t *meta; clear_bit(head->pagesize, lpage >> lp_bits);
long i;
unsigned long metalen = get_metalen(pool, poolsize, mh);
meta = (uint8_t *)(mh + 1);
BUILD_ASSERT(FREE == 0);
for (i = metalen - 1; i > 0; i--)
if (meta[i] != 0)
break;
/* Completely empty? */
if (prev_mh && i == metalen) {
alloc_free_pages(pool,
pool_offset(pool, mh)/getpagesize());
prev_mh->next = mh->next;
mh = prev_mh;
progress = true;
} else {
uint8_t *p;
/* Some pages at end are free? */
for (p = (uint8_t *)(mh+1) + metalen - getpagesize();
p > meta + i;
p -= getpagesize()) {
set_page_state(pool,
pool_offset(pool, p)
/ getpagesize(),
FREE);
progress = true;
}
}
}
return progress; for (i = 1; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++)
add_small_page_to_freelist(head,
from_off(head,
lpage + (i<<sp_bits)));
return lpage;
} }
void *alloc_get(void *pool, unsigned long poolsize, static unsigned long get_small_page(struct header *head,
unsigned long size, unsigned long align) unsigned long poolsize)
{ {
bool subpage_clean = false, metadata_clean = false;
unsigned long ret; unsigned long ret;
if (poolsize < MIN_SIZE) ret = pop_from_list(head, &head->small_free_list);
return NULL; if (likely(ret))
return ret;
again: ret = get_large_page(head, poolsize);
/* Sub-page allocations have an overhead of ~12%. */ if (likely(ret))
if (size + size/8 >= getpagesize() || align >= getpagesize()) { ret = break_up_large_page(head, poolsize, ret);
unsigned long pages = div_up(size, getpagesize()); return ret;
}
ret = alloc_get_pages(pool, poolsize, pages, align)
* getpagesize();
} else
ret = alloc_sub_page(pool, poolsize, size, align);
if (ret != 0) void *alloc_get(void *pool, unsigned long poolsize,
return (char *)pool + ret; unsigned long size, unsigned long align)
{
struct header *head = pool;
unsigned int bucket;
unsigned long i;
struct bucket_state *bs;
struct page_header *ph;
/* Allocation failed: garbage collection. */ if (poolsize < MIN_USEFUL_SIZE) {
if (!subpage_clean) { return tiny_alloc_get(pool, poolsize, size, align);
subpage_clean = true;
if (clean_empty_subpages(pool, poolsize))
goto again;
} }
if (!metadata_clean) { size = align_up(size, align);
metadata_clean = true; if (unlikely(!size))
if (clean_metadata(pool, poolsize)) size = 1;
goto again; bucket = size_to_bucket(size);
}
/* FIXME: Compact metadata? */ if (bucket >= max_bucket(large_page_bits(poolsize))) {
/* FIXME: huge alloc. */
return NULL; return NULL;
} }
static void bitmap_free(void *pool, unsigned long pagenum, unsigned long off,
uint8_t *metadata)
{
assert(off % BITMAP_GRANULARITY == 0);
off /= BITMAP_GRANULARITY;
/* Offset by one because first bit is used for header. */
off++;
set_bit_pair(metadata, off++, FREE); bs = &head->bs[bucket];
while (off <= SUBPAGE_METAOFF / BITMAP_GRANULARITY
&& get_bit_pair(metadata, off) == TAKEN)
set_bit_pair(metadata, off++, FREE);
}
static void uniform_free(void *pool, unsigned long pagenum, unsigned long off, if (!bs->page_list) {
uint8_t *metadata) struct page_header *ph;
{
unsigned int usize, bit;
usize = decode_usize(metadata); if (large_page_bucket(bucket, poolsize))
/* Must have been this size. */ bs->page_list = get_large_page(head, poolsize);
assert(off % usize == 0); else
bit = off / usize; bs->page_list = get_small_page(head, poolsize);
/* FIXME: Try large-aligned alloc? Header stuffing? */
if (unlikely(!bs->page_list))
return NULL;
ph = from_off(head, bs->page_list);
ph->bucket = bucket;
ph->elements_used = 0;
ph->next = 0;
memset(ph->used, 0, used_size(bs->elements_per_page));
}
/* Skip header. */ ph = from_off(head, bs->page_list);
metadata += 2;
/* Must have been allocated. */ i = find_free_bit(ph->used);
assert(metadata[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT))); set_bit(ph->used, i);
metadata[bit / CHAR_BIT] &= ~(1 << (bit % CHAR_BIT)); ph->elements_used++;
}
static void subpage_free(void *pool, unsigned long pagenum, void *free) /* check if this page is now full */
{ if (unlikely(ph->elements_used == bs->elements_per_page)) {
unsigned long off = (unsigned long)free % getpagesize(); del_from_bucket_list(head, bs, ph);
uint8_t *metadata = get_page_metadata(pool, pagenum); add_to_bucket_full_list(head, bs, ph);
enum sub_metadata_type type;
type = get_bit_pair(metadata, 0);
assert(off < SUBPAGE_METAOFF);
switch (type) {
case BITMAP:
bitmap_free(pool, pagenum, off, metadata);
break;
case UNIFORM:
uniform_free(pool, pagenum, off, metadata);
break;
default:
assert(0);
} }
return (char *)ph + page_header_size(ph->bucket / INTER_BUCKET_SPACE,
bs->elements_per_page)
+ i * bucket_to_size(bucket);
} }
void alloc_free(void *pool, unsigned long poolsize, void *free) void alloc_free(void *pool, unsigned long poolsize, void *free)
{ {
unsigned long pagenum; struct header *head = pool;
struct metaheader *mh; struct bucket_state *bs;
unsigned int pagebits;
unsigned long i, pgoffset, offset = (char *)free - (char *)pool;
bool smallpage;
struct page_header *ph;
if (!free) if (poolsize < MIN_USEFUL_SIZE) {
return; return tiny_alloc_free(pool, poolsize, free);
}
assert(poolsize >= MIN_SIZE); /* Get page header. */
pagebits = large_page_bits(poolsize);
if (!test_bit(head->pagesize, offset >> pagebits)) {
smallpage = true;
pagebits -= BITS_FROM_SMALL_TO_LARGE_PAGE;
} else
smallpage = false;
mh = first_mheader(pool, poolsize); /* Step back to page header. */
assert((char *)free >= (char *)(mh + 1)); ph = from_off(head, offset & ~((1UL << pagebits) - 1));
assert((char *)pool + poolsize > (char *)free); bs = &head->bs[ph->bucket];
pgoffset = (offset & ((1UL << pagebits) - 1))
- page_header_size(ph->bucket / INTER_BUCKET_SPACE,
bs->elements_per_page);
pagenum = pool_offset(pool, free) / getpagesize(); if (unlikely(ph->elements_used == bs->elements_per_page)) {
del_from_bucket_full_list(head, bs, ph);
add_to_bucket_list(head, bs, ph);
}
/* Which element are we? */
i = pgoffset / bucket_to_size(ph->bucket);
clear_bit(ph->used, i);
ph->elements_used--;
if (get_page_state(pool, pagenum) == SPECIAL) if (unlikely(ph->elements_used == 0)) {
subpage_free(pool, pagenum, free); bs = &head->bs[ph->bucket];
else { del_from_bucket_list(head, bs, ph);
assert((unsigned long)free % getpagesize() == 0); if (smallpage)
alloc_free_pages(pool, pagenum); add_small_page_to_freelist(head, ph);
else
add_large_page_to_freelist(head, ph);
} }
} }
unsigned long alloc_size(void *pool, unsigned long poolsize, void *p) unsigned long alloc_size(void *pool, unsigned long poolsize, void *p)
{ {
unsigned long len, pagenum; struct header *head = pool;
struct metaheader *mh; unsigned int pagebits;
unsigned long offset = (char *)p - (char *)pool;
assert(poolsize >= MIN_SIZE); struct page_header *ph;
mh = first_mheader(pool, poolsize);
assert((char *)p >= (char *)(mh + 1));
assert((char *)pool + poolsize > (char *)p);
pagenum = pool_offset(pool, p) / getpagesize();
if (get_page_state(pool, pagenum) == SPECIAL) {
unsigned long off = (unsigned long)p % getpagesize();
uint8_t *metadata = get_page_metadata(pool, pagenum);
enum sub_metadata_type type = get_bit_pair(metadata, 0);
assert(off < SUBPAGE_METAOFF);
switch (type) {
case BITMAP:
assert(off % BITMAP_GRANULARITY == 0);
off /= BITMAP_GRANULARITY;
/* Offset by one because first bit used for header. */
off++;
len = BITMAP_GRANULARITY;
while (++off < SUBPAGE_METAOFF / BITMAP_GRANULARITY
&& get_bit_pair(metadata, off) == TAKEN)
len += BITMAP_GRANULARITY;
break;
case UNIFORM:
len = decode_usize(metadata);
break;
default:
assert(0);
}
} else {
len = getpagesize();
while (get_page_state(pool, ++pagenum) == TAKEN)
len += getpagesize();
}
return len;
}
static bool is_metadata_page(void *pool, unsigned long poolsize, if (poolsize < MIN_USEFUL_SIZE)
unsigned long page) return tiny_alloc_size(pool, poolsize, p);
{
struct metaheader *mh;
for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){ /* Get page header. */
unsigned long start, end; pagebits = large_page_bits(poolsize);
if (!test_bit(head->pagesize, offset >> pagebits))
pagebits -= BITS_FROM_SMALL_TO_LARGE_PAGE;
start = pool_offset(pool, mh); /* Step back to page header. */
end = pool_offset(pool, (char *)(mh+1) ph = from_off(head, offset & ~((1UL << pagebits) - 1));
+ get_metalen(pool, poolsize, mh)); return bucket_to_size(ph->bucket);
if (page >= start/getpagesize() && page < end/getpagesize())
return true;
}
return false;
} }
/* Useful for gdb breakpoints. */ /* Useful for gdb breakpoints. */
...@@ -894,293 +731,215 @@ static bool check_fail(void) ...@@ -894,293 +731,215 @@ static bool check_fail(void)
return false; return false;
} }
static bool check_bitmap_metadata(void *pool, unsigned long *mhoff) static unsigned long count_bits(const unsigned long bitmap[],
unsigned long limit)
{ {
enum alloc_state last_state = FREE; unsigned long i, count = 0;
unsigned int i;
for (i = 0; i < SUBPAGE_METAOFF / BITMAP_GRANULARITY; i++) { while (limit >= BITS_PER_LONG) {
enum alloc_state state; count += popcount(bitmap[0]);
bitmap++;
/* +1 because header is the first byte. */ limit -= BITS_PER_LONG;
state = get_bit_pair((uint8_t *)pool + *mhoff, i+1);
switch (state) {
case SPECIAL:
return check_fail();
case TAKEN:
if (last_state == FREE)
return check_fail();
break;
default:
break;
} }
last_state = state;
} for (i = 0; i < limit; i++)
return true; if (test_bit(bitmap, i))
count++;
return count;
} }
/* We don't know what alignment they asked for, but we can infer worst static bool out_of_bounds(unsigned long off,
* case from the size. */ unsigned long pagesize,
static unsigned int max_align(unsigned int size) unsigned long poolsize)
{ {
unsigned int align = 1; return (off > poolsize || off + pagesize > poolsize);
while (size % (align * 2) == 0)
align *= 2;
return align;
} }
static bool check_uniform_metadata(void *pool, unsigned long *mhoff) static bool check_bucket(struct header *head,
unsigned long poolsize,
unsigned long pages[],
struct bucket_state *bs,
unsigned int bindex)
{ {
uint8_t *meta = (uint8_t *)pool + *mhoff; bool lp_bucket = large_page_bucket(bindex, poolsize);
unsigned int i, usize; struct page_header *ph;
struct uniform_cache *uc = pool; unsigned long taken, i, prev, pagesize, sp_bits, lp_bits;
usize = decode_usize(meta); lp_bits = large_page_bits(poolsize);
if (usize == 0 || suitable_for_uc(usize, max_align(usize)) != usize) sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
return check_fail();
/* If it's in uniform cache, make sure that agrees on size. */ pagesize = 1UL << (lp_bucket ? lp_bits : sp_bits);
for (i = 0; i < UNIFORM_CACHE_NUM; i++) {
uint8_t *ucm;
if (!uc->size[i]) /* This many elements fit? */
continue; taken = page_header_size(bindex / INTER_BUCKET_SPACE,
bs->elements_per_page);
ucm = get_page_metadata(pool, uc->page[i]); taken += bucket_to_size(bindex) * bs->elements_per_page;
if (ucm != meta) if (taken > pagesize)
continue;
if (usize != uc->size[i])
return check_fail(); return check_fail();
}
return true;
}
static bool check_subpage(void *pool, unsigned long poolsize,
unsigned long page)
{
unsigned long *mhoff = metadata_off(pool, page);
if (*mhoff + sizeof(struct metaheader) > poolsize) /* One more wouldn't fit? */
taken = page_header_size(bindex / INTER_BUCKET_SPACE,
bs->elements_per_page + 1);
taken += bucket_to_size(bindex) * (bs->elements_per_page + 1);
if (taken <= pagesize)
return check_fail(); return check_fail();
if (*mhoff % ALIGNOF(struct metaheader) != 0) /* Walk used list. */
prev = 0;
for (i = bs->page_list; i; i = ph->next) {
/* Bad pointer? */
if (out_of_bounds(i, pagesize, poolsize))
return check_fail(); return check_fail();
/* Wrong size page? */
/* It must point to a metadata page. */ if (!!test_bit(head->pagesize, i >> lp_bits) != lp_bucket)
if (!is_metadata_page(pool, poolsize, *mhoff / getpagesize()))
return check_fail(); return check_fail();
/* Not page boundary? */
/* Header at start of subpage allocation */ if (i % pagesize)
switch (get_bit_pair((uint8_t *)pool + *mhoff, 0)) {
case BITMAP:
return check_bitmap_metadata(pool, mhoff);
case UNIFORM:
return check_uniform_metadata(pool, mhoff);
default:
return check_fail(); return check_fail();
} ph = from_off(head, i);
/* Linked list corrupt? */
} if (ph->prev != prev)
bool alloc_check(void *pool, unsigned long poolsize)
{
unsigned long i;
struct metaheader *mh;
enum alloc_state last_state = FREE;
bool was_metadata = false;
if (poolsize < MIN_SIZE)
return true;
if (get_page_state(pool, 0) != TAKEN_START)
return check_fail(); return check_fail();
/* Already seen this page? */
/* First check metadata pages. */ if (test_bit(pages, i >> sp_bits))
/* Metadata pages will be marked TAKEN. */
for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
unsigned long start, end;
start = pool_offset(pool, mh);
if (start + sizeof(*mh) > poolsize)
return check_fail(); return check_fail();
set_bit(pages, i >> sp_bits);
end = pool_offset(pool, (char *)(mh+1) /* Empty or full? */
+ get_metalen(pool, poolsize, mh)); if (ph->elements_used == 0)
if (end > poolsize)
return check_fail(); return check_fail();
if (ph->elements_used >= bs->elements_per_page)
/* Non-first pages should start on a page boundary. */
if (mh != first_mheader(pool, poolsize)
&& start % getpagesize() != 0)
return check_fail(); return check_fail();
/* Used bits don't agree? */
/* It should end on a page boundary. */ if (ph->elements_used != count_bits(ph->used,
if (end % getpagesize() != 0) bs->elements_per_page))
return check_fail(); return check_fail();
/* Wrong bucket? */
if (ph->bucket != bindex)
return check_fail();
prev = i;
} }
for (i = 0; i < poolsize / getpagesize(); i++) { /* Walk full list. */
enum alloc_state state = get_page_state(pool, i); prev = 0;
bool is_metadata = is_metadata_page(pool, poolsize,i); for (i = bs->full_list; i; i = ph->next) {
/* Bad pointer? */
switch (state) { if (out_of_bounds(i, pagesize, poolsize))
case FREE:
/* metadata pages are never free. */
if (is_metadata)
return check_fail(); return check_fail();
case TAKEN_START: /* Wrong size page? */
break; if (!!test_bit(head->pagesize, i >> lp_bits) != lp_bucket)
case TAKEN:
/* This should continue a previous block. */
if (last_state == FREE)
return check_fail(); return check_fail();
if (is_metadata != was_metadata) /* Not page boundary? */
if (i % pagesize)
return check_fail(); return check_fail();
break; ph = from_off(head, i);
case SPECIAL: /* Linked list corrupt? */
/* Check metadata pointer etc. */ if (ph->prev != prev)
if (!check_subpage(pool, poolsize, i))
return check_fail(); return check_fail();
} /* Already seen this page? */
last_state = state; if (test_bit(pages, i >> sp_bits))
was_metadata = is_metadata; return check_fail();
set_bit(pages, i >> sp_bits);
/* Not full? */
if (ph->elements_used != bs->elements_per_page)
return check_fail();
/* Used bits don't agree? */
if (ph->elements_used != count_bits(ph->used,
bs->elements_per_page))
return check_fail();
/* Wrong bucket? */
if (ph->bucket != bindex)
return check_fail();
prev = i;
} }
return true; return true;
} }
void alloc_visualize(FILE *out, void *pool, unsigned long poolsize) bool alloc_check(void *pool, unsigned long poolsize)
{ {
struct metaheader *mh; struct header *head = pool;
struct uniform_cache *uc = pool; unsigned long prev, i, lp_bits, sp_bits, header_size, num_buckets;
unsigned long pagebitlen, metadata_pages, count[1<<BITS_PER_PAGE], tot; struct page_header *ph;
long i; unsigned long pages[(MAX_PAGES << BITS_FROM_SMALL_TO_LARGE_PAGE)
/ BITS_PER_LONG] = { 0 };
if (poolsize < MIN_SIZE) { if (poolsize < MIN_USEFUL_SIZE)
fprintf(out, "Pool smaller than %u: no content\n", MIN_SIZE); return tiny_alloc_check(pool, poolsize);
return;
}
tot = 0;
for (i = 0; i < UNIFORM_CACHE_NUM; i++)
tot += (uc->size[i] != 0);
fprintf(out, "Uniform cache (%lu entries):\n", tot);
for (i = 0; i < UNIFORM_CACHE_NUM; i++) {
unsigned int j, total = 0;
uint8_t *meta;
if (!uc->size[i]) lp_bits = large_page_bits(poolsize);
continue; sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
/* First two bytes are header. */ num_buckets = max_bucket(lp_bits);
meta = get_page_metadata(pool, uc->page[i]) + 2;
for (j = 0; j < SUBPAGE_METAOFF / uc->size[i]; j++) header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
if (meta[j / 8] & (1 << (j % 8)))
total++;
printf(" %u: %lu: %u/%zu (%zu%% density)\n", /* First, set all bits taken by header. */
uc->size[i], uc->page[i], total, for (i = 0; i < header_size; i += (1UL << sp_bits))
SUBPAGE_METAOFF / uc->size[i], set_bit(pages, i >> sp_bits);
(total * 100) / (SUBPAGE_METAOFF / uc->size[i]));
}
memset(count, 0, sizeof(count)); /* Check small page free list. */
for (i = 0; i < poolsize / getpagesize(); i++) prev = 0;
count[get_page_state(pool, i)]++; for (i = head->small_free_list; i; i = ph->next) {
/* Bad pointer? */
mh = first_mheader(pool, poolsize); if (out_of_bounds(i, 1 << sp_bits, poolsize))
pagebitlen = (uint8_t *)mh - get_page_statebits(pool); return check_fail();
fprintf(out, "%lu bytes of page bits: FREE/TAKEN/TAKEN_START/SUBPAGE = %lu/%lu/%lu/%lu\n", /* Large page? */
pagebitlen, count[0], count[1], count[2], count[3]); if (test_bit(head->pagesize, i >> lp_bits))
return check_fail();
/* One metadata page for every page of page bits. */ /* Not page boundary? */
metadata_pages = div_up(pagebitlen, getpagesize()); if (i % (1 << sp_bits))
return check_fail();
/* Now do each metadata page. */ ph = from_off(head, i);
for (; mh; mh = next_mheader(pool,mh)) { /* Linked list corrupt? */
unsigned long free = 0, bitmapblocks = 0, uniformblocks = 0, if (ph->prev != prev)
len = 0, uniformlen = 0, bitmaplen = 0, metalen; return check_fail();
uint8_t *meta = (uint8_t *)(mh + 1); /* Already seen this page? */
if (test_bit(pages, i >> sp_bits))
metalen = get_metalen(pool, poolsize, mh); return check_fail();
metadata_pages += (sizeof(*mh) + metalen) / getpagesize(); set_bit(pages, i >> sp_bits);
prev = i;
for (i = 0; i < metalen * METADATA_PER_BYTE; i += len) {
switch (get_bit_pair(meta, i)) {
case FREE:
len = 1;
free++;
break;
case BITMAP:
/* Skip over this allocated part. */
len = BITMAP_METALEN * METADATA_PER_BYTE;
bitmapblocks++;
bitmaplen += len;
break;
case UNIFORM:
/* Skip over this part. */
len = decode_usize(meta + i/METADATA_PER_BYTE);
len = uniform_metalen(len) * METADATA_PER_BYTE;
uniformblocks++;
uniformlen += len;
break;
default:
assert(0);
}
} }
fprintf(out, "Metadata %lu-%lu: %lu free, %lu bitmapblocks, %lu uniformblocks, %lu%% density\n", /* Check large page free list. */
pool_offset(pool, mh), prev = 0;
pool_offset(pool, (char *)(mh+1) + metalen), for (i = head->large_free_list; i; i = ph->next) {
free, bitmapblocks, uniformblocks, /* Bad pointer? */
(bitmaplen + uniformlen) * 100 if (out_of_bounds(i, 1 << lp_bits, poolsize))
/ (free + bitmaplen + uniformlen)); return check_fail();
/* Not large page? */
if (!test_bit(head->pagesize, i >> lp_bits))
return check_fail();
/* Not page boundary? */
if (i % (1 << lp_bits))
return check_fail();
ph = from_off(head, i);
/* Linked list corrupt? */
if (ph->prev != prev)
return check_fail();
/* Already seen this page? */
if (test_bit(pages, i >> sp_bits))
return check_fail();
set_bit(pages, i >> sp_bits);
prev = i;
} }
/* Account for total pages allocated. */ /* Check the buckets. */
tot = (count[1] + count[2] - metadata_pages) * getpagesize(); for (i = 0; i < max_bucket(lp_bits); i++) {
struct bucket_state *bs = &head->bs[i];
fprintf(out, "Total metadata bytes = %lu\n",
metadata_pages * getpagesize());
/* Now do every subpage. */
for (i = 0; i < poolsize / getpagesize(); i++) {
uint8_t *meta;
unsigned int j, allocated;
enum sub_metadata_type type;
if (get_page_state(pool, i) != SPECIAL) if (!check_bucket(head, poolsize, pages, bs, i))
continue; return false;
}
memset(count, 0, sizeof(count));
meta = get_page_metadata(pool, i);
type = get_bit_pair(meta, 0);
if (type == BITMAP) { /* Make sure every page accounted for. */
for (j = 0; j < SUBPAGE_METAOFF/BITMAP_GRANULARITY; j++) for (i = 0; i < poolsize >> sp_bits; i++) {
count[get_page_state(meta, j)]++; if (!test_bit(pages, i))
allocated = (count[1] + count[2]) * BITMAP_GRANULARITY; return check_fail();
fprintf(out, "Subpage bitmap "); if (test_bit(head->pagesize,
} else { i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
unsigned int usize = decode_usize(meta); /* Large page, skip rest. */
i += (1 << BITS_FROM_SMALL_TO_LARGE_PAGE) - 1;
assert(type == UNIFORM);
fprintf(out, "Subpage uniform (%u) ", usize);
meta += 2;
for (j = 0; j < SUBPAGE_METAOFF / usize; j++)
count[!!(meta[j / 8] & (1 << (j % 8)))]++;
allocated = count[1] * usize;
} }
fprintf(out, "%lu: FREE/TAKEN/TAKEN_START = %lu/%lu/%lu %u%% density\n",
i, count[0], count[1], count[2],
allocated * 100 / getpagesize());
tot += allocated;
} }
/* This is optimistic, since we overalloc in several cases. */ return true;
fprintf(out, "Best possible allocation density = %lu%%\n",
tot * 100 / poolsize);
} }
...@@ -3,12 +3,131 @@ ...@@ -3,12 +3,131 @@
#include <stdio.h> #include <stdio.h>
#include <stdbool.h> #include <stdbool.h>
/**
* alloc_init - initialize a pool of memory for the allocator.
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
*
* This stores all the setup state required to perform allocation within the
* pool (there is no external state). Any previous contents of @pool is
* discarded.
*
* The same @pool and @poolsize arguments must be handed to the other alloc
* functions after this.
*
* If the pool is too small for meaningful allocations, alloc_get will fail.
*
* Example:
* void *pool = malloc(32*1024*1024);
* if (!pool)
* err(1, "Failed to allocate 32MB");
* alloc_init(pool, 32*1024*1024);
*/
void alloc_init(void *pool, unsigned long poolsize); void alloc_init(void *pool, unsigned long poolsize);
/**
* alloc_get - allocate some memory from the pool
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
* @size: the size of the desired allocation
* @align: the alignment of the desired allocation (0 or power of 2)
*
* This is "malloc" within an initialized pool.
*
* It will return a unique pointer within the pool (ie. between @pool
* and @pool+@poolsize) which meets the alignment requirements of
* @align. Note that the alignment is relative to the start of the pool,
* so of @pool is not aligned, the pointer won't be either.
*
* Returns NULL if there is no contiguous room.
*
* Example:
* #include <ccan/alignof/alignof.h>
* ...
* double *d = alloc_get(pool, 32*1024*1024,
* sizeof(*d), ALIGNOF(*d));
* if (!d)
* err(1, "Failed to allocate a double");
*/
void *alloc_get(void *pool, unsigned long poolsize, void *alloc_get(void *pool, unsigned long poolsize,
unsigned long size, unsigned long align); unsigned long size, unsigned long align);
/**
* alloc_free - free some allocated memory from the pool
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
* @p: the non-NULL pointer returned from alloc_get.
*
* This is "free" within an initialized pool. A pointer should only be
* freed once, and must be a pointer returned from a successful alloc_get()
* call.
*
* Example:
* alloc_free(pool, 32*1024*1024, d);
*/
void alloc_free(void *pool, unsigned long poolsize, void *free); void alloc_free(void *pool, unsigned long poolsize, void *free);
/**
* alloc_size - get the actual size allocated by alloc_get
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
* @p: the non-NULL pointer returned from alloc_get.
*
* alloc_get() may overallocate, in which case you may use the extra
* space exactly as if you had asked for it.
*
* The return value will always be at least the @size passed to alloc_get().
*
* Example:
* printf("Allocating a double actually got me %lu bytes\n",
* alloc_size(pool, 32*1024*1024, d));
*/
unsigned long alloc_size(void *pool, unsigned long poolsize, void *p); unsigned long alloc_size(void *pool, unsigned long poolsize, void *p);
/**
* alloc_check - check the integrity of the allocation pool
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
*
* alloc_check() can be used for debugging suspected pool corruption. It may
* be quite slow, but provides some assistance for hard-to-find overruns or
* double-frees. Unlike the rest of the code, it will not crash on corrupted
* pools.
*
* There is an internal function check_fail() which this calls on failure which
* is useful for placing breakpoints and gaining more insight into the type
* of the corruption detected.
*
* Example:
* #include <assert.h>
*
* ...
* assert(alloc_check(pool, 32*1024*1024));
*/
bool alloc_check(void *pool, unsigned long poolsize); bool alloc_check(void *pool, unsigned long poolsize);
/**
* alloc_visualize - dump information about the allocation pool
* @pool: the contiguous bytes for the allocator to use
* @poolsize: the size of the pool
*
* When debugging the allocator itself, it's often useful to see how
* the pool is being used. alloc_visualize() does that, but makes
* assumptions about correctness (like the rest of the code) so if you
* suspect corruption call alloc_check() first.
*
* Example:
* #include <stdio.h>
*
* double *d = alloc_get(pool, 32*1024*1024,
* sizeof(*d), ALIGNOF(*d));
* if (!d) {
* fprintf(stderr, "Allocation failed!\n");
* if (!alloc_check(pool, 32*1024*1024))
* errx(1, "Allocation pool is corrupt");
* alloc_visualize(stderr, pool, 32*1024*1024));
* exit(1);
* }
*/
void alloc_visualize(FILE *out, void *pool, unsigned long poolsize); void alloc_visualize(FILE *out, void *pool, unsigned long poolsize);
#endif /* ALLOC_H */ #endif /* ALLOC_H */
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <err.h> #include <err.h>
#define POOL_ORD 16 #define POOL_ORD 20
#define POOL_SIZE (1 << POOL_ORD) #define POOL_SIZE (1 << POOL_ORD)
#define sort(p, num, cmp) \ #define sort(p, num, cmp) \
...@@ -32,14 +32,14 @@ static bool free_every_second_one(void *mem, unsigned int num, void *p[]) ...@@ -32,14 +32,14 @@ static bool free_every_second_one(void *mem, unsigned int num, void *p[])
/* Free every second one. */ /* Free every second one. */
for (i = 0; i < num; i += 2) { for (i = 0; i < num; i += 2) {
alloc_free(mem, POOL_SIZE, p[i]); alloc_free(mem, POOL_SIZE, p[i]);
}
if (!alloc_check(mem, POOL_SIZE)) if (!alloc_check(mem, POOL_SIZE))
return false; return false;
}
for (i = 1; i < num; i += 2) { for (i = 1; i < num; i += 2) {
alloc_free(mem, POOL_SIZE, p[i]); alloc_free(mem, POOL_SIZE, p[i]);
}
if (!alloc_check(mem, POOL_SIZE)) if (!alloc_check(mem, POOL_SIZE))
return false; return false;
}
return true; return true;
} }
...@@ -48,9 +48,9 @@ int main(int argc, char *argv[]) ...@@ -48,9 +48,9 @@ int main(int argc, char *argv[])
{ {
void *mem; void *mem;
unsigned int i, num, max_size; unsigned int i, num, max_size;
void *p[POOL_SIZE]; void **p = calloc(POOL_SIZE, sizeof(*p));
plan_tests(178); plan_tests(120);
/* FIXME: Needs to be page aligned for now. */ /* FIXME: Needs to be page aligned for now. */
if (posix_memalign(&mem, 1 << POOL_ORD, POOL_SIZE) != 0) if (posix_memalign(&mem, 1 << POOL_ORD, POOL_SIZE) != 0)
...@@ -134,7 +134,7 @@ int main(int argc, char *argv[]) ...@@ -134,7 +134,7 @@ int main(int argc, char *argv[])
ok1(alloc_check(mem, POOL_SIZE)); ok1(alloc_check(mem, POOL_SIZE));
/* Alignment constraints should be met, as long as powers of two */ /* Alignment constraints should be met, as long as powers of two */
for (i = 0; i < POOL_ORD-1; i++) { for (i = 0; i < /*FIXME: POOL_ORD-1*/ 10; i++) {
p[i] = alloc_get(mem, POOL_SIZE, i, 1 << i); p[i] = alloc_get(mem, POOL_SIZE, i, 1 << i);
ok1(p[i]); ok1(p[i]);
ok1(((unsigned long)p[i] % (1 << i)) == 0); ok1(((unsigned long)p[i] % (1 << i)) == 0);
...@@ -142,13 +142,13 @@ int main(int argc, char *argv[]) ...@@ -142,13 +142,13 @@ int main(int argc, char *argv[])
ok1(alloc_size(mem, POOL_SIZE, p[i]) >= i); ok1(alloc_size(mem, POOL_SIZE, p[i]) >= i);
} }
for (i = 0; i < POOL_ORD-1; i++) { for (i = 0; i < /*FIXME: POOL_ORD-1*/ 10; i++) {
alloc_free(mem, POOL_SIZE, p[i]); alloc_free(mem, POOL_SIZE, p[i]);
ok1(alloc_check(mem, POOL_SIZE)); ok1(alloc_check(mem, POOL_SIZE));
} }
/* Alignment constraints for a single-byte allocation. */ /* Alignment constraints for a single-byte allocation. */
for (i = 0; i < POOL_ORD; i++) { for (i = 0; i < /*FIXME: POOL_ORD*/ 10; i++) {
p[0] = alloc_get(mem, POOL_SIZE, 1, 1 << i); p[0] = alloc_get(mem, POOL_SIZE, 1, 1 << i);
ok1(p[0]); ok1(p[0]);
ok1(alloc_check(mem, POOL_SIZE)); ok1(alloc_check(mem, POOL_SIZE));
...@@ -158,50 +158,11 @@ int main(int argc, char *argv[]) ...@@ -158,50 +158,11 @@ int main(int argc, char *argv[])
} }
/* Alignment check for a 0-byte allocation. Corner case. */ /* Alignment check for a 0-byte allocation. Corner case. */
p[0] = alloc_get(mem, POOL_SIZE, 0, 1 << (POOL_ORD - 1)); p[0] = alloc_get(mem, POOL_SIZE, 0, 1 << (/*FIXME: POOL_ORD - 1*/ 10));
ok1(alloc_check(mem, POOL_SIZE)); ok1(alloc_check(mem, POOL_SIZE));
ok1(alloc_size(mem, POOL_SIZE, p[0]) < POOL_SIZE); ok1(alloc_size(mem, POOL_SIZE, p[0]) < POOL_SIZE);
alloc_free(mem, POOL_SIZE, p[0]); alloc_free(mem, POOL_SIZE, p[0]);
ok1(alloc_check(mem, POOL_SIZE)); ok1(alloc_check(mem, POOL_SIZE));
/* Force the testing of split metadata. */
alloc_init(mem, POOL_SIZE);
for (i = 0; i < POOL_SIZE; i++) {
p[i] = alloc_get(mem, POOL_SIZE, getpagesize(), getpagesize());
if (!p[i])
break;
}
ok1(alloc_check(mem, POOL_SIZE));
ok1(alloc_size(mem, POOL_SIZE, p[i-1]) >= getpagesize());
/* Sort them. */
sort(p, i-1, addr_cmp);
/* Free all but the one next to the metadata. */
for (i = 1; p[i]; i++)
alloc_free(mem, POOL_SIZE, p[i]);
ok1(alloc_check(mem, POOL_SIZE));
ok1(alloc_size(mem, POOL_SIZE, p[0]) >= getpagesize());
/* Now do a whole heap of subpage allocs. */
for (i = 1; i < POOL_SIZE; i++) {
p[i] = alloc_get(mem, POOL_SIZE, 1, 1);
if (!p[i])
break;
}
ok1(alloc_check(mem, POOL_SIZE));
/* Free up our page next to metadata, and should be able to alloc */
alloc_free(mem, POOL_SIZE, p[0]);
ok1(alloc_check(mem, POOL_SIZE));
p[0] = alloc_get(mem, POOL_SIZE, 1, 1);
ok1(p[0]);
ok1(alloc_size(mem, POOL_SIZE, p[0]) >= 1);
/* Clean up. */
for (i = 0; p[i]; i++)
alloc_free(mem, POOL_SIZE, p[i]);
ok1(alloc_check(mem, POOL_SIZE));
return exit_status(); return exit_status();
} }
...@@ -13,3 +13,7 @@ ...@@ -13,3 +13,7 @@
#define HAVE_STATEMENT_EXPR 1 #define HAVE_STATEMENT_EXPR 1
#define HAVE_TYPEOF 1 #define HAVE_TYPEOF 1
#define HAVE_UTIME 1 #define HAVE_UTIME 1
#define HAVE_BUILTIN_CLZL 1
#define HAVE_BUILTIN_FFSL 1
#define HAVE_BUILTIN_POPCOUNTL 1
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment