Commit d73e7c40 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul Committed by Yoni Fogel

The good stuff from #1884 up to changeset:14213. Refs #1884. [t:1884].

 * Block allocation is now first-fit.
 * There is a better test for the block allocator.
 * brt_stat64 returns data in a struct instead of a bunch of arguments.
 * Nodes are set clean after serialization.


git-svn-id: file:///svn/toku/tokudb@14214 c7de825b-a66e-492c-adef-691d508d4ae1
parent 12dc9822
......@@ -7,7 +7,7 @@
// Here's a very simple implementation.
// It's not very fast at allocating or freeing.
// Previous implementation used next_fit, but now use first_fit since we are moving blocks around to reduce file size.
struct blockpair {
u_int64_t offset;
......@@ -20,18 +20,21 @@ struct block_allocator {
u_int64_t n_blocks; // How many blocks
u_int64_t blocks_array_size; // How big is the blocks_array. Must be >= n_blocks.
struct blockpair *blocks_array; // These blocks are sorted by address.
u_int64_t next_fit_counter; // Used for the next_fit algorithm.
u_int64_t n_bytes_in_use; // including the reserve_at_beginning
};
void
block_allocator_validate (BLOCK_ALLOCATOR ba) {
u_int64_t i;
u_int64_t n_bytes_in_use = ba->reserve_at_beginning;
for (i=0; i<ba->n_blocks; i++) {
n_bytes_in_use += ba->blocks_array[i].size;
if (i>0) {
assert(ba->blocks_array[i].offset > ba->blocks_array[i-1].offset);
assert(ba->blocks_array[i].offset >= ba->blocks_array[i-1].offset + ba->blocks_array[i-1].size );
}
}
assert(n_bytes_in_use == ba->n_bytes_in_use);
}
#if 0
......@@ -60,7 +63,7 @@ create_block_allocator (BLOCK_ALLOCATOR *ba, u_int64_t reserve_at_beginning, u_i
result->n_blocks = 0;
result->blocks_array_size = 1;
XMALLOC_N(result->blocks_array_size, result->blocks_array);
result->next_fit_counter = 0;
result->n_bytes_in_use = reserve_at_beginning;
*ba = result;
VALIDATE(result);
}
......@@ -89,6 +92,7 @@ block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t of
assert(offset >= ba->reserve_at_beginning);
grow_blocks_array(ba);
// Just do a linear search for the block
ba->n_bytes_in_use += size;
for (i=0; i<ba->n_blocks; i++) {
if (ba->blocks_array[i].offset > offset) {
// allocate it in that slot
......@@ -110,27 +114,41 @@ block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t of
}
static inline u_int64_t
align (u_int64_t value, BLOCK_ALLOCATOR ba) {
align (u_int64_t value, BLOCK_ALLOCATOR ba)
// Effect: align a value by rounding up.
{
return ((value+ba->alignment-1)/ba->alignment)*ba->alignment;
}
void
block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offset) {
grow_blocks_array(ba);
ba->n_bytes_in_use += size;
if (ba->n_blocks==0) {
ba->blocks_array[0].offset = ba->reserve_at_beginning;
assert(ba->n_bytes_in_use == ba->reserve_at_beginning + size); // we know exactly how many are in use
ba->blocks_array[0].offset = align(ba->reserve_at_beginning, ba);
ba->blocks_array[0].size = size;
*offset = ba->reserve_at_beginning;
*offset = ba->blocks_array[0].offset;
ba->n_blocks++;
return;
}
u_int64_t i;
u_int64_t blocknum = ba->next_fit_counter;
// Implement next fit.
for (i=0; i<ba->n_blocks; i++, blocknum++) {
if (blocknum>=ba->n_blocks) blocknum=0;
// Implement first fit.
{
u_int64_t end_of_reserve = align(ba->reserve_at_beginning, ba);
if (end_of_reserve + size <= ba->blocks_array[0].offset ) {
// Check to see if the space immediately after the reserve is big enough to hold the new block.
struct blockpair *bp = &ba->blocks_array[0];
memmove(bp+1, bp, (ba->n_blocks)*sizeof(struct blockpair));
bp[0].offset = end_of_reserve;
bp[0].size = size;
ba->n_blocks++;
*offset = end_of_reserve;
VALIDATE(ba);
return;
}
}
for (u_int64_t blocknum = 0; blocknum +1 < ba->n_blocks; blocknum ++) {
// Consider the space after blocknum
if (blocknum+1 == ba->n_blocks) continue; // Can't use the space after the last block, since that would be new space.
struct blockpair *bp = &ba->blocks_array[blocknum];
u_int64_t this_offset = bp[0].offset;
u_int64_t this_size = bp[0].size;
......@@ -141,7 +159,6 @@ block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offs
bp[1].offset = answer_offset;
bp[1].size = size;
ba->n_blocks++;
ba->next_fit_counter = blocknum;
*offset = answer_offset;
VALIDATE(ba);
return;
......@@ -188,6 +205,7 @@ block_allocator_free_block (BLOCK_ALLOCATOR ba, u_int64_t offset) {
VALIDATE(ba);
int64_t bn = find_block(ba, offset);
assert(bn>=0); // we require that there is a block with that offset. Might as well abort if no such block exists.
ba->n_bytes_in_use -= ba->blocks_array[bn].size;
memmove(&ba->blocks_array[bn], &ba->blocks_array[bn+1], (ba->n_blocks-bn-1) * sizeof(struct blockpair));
ba->n_blocks--;
VALIDATE(ba);
......@@ -208,3 +226,22 @@ block_allocator_allocated_limit (BLOCK_ALLOCATOR ba) {
return last->offset + last->size;
}
}
int
block_allocator_get_nth_block_in_layout_order (BLOCK_ALLOCATOR ba, u_int64_t b, u_int64_t *offset, u_int64_t *size)
// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth.
// Return the offset and size of the block with that number.
// Return 0 if there is a block that big, return nonzero if b is too big.
{
if (b==0) {
*offset=0;
*size =ba->reserve_at_beginning;
return 0;
} else if (b > ba->n_blocks) {
return -1;
} else {
*offset=ba->blocks_array[b-1].offset;
*size =ba->blocks_array[b-1].size;
return 0;
}
}
......@@ -42,7 +42,7 @@ create_block_allocator (BLOCK_ALLOCATOR * ba, u_int64_t reserve_at_beginning, u_
// Aborts if we run out of memory.
// Parameters
// ba (OUT): Result stored here.
// reserve_at_beginning (IN) Size of reserved block at beginning.
// reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned.
// alignment (IN) Block alignment.
void
......@@ -73,7 +73,7 @@ block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offs
// The block address will be a multiple of the alignment.
// Parameters:
// ba (IN/OUT): The block allocator. (Modifies ba.)
// size (IN): The size of the block.
// size (IN): The size of the block. (The size does not have to be aligned.)
// offset (OUT): The location of the block.
void
......@@ -109,4 +109,11 @@ block_allocator_allocated_limit (BLOCK_ALLOCATOR ba);
// So we start at the "infinite" block, write the fifo, and then
// allocate_block_at of the correct size and offset to account for the root FIFO.
int
block_allocator_get_nth_block_in_layout_order (BLOCK_ALLOCATOR ba, u_int64_t b, u_int64_t *offset, u_int64_t *size);
// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth.
// Return the offset and size of the block with that number.
// Return 0 if there is a block that big, return nonzero if b is too big.
// This is probably only useful for tests.
#endif
......@@ -4578,12 +4578,12 @@ int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less, u_int64_t *equal, u
return 0;
}
int toku_brt_stat64 (BRT brt, TOKUTXN UU(txn), u_int64_t *nkeys, u_int64_t *ndata, u_int64_t *dsize, u_int64_t *fsize) {
int toku_brt_stat64 (BRT brt, TOKUTXN UU(txn), struct brtstat64_s *s) {
{
int64_t file_size;
int r = toku_os_get_file_size(toku_cachefile_fd(brt->cf), &file_size);
assert(r==0);
*fsize = file_size + toku_cachefile_size_in_memory(brt->cf);
s->fsize = file_size + toku_cachefile_size_in_memory(brt->cf);
}
assert(brt->h);
......@@ -4598,17 +4598,17 @@ int toku_brt_stat64 (BRT brt, TOKUTXN UU(txn), u_int64_t *nkeys, u_int64_t *ndat
BRTNODE node = node_v;
if (node->height==0) {
*nkeys = node->u.l.leaf_stats.nkeys;
*ndata = node->u.l.leaf_stats.ndata;
*dsize = node->u.l.leaf_stats.dsize;
s->nkeys = node->u.l.leaf_stats.nkeys;
s->ndata = node->u.l.leaf_stats.ndata;
s->dsize = node->u.l.leaf_stats.dsize;
} else {
*nkeys = *ndata = *dsize = 0;
s->nkeys = s->ndata = s->dsize = 0;
int i;
for (i=0; i<node->u.n.n_children; i++) {
struct subtree_estimates *se = &BNC_SUBTREE_ESTIMATES(node, i);
*nkeys += se->nkeys;
*ndata += se->ndata;
*dsize += se->dsize;
s->nkeys += se->nkeys;
s->ndata += se->ndata;
s->dsize += se->dsize;
}
}
......
......@@ -137,11 +137,15 @@ enum brt_header_flags {
};
int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less, u_int64_t *equal, u_int64_t *greater);
struct brtstat64_s {
u_int64_t nkeys; /* estimate how many unique keys (even when flattened this may be an estimate) */
u_int64_t ndata; /* estimate the number of pairs (exact when flattened and committed) */
u_int64_t dsize; /* estimate the sum of the sizes of the pairs (exact when flattened and committed) */
u_int64_t fsize; /* the size of the underlying file */
u_int64_t ffree; /* Number of free bytes in the underlying file */
};
int toku_brt_stat64 (BRT, TOKUTXN,
u_int64_t *nkeys, /* estimate how many unique keys (even when flattened this may be an estimate) */
u_int64_t *ndata, /* estimate the number of pairs (exact when flattened and committed) */
u_int64_t *dsize, /* estimate the sum of the sizes of the pairs (exact when flattened and committed) */
u_int64_t *fsize /* the size of the underlying file */
struct brtstat64_s *stat
);
int toku_brt_init(void (*ydb_lock_callback)(void), void (*ydb_unlock_callback)(void));
......
......@@ -1196,7 +1196,9 @@ int toku_cachetable_maybe_get_and_pin_clean (CACHEFILE cachefile, CACHEKEY key,
}
int toku_cachetable_unpin(CACHEFILE cachefile, CACHEKEY key, u_int32_t fullhash, enum cachetable_dirty dirty, long size) {
int toku_cachetable_unpin(CACHEFILE cachefile, CACHEKEY key, u_int32_t fullhash, enum cachetable_dirty dirty, long size)
// size==0 means that the size didn't change.
{
CACHETABLE ct = cachefile->cachetable;
PAIR p;
WHEN_TRACE_CT(printf("%s:%d unpin(%lld)", __FILE__, __LINE__, key));
......
......@@ -22,15 +22,41 @@ static void ba_free (BLOCK_ALLOCATOR ba, u_int64_t offset) {
block_allocator_validate(ba);
}
static void
ba_check_l (BLOCK_ALLOCATOR ba, u_int64_t blocknum_in_layout_order, u_int64_t expected_offset, u_int64_t expected_size)
{
u_int64_t actual_offset, actual_size;
int r = block_allocator_get_nth_block_in_layout_order(ba, blocknum_in_layout_order, &actual_offset, &actual_size);
assert(r==0);
assert(expected_offset == actual_offset);
assert(expected_size == actual_size);
}
static void
ba_check_none (BLOCK_ALLOCATOR ba, u_int64_t blocknum_in_layout_order)
{
u_int64_t actual_offset, actual_size;
int r = block_allocator_get_nth_block_in_layout_order(ba, blocknum_in_layout_order, &actual_offset, &actual_size);
assert(r==-1);
}
// Simple block allocator test
static void
test_ba0 (void) {
BLOCK_ALLOCATOR ba;
u_int64_t b0, b1;
create_block_allocator(&ba, 100, 1);
assert(block_allocator_allocated_limit(ba)==100);
ba_alloc_at(ba, 50, 100);
assert(block_allocator_allocated_limit(ba)==150);
ba_alloc_at(ba, 25, 150);
ba_alloc (ba, 10, &b0);
ba_check_l (ba, 0, 0, 100);
ba_check_l (ba, 1, 100, 50);
ba_check_l (ba, 2, 150, 25);
ba_check_l (ba, 3, b0, 10);
ba_check_none (ba, 4);
assert(b0==175);
ba_free(ba, 150);
ba_alloc_at(ba, 10, 150);
......@@ -101,6 +127,135 @@ test_ba1 (int n_initial) {
assert(ba==0);
}
// Check to see if it is first fit or best fit.
static void
test_ba2 (void)
{
BLOCK_ALLOCATOR ba;
u_int64_t b[6];
enum { BSIZE = 1024 };
create_block_allocator(&ba, 100, BSIZE);
assert(block_allocator_allocated_limit(ba)==100);
ba_check_l (ba, 0, 0, 100);
ba_check_none (ba, 1);
ba_alloc (ba, 100, &b[0]);
ba_check_l (ba, 0, 0, 100);
ba_check_l (ba, 1, BSIZE, 100);
ba_check_none (ba, 2);
ba_alloc (ba, BSIZE+100, &b[1]);
ba_check_l (ba, 0, 0, 100);
ba_check_l (ba, 1, BSIZE, 100);
ba_check_l (ba, 2, 2*BSIZE, BSIZE+100);
ba_check_none (ba, 3);
ba_alloc (ba, 100, &b[2]);
ba_check_l (ba, 0, 0, 100);
ba_check_l (ba, 1, BSIZE, 100);
ba_check_l (ba, 2, 2*BSIZE, BSIZE+100);
ba_check_l (ba, 3, 4*BSIZE, 100);
ba_check_none (ba, 4);
ba_alloc (ba, 100, &b[3]);
ba_alloc (ba, 100, &b[4]);
ba_alloc (ba, 100, &b[5]);
ba_check_l (ba, 0, 0, 100);
ba_check_l (ba, 1, BSIZE, 100);
ba_check_l (ba, 2, 2*BSIZE, BSIZE+100);
ba_check_l (ba, 3, 4*BSIZE, 100);
ba_check_l (ba, 4, 5*BSIZE, 100);
ba_check_l (ba, 5, 6*BSIZE, 100);
ba_check_l (ba, 6, 7*BSIZE, 100);
ba_check_none (ba, 7);
ba_free (ba, 4*BSIZE);
ba_check_l (ba, 0, 0, 100);
ba_check_l (ba, 1, BSIZE, 100);
ba_check_l (ba, 2, 2*BSIZE, BSIZE+100);
ba_check_l (ba, 3, 5*BSIZE, 100);
ba_check_l (ba, 4, 6*BSIZE, 100);
ba_check_l (ba, 5, 7*BSIZE, 100);
ba_check_none (ba, 6);
u_int64_t b2;
ba_alloc(ba, 100, &b2);
assert(b2==4*BSIZE);
ba_check_l (ba, 0, 0, 100);
ba_check_l (ba, 1, BSIZE, 100);
ba_check_l (ba, 2, 2*BSIZE, BSIZE+100);
ba_check_l (ba, 3, 4*BSIZE, 100);
ba_check_l (ba, 4, 5*BSIZE, 100);
ba_check_l (ba, 5, 6*BSIZE, 100);
ba_check_l (ba, 6, 7*BSIZE, 100);
ba_check_none (ba, 7);
ba_free (ba, BSIZE);
ba_free (ba, 5*BSIZE);
ba_check_l (ba, 0, 0, 100);
ba_check_l (ba, 1, 2*BSIZE, BSIZE+100);
ba_check_l (ba, 2, 4*BSIZE, 100);
ba_check_l (ba, 3, 6*BSIZE, 100);
ba_check_l (ba, 4, 7*BSIZE, 100);
ba_check_none (ba, 5);
// This alloc will allocate the first block after the reserve space in the case of first fit.
u_int64_t b3;
ba_alloc(ba, 100, &b3);
assert(b3== BSIZE); // First fit.
// if (b3==5*BSIZE) then it is next fit.
// Now 5*BSIZE is free
u_int64_t b5;
ba_alloc(ba, 100, &b5);
assert(b5==5*BSIZE);
ba_check_l (ba, 0, 0, 100);
ba_check_l (ba, 1, BSIZE, 100);
ba_check_l (ba, 2, 2*BSIZE, BSIZE+100);
ba_check_l (ba, 3, 4*BSIZE, 100);
ba_check_l (ba, 4, 5*BSIZE, 100);
ba_check_l (ba, 5, 6*BSIZE, 100);
ba_check_l (ba, 6, 7*BSIZE, 100);
ba_check_none (ba, 7);
// Now all blocks are busy
u_int64_t b6, b7, b8;
ba_alloc(ba, 100, &b6);
ba_alloc(ba, 100, &b7);
ba_alloc(ba, 100, &b8);
assert(b6==8*BSIZE);
assert(b7==9*BSIZE);
assert(b8==10*BSIZE);
ba_check_l (ba, 0, 0, 100);
ba_check_l (ba, 1, BSIZE, 100);
ba_check_l (ba, 2, 2*BSIZE, BSIZE+100);
ba_check_l (ba, 3, 4*BSIZE, 100);
ba_check_l (ba, 4, 5*BSIZE, 100);
ba_check_l (ba, 5, 6*BSIZE, 100);
ba_check_l (ba, 6, 7*BSIZE, 100);
ba_check_l (ba, 7, 8*BSIZE, 100);
ba_check_l (ba, 8, 9*BSIZE, 100);
ba_check_l (ba, 9, 10*BSIZE, 100);
ba_check_none (ba, 10);
ba_free(ba, 9*BSIZE);
ba_free(ba, 7*BSIZE);
u_int64_t b9;
ba_alloc(ba, 100, &b9);
assert(b9==7*BSIZE);
ba_free(ba, 5*BSIZE);
ba_free(ba, 2*BSIZE);
u_int64_t b10, b11;
ba_alloc(ba, 100, &b10);
assert(b10==2*BSIZE);
ba_alloc(ba, 100, &b11);
assert(b11==3*BSIZE);
ba_alloc(ba, 100, &b11);
assert(b11==5*BSIZE);
destroy_block_allocator(&ba);
}
int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
......@@ -108,5 +263,6 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_
test_ba1(0);
test_ba1(10);
test_ba1(20);
test_ba2();
return 0;
}
......@@ -3558,14 +3558,21 @@ static int toku_db_set_pagesize(DB *db, u_int32_t pagesize) {
static int toku_db_stat64(DB * db, DB_TXN *txn, DB_BTREE_STAT64 *s) {
HANDLE_PANICKED_DB(db);
HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
return toku_brt_stat64(db->i->brt, db_txn_struct_i(txn)->tokutxn, &s->bt_nkeys, &s->bt_ndata, &s->bt_dsize, &s->bt_fsize);
struct brtstat64_s brtstat;
int r = toku_brt_stat64(db->i->brt, db_txn_struct_i(txn)->tokutxn, &brtstat);
if (r==0) {
s->bt_nkeys = brtstat.nkeys;
s->bt_ndata = brtstat.ndata;
s->bt_dsize = brtstat.dsize;
s->bt_fsize = brtstat.fsize;
}
return r;
}
static int locked_db_stat64 (DB *db, DB_TXN *txn, DB_BTREE_STAT64 *s) {
toku_ydb_lock();
int r = toku_db_stat64(db, txn, s);
toku_ydb_unlock();
return r;
}
static int toku_db_key_range64(DB* db, DB_TXN* txn __attribute__((__unused__)), DBT* key, u_int64_t* less, u_int64_t* equal, u_int64_t* greater, int* is_exact) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment