From af3ad719d81f5ddd8a3588637dd47a171dea77f8 Mon Sep 17 00:00:00 2001 From: "Bradley C. Kuszmaul" <bradley@tokutek.com> Date: Thu, 18 Sep 2008 00:04:10 +0000 Subject: [PATCH] Allocate blocks on alignment and don't write the root FIFO over the translate block. Addresses #1080, #1000, #1131. git-svn-id: file:///svn/tokudb.1131b+1080a@6167 c7de825b-a66e-492c-adef-691d508d4ae1 --- newbrt/block_allocator.c | 15 +++++++++++---- newbrt/block_allocator.h | 6 +++++- newbrt/brt-internal.h | 2 ++ newbrt/brt-serialize.c | 7 ++++--- newbrt/brt.c | 6 ++++-- newbrt/tests/block_allocator_test.c | 4 ++-- newbrt/tests/brt-serialize-test.c | 2 +- 7 files changed, 29 insertions(+), 13 deletions(-) diff --git a/newbrt/block_allocator.c b/newbrt/block_allocator.c index 254156e134..723a6f8a3b 100644 --- a/newbrt/block_allocator.c +++ b/newbrt/block_allocator.c @@ -17,6 +17,7 @@ struct blockpair { struct block_allocator { u_int64_t reserve_at_beginning; // How much to reserve at the beginning + u_int64_t alignment; // Block alignment u_int64_t n_blocks; // How many blocks u_int64_t blocks_array_size; // How big is the blocks_array. Must be >= n_blocks. struct blockpair *blocks_array; // These blocks are sorted by address. @@ -53,9 +54,10 @@ block_allocator_print (BLOCK_ALLOCATOR ba) { #endif void -create_block_allocator (BLOCK_ALLOCATOR *ba, u_int64_t reserve_at_beginning) { +create_block_allocator (BLOCK_ALLOCATOR *ba, u_int64_t reserve_at_beginning, u_int64_t alignment) { BLOCK_ALLOCATOR XMALLOC(result); result->reserve_at_beginning = reserve_at_beginning; + result->alignment = alignment; result->n_blocks = 0; result->blocks_array_size = 1; XMALLOC_N(result->blocks_array_size, result->blocks_array); @@ -82,6 +84,7 @@ grow_blocks_array (BLOCK_ALLOCATOR ba) { void block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t offset) { + assert(offset%ba->alignment == 0); u_int64_t i; VALIDATE(ba); assert(offset >= ba->reserve_at_beginning); @@ -107,6 +110,10 @@ block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t of VALIDATE(ba); } +static u_int64_t align (u_int64_t value, BLOCK_ALLOCATOR ba) { + return ((value+ba->alignment-1)/ba->alignment)*ba->alignment; +} + void block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offset) { grow_blocks_array(ba); @@ -123,11 +130,11 @@ block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offs for (i=0; i<ba->n_blocks; i++, blocknum++) { if (blocknum>=ba->n_blocks) blocknum=0; // Consider the space after blocknum - if (blocknum+1 ==ba->n_blocks) continue; // Can't use the space after the last block, since that would be new space. + if (blocknum+1 == ba->n_blocks) continue; // Can't use the space after the last block, since that would be new space. struct blockpair *bp = &ba->blocks_array[blocknum]; u_int64_t this_offset = bp[0].offset; u_int64_t this_size = bp[0].size; - u_int64_t answer_offset = this_offset + this_size; + u_int64_t answer_offset = align(this_offset + this_size, ba); if (answer_offset + size > bp[1].offset) continue; // The block we want doesn't fit after this block. // It fits, so allocate it here. memmove(bp+2, bp+1, (ba->n_blocks - blocknum -1)*sizeof(struct blockpair)); @@ -141,7 +148,7 @@ block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offs } // It didn't fit anywhere, so fit it on the end. struct blockpair *bp = &ba->blocks_array[ba->n_blocks]; - u_int64_t answer_offset = bp[-1].offset+bp[-1].size; + u_int64_t answer_offset = align(bp[-1].offset+bp[-1].size, ba); bp->offset = answer_offset; bp->size = size; ba->n_blocks++; diff --git a/newbrt/block_allocator.h b/newbrt/block_allocator.h index 219e7437a1..f203209cbb 100644 --- a/newbrt/block_allocator.h +++ b/newbrt/block_allocator.h @@ -23,12 +23,14 @@ typedef struct block_allocator *BLOCK_ALLOCATOR; void -create_block_allocator (BLOCK_ALLOCATOR * ba, u_int64_t reserve_at_beginning); +create_block_allocator (BLOCK_ALLOCATOR * ba, u_int64_t reserve_at_beginning, u_int64_t alignment); // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. +// All blocks be start on a multiple of ALIGNMENT. // Aborts if we run out of memory. // Parameters // ba (OUT): Result stored here. // reserve_at_beginning (IN) Size of reserved block at beginning. +// alignment (IN) Block alignment. void destroy_block_allocator (BLOCK_ALLOCATOR *ba); @@ -44,6 +46,7 @@ block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t of // Effect: Allocate a block of the specified size at a particular offset. // Aborts if anything goes wrong. // Requires: The resulting block may not overlap any other allocated block. +// And the offset must be a multiple of the block alignment. // Parameters: // ba (IN/OUT): The block allocator. (Modifies ba.) // size (IN): The size of the block. @@ -54,6 +57,7 @@ void block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offset); // Effect: Allocate a block of the specified size at an address chosen by the allocator. // Aborts if anything goes wrong. +// The block address will be a multiple of the alignment. // Parameters: // ba (IN/OUT): The block allocator. (Modifies ba.) // size (IN): The size of the block. diff --git a/newbrt/brt-internal.h b/newbrt/brt-internal.h index 8a8b9482c9..0977f13ca8 100644 --- a/newbrt/brt-internal.h +++ b/newbrt/brt-internal.h @@ -281,4 +281,6 @@ enum brt_layout_version_e { void toku_brtheader_free (struct brt_header *h); int toku_brtheader_close (CACHEFILE cachefile, void *header_v); +#define BLOCK_ALLOCATOR_ALIGNMENT 4096 + #endif diff --git a/newbrt/brt-serialize.c b/newbrt/brt-serialize.c index 45243796e9..0165bad500 100644 --- a/newbrt/brt-serialize.c +++ b/newbrt/brt-serialize.c @@ -620,7 +620,8 @@ int toku_serialize_brt_header_to_wbuf (struct wbuf *wbuf, struct brt_header *h) if (h->block_translation_address_on_disk != 0) { block_allocator_free_block(h->block_allocator, h->block_translation_address_on_disk); } - block_allocator_alloc_block(h->block_allocator, 4 + 8*h->translated_blocknum_limit, &h->block_translation_address_on_disk); + block_allocator_alloc_block(h->block_allocator, 4 + 16*h->translated_blocknum_limit, &h->block_translation_address_on_disk); + //printf("%s:%d bta=%lu size=%lu\n", __FILE__, __LINE__, h->block_translation_address_on_disk, 4 + 16*h->translated_blocknum_limit); wbuf_ulonglong(wbuf, h->translated_blocknum_limit); wbuf_DISKOFF(wbuf, h->block_translation_address_on_disk); if (h->n_named_roots>=0) { @@ -702,7 +703,7 @@ int deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_heade h->block_translation_size_on_disk = 4 + 16 * h->translated_blocknum_limit; h->block_translation_address_on_disk = rbuf_diskoff(&rc); // Set up the the block translation buffer. - create_block_allocator(&h->block_allocator, h->nodesize); + create_block_allocator(&h->block_allocator, h->nodesize, BLOCK_ALLOCATOR_ALIGNMENT); // printf("%s:%d translated_blocknum_limit=%ld, block_translation_address_on_disk=%ld\n", __FILE__, __LINE__, h->translated_blocknum_limit, h->block_translation_address_on_disk); if (h->block_translation_address_on_disk == 0) { h->block_translation = 0; @@ -718,7 +719,7 @@ int deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_heade // check the checksum u_int32_t x1764 = x1764_memory(tbuf, h->block_translation_size_on_disk - 4); u_int64_t offset = h->block_translation_size_on_disk - 4; - // printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, h->block_translation_address_on_disk, offset, h->block_translation_size_on_disk); + //printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, h->block_translation_address_on_disk, offset, h->block_translation_size_on_disk); u_int32_t stored_x1764 = ntohl(*(int*)(tbuf + offset)); assert(x1764 == stored_x1764); } diff --git a/newbrt/brt.c b/newbrt/brt.c index 6fb4274fc2..b3a642c50e 100644 --- a/newbrt/brt.c +++ b/newbrt/brt.c @@ -222,7 +222,9 @@ int toku_brtheader_close (CACHEFILE cachefile, void *header_v) { // block_allocator_allocated_limit(h->block_allocator), h->unused_blocks.b*h->nodesize); if (h->dirty) { toku_serialize_brt_header_to(toku_cachefile_fd(cachefile), h); - toku_serialize_fifo_at(toku_cachefile_fd(cachefile), block_allocator_allocated_limit(h->block_allocator), h->fifo); + u_int64_t write_to = block_allocator_allocated_limit(h->block_allocator); // Must compute this after writing the header. + //printf("%s:%d fifo written to %lu\n", __FILE__, __LINE__, write_to); + toku_serialize_fifo_at(toku_cachefile_fd(cachefile), write_to, h->fifo); } toku_brtheader_free(h); return 0; @@ -2162,7 +2164,7 @@ static int brt_alloc_init_header(BRT t, const char *dbname, TOKUTXN txn) { t->h->block_translation_size_on_disk = 0; t->h->block_translation_address_on_disk = 0; // printf("%s:%d translated_blocknum_limit=%ld, block_translation_address_on_disk=%ld\n", __FILE__, __LINE__, t->h->translated_blocknum_limit, t->h->block_translation_address_on_disk); - create_block_allocator(&t->h->block_allocator, t->nodesize); + create_block_allocator(&t->h->block_allocator, t->nodesize, BLOCK_ALLOCATOR_ALIGNMENT); toku_fifo_create(&t->h->fifo); t->root_put_counter = global_root_put_counter++; if (dbname) { diff --git a/newbrt/tests/block_allocator_test.c b/newbrt/tests/block_allocator_test.c index 7be0f7b071..205ab52591 100644 --- a/newbrt/tests/block_allocator_test.c +++ b/newbrt/tests/block_allocator_test.c @@ -27,7 +27,7 @@ static void test_ba0 (void) { BLOCK_ALLOCATOR ba; u_int64_t b0, b1; - create_block_allocator(&ba, 100); + create_block_allocator(&ba, 100, 1); ba_alloc_at(ba, 50, 100); ba_alloc_at(ba, 25, 150); ba_alloc (ba, 10, &b0); @@ -75,7 +75,7 @@ test_ba0 (void) { static void test_ba1 (int n_initial) { BLOCK_ALLOCATOR ba; - create_block_allocator(&ba, 0); + create_block_allocator(&ba, 0, 1); int i; int n_blocks=0; u_int64_t blocks[1000]; diff --git a/newbrt/tests/brt-serialize-test.c b/newbrt/tests/brt-serialize-test.c index b93fb16b3a..86e0cf86b1 100644 --- a/newbrt/tests/brt-serialize-test.c +++ b/newbrt/tests/brt-serialize-test.c @@ -62,7 +62,7 @@ static void test_serialize(void) { brt_h->block_translation = btps; brt_h->block_translation[20].diskoff = 4096; brt_h->block_translation[20].size = 100; - create_block_allocator(&brt_h->block_allocator, 4096); + create_block_allocator(&brt_h->block_allocator, 4096, BLOCK_ALLOCATOR_ALIGNMENT); { u_int64_t b; block_allocator_alloc_block(brt_h->block_allocator, 100, &b); -- 2.30.9