From af3ad719d81f5ddd8a3588637dd47a171dea77f8 Mon Sep 17 00:00:00 2001
From: "Bradley C. Kuszmaul" <bradley@tokutek.com>
Date: Thu, 18 Sep 2008 00:04:10 +0000
Subject: [PATCH] Allocate blocks on alignment and don't write the root FIFO
 over the translate block.  Addresses #1080, #1000, #1131.

git-svn-id: file:///svn/tokudb.1131b+1080a@6167 c7de825b-a66e-492c-adef-691d508d4ae1
---
 newbrt/block_allocator.c            | 15 +++++++++++----
 newbrt/block_allocator.h            |  6 +++++-
 newbrt/brt-internal.h               |  2 ++
 newbrt/brt-serialize.c              |  7 ++++---
 newbrt/brt.c                        |  6 ++++--
 newbrt/tests/block_allocator_test.c |  4 ++--
 newbrt/tests/brt-serialize-test.c   |  2 +-
 7 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/newbrt/block_allocator.c b/newbrt/block_allocator.c
index 254156e134..723a6f8a3b 100644
--- a/newbrt/block_allocator.c
+++ b/newbrt/block_allocator.c
@@ -17,6 +17,7 @@ struct blockpair {
 
 struct block_allocator {
     u_int64_t reserve_at_beginning; // How much to reserve at the beginning
+    u_int64_t alignment;            // Block alignment
     u_int64_t n_blocks; // How many blocks
     u_int64_t blocks_array_size; // How big is the blocks_array.  Must be >= n_blocks. 
     struct blockpair *blocks_array; // These blocks are sorted by address.
@@ -53,9 +54,10 @@ block_allocator_print (BLOCK_ALLOCATOR ba) {
 #endif
 
 void
-create_block_allocator (BLOCK_ALLOCATOR *ba, u_int64_t reserve_at_beginning) {
+create_block_allocator (BLOCK_ALLOCATOR *ba, u_int64_t reserve_at_beginning, u_int64_t alignment) {
     BLOCK_ALLOCATOR XMALLOC(result);    
     result->reserve_at_beginning = reserve_at_beginning;
+    result->alignment = alignment;
     result->n_blocks = 0;
     result->blocks_array_size = 1;
     XMALLOC_N(result->blocks_array_size, result->blocks_array);
@@ -82,6 +84,7 @@ grow_blocks_array (BLOCK_ALLOCATOR ba) {
 
 void
 block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t offset) {
+    assert(offset%ba->alignment == 0);
     u_int64_t i;
     VALIDATE(ba);
     assert(offset >= ba->reserve_at_beginning);
@@ -107,6 +110,10 @@ block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t of
     VALIDATE(ba);
 }
     
+static u_int64_t align (u_int64_t value, BLOCK_ALLOCATOR ba) {
+    return ((value+ba->alignment-1)/ba->alignment)*ba->alignment;
+}
+
 void
 block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offset) {
     grow_blocks_array(ba);
@@ -123,11 +130,11 @@ block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offs
     for (i=0; i<ba->n_blocks; i++, blocknum++) {
 	if (blocknum>=ba->n_blocks) blocknum=0;
 	// Consider the space after blocknum
-	if (blocknum+1 ==ba->n_blocks) continue; // Can't use the space after the last block, since that would be new space.
+	if (blocknum+1 == ba->n_blocks) continue; // Can't use the space after the last block, since that would be new space.
 	struct blockpair *bp = &ba->blocks_array[blocknum];
 	u_int64_t this_offset = bp[0].offset;
 	u_int64_t this_size   = bp[0].size;
-	u_int64_t answer_offset = this_offset + this_size;
+	u_int64_t answer_offset = align(this_offset + this_size, ba);
 	if (answer_offset + size > bp[1].offset) continue; // The block we want doesn't fit after this block.
 	// It fits, so allocate it here.
 	memmove(bp+2, bp+1, (ba->n_blocks - blocknum -1)*sizeof(struct blockpair));
@@ -141,7 +148,7 @@ block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offs
     }
     // It didn't fit anywhere, so fit it on the end.
     struct blockpair *bp = &ba->blocks_array[ba->n_blocks];
-    u_int64_t answer_offset = bp[-1].offset+bp[-1].size;
+    u_int64_t answer_offset = align(bp[-1].offset+bp[-1].size, ba);
     bp->offset = answer_offset;
     bp->size   = size;
     ba->n_blocks++;
diff --git a/newbrt/block_allocator.h b/newbrt/block_allocator.h
index 219e7437a1..f203209cbb 100644
--- a/newbrt/block_allocator.h
+++ b/newbrt/block_allocator.h
@@ -23,12 +23,14 @@
 typedef struct block_allocator *BLOCK_ALLOCATOR;
 
 void
-create_block_allocator (BLOCK_ALLOCATOR * ba, u_int64_t reserve_at_beginning);
+create_block_allocator (BLOCK_ALLOCATOR * ba, u_int64_t reserve_at_beginning, u_int64_t alignment);
 // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
+//  All blocks be start on a multiple of ALIGNMENT.
 //  Aborts if we run out of memory.
 // Parameters
 //  ba (OUT):                        Result stored here.
 //  reserve_at_beginning (IN)        Size of reserved block at beginning.
+//  alignment (IN)                   Block alignment.
 
 void
 destroy_block_allocator (BLOCK_ALLOCATOR *ba);
@@ -44,6 +46,7 @@ block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t of
 // Effect: Allocate a block of the specified size at a particular offset.
 //  Aborts if anything goes wrong.
 // Requires: The resulting block may not overlap any other allocated block.
+//  And the offset must be a multiple of the block alignment.
 // Parameters:
 //  ba (IN/OUT): The block allocator.  (Modifies ba.)
 //  size (IN):   The size of the block.
@@ -54,6 +57,7 @@ void
 block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offset);
 // Effect: Allocate a block of the specified size at an address chosen by the allocator.
 //  Aborts if anything goes wrong.
+//  The block address will be a multiple of the alignment.
 // Parameters:
 //  ba (IN/OUT):  The block allocator.   (Modifies ba.)
 //  size (IN):    The size of the block.
diff --git a/newbrt/brt-internal.h b/newbrt/brt-internal.h
index 8a8b9482c9..0977f13ca8 100644
--- a/newbrt/brt-internal.h
+++ b/newbrt/brt-internal.h
@@ -281,4 +281,6 @@ enum brt_layout_version_e {
 void toku_brtheader_free (struct brt_header *h);
 int toku_brtheader_close (CACHEFILE cachefile, void *header_v);
 
+#define BLOCK_ALLOCATOR_ALIGNMENT 4096
+
 #endif
diff --git a/newbrt/brt-serialize.c b/newbrt/brt-serialize.c
index 45243796e9..0165bad500 100644
--- a/newbrt/brt-serialize.c
+++ b/newbrt/brt-serialize.c
@@ -620,7 +620,8 @@ int toku_serialize_brt_header_to_wbuf (struct wbuf *wbuf, struct brt_header *h)
     if (h->block_translation_address_on_disk != 0) {
 	block_allocator_free_block(h->block_allocator, h->block_translation_address_on_disk);
     }
-    block_allocator_alloc_block(h->block_allocator, 4 + 8*h->translated_blocknum_limit, &h->block_translation_address_on_disk);
+    block_allocator_alloc_block(h->block_allocator, 4 + 16*h->translated_blocknum_limit, &h->block_translation_address_on_disk);
+    //printf("%s:%d bta=%lu size=%lu\n", __FILE__, __LINE__, h->block_translation_address_on_disk, 4 + 16*h->translated_blocknum_limit);
     wbuf_ulonglong(wbuf, h->translated_blocknum_limit);
     wbuf_DISKOFF(wbuf, h->block_translation_address_on_disk);
     if (h->n_named_roots>=0) {
@@ -702,7 +703,7 @@ int deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_heade
     h->block_translation_size_on_disk    = 4 + 16 * h->translated_blocknum_limit;
     h->block_translation_address_on_disk = rbuf_diskoff(&rc);
     // Set up the the block translation buffer.
-    create_block_allocator(&h->block_allocator, h->nodesize);
+    create_block_allocator(&h->block_allocator, h->nodesize, BLOCK_ALLOCATOR_ALIGNMENT);
     // printf("%s:%d translated_blocknum_limit=%ld, block_translation_address_on_disk=%ld\n", __FILE__, __LINE__, h->translated_blocknum_limit, h->block_translation_address_on_disk);
     if (h->block_translation_address_on_disk == 0) {
 	h->block_translation = 0;
@@ -718,7 +719,7 @@ int deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_heade
 	    // check the checksum
 	    u_int32_t x1764 = x1764_memory(tbuf, h->block_translation_size_on_disk - 4);
 	    u_int64_t offset = h->block_translation_size_on_disk - 4;
-	    // printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, h->block_translation_address_on_disk, offset, h->block_translation_size_on_disk);
+	    //printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, h->block_translation_address_on_disk, offset, h->block_translation_size_on_disk);
 	    u_int32_t stored_x1764 = ntohl(*(int*)(tbuf + offset));
 	    assert(x1764 == stored_x1764);
 	}
diff --git a/newbrt/brt.c b/newbrt/brt.c
index 6fb4274fc2..b3a642c50e 100644
--- a/newbrt/brt.c
+++ b/newbrt/brt.c
@@ -222,7 +222,9 @@ int toku_brtheader_close (CACHEFILE cachefile, void *header_v) {
     //       block_allocator_allocated_limit(h->block_allocator), h->unused_blocks.b*h->nodesize);
     if (h->dirty) {
 	toku_serialize_brt_header_to(toku_cachefile_fd(cachefile), h);
-	toku_serialize_fifo_at(toku_cachefile_fd(cachefile), block_allocator_allocated_limit(h->block_allocator), h->fifo);
+	u_int64_t write_to = block_allocator_allocated_limit(h->block_allocator); // Must compute this after writing the header.
+	//printf("%s:%d fifo written to %lu\n", __FILE__, __LINE__, write_to);
+	toku_serialize_fifo_at(toku_cachefile_fd(cachefile), write_to, h->fifo);
     }
     toku_brtheader_free(h);
     return 0;
@@ -2162,7 +2164,7 @@ static int brt_alloc_init_header(BRT t, const char *dbname, TOKUTXN txn) {
     t->h->block_translation_size_on_disk = 0;
     t->h->block_translation_address_on_disk = 0;
     // printf("%s:%d translated_blocknum_limit=%ld, block_translation_address_on_disk=%ld\n", __FILE__, __LINE__, t->h->translated_blocknum_limit, t->h->block_translation_address_on_disk);
-    create_block_allocator(&t->h->block_allocator, t->nodesize);
+    create_block_allocator(&t->h->block_allocator, t->nodesize, BLOCK_ALLOCATOR_ALIGNMENT);
     toku_fifo_create(&t->h->fifo);
     t->root_put_counter = global_root_put_counter++; 
     if (dbname) {
diff --git a/newbrt/tests/block_allocator_test.c b/newbrt/tests/block_allocator_test.c
index 7be0f7b071..205ab52591 100644
--- a/newbrt/tests/block_allocator_test.c
+++ b/newbrt/tests/block_allocator_test.c
@@ -27,7 +27,7 @@ static void
 test_ba0 (void) {
     BLOCK_ALLOCATOR ba;
     u_int64_t b0, b1;
-    create_block_allocator(&ba, 100);
+    create_block_allocator(&ba, 100, 1);
     ba_alloc_at(ba, 50, 100);
     ba_alloc_at(ba, 25, 150);
     ba_alloc   (ba, 10, &b0);
@@ -75,7 +75,7 @@ test_ba0 (void) {
 static void
 test_ba1 (int n_initial) {
     BLOCK_ALLOCATOR ba;
-    create_block_allocator(&ba, 0);
+    create_block_allocator(&ba, 0, 1);
     int i;
     int n_blocks=0;
     u_int64_t blocks[1000];
diff --git a/newbrt/tests/brt-serialize-test.c b/newbrt/tests/brt-serialize-test.c
index b93fb16b3a..86e0cf86b1 100644
--- a/newbrt/tests/brt-serialize-test.c
+++ b/newbrt/tests/brt-serialize-test.c
@@ -62,7 +62,7 @@ static void test_serialize(void) {
     brt_h->block_translation = btps;
     brt_h->block_translation[20].diskoff = 4096;
     brt_h->block_translation[20].size    = 100;
-    create_block_allocator(&brt_h->block_allocator, 4096);
+    create_block_allocator(&brt_h->block_allocator, 4096, BLOCK_ALLOCATOR_ALIGNMENT);
     {
 	u_int64_t b;
 	block_allocator_alloc_block(brt_h->block_allocator, 100, &b);
-- 
2.30.9