Merge pull request #261 from Tokutek/variable_key_overwrite_dmt

Variable key overwrite dmt

Merge pull request #261 from Tokutek/variable_key_overwrite_dmt
Variable key overwrite dmt
77ec78cc · zkasheff · 4962274a · 521a9ec1 · 77ec78cc · 77ec78cc
Commit 77ec78cc authored Jun 09, 2014 by zkasheff
21 changed files
--- a/ft/bndata.cc
+++ b/ft/bndata.cc
@@ -441,6 +441,7 @@ void bn_data::get_space_for_overwrite(
    uint32_t idx,
    const void* keyp UU(),
    uint32_t keylen UU(),
+    uint32_t old_keylen,
    uint32_t old_le_size,
    uint32_t new_size,
    LEAFENTRY* new_le_space,
@@ -455,8 +456,8 @@ void bn_data::get_space_for_overwrite(
    int r = m_buffer.fetch(idx, &klpair_len, &klp);
    invariant_zero(r);
    paranoid_invariant(klp!=nullptr);
-    // Key never changes.
-    paranoid_invariant(keylen_from_klpair_len(klpair_len) == keylen);
+    // Old key length should be consistent with what is stored in the DMT
+    invariant(keylen_from_klpair_len(klpair_len) == old_keylen);

    size_t new_le_offset = toku_mempool_get_offset_from_pointer_and_base(&this->m_buffer_mempool, new_le);
    paranoid_invariant(new_le_offset <= UINT32_MAX - new_size);  // Not using > 4GB

--- a/ft/bndata.h
+++ b/ft/bndata.h
@@ -304,7 +304,8 @@ class bn_data {
    // Allocates space in the mempool to store a new leafentry.
    // This may require reorganizing the mempool and updating the dmt.
    __attribute__((__nonnull__))
-    void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_size, uint32_t new_size, LEAFENTRY* new_le_space, void **const maybe_free);
+    void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_keylen, uint32_t old_size,
+                                 uint32_t new_size, LEAFENTRY* new_le_space, void **const maybe_free);

    // Allocates space in the mempool to store a new leafentry
    // and inserts a new key into the dmt

--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -1224,6 +1224,7 @@ toku_ft_bn_apply_msg_once(
    BASEMENTNODE bn,
    const FT_MSG msg,
    uint32_t idx,
+    uint32_t le_keylen,
    LEAFENTRY le,
    txn_gc_info *gc_info,
    uint64_t *workdonep,

--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -1740,6 +1740,7 @@ toku_ft_bn_apply_msg_once (
    BASEMENTNODE bn,
    const FT_MSG msg,
    uint32_t idx,
+    uint32_t le_keylen,
    LEAFENTRY le,
    txn_gc_info *gc_info,
    uint64_t *workdone,
@@ -1767,6 +1768,7 @@ toku_ft_bn_apply_msg_once (
        le,
        &bn->data_buffer,
        idx,
+        le_keylen,
        gc_info, 
        &new_le, 
        &numbytes_delta
@@ -1816,6 +1818,7 @@ struct setval_extra_s {
    XIDS xids;
    const DBT *key;
    uint32_t idx;
+    uint32_t le_keylen;
    LEAFENTRY le;
    txn_gc_info *gc_info;
    uint64_t * workdone;  // set by toku_ft_bn_apply_msg_once()
@@ -1849,7 +1852,7 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
            msg.u.id.val = &val;
        }
        toku_ft_bn_apply_msg_once(svextra->bn, &msg,
-                                  svextra->idx, svextra->le,
+                                  svextra->idx, svextra->le_keylen, svextra->le,
                                  svextra->gc_info,
                                  svextra->workdone, svextra->stats_to_update);
        svextra->setval_r = 0;
@@ -1909,7 +1912,7 @@ static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn
    le_for_update = le;

    struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, msg->msn, msg->xids,
-                                          keyp, idx, le_for_update, gc_info,
+                                          keyp, idx, keylen, le_for_update, gc_info,
                                          workdone, stats_to_update};
    // call handlerton's ft->update_fun(), which passes setval_extra to setval_fun()
    FAKE_DB(db, desc);
@@ -1980,7 +1983,7 @@ toku_ft_bn_apply_msg (
        } else {
            assert_zero(r);
        }
-        toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update);
+        toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update);

        // if the insertion point is within a window of the right edge of
        // the leaf then it is sequential
@@ -2012,7 +2015,7 @@ toku_ft_bn_apply_msg (
            );
        if (r == DB_NOTFOUND) break;
        assert_zero(r);
-        toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update);
+        toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update);

        break;
    }
@@ -2034,7 +2037,7 @@ toku_ft_bn_apply_msg (
            msg->u.id.key = &curr_keydbt;
            int deleted = 0;
            if (!le_is_clean(storeddata)) { //If already clean, nothing to do.
-                toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update);
+                toku_ft_bn_apply_msg_once(bn, msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
                // at this point, we cannot trust msg->u.id.key to be valid.
                uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
                if (new_dmt_size != num_klpairs) {
@@ -2067,7 +2070,7 @@ toku_ft_bn_apply_msg (
            msg->u.id.key = &curr_keydbt;
            int deleted = 0;
            if (le_has_xids(storeddata, msg->xids)) {
-                toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update);
+                toku_ft_bn_apply_msg_once(bn, msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
                uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
                if (new_dmt_size != num_klpairs) {
                    paranoid_invariant(new_dmt_size + 1 == num_klpairs);

--- a/ft/ftloader.cc
+++ b/ft/ftloader.cc
@@ -2948,7 +2948,7 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
    uint64_t workdone=0;
    // there's no mvcc garbage in a bulk-loaded FT, so there's no need to pass useful gc info
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, NULL, &gc_info, &workdone, stats_to_update);
+    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, keylen, NULL, &gc_info, &workdone, stats_to_update);
 }

 static int write_literal(struct dbout *out, void*data,  size_t len) {

--- a/ft/leafentry.h
+++ b/ft/leafentry.h
@@ -246,6 +246,7 @@ toku_le_apply_msg(FT_MSG   msg,
                  LEAFENTRY old_leafentry, // NULL if there was no stored data.
                  bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
                  uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
+                  uint32_t old_keylen,
                  txn_gc_info *gc_info,
                  LEAFENTRY *new_leafentry_p,
                  int64_t * numbytes_delta_p);

--- a/ft/tests/make-tree.cc
+++ b/ft/tests/make-tree.cc
@@ -126,7 +126,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    // apply an insert to the leaf node
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u = {.id = { &thekey, &theval }} };
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);

    leafnode->max_msn_applied_to_node_on_disk = msn;


--- a/ft/tests/mempool-115.cc
+++ b/ft/tests/mempool-115.cc
@@ -123,6 +123,7 @@ le_overwrite(bn_data* bn, uint32_t idx, const  char *key, int keysize, const cha
        idx, 
        key,
        keysize,
+        keysize, // old_keylen
        size_needed, // old_le_size
        size_needed,
        &r,

--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -218,7 +218,7 @@ insert_random_message_to_bn(
    *keylenp = keydbt->size;
    *keyp = toku_xmemdup(keydbt->data, keydbt->size);
    int64_t numbytes;
-    toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
+    toku_le_apply_msg(&msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
    toku_ft_bn_apply_msg(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, &non_mvcc_gc_info, NULL, NULL);
    if (msn.msn > blb->max_msn_applied.msn) {
        blb->max_msn_applied = msn;
@@ -268,7 +268,7 @@ insert_same_message_to_bns(
    *keylenp = keydbt->size;
    *keyp = toku_xmemdup(keydbt->data, keydbt->size);
    int64_t numbytes;
-    toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
+    toku_le_apply_msg(&msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
    toku_ft_bn_apply_msg(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, &non_mvcc_gc_info, NULL, NULL);
    if (msn.msn > blb1->max_msn_applied.msn) {
        blb1->max_msn_applied = msn;

--- a/ft/tests/test-leafentry-nested.cc
+++ b/ft/tests/test-leafentry-nested.cc
@@ -213,7 +213,7 @@ test_le_offsets (void) {
 static void
 test_ule_packs_to_nothing (ULE ule) {
    LEAFENTRY le;
-    int r = le_pack(ule, NULL, 0, NULL, 0, 0, &le, nullptr);
+    int r = le_pack(ule, NULL, 0, NULL, 0, 0, 0, &le, nullptr);
    assert(r==0);
    assert(le==NULL);
 }
@@ -319,7 +319,7 @@ test_le_pack_committed (void) {

        size_t memsize;
        LEAFENTRY le;
-        int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, &le, nullptr);
+        int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr);
        assert(r==0);
        assert(le!=NULL);
        memsize = le_memsize_from_ule(&ule);
@@ -329,7 +329,7 @@ test_le_pack_committed (void) {
        verify_ule_equal(&ule, &tmp_ule);
        LEAFENTRY tmp_le;
        size_t    tmp_memsize;
-        r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, &tmp_le, nullptr);
+        r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, 0, &tmp_le, nullptr);
        tmp_memsize = le_memsize_from_ule(&tmp_ule);
        assert(r==0);
        assert(tmp_memsize == memsize);
@@ -377,7 +377,7 @@ test_le_pack_uncommitted (uint8_t committed_type, uint8_t prov_type, int num_pla

        size_t memsize;
        LEAFENTRY le;
-        int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, &le, nullptr);
+        int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr);
        assert(r==0);
        assert(le!=NULL);
        memsize = le_memsize_from_ule(&ule);
@@ -387,7 +387,7 @@ test_le_pack_uncommitted (uint8_t committed_type, uint8_t prov_type, int num_pla
        verify_ule_equal(&ule, &tmp_ule);
        LEAFENTRY tmp_le;
        size_t    tmp_memsize;
-        r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, &tmp_le, nullptr);
+        r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, 0, &tmp_le, nullptr);
        tmp_memsize = le_memsize_from_ule(&tmp_ule);
        assert(r==0);
        assert(tmp_memsize == memsize);
@@ -448,7 +448,7 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) {
    LEAFENTRY le_expected;
    LEAFENTRY le_result;

-    r = le_pack(ule_initial, nullptr, 0, nullptr, 0, 0, &le_initial, nullptr);
+    r = le_pack(ule_initial, nullptr, 0, nullptr, 0, 0, 0, &le_initial, nullptr);
    CKERR(r);

    size_t result_memsize = 0;
@@ -458,6 +458,7 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) {
                      le_initial,
                      nullptr,
                      0,
+                      0,
                      &gc_info,
                      &le_result,
                      &ignoreme);
@@ -467,7 +468,7 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) {
    }

    size_t expected_memsize = 0;
-    r = le_pack(ule_expected, nullptr, 0, nullptr, 0, 0, &le_expected, nullptr);
+    r = le_pack(ule_expected, nullptr, 0, nullptr, 0, 0, 0, &le_expected, nullptr);
    CKERR(r);
    if (le_expected) {
        expected_memsize = leafentry_memsize(le_expected);
@@ -749,7 +750,7 @@ test_le_apply_messages(void) {

 static bool ule_worth_running_garbage_collection(ULE ule, TXNID oldest_referenced_xid_known) {
    LEAFENTRY le;
-    int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, &le, nullptr); CKERR(r);
+    int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr); CKERR(r);
    invariant_notnull(le);
    txn_gc_info gc_info(nullptr, oldest_referenced_xid_known, oldest_referenced_xid_known, true);
    bool worth_running = toku_le_worth_running_garbage_collection(le, &gc_info);

--- a/ft/tests/verify-bad-msn.cc
+++ b/ft/tests/verify-bad-msn.cc
@@ -129,7 +129,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    // apply an insert to the leaf node
    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);

    // Create bad tree (don't do following):
    // leafnode->max_msn_applied_to_node = msn;

--- a/ft/tests/verify-bad-pivots.cc
+++ b/ft/tests/verify-bad-pivots.cc
@@ -117,7 +117,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;

--- a/ft/tests/verify-dup-in-leaf.cc
+++ b/ft/tests/verify-dup-in-leaf.cc
@@ -118,7 +118,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;

--- a/ft/tests/verify-dup-pivots.cc
+++ b/ft/tests/verify-dup-pivots.cc
@@ -117,7 +117,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;

--- a/ft/tests/verify-misrouted-msgs.cc
+++ b/ft/tests/verify-misrouted-msgs.cc
@@ -118,7 +118,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;

--- a/ft/tests/verify-unsorted-leaf.cc
+++ b/ft/tests/verify-unsorted-leaf.cc
@@ -120,7 +120,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;

--- a/ft/tests/verify-unsorted-pivots.cc
+++ b/ft/tests/verify-unsorted-pivots.cc
@@ -117,7 +117,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;

--- a/ft/ule-internal.h
+++ b/ft/ule-internal.h
@@ -148,6 +148,7 @@ le_pack(ULE ule, // data to be packed into new leafentry
        uint32_t idx,
        void* keyp,
        uint32_t keylen,
+        uint32_t old_keylen,
        uint32_t old_le_size,
        LEAFENTRY * const new_leafentry_p, // this is what this function creates
        void **const maybe_free

--- a/ft/ule.cc
+++ b/ft/ule.cc
@@ -256,6 +256,7 @@ static void get_space_for_le(
    uint32_t idx,
    void* keyp,
    uint32_t keylen,
+    uint32_t old_keylen,
    uint32_t old_le_size,
    size_t size,
    LEAFENTRY* new_le_space,
@@ -268,7 +269,7 @@ static void get_space_for_le(
    else {
        // this means we are overwriting something
        if (old_le_size > 0) {
-            data_buffer->get_space_for_overwrite(idx, keyp, keylen, old_le_size, size, new_le_space, maybe_free);
+            data_buffer->get_space_for_overwrite(idx, keyp, keylen, old_keylen, old_le_size, size, new_le_space, maybe_free);
        }
        // this means we are inserting something new
        else {
@@ -496,6 +497,7 @@ toku_le_apply_msg(FT_MSG   msg,
                  LEAFENTRY old_leafentry, // NULL if there was no stored data.
                  bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
                  uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
+                  uint32_t old_keylen, // length of the any key in data_buffer
                  txn_gc_info *gc_info,
                  LEAFENTRY *new_leafentry_p,
                  int64_t * numbytes_delta_p) {  // change in total size of key and val, not including any overhead
@@ -552,6 +554,7 @@ toku_le_apply_msg(FT_MSG   msg,
        idx,
        ft_msg_get_key(msg), // contract of this function is caller has this set, always
        keylen, // contract of this function is caller has this set, always
+        old_keylen,
        oldmemsize,
        new_leafentry_p,
        &maybe_free
@@ -655,6 +658,7 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
        idx,
        keyp,
        keylen,
+        keylen, // old_keylen, same because the key isn't going to change for gc
        old_mem_size,
        new_leaf_entry,
        &maybe_free
@@ -974,6 +978,7 @@ le_pack(ULE ule, // data to be packed into new leafentry
        uint32_t idx,
        void* keyp,
        uint32_t keylen,
+        uint32_t old_keylen,
        uint32_t old_le_size,
        LEAFENTRY * const new_leafentry_p, // this is what this function creates
        void **const maybe_free
@@ -996,7 +1001,8 @@ le_pack(ULE ule, // data to be packed into new leafentry
            }
        }
        if (data_buffer && old_le_size > 0) {
-            data_buffer->delete_leafentry(idx, keylen, old_le_size);
+            // must pass old_keylen and old_le_size, since that's what is actually stored in data_buffer
+            data_buffer->delete_leafentry(idx, old_keylen, old_le_size);
        }
        *new_leafentry_p = NULL;
        rval = 0;
@@ -1005,7 +1011,7 @@ le_pack(ULE ule, // data to be packed into new leafentry
 found_insert:
    memsize = le_memsize_from_ule(ule);
    LEAFENTRY new_leafentry;
-    get_space_for_le(data_buffer, idx, keyp, keylen, old_le_size, memsize, &new_leafentry, maybe_free);
+    get_space_for_le(data_buffer, idx, keyp, keylen, old_keylen, old_le_size, memsize, &new_leafentry, maybe_free);

    //p always points to first unused byte after leafentry we are packing
    uint8_t *p;
@@ -2467,6 +2473,7 @@ toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry,
                   nullptr, //only matters if we are passing in a bn_data
                   0, //only matters if we are passing in a bn_data
                   0, //only matters if we are passing in a bn_data
+                   0, //only matters if we are passing in a bn_data
                   new_leafentry_p,
                   nullptr //only matters if we are passing in a bn_data
                   );

--- a/src/tests/test_keylen_diff.cc
+++ b/src/tests/test_keylen_diff.cc
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2014 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#include "test.h"
+
+// test a comparison function that treats certain different-lengthed keys as equal
+
+struct packed_key {
+    char type;
+    char k[8];
+    static packed_key as_int(int v) {
+        packed_key k;
+        k.type = 0;
+        memcpy(k.k, &v, sizeof(int));
+        return k;
+    }
+    static packed_key as_double(double v) {
+        packed_key k;
+        k.type = 1;
+        memcpy(k.k, &v, sizeof(double));
+        return k;
+    }
+    size_t size() const {
+        assert(type == 0 || type == 1);
+        return type == 0 ? 5 : 9;
+    }
+};
+
+// the point is that keys can be packed as integers or doubles, but
+// we'll treat them both as doubles for the sake of comparison.
+// this means a 4 byte number could equal an 8 byte number.
+static int packed_key_cmp(DB *UU(db), const DBT *a, const DBT *b) {
+    assert(a->size == 5 || a->size == 9);
+    assert(b->size == 5 || b->size == 9);
+    char *k1 = reinterpret_cast<char *>(a->data);
+    char *k2 = reinterpret_cast<char *>(b->data);
+    assert(*k1 == 0 || *k1 == 1);
+    assert(*k2 == 0 || *k2 == 1);
+    double v1 = *k1 == 0 ? static_cast<double>(*reinterpret_cast<int *>(k1 + 1)) :
+                           *reinterpret_cast<double *>(k1 + 1);
+    double v2 = *k2 == 0 ? static_cast<double>(*reinterpret_cast<int *>(k2 + 1)) :
+                           *reinterpret_cast<double *>(k2 + 1);
+    if (v1 > v2) {
+        return 1;
+    } else if (v1 < v2) {
+        return -1;
+    } else {
+        return 0;
+    }
+}
+
+static int update_callback(DB *UU(db), const DBT *UU(key), const DBT *old_val, const DBT *extra,
+                           void (*set_val)(const DBT *new_val, void *setval_extra), void *setval_extra) {
+    assert(extra != nullptr);
+    assert(old_val != nullptr);
+    assert(extra->size == 0);
+    assert(old_val->size == 0);
+    if (extra->data == nullptr) {
+        set_val(nullptr, setval_extra);
+    } else {
+        DBT new_val;
+        char empty_v;
+        dbt_init(&new_val, &empty_v, 0);
+        set_val(&new_val, setval_extra);
+    }
+    return 0;
+}
+
+enum overwrite_method { 
+    VIA_UPDATE_OVERWRITE_BROADCAST,
+    VIA_UPDATE_DELETE_BROADCAST,
+    VIA_UPDATE_OVERWRITE,
+    VIA_UPDATE_DELETE,
+    VIA_DELETE,
+    VIA_INSERT,
+    NUM_OVERWRITE_METHODS
+};
+
+static void test_keylen_diff(enum overwrite_method method, bool control_test) {
+    int r;
+
+    DB_ENV *env;
+    r = db_env_create(&env, 0); CKERR(r);
+    r = env->set_default_bt_compare(env, packed_key_cmp); CKERR(r);
+    env->set_update(env, update_callback); CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL+DB_INIT_TXN, 0); CKERR(r);
+
+    DB *db;
+    r = db_create(&db, env, 0); CKERR(r);
+    r = db->set_pagesize(db, 16 * 1024); // smaller pages so we get a more lush tree
+    r = db->set_readpagesize(db, 1 * 1024); // smaller basements so we get more per leaf
+    r = db->open(db, nullptr, "db", nullptr, DB_BTREE, DB_CREATE, 0666); CKERR(r);
+
+    DBT null_dbt, empty_dbt;
+    char empty_v;
+    dbt_init(&empty_dbt, &empty_v, 0);
+    dbt_init(&null_dbt, nullptr, 0);
+
+    const int num_keys = 256 * 1000;
+
+    for (int i = 0; i < num_keys; i++) {
+        // insert it using a 4 byte key ..
+        packed_key key = packed_key::as_int(i);
+
+        DBT dbt;
+        dbt_init(&dbt, &key, key.size());
+        r = db->put(db, nullptr, &dbt, &empty_dbt, 0); CKERR(r);
+    }
+
+    // overwrite keys randomly, so we induce flushes and get better / realistic coverage
+    int *XMALLOC_N(num_keys, shuffled_keys);
+    for (int i = 0; i < num_keys; i++) {
+        shuffled_keys[i] = i;
+    }
+    for (int i = num_keys - 1; i >= 1; i--) {
+        long rnd = random64() % (i + 1);
+        int tmp = shuffled_keys[rnd];
+        shuffled_keys[rnd] = shuffled_keys[i];
+        shuffled_keys[i] = tmp;
+    }
+
+    for (int i = 0; i < num_keys; i++) {
+        // for the control test, delete it using the same length key
+        //
+        // .. otherwise, delete it with an 8 byte key
+        packed_key key = control_test ? packed_key::as_int(shuffled_keys[i]) :
+                                        packed_key::as_double(shuffled_keys[i]);
+
+        DBT dbt;
+        dbt_init(&dbt, &key, key.size());
+        DB_TXN *txn;
+        env->txn_begin(env, nullptr, &txn, DB_TXN_NOSYNC); CKERR(r);
+        switch (method) {
+            case VIA_INSERT: {
+                r = db->put(db, txn, &dbt, &empty_dbt, 0); CKERR(r);
+                break;
+            }
+            case VIA_DELETE: {
+                // we purposefully do not pass DB_DELETE_ANY because the hidden query acts as
+                // a sanity check for the control test and, overall, gives better code coverage
+                r = db->del(db, txn, &dbt, 0); CKERR(r);
+                break;
+            }
+            case VIA_UPDATE_OVERWRITE:
+            case VIA_UPDATE_DELETE: {
+                r = db->update(db, txn, &dbt, method == VIA_UPDATE_DELETE ? &null_dbt : &empty_dbt, 0); CKERR(r);
+                break;
+            }
+            case VIA_UPDATE_OVERWRITE_BROADCAST:
+            case VIA_UPDATE_DELETE_BROADCAST: {
+                r = db->update_broadcast(db, txn, method == VIA_UPDATE_DELETE_BROADCAST ? &null_dbt : &empty_dbt, 0); CKERR(r); 
+                if (i > 1 ) { // only need to test broadcast twice - one with abort, one without
+                    txn->abort(txn); // we opened a txn so we should abort it before exiting
+                    goto done;
+                }
+                break;
+            }
+            default: {
+                assert(false);
+            }
+        }
+        const bool abort = i % 2 == 0;
+        if (abort) {
+            txn->abort(txn);
+        } else {
+            txn->commit(txn, 0);
+        }
+    }
+
+done:
+    toku_free(shuffled_keys);
+
+    // optimize before close to ensure that all messages are applied and any potential bugs are exposed
+    r = db->optimize(db);
+    r = db->close(db, 0); CKERR(r);
+    r = env->close(env, 0); CKERR(r);
+}
+
+int
+test_main(int argc, char *const argv[]) {
+    parse_args(argc, argv);
+
+    toku_os_recursive_delete(TOKU_TEST_FILENAME);
+    int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
+
+    for (int i = 0; i < NUM_OVERWRITE_METHODS; i++) {
+        enum overwrite_method method = static_cast<enum overwrite_method>(i);
+
+        // control test - must pass for the 'real' test below to be interesting
+        printf("testing method %d (control)\n", i);
+        test_keylen_diff(method, true);
+
+        // real test, actually mixes key lengths
+        printf("testing method %d (real)\n", i);
+        test_keylen_diff(method, false);
+    }
+
+    return 0;
+}
--- a/util/mempool.cc
+++ b/util/mempool.cc
@@ -232,7 +232,8 @@ void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment) {
 void toku_mempool_mfree(struct mempool *mp, void *vp, size_t size) {
    if (vp) { paranoid_invariant(toku_mempool_inrange(mp, vp, size)); }
    mp->frag_size += size;
-    paranoid_invariant(mp->frag_size <= mp->size);
+    invariant(mp->frag_size <= mp->free_offset);
+    invariant(mp->frag_size <= mp->size);
 }