From cdfc66f84c276a0cdac13033f57d0011120d609b Mon Sep 17 00:00:00 2001 From: Zardosht Kasheff <zardosht@tokutek.com> Date: Wed, 17 Apr 2013 00:01:49 -0400 Subject: [PATCH] addresses #1655 add comments, rename some functions git-svn-id: file:///svn/mysql/tokudb-engine/src@11083 c7de825b-a66e-492c-adef-691d508d4ae1 --- storage/tokudb/ha_tokudb.cc | 6 ++-- storage/tokudb/hatoku_cmp.cc | 59 ++++++++++++++++++++++++++++++------ storage/tokudb/hatoku_cmp.h | 4 +-- 3 files changed, 54 insertions(+), 15 deletions(-) diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc index 2ff7722e3dc..07b25e7c6d9 100644 --- a/storage/tokudb/ha_tokudb.cc +++ b/storage/tokudb/ha_tokudb.cc @@ -3740,7 +3740,7 @@ int ha_tokudb::create(const char *name, TABLE * form, HA_CREATE_INFO * create_in // prim_key = (hidden_primary_key) ? NULL : &form->s->key_info[primary_key]; row_descriptor.data = row_desc_buff; - row_descriptor.size = create_toku_descriptor( + row_descriptor.size = create_toku_key_descriptor( row_desc_buff, hidden_primary_key, false, @@ -3770,7 +3770,7 @@ int ha_tokudb::create(const char *name, TABLE * form, HA_CREATE_INFO * create_in // // setup the row descriptor // - row_descriptor.size = create_toku_descriptor( + row_descriptor.size = create_toku_key_descriptor( row_desc_buff, false, form->key_info[i].flags & HA_CLUSTERING, @@ -4334,7 +4334,7 @@ int ha_tokudb::add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys) { // // setup the row descriptor // - row_descriptor.size = create_toku_descriptor( + row_descriptor.size = create_toku_key_descriptor( row_desc_buff, false, key_info[i].flags & HA_CLUSTERING, diff --git a/storage/tokudb/hatoku_cmp.cc b/storage/tokudb/hatoku_cmp.cc index d12d2f19f14..b29d8d993dc 100755 --- a/storage/tokudb/hatoku_cmp.cc +++ b/storage/tokudb/hatoku_cmp.cc @@ -694,7 +694,14 @@ inline int tokudb_compare_two_hidden_keys( } // -// returns number of bytes to jump over +// Returns number of bytes used for a given TOKU_TYPE +// in a key descriptor. The number of bytes returned +// here MUST match the number of bytes used for the encoding +// in create_toku_key_descriptor_for_key +// Parameters: +// [in] row_desc - buffer that contains portion of descriptor +// created in create_toku_key_descriptor_for_key. The first +// byte points to the TOKU_TYPE. // u_int32_t skip_field_in_descriptor(uchar* row_desc) { uchar* row_desc_pos = row_desc; @@ -727,10 +734,11 @@ u_int32_t skip_field_in_descriptor(uchar* row_desc) { } // -// outputs a descriptor for key into buf. num_bytes returns number of bytes used in buf -// to store the descriptor +// outputs a descriptor for key into buf. Returns number of bytes used in buf +// to store the descriptor. Number of bytes used MUST match number of bytes +// we would skip in skip_field_in_descriptor // -int create_toku_key_descriptor(KEY* key, uchar* buf) { +int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) { int ret_val = 0; uchar* pos = buf; u_int32_t num_bytes_in_field = 0; @@ -739,6 +747,8 @@ int create_toku_key_descriptor(KEY* key, uchar* buf) { Field* field = key->key_part[i].field; // // The first byte states if there is a null byte + // 0 means no null byte, non-zer means there + // is one // *pos = field->null_bit; pos++; @@ -753,8 +763,6 @@ int create_toku_key_descriptor(KEY* key, uchar* buf) { // // based on the type, extra data follows afterwards - // doubles and floats have no extra information - // after it // switch (type) { // @@ -817,7 +825,34 @@ int create_toku_key_descriptor(KEY* key, uchar* buf) { return pos - buf; } -int create_toku_descriptor( + +// +// Creates a descriptor for a DB. That contains all information necessary +// to do both key comparisons and data comparisons (for dup-sort databases). +// +// There are three types of descriptors we care about: +// 1) Primary key, (in a no-dup database) +// 2) secondary key, followed by primary key (for secondary indexes that are +// dup-sort databases). +// 3) clustering keys, which are a secondary key followed by a primary key, +// but in a no-dup database. +// +// I realize this may be confusing, but here is how it works. +// All DB's have a key compare, and some have a data compare. +// The format of the descriptor must be able to handle both. +// +// The descriptor handles having two pieces of information appended +// to each other. The first piece is used for key comparisons (and used +// in tokudb_cmp_dbt_key), and the second piece is used for data +// comparisons (used in tokudb_cmp_dbt_data). These pieces are +// generated by create_toku_key_descriptor_for_key +// +// The first four bytes store an offset into the descriptor to the second piece +// used for data comparisons. For cases 1 and 3 above, where no data comparison +// exists, this offset is equal to the entire length of the descriptor. +// +// +int create_toku_key_descriptor( uchar* buf, bool is_first_hpk, bool is_clustering_key, @@ -826,15 +861,21 @@ int create_toku_descriptor( KEY* second_key ) { + // + // The first four bytes always contain the offset of where the first key + // ends. + // uchar* pos = buf + 4; u_int32_t num_bytes = 0; u_int32_t offset = 0; // + // sanity check: // assert that if the first key is a hpk, then it is not a clustering key // assert(!(is_first_hpk && is_clustering_key)); // + // sanity check: // assert that if it is a clustering key, then a second key exists // assert(!(is_clustering_key && !is_second_hpk && second_key == NULL)); @@ -851,7 +892,7 @@ int create_toku_descriptor( // pos[0] = 1; //say there is an infinity byte pos++; - num_bytes = create_toku_key_descriptor(first_key, pos); + num_bytes = create_toku_key_descriptor_for_key(first_key, pos); pos += num_bytes; } @@ -895,7 +936,7 @@ int create_toku_descriptor( // // second key is NOT a hidden primary key, so we now pack second_key // - num_bytes = create_toku_key_descriptor(second_key, pos); + num_bytes = create_toku_key_descriptor_for_key(second_key, pos); pos += num_bytes; } diff --git a/storage/tokudb/hatoku_cmp.h b/storage/tokudb/hatoku_cmp.h index 006595d12d5..ce8a32a9ea4 100755 --- a/storage/tokudb/hatoku_cmp.h +++ b/storage/tokudb/hatoku_cmp.h @@ -100,9 +100,7 @@ int tokudb_cmp_dbt_data(DB *file, const DBT *keya, const DBT *keyb); //TODO: QQQ Only do one direction for prefix. int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb); -int create_toku_key_descriptor(KEY* key, uchar* buf); - -int create_toku_descriptor( +int create_toku_key_descriptor( uchar* buf, bool is_first_hpk, bool is_clustering_key, -- 2.30.9