ha_tokudb_alter_56.cc 54.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*
COPYING CONDITIONS NOTICE:

  This program is free software; you can redistribute it and/or modify
  it under the terms of version 2 of the GNU General Public License as
  published by the Free Software Foundation, and provided that the
  following conditions are met:

      * Redistributions of source code must retain this COPYING
        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
        GRANT (below).

      * Redistributions in binary form must reproduce this COPYING
        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
        GRANT (below) in the documentation and/or other materials
        provided with the distribution.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  02110-1301, USA.

COPYRIGHT NOTICE:

  TokuDB, Tokutek Fractal Tree Indexing Library.
  Copyright (C) 2007-2013 Tokutek, Inc.

DISCLAIMER:

  This program is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

UNIVERSITY PATENT NOTICE:

  The technology is licensed by the Massachusetts Institute of
  Technology, Rutgers State University of New Jersey, and the Research
  Foundation of State University of New York at Stony Brook under
  United States of America Serial No. 11/760379 and to the patents
  and/or patent applications resulting from it.

PATENT MARKING NOTICE:

  This software is covered by US Patent No. 8,185,551.
53
  This software is covered by US Patent No. 8,489,638.
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90

PATENT RIGHTS GRANT:

  "THIS IMPLEMENTATION" means the copyrightable works distributed by
  Tokutek as part of the Fractal Tree project.

  "PATENT CLAIMS" means the claims of patents that are owned or
  licensable by Tokutek, both currently or in the future; and that in
  the absence of this license would be infringed by THIS
  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.

  "PATENT CHALLENGE" shall mean a challenge to the validity,
  patentability, enforceability and/or non-infringement of any of the
  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.

  Tokutek hereby grants to you, for the term and geographical scope of
  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
  irrevocable (except as stated in this section) patent license to
  make, have made, use, offer to sell, sell, import, transfer, and
  otherwise run, modify, and propagate the contents of THIS
  IMPLEMENTATION, where such license applies only to the PATENT
  CLAIMS.  This grant does not include claims that would be infringed
  only as a consequence of further modifications of THIS
  IMPLEMENTATION.  If you or your agent or licensee institute or order
  or agree to the institution of patent litigation against any entity
  (including a cross-claim or counterclaim in a lawsuit) alleging that
  THIS IMPLEMENTATION constitutes direct or contributory patent
  infringement, or inducement of patent infringement, then any rights
  granted to you under this License shall terminate as of the date
  such litigation is filed.  If you or your agent or exclusive
  licensee institute or order or agree to the institution of a PATENT
  CHALLENGE, then Tokutek may terminate any rights granted to you
  under this License.
*/

#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
91
#if TOKU_INCLUDE_ALTER_56
92

93
#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100099
94
#define TOKU_ALTER_RENAME ALTER_RENAME
95 96 97
#define DYNAMIC_ARRAY_ELEMENTS_TYPE size_t
#elif (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
      (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
98
#define TOKU_ALTER_RENAME ALTER_RENAME
99
#define DYNAMIC_ARRAY_ELEMENTS_TYPE int
100
#elif 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599
101
#define TOKU_ALTER_RENAME ALTER_RENAME_56
102
#define DYNAMIC_ARRAY_ELEMENTS_TYPE int
103 104 105 106
#else
#error
#endif

107
#include "ha_tokudb_alter_common.cc"
108
#include <sql_array.h>
109
#include <sql_base.h>
110

111 112
// The tokudb alter context contains the alter state that is set in the check if supported method and used
// later when the alter operation is executed.
113 114
class tokudb_alter_ctx : public inplace_alter_handler_ctx {
public:
115 116 117 118 119
    tokudb_alter_ctx() :
        handler_flags(0),
        alter_txn(NULL),
        add_index_changed(false),
        drop_index_changed(false),
120
        reset_card(false),
121 122 123
        compression_changed(false),
        expand_varchar_update_needed(false),
        expand_fixed_update_needed(false),
124
        expand_blob_update_needed(false),
125 126
        table_kc_info(NULL),
        altered_table_kc_info(NULL) {
127 128 129 130 131 132 133 134 135 136 137
    }
    ~tokudb_alter_ctx() {
        if (altered_table_kc_info)
            free_key_and_col_info(altered_table_kc_info);
    }
public:
    ulong handler_flags;
    DB_TXN *alter_txn;
    bool add_index_changed;
    bool incremented_num_DBs, modified_DBs;
    bool drop_index_changed;
138
    bool reset_card;
139 140
    bool compression_changed;
    enum toku_compression_method orig_compression_method;
141 142
    bool expand_varchar_update_needed;
    bool expand_fixed_update_needed;
143
    bool expand_blob_update_needed;
144 145 146 147 148
    Dynamic_array<uint> changed_fields;
    KEY_AND_COL_INFO *table_kc_info;
    KEY_AND_COL_INFO *altered_table_kc_info;
    KEY_AND_COL_INFO altered_table_kc_info_base;
};
149

150
// Debug function to print out an alter table operation
151
void ha_tokudb::print_alter_info(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
152 153 154 155 156 157
    printf("***are keys of two tables same? %d\n", tables_have_same_keys(table, altered_table, false, false));
    if (ha_alter_info->handler_flags) {
        printf("***alter flags set ***\n");
        for (int i = 0; i < 32; i++) {
            if (ha_alter_info->handler_flags & (1 << i))
                printf("%d\n", i);
158 159 160
        }
    }

161 162 163 164
    // everyone calculates data by doing some default_values - record[0], but I do not see why
    // that is necessary
    printf("******\n");
    printf("***orig table***\n");
165
    for (uint i = 0; i < table->s->fields; i++) {
166 167 168 169 170 171
      //
      // make sure to use table->field, and NOT table->s->field
      //
      Field* curr_field = table->field[i];
      uint null_offset = get_null_offset(table, curr_field);
      printf(
172
          "name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: %d, is_null %d, pack_length %u\n", 
173
          curr_field->field_name, 
174
          curr_field->real_type(), mysql_to_toku_type(curr_field),
175 176
          curr_field->null_bit,
          null_offset,
177
          curr_field->real_maybe_null(),
178 179
          curr_field->real_maybe_null() ? table->s->default_values[null_offset] & curr_field->null_bit : 0xffffffff,
          curr_field->pack_length()
180
          );
181
    }
182 183 184 185 186 187
    printf("******\n");
    printf("***altered table***\n");
    for (uint i = 0; i < altered_table->s->fields; i++) {
      Field* curr_field = altered_table->field[i];
      uint null_offset = get_null_offset(altered_table, curr_field);
      printf(
188 189 190
         "name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: %d, is_null %d, pack_length %u\n", 
         curr_field->field_name,
         curr_field->real_type(), mysql_to_toku_type(curr_field),
191 192
         curr_field->null_bit,
         null_offset,
193
         curr_field->real_maybe_null(),
194 195
         curr_field->real_maybe_null() ? altered_table->s->default_values[null_offset] & curr_field->null_bit : 0xffffffff,
         curr_field->pack_length()
196
         );
197
    }
198
    printf("******\n");
199 200
}

201 202 203
// Given two tables with equal number of fields, find all of the fields with different types
// and return the indexes of the different fields in the changed_fields array. This function ignores field
// name differences.
204
static int find_changed_fields(TABLE *table_a, TABLE *table_b, Dynamic_array<uint> &changed_fields) {
205
    for (uint i = 0; i < table_a->s->fields; i++) {
206 207 208
        Field *field_a = table_a->field[i];
        Field *field_b = table_b->field[i];
        if (!fields_are_same_type(field_a, field_b)) 
209 210
            changed_fields.append(i);
    }
211
    return changed_fields.elements();
212 213
}

214
static bool change_length_is_supported(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx);
215

216
static bool change_type_is_supported(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx);
217

218
// The ha_alter_info->handler_flags can not be trusted.  This function maps the bogus handler flags to something we like.
219
static ulong fix_handler_flags(THD *thd, TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
220
    ulong handler_flags = ha_alter_info->handler_flags;
221 222 223 224 225 226 227

    // workaround for fill_alter_inplace_info bug (#5193)
    // the function erroneously sets the ADD_INDEX and DROP_INDEX flags for a column addition that does not
    // change the keys.  the following code turns the ADD_INDEX and DROP_INDEX flags so that we can do hot
    // column addition later.
    if (handler_flags & (Alter_inplace_info::ADD_COLUMN + Alter_inplace_info::DROP_COLUMN)) {
        if (handler_flags & (Alter_inplace_info::ADD_INDEX + Alter_inplace_info::DROP_INDEX)) {
228
            if (tables_have_same_keys(table, altered_table, THDVAR(thd, alter_print_error) != 0, false)) {
229 230 231 232
                handler_flags &= ~(Alter_inplace_info::ADD_INDEX + Alter_inplace_info::DROP_INDEX);
            }
        }
    }
233 234 235 236 237 238

    // always allow rename table + any other operation, so turn off the rename flag
    if (handler_flags & Alter_inplace_info::TOKU_ALTER_RENAME) {
        handler_flags &= ~Alter_inplace_info::TOKU_ALTER_RENAME;
    }

239 240 241 242 243 244 245
    // ALTER_COLUMN_TYPE may be set when no columns have been changed, so turn off the flag
    if (handler_flags & Alter_inplace_info::ALTER_COLUMN_TYPE) {
        if (all_fields_are_same_type(table, altered_table)) {
            handler_flags &= ~Alter_inplace_info::ALTER_COLUMN_TYPE;
        }
    }

246 247 248
    return handler_flags;
}

249
// Require that there is no intersection of add and drop names.
250
static bool is_disjoint_add_drop(Alter_inplace_info *ha_alter_info) {
251 252 253 254 255 256 257 258 259 260 261 262
    for (uint d = 0; d < ha_alter_info->index_drop_count; d++) {
        KEY *drop_key = ha_alter_info->index_drop_buffer[d];
        for (uint a = 0; a < ha_alter_info->index_add_count; a++) {
            KEY *add_key = &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[a]];
            if (strcmp(drop_key->name, add_key->name) == 0) {
                return false;
            }
        }
    }
    return true;
}

263
// Return true if some bit in mask is set and no bit in ~mask is set, otherwise return false.
264
static bool only_flags(ulong bits, ulong mask) {
265 266 267
    return (bits & mask) != 0 && (bits & ~mask) == 0;
}

268 269 270
// Check if an alter table operation on this table and described by the alter table parameters is supported inplace
// and if so, what type of locking is needed to execute it.
// return values:
271 272

// HA_ALTER_INPLACE_NOT_SUPPORTED: alter operation is not supported as an inplace operation, a table copy is required
273
// HA_ALTER_ERROR: the alter table operation should fail
274 275 276 277

// HA_ALTER_INPLACE_EXCLUSIVE_LOCK: prepare and alter runs with MDL X

// HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE: prepare runs with MDL X, alter runs with MDL SNW
278
// HA_ALTER_INPLACE_SHARED_LOCK: prepare and alter methods called with MDL SNW, concurrent reads, no writes
279 280

// HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE: prepare runs with MDL X, alter runs with MDL SW
281 282 283
// HA_ALTER_INPLACE_NO_LOCK: prepare and alter methods called with MDL SW, concurrent reads, writes.
//                           must set WRITE_ALLOW_WRITE lock type in the external lock method to avoid deadlocks
//                           with the MDL lock and the table lock
284
enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
285 286 287 288 289 290 291
    TOKUDB_DBUG_ENTER("check_if_supported_alter");

    if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
        print_alter_info(altered_table, ha_alter_info);
    }

    enum_alter_inplace_result result = HA_ALTER_INPLACE_NOT_SUPPORTED; // default is NOT inplace
292
    THD *thd = ha_thd();
293

294 295 296
    // setup context
    tokudb_alter_ctx *ctx = new tokudb_alter_ctx;
    ha_alter_info->handler_ctx = ctx;
297
    ctx->handler_flags = fix_handler_flags(thd, table, altered_table, ha_alter_info);
298 299 300
    ctx->table_kc_info = &share->kc_info;
    ctx->altered_table_kc_info = &ctx->altered_table_kc_info_base;
    memset(ctx->altered_table_kc_info, 0, sizeof (KEY_AND_COL_INFO));
301

302 303 304
    if (get_disable_hot_alter(thd)) {
        ; // do nothing
    } else
305 306 307
    // add or drop index
    if (only_flags(ctx->handler_flags, Alter_inplace_info::DROP_INDEX + Alter_inplace_info::DROP_UNIQUE_INDEX + 
                   Alter_inplace_info::ADD_INDEX + Alter_inplace_info::ADD_UNIQUE_INDEX)) {
308
        if ((ha_alter_info->index_add_count > 0 || ha_alter_info->index_drop_count > 0) &&
309
            !tables_have_same_keys(table, altered_table, THDVAR(thd, alter_print_error) != 0, false) &&
310
            is_disjoint_add_drop(ha_alter_info)) {
311

312 313 314 315 316
            if (ctx->handler_flags & (Alter_inplace_info::DROP_INDEX + Alter_inplace_info::DROP_UNIQUE_INDEX)) {
                // the fractal tree can not handle dropping an index concurrent with querying with the index.
                // we grab an exclusive MDL for the drop index.
                result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
            } else {
317
                result = HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE;
318 319 320

                // someday, allow multiple hot indexes via alter table add key. don't forget to change the store_lock function.
                // for now, hot indexing is only supported via session variable with the create index sql command
321 322 323 324
                if (ha_alter_info->index_add_count == 1 && ha_alter_info->index_drop_count == 0 &&  // only one add or drop
                    ctx->handler_flags == Alter_inplace_info::ADD_INDEX &&                          // must be add index not add unique index
                    thd_sql_command(thd) == SQLCOM_CREATE_INDEX &&                                  // must be a create index command
                    get_create_index_online(thd)) {                                                 // must be enabled
325
                    // external_lock set WRITE_ALLOW_WRITE which allows writes concurrent with the index creation
326
                    result = HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE; 
327
                }
328 329 330
            }
        }
    } else
331
    // column default
332
    if (only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
333
        result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
334
    } else
335
    // column rename
336 337
    if (ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_NAME &&
        only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_NAME + Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
338 339
        // we have identified a possible column rename, 
        // but let's do some more checks
340
        
341
        // we will only allow an hcr if there are no changes
342
        // in column positions (ALTER_COLUMN_ORDER is not set)
343
        
344 345 346
        // now need to verify that one and only one column
        // has changed only its name. If we find anything to
        // the contrary, we don't allow it, also check indexes
347
        bool cr_supported = column_rename_supported(table, altered_table, (ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_ORDER) != 0);
348
        if (cr_supported)
349
            result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
350
    } else    
351
    // add column
352
    if (ctx->handler_flags & Alter_inplace_info::ADD_COLUMN &&
353 354 355
        only_flags(ctx->handler_flags, Alter_inplace_info::ADD_COLUMN + Alter_inplace_info::ALTER_COLUMN_ORDER) &&
        setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {

Yoni Fogel's avatar
Yoni Fogel committed
356 357
        uint32_t added_columns[altered_table->s->fields];
        uint32_t num_added_columns = 0;
358 359 360
        int r = find_changed_columns(added_columns, &num_added_columns, table, altered_table);
        if (r == 0) {
            if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
Yoni Fogel's avatar
Yoni Fogel committed
361 362
                for (uint32_t i = 0; i < num_added_columns; i++) {
                    uint32_t curr_added_index = added_columns[i];
363 364 365 366 367 368 369
                    Field* curr_added_field = altered_table->field[curr_added_index];
                    printf("Added column: index %d, name %s\n", curr_added_index, curr_added_field->field_name);
                }
            }
            result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
        }
    } else
370
    // drop column
371
    if (ctx->handler_flags & Alter_inplace_info::DROP_COLUMN &&
372 373 374
        only_flags(ctx->handler_flags, Alter_inplace_info::DROP_COLUMN + Alter_inplace_info::ALTER_COLUMN_ORDER) &&
        setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {

Yoni Fogel's avatar
Yoni Fogel committed
375 376
        uint32_t dropped_columns[table->s->fields];
        uint32_t num_dropped_columns = 0;
377 378 379
        int r = find_changed_columns(dropped_columns, &num_dropped_columns, altered_table, table);
        if (r == 0) {
            if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
Yoni Fogel's avatar
Yoni Fogel committed
380 381
                for (uint32_t i = 0; i < num_dropped_columns; i++) {
                    uint32_t curr_dropped_index = dropped_columns[i];
382 383 384 385 386 387
                    Field* curr_dropped_field = table->field[curr_dropped_index];
                    printf("Dropped column: index %d, name %s\n", curr_dropped_index, curr_dropped_field->field_name);
                }
            }
            result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
        }
388
    } else
389
    // change column length
390
    if ((ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH) && 
391 392
        only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH + Alter_inplace_info::ALTER_COLUMN_DEFAULT) &&
        table->s->fields == altered_table->s->fields &&
393 394 395 396 397
        find_changed_fields(table, altered_table, ctx->changed_fields) > 0 && 
        setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {

        if (change_length_is_supported(table, altered_table, ha_alter_info, ctx)) {
            result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
398 399 400
        }
    } else
    // change column type
401
    if ((ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_TYPE) &&
402 403
        only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_TYPE + Alter_inplace_info::ALTER_COLUMN_DEFAULT) && 
        table->s->fields == altered_table->s->fields && 
404 405
        find_changed_fields(table, altered_table, ctx->changed_fields) > 0 &&
        setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
406
        
407 408
        if (change_type_is_supported(table, altered_table, ha_alter_info, ctx)) {
            result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
409 410 411
        }
    } else
    if (only_flags(ctx->handler_flags, Alter_inplace_info::CHANGE_CREATE_OPTION)) {
412
        HA_CREATE_INFO *create_info = ha_alter_info->create_info;
413 414
        // alter auto_increment
        if (only_flags(create_info->used_fields, HA_CREATE_USED_AUTO)) {
415
            // do a sanity check that the table is what we think it is
416
            if (tables_have_same_keys_and_columns(table, altered_table, THDVAR(thd, alter_print_error) != 0)) {
417 418 419
                result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
            }
        }
420
        // alter row_format
421
        else if (only_flags(create_info->used_fields, HA_CREATE_USED_ROW_FORMAT)) {
422
            // do a sanity check that the table is what we think it is
423
            if (tables_have_same_keys_and_columns(table, altered_table, THDVAR(thd, alter_print_error) != 0)) {
424 425
                result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
            }
426
        }
427
    }
428

429
    // turn a not supported result into an error if the slow alter table (copy) is disabled
430 431 432 433
    if (result == HA_ALTER_INPLACE_NOT_SUPPORTED && get_disable_slow_alter(thd)) {
        print_error(HA_ERR_UNSUPPORTED, MYF(0));
        result = HA_ALTER_ERROR;
    }
434 435 436 437
    
    DBUG_RETURN(result);
}

438
// Prepare for the alter operations
439
bool ha_tokudb::prepare_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
440
    TOKUDB_DBUG_ENTER("prepare_inplace_alter_table");
441
    tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
442 443
    assert(transaction); // transaction must exist after table is locked
    ctx->alter_txn = transaction;
444
    bool result = false; // success
445 446 447
    DBUG_RETURN(result);
}

448
// Execute the alter operations.
449
bool ha_tokudb::inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
450 451
    TOKUDB_DBUG_ENTER("inplace_alter_table");

452
    int error = 0;
453
    tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
454
    HA_CREATE_INFO *create_info = ha_alter_info->create_info;
455

456
    if (error == 0 && (ctx->handler_flags & (Alter_inplace_info::DROP_INDEX + Alter_inplace_info::DROP_UNIQUE_INDEX))) {
457
        error = alter_table_drop_index(altered_table, ha_alter_info);
458
    }
459
    if (error == 0 && (ctx->handler_flags & (Alter_inplace_info::ADD_INDEX + Alter_inplace_info::ADD_UNIQUE_INDEX))) {
460
        error = alter_table_add_index(altered_table, ha_alter_info);
461
    }
462
    if (error == 0 && (ctx->handler_flags & (Alter_inplace_info::ADD_COLUMN + Alter_inplace_info::DROP_COLUMN))) { 
463
        error = alter_table_add_or_drop_column(altered_table, ha_alter_info);
464
    }
465
    if (error == 0 && (ctx->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) && (create_info->used_fields & HA_CREATE_USED_AUTO)) {
466
        error = write_auto_inc_create(share->status_block, create_info->auto_increment_value, ctx->alter_txn);
467
    }
468 469
    if (error == 0 && (ctx->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) && (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) {
        // Get the current compression
470 471 472
        DB *db = share->key_file[0];
        error = db->get_compression_method(db, &ctx->orig_compression_method);
        assert(error == 0);
473

474
        // Set the new compression
475
        enum toku_compression_method method = row_type_to_compression_method(create_info->row_type);
Rich Prohaska's avatar
Rich Prohaska committed
476
        uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
Yoni Fogel's avatar
Yoni Fogel committed
477
        for (uint32_t i = 0; i < curr_num_DBs; i++) {
478
            db = share->key_file[i];
479
            error = db->change_compression_method(db, method);
480
            if (error)
481
                break;
482
            ctx->compression_changed = true;
483
        }
484
    }
485 486

    // note: only one column expansion is allowed
487 488

    if (error == 0 && ctx->expand_fixed_update_needed)
489
        error = alter_table_expand_columns(altered_table, ha_alter_info);
490

491 492 493 494 495 496
    if (error == 0 && ctx->expand_varchar_update_needed)
        error = alter_table_expand_varchar_offsets(altered_table, ha_alter_info);

    if (error == 0 && ctx->expand_blob_update_needed) 
        error = alter_table_expand_blobs(altered_table, ha_alter_info);

497 498 499
    if (error == 0 && ctx->reset_card)
        tokudb::set_card_from_status(share->status_block, ctx->alter_txn, table->s, altered_table->s);

500 501
#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
    (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
502 503 504 505 506
    if (error == 0 && (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL)) {
        error = write_frm_data(share->status_block, ctx->alter_txn, altered_table->s->path.str);
    }
#endif

507 508 509 510
    bool result = false; // success
    if (error) {
        print_error(error, MYF(0));
        result = true;  // failure
511 512 513 514 515
    }

    DBUG_RETURN(result);
}

516
int ha_tokudb::alter_table_add_index(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
517

518
    // sort keys in add index order
519
    KEY *key_info = (KEY*) tokudb_my_malloc(sizeof (KEY) * ha_alter_info->index_add_count, MYF(MY_WME));
520
    for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
521
        KEY *key = &key_info[i];
522
        *key = ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
523
        for (KEY_PART_INFO *key_part= key->key_part; key_part < key->key_part + get_key_parts(key); key_part++)
524 525 526
            key_part->field = table->field[key_part->fieldnr];
    }

527 528
    tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
    ctx->add_index_changed = true;
529
    int error = tokudb_add_index(table, key_info, ha_alter_info->index_add_count, ctx->alter_txn, &ctx->incremented_num_DBs, &ctx->modified_DBs);
530 531 532 533 534 535 536 537
    if (error == HA_ERR_FOUND_DUPP_KEY) {
        // hack for now, in case of duplicate key error, 
        // because at the moment we cannot display the right key
        // information to the user, so that he knows potentially what went
        // wrong.
        last_dup_key = MAX_KEY;
    }

538
    tokudb_my_free(key_info);
539 540
    
    if (error == 0)
541
        ctx->reset_card = true;
542

543
    return error;
544 545
}

546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565
static bool find_index_of_key(const char *key_name, TABLE *table, uint *index_offset_ptr) {
    for (uint i = 0; i < table->s->keys; i++) {
        if (strcmp(key_name, table->key_info[i].name) == 0) {
            *index_offset_ptr = i;
            return true;
        }
    }
    return false;
}

static bool find_index_of_key(const char *key_name, KEY *key_info, uint key_count, uint *index_offset_ptr) {
    for (uint i = 0; i < key_count; i++) {
        if (strcmp(key_name, key_info[i].name) == 0) {
            *index_offset_ptr = i;
            return true;
        }
    }
    return false;
}

566
int ha_tokudb::alter_table_drop_index(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
567 568
    KEY *key_info = table->key_info;
    // translate key names to indexes into the key_info array
569
    uint index_drop_offsets[ha_alter_info->index_drop_count];
570 571 572 573 574 575 576 577 578 579
    for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
        bool found;
        found = find_index_of_key(ha_alter_info->index_drop_buffer[i]->name, table, &index_drop_offsets[i]);
        if (!found) {
            // undo of add key in partition engine
            found = find_index_of_key(ha_alter_info->index_drop_buffer[i]->name, ha_alter_info->key_info_buffer, ha_alter_info->key_count, &index_drop_offsets[i]);
            assert(found);
            key_info = ha_alter_info->key_info_buffer;
        }
    }
580 581
    
    // drop indexes
582 583 584
    tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
    ctx->drop_index_changed = true;

585
    int error = drop_indexes(table, index_drop_offsets, ha_alter_info->index_drop_count, key_info, ctx->alter_txn);
586

587
    if (error == 0)
588
        ctx->reset_card = true;
589

590
    return error;
591 592
}

593
int ha_tokudb::alter_table_add_or_drop_column(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
594
    tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
595 596
    int error;
    uchar *column_extra = NULL;
Yoni Fogel's avatar
Yoni Fogel committed
597 598 599
    uint32_t max_column_extra_size;
    uint32_t num_column_extra;
    uint32_t num_columns = 0;
Rich Prohaska's avatar
Rich Prohaska committed
600
    uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
601

Yoni Fogel's avatar
Yoni Fogel committed
602
    uint32_t columns[table->s->fields + altered_table->s->fields]; // set size such that we know it is big enough for both cases
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
    memset(columns, 0, sizeof(columns));

    // generate the array of columns
    if (ha_alter_info->handler_flags & Alter_inplace_info::DROP_COLUMN) {
        find_changed_columns(
                             columns,
                             &num_columns,
                             altered_table,
                             table
                             );
    } else
    if (ha_alter_info->handler_flags & Alter_inplace_info::ADD_COLUMN) {
        find_changed_columns(
                             columns,
                             &num_columns,
                             table,
                             altered_table
                             );
    } else
        assert(0);
    max_column_extra_size = 
        STATIC_ROW_MUTATOR_SIZE + //max static row_mutator
        4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength + // max dynamic row_mutator
        (4 + share->kc_info.num_blobs) + // max static blob size
        (num_columns*(1+4+1+4)); // max dynamic blob size
628
    column_extra = (uchar *)tokudb_my_malloc(max_column_extra_size, MYF(MY_WME));
629 630
    if (column_extra == NULL) { error = ENOMEM; goto cleanup; }
    
Yoni Fogel's avatar
Yoni Fogel committed
631
    for (uint32_t i = 0; i < curr_num_DBs; i++) {
632 633 634 635 636 637
        // change to a new descriptor
        DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
        error = new_row_descriptor(table, altered_table, ha_alter_info, i, &row_descriptor);
        if (error)
            goto cleanup;
        error = share->key_file[i]->change_descriptor(share->key_file[i], ctx->alter_txn, &row_descriptor, 0);
638
        tokudb_my_free(row_descriptor.data);
639 640
        if (error)
            goto cleanup;
641
        
642
        if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
643 644 645 646 647
            num_column_extra = fill_row_mutator(
                                                column_extra,
                                                columns,
                                                num_columns,
                                                altered_table,
648
                                                ctx->altered_table_kc_info,
649 650 651 652
                                                i,
                                                (ha_alter_info->handler_flags & Alter_inplace_info::ADD_COLUMN) != 0 // true if adding columns, otherwise is a drop
                                                );
            
653
            DBT column_dbt; memset(&column_dbt, 0, sizeof column_dbt);
654 655 656 657 658
            column_dbt.data = column_extra; 
            column_dbt.size = num_column_extra;
            DBUG_ASSERT(num_column_extra <= max_column_extra_size);            
            error = share->key_file[i]->update_broadcast(
                                                         share->key_file[i],
659
                                                         ctx->alter_txn,
660 661 662 663 664 665 666 667 668
                                                         &column_dbt,
                                                         DB_IS_RESETTING_OP
                                                         );
            if (error) { goto cleanup; }
        }
    }

    error = 0;
 cleanup:
669
    tokudb_my_free(column_extra);
670 671 672
    return error;
}

673 674 675
// Commit or abort the alter operations.
// If commit then write the new frm data to the status using the alter transaction.
// If abort then abort the alter transaction and try to rollback the non-transactional changes.
676
bool ha_tokudb::commit_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, bool commit) {
677
    TOKUDB_DBUG_ENTER("commit_inplace_alter_table");
678 679
    
    tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
680
    bool result = false; // success
681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
    THD *thd = ha_thd();
    MDL_ticket *ticket = table->mdl_ticket;
    if (ticket->get_type() != MDL_EXCLUSIVE) {
        // get exclusive lock no matter what
#if defined(MARIADB_BASE_VERSION)
        killed_state saved_killed_state = thd->killed;
        thd->killed = NOT_KILLED;
        while (wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) && thd->killed)
            thd->killed = NOT_KILLED;
        assert(ticket->get_type() == MDL_EXCLUSIVE);
        if (thd->killed == NOT_KILLED)
            thd->killed = saved_killed_state;
#else
        THD::killed_state saved_killed_state = thd->killed;
        thd->killed = THD::NOT_KILLED;
        while (wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) && thd->killed)
            thd->killed = THD::NOT_KILLED;
        assert(ticket->get_type() == MDL_EXCLUSIVE);
        if (thd->killed == THD::NOT_KILLED)
            thd->killed = saved_killed_state;
#endif
    }
703

704
    if (commit) {
705 706
#if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
    (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
707 708 709 710
        if (ha_alter_info->group_commit_ctx) {
            ha_alter_info->group_commit_ctx = NULL;
        }
#endif
711
#if (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599)
712 713
        if (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL) {
            int error = write_frm_data(share->status_block, ctx->alter_txn, altered_table->s->path.str);
714 715 716
            if (error) {
                commit = false; 
                result = true;
717
                print_error(error, MYF(0));
718
            }
719
        }
720
#endif
721 722
    }

723
    if (!commit) {
724
        // abort the alter transaction NOW so that any alters are rolled back. this allows the following restores to work.
725 726 727
        tokudb_trx_data *trx = (tokudb_trx_data *) thd_data_get(thd, tokudb_hton->slot);
        assert(ctx->alter_txn == trx->stmt);
        assert(trx->tokudb_lock_count > 0);
728 729
        // for partitioned tables, we use a single transaction to do all of the partition changes.  the tokudb_lock_count
        // is a reference count for each of the handlers to the same transaction.  obviously, we want to only abort once.
730 731 732 733 734 735 736
        if (!--trx->tokudb_lock_count) {
            abort_txn(ctx->alter_txn);
            ctx->alter_txn = NULL;
            trx->stmt = NULL;
            trx->sub_sp_level = NULL;
        }
        transaction = NULL;
737

738
        if (ctx->add_index_changed) {
739
            restore_add_index(table, ha_alter_info->index_add_count, ctx->incremented_num_DBs, ctx->modified_DBs);
740
        }
741
        if (ctx->drop_index_changed) {
742
            // translate key names to indexes into the key_info array
743
            uint index_drop_offsets[ha_alter_info->index_drop_count];
744 745 746 747
            for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
                bool found = find_index_of_key(ha_alter_info->index_drop_buffer[i]->name, table, &index_drop_offsets[i]);
                assert(found);
            }
748 749
            restore_drop_indexes(table, index_drop_offsets, ha_alter_info->index_drop_count);
        }
750
        if (ctx->compression_changed) {
Rich Prohaska's avatar
Rich Prohaska committed
751
            uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
Yoni Fogel's avatar
Yoni Fogel committed
752
            for (uint32_t i = 0; i < curr_num_DBs; i++) {
753 754 755 756 757
                DB *db = share->key_file[i];
                int error = db->change_compression_method(db, ctx->orig_compression_method);
                assert(error == 0);
            }
        }
758 759
    }
    
760 761 762
    DBUG_RETURN(result);
}

763
// Setup the altered table's key and col info.
764
int ha_tokudb::setup_kc_info(TABLE *altered_table, KEY_AND_COL_INFO *altered_kc_info) {
765
    int error = allocate_key_and_col_info(altered_table->s, altered_kc_info);
766 767 768 769 770
    if (error == 0)
        error = initialize_key_and_col_info(altered_table->s, altered_table, altered_kc_info, hidden_primary_key, primary_key);
    return error;
}

771
// Expand the variable length fields offsets from 1 to 2 bytes.
772
int ha_tokudb::alter_table_expand_varchar_offsets(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
773 774 775
    int error = 0;
    tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);

Rich Prohaska's avatar
Rich Prohaska committed
776
    uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
777
    for (uint32_t i = 0; i < curr_num_DBs; i++) {
778 779 780 781 782 783
        // change to a new descriptor
        DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
        error = new_row_descriptor(table, altered_table, ha_alter_info, i, &row_descriptor);
        if (error)
            break;
        error = share->key_file[i]->change_descriptor(share->key_file[i], ctx->alter_txn, &row_descriptor, 0);
784
        tokudb_my_free(row_descriptor.data);
785 786 787
        if (error)
            break;

788
        // for all trees that have values, make an update variable offsets message and broadcast it into the tree
789
        if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
790 791
            uint32_t offset_start = table_share->null_bytes + share->kc_info.mcp_info[i].fixed_field_size;
            uint32_t offset_end = offset_start + share->kc_info.mcp_info[i].len_of_offsets;
792
            uint32_t number_of_offsets = offset_end - offset_start;
793

794
            // make the expand variable offsets message
795
            DBT expand; memset(&expand, 0, sizeof expand);
796
            expand.size = sizeof (uchar) + sizeof offset_start + sizeof offset_end;
797
            expand.data = tokudb_my_malloc(expand.size, MYF(MY_WME));
798 799 800 801 802
            if (!expand.data) {
                error = ENOMEM;
                break;
            }
            uchar *expand_ptr = (uchar *)expand.data;
803
            expand_ptr[0] = UPDATE_OP_EXPAND_VARIABLE_OFFSETS;
804 805
            expand_ptr += sizeof (uchar);
        
806 807 808
            memcpy(expand_ptr, &number_of_offsets, sizeof number_of_offsets);
            expand_ptr += sizeof number_of_offsets;

809 810 811 812 813
            memcpy(expand_ptr, &offset_start, sizeof offset_start);
            expand_ptr += sizeof offset_start;

            // and broadcast it into the tree
            error = share->key_file[i]->update_broadcast(share->key_file[i], ctx->alter_txn, &expand, DB_IS_RESETTING_OP);
814
            tokudb_my_free(expand.data);
815 816 817
            if (error)
                break;
        }
818 819 820 821 822 823
    }

    return error;
}

// Return true if a field is part of a key
824
static bool field_in_key(KEY *key, Field *field) {
825
    for (uint i = 0; i < get_key_parts(key); i++) {
826
        KEY_PART_INFO *key_part = &key->key_part[i];
827
        if (strcmp(key_part->field->field_name, field->field_name) == 0)
828 829 830 831 832 833
            return true;
    }
    return false;
}

// Return true if a field is part of any key
834
static bool field_in_key_of_table(TABLE *table, Field *field) {
835 836 837 838 839 840 841
    for (uint i = 0; i < table->s->keys; i++) {
        if (field_in_key(&table->key_info[i], field))
            return true;
    }
    return false;
}

842
// Return true if all changed varchar/varbinary field lengths can be changed inplace, otherwise return false
843
static bool change_varchar_length_is_supported(Field *old_field, Field *new_field, TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
844 845 846 847 848
    if (old_field->real_type() != MYSQL_TYPE_VARCHAR || 
        new_field->real_type() != MYSQL_TYPE_VARCHAR || 
        old_field->binary() != new_field->binary() || 
        old_field->charset()->number != new_field->charset()->number ||
        old_field->field_length > new_field->field_length)
849 850
        return false;
    if (ctx->table_kc_info->num_offset_bytes > ctx->altered_table_kc_info->num_offset_bytes)
851
        return false; // shrink is not supported
852 853
    if (ctx->table_kc_info->num_offset_bytes < ctx->altered_table_kc_info->num_offset_bytes)
        ctx->expand_varchar_update_needed = true; // sum of varchar lengths changed from 1 to 2
854 855 856 857
    return true;
}

// Return true if all changed field lengths can be changed inplace, otherwise return false
858
static bool change_length_is_supported(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
859 860
    if (table->s->fields != altered_table->s->fields)
        return false;
861 862
    if (table->s->null_bytes != altered_table->s->null_bytes)
        return false;
863 864
    if (ctx->changed_fields.elements() > 1)
        return false; // only support one field change
865
    for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0; ai < ctx->changed_fields.elements(); ai++) {
866 867 868
        uint i = ctx->changed_fields.at(ai);
        Field *old_field = table->field[i];
        Field *new_field = altered_table->field[i];
869 870 871 872 873 874
        if (old_field->real_type() != new_field->real_type())
            return false; // no type conversions
        if (old_field->real_type() != MYSQL_TYPE_VARCHAR)
            return false; // only varchar
        if (field_in_key_of_table(table, old_field) || field_in_key_of_table(altered_table, new_field))
            return false; // not in any key
875
        if (!change_varchar_length_is_supported(old_field, new_field, table, altered_table, ha_alter_info, ctx))
876 877 878
            return false;
    }

879
    return true;
880 881
}

882
// Debug function that ensures that the array is sorted
883
static bool is_sorted(Dynamic_array<uint> &a) {
884 885 886
    bool r = true;
    if (a.elements() > 0) {
        uint lastelement = a.at(0);
887
        for (DYNAMIC_ARRAY_ELEMENTS_TYPE i = 1; i < a.elements(); i++)
888 889 890 891 892 893
            if (lastelement > a.at(i))
                r = false;
    }
    return r;
}

894
int ha_tokudb::alter_table_expand_columns(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
895 896
    int error = 0;
    tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
897
    assert(is_sorted(ctx->changed_fields)); // since we build the changed_fields array in field order, it must be sorted
898
    for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0; error == 0 && ai < ctx->changed_fields.elements(); ai++) {
899
        uint expand_field_num = ctx->changed_fields.at(ai);
900
        error = alter_table_expand_one_column(altered_table, ha_alter_info, expand_field_num);
901 902 903 904 905
    }

    return error;
}

906 907 908 909 910
// Return true if the field is an unsigned int
static bool is_unsigned(Field *f) {
    return (f->flags & UNSIGNED_FLAG) != 0;
}

911 912
// Return the starting offset in the value for a particular index (selected by idx) of a
// particular field (selected by expand_field_num)
913 914
// TODO: replace this?
static uint32_t alter_table_field_offset(uint32_t null_bytes, KEY_AND_COL_INFO *kc_info, int idx, int expand_field_num) {
915
    uint32_t offset = null_bytes;
916 917 918
    for (int i = 0; i < expand_field_num; i++) {
        if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields
            continue;
919
        offset += kc_info->field_lengths[i];
920
    }
921 922 923 924
    return offset;
}

// Send an expand message into all clustered indexes including the primary
925
int ha_tokudb::alter_table_expand_one_column(TABLE *altered_table, Alter_inplace_info *ha_alter_info, int expand_field_num) {
926 927 928 929 930 931 932 933 934
    int error = 0;
    tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);

    Field *old_field = table->field[expand_field_num];
    TOKU_TYPE old_field_type = mysql_to_toku_type(old_field);
    Field *new_field = altered_table->field[expand_field_num];
    TOKU_TYPE new_field_type = mysql_to_toku_type(new_field);
    assert(old_field_type == new_field_type);

935
    uchar operation;
936
    uchar pad_char;
937 938 939 940 941 942 943
    switch (old_field_type) {
    case toku_type_int:
        assert(is_unsigned(old_field) == is_unsigned(new_field));
        if (is_unsigned(old_field))
            operation = UPDATE_OP_EXPAND_UINT;
        else
            operation = UPDATE_OP_EXPAND_INT;
944
        pad_char = 0;
945 946 947
        break;
    case toku_type_fixstring:
        operation = UPDATE_OP_EXPAND_CHAR;
948
        pad_char = old_field->charset()->pad_char;
949 950 951
        break;
    case toku_type_fixbinary:
        operation = UPDATE_OP_EXPAND_BINARY;
952
        pad_char = 0;
953 954 955 956 957
        break;
    default:
        assert(0);
    }

Rich Prohaska's avatar
Rich Prohaska committed
958
    uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
959
    for (uint32_t i = 0; i < curr_num_DBs; i++) {
960 961 962 963 964 965
        // change to a new descriptor
        DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
        error = new_row_descriptor(table, altered_table, ha_alter_info, i, &row_descriptor);
        if (error)
            break;
        error = share->key_file[i]->change_descriptor(share->key_file[i], ctx->alter_txn, &row_descriptor, 0);
966
        tokudb_my_free(row_descriptor.data);
967 968 969
        if (error)
            break;

970
        // for all trees that have values, make an expand update message and broadcast it into the tree
971
        if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
972 973
            uint32_t old_offset = alter_table_field_offset(table_share->null_bytes, ctx->table_kc_info, i, expand_field_num);
            uint32_t new_offset = alter_table_field_offset(table_share->null_bytes, ctx->altered_table_kc_info, i, expand_field_num);
974 975 976 977 978 979 980 981
            assert(old_offset <= new_offset);

            uint32_t old_length = ctx->table_kc_info->field_lengths[expand_field_num];
            assert(old_length == old_field->pack_length());

            uint32_t new_length = ctx->altered_table_kc_info->field_lengths[expand_field_num];
            assert(new_length == new_field->pack_length());

982
            DBT expand; memset(&expand, 0, sizeof expand);
983 984 985
            expand.size = sizeof operation + sizeof new_offset + sizeof old_length + sizeof new_length;
            if (operation == UPDATE_OP_EXPAND_CHAR || operation == UPDATE_OP_EXPAND_BINARY)
                expand.size += sizeof pad_char;
986
            expand.data = tokudb_my_malloc(expand.size, MYF(MY_WME));
987 988 989 990 991 992
            if (!expand.data) {
                error = ENOMEM;
                break;
            }
            uchar *expand_ptr = (uchar *)expand.data;
            expand_ptr[0] = operation;
993
            expand_ptr += sizeof operation;
994 995 996 997 998

            // for the first altered field, old_offset == new_offset.  for the subsequent altered fields, the new_offset
            // should be used as it includes the length changes from the previous altered fields.
            memcpy(expand_ptr, &new_offset, sizeof new_offset);
            expand_ptr += sizeof new_offset;
999 1000 1001 1002 1003 1004 1005

            memcpy(expand_ptr, &old_length, sizeof old_length);
            expand_ptr += sizeof old_length;

            memcpy(expand_ptr, &new_length, sizeof new_length);
            expand_ptr += sizeof new_length;

1006 1007 1008 1009 1010
            if (operation == UPDATE_OP_EXPAND_CHAR || operation == UPDATE_OP_EXPAND_BINARY) {
                memcpy(expand_ptr, &pad_char, sizeof pad_char);
                expand_ptr += sizeof pad_char;
            }

1011 1012 1013 1014
            assert(expand_ptr == (uchar *)expand.data + expand.size);

            // and broadcast it into the tree
            error = share->key_file[i]->update_broadcast(share->key_file[i], ctx->alter_txn, &expand, DB_IS_RESETTING_OP);
1015
            tokudb_my_free(expand.data);
1016 1017 1018 1019 1020 1021 1022 1023
            if (error)
                break;
        }
    }

    return error;
}

1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036
static void marshall_blob_lengths(tokudb::buffer &b, uint32_t n, TABLE *table, KEY_AND_COL_INFO *kc_info) {
    for (uint i = 0; i < n; i++) {
        uint blob_field_index = kc_info->blob_fields[i];
        assert(blob_field_index < table->s->fields);
        uint8_t blob_field_length = table->s->field[blob_field_index]->row_pack_length();
        b.append(&blob_field_length, sizeof blob_field_length);
    }
}

int ha_tokudb::alter_table_expand_blobs(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
    int error = 0;
    tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);

Rich Prohaska's avatar
Rich Prohaska committed
1037
    uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
1038 1039 1040 1041 1042 1043 1044
    for (uint32_t i = 0; i < curr_num_DBs; i++) {
        // change to a new descriptor
        DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
        error = new_row_descriptor(table, altered_table, ha_alter_info, i, &row_descriptor);
        if (error)
            break;
        error = share->key_file[i]->change_descriptor(share->key_file[i], ctx->alter_txn, &row_descriptor, 0);
1045
        tokudb_my_free(row_descriptor.data);
1046 1047 1048 1049
        if (error)
            break;

        // for all trees that have values, make an update blobs message and broadcast it into the tree
1050
        if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077
            tokudb::buffer b;
            uint8_t op = UPDATE_OP_EXPAND_BLOB;
            b.append(&op, sizeof op);
            b.append_ui<uint32_t>(table->s->null_bytes + ctx->table_kc_info->mcp_info[i].fixed_field_size);
            uint32_t var_offset_bytes = ctx->table_kc_info->mcp_info[i].len_of_offsets;
            b.append_ui<uint32_t>(var_offset_bytes);
            b.append_ui<uint32_t>(var_offset_bytes == 0 ? 0 : ctx->table_kc_info->num_offset_bytes);
            
            // add blobs info
            uint32_t num_blobs = ctx->table_kc_info->num_blobs;
            b.append_ui<uint32_t>(num_blobs);
            marshall_blob_lengths(b, num_blobs, table, ctx->table_kc_info);
            marshall_blob_lengths(b, num_blobs, altered_table, ctx->altered_table_kc_info);

            // and broadcast it into the tree
            DBT expand; memset(&expand, 0, sizeof expand);
            expand.data = b.data();
            expand.size = b.size();
            error = share->key_file[i]->update_broadcast(share->key_file[i], ctx->alter_txn, &expand, DB_IS_RESETTING_OP);
            if (error)
                break;
        }
    }

    return error;
}

1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
// Return true if two fixed length fields can be changed inplace
static bool change_fixed_length_is_supported(TABLE *table, TABLE *altered_table, Field *old_field, Field *new_field, tokudb_alter_ctx *ctx) {
    // no change in size is supported
    if (old_field->pack_length() == new_field->pack_length())
        return true;
    // shrink is not supported
    if (old_field->pack_length() > new_field->pack_length())
        return false;
    ctx->expand_fixed_update_needed = true;
    return true;
}

1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
static bool change_blob_length_is_supported(TABLE *table, TABLE *altered_table, Field *old_field, Field *new_field, tokudb_alter_ctx *ctx) {
    // blob -> longer or equal length blob
    if (old_field->binary() && new_field->binary() && old_field->pack_length() <= new_field->pack_length()) {
        ctx->expand_blob_update_needed = true;
        return true;
    }
    // text -> longer or equal length text
    if (!old_field->binary() && !new_field->binary() &&
        old_field->pack_length() <= new_field->pack_length() &&
        old_field->charset()->number == new_field->charset()->number) {
        ctx->expand_blob_update_needed = true;
        return true;
    }
    return false;
}

1106
// Return true if the MySQL type is an int or unsigned int type
1107
static bool is_int_type(enum_field_types t) {
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
    switch (t) {
    case MYSQL_TYPE_TINY:
    case MYSQL_TYPE_SHORT:
    case MYSQL_TYPE_INT24:
    case MYSQL_TYPE_LONG:
    case MYSQL_TYPE_LONGLONG:
        return true;
    default:
        return false;
    }
}

1120
// Return true if two field types can be changed inplace
1121
static bool change_field_type_is_supported(Field *old_field, Field *new_field, TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
1122 1123
    enum_field_types old_type = old_field->real_type();
    enum_field_types new_type = new_field->real_type();
1124
    if (is_int_type(old_type)) {
1125
        // int and unsigned int expansion
1126 1127 1128 1129 1130
        if (is_int_type(new_type) && is_unsigned(old_field) == is_unsigned(new_field))
            return change_fixed_length_is_supported(table, altered_table, old_field, new_field, ctx);
        else
            return false;
    } else if (old_type == MYSQL_TYPE_STRING) {
1131
        // char(X) -> char(Y) and binary(X) -> binary(Y) expansion
1132 1133 1134
        if (new_type == MYSQL_TYPE_STRING && 
            old_field->binary() == new_field->binary() && 
            old_field->charset()->number == new_field->charset()->number)
1135 1136 1137
            return change_fixed_length_is_supported(table, altered_table, old_field, new_field, ctx);
        else
            return false;
1138 1139 1140 1141
    } else if (old_type == MYSQL_TYPE_VARCHAR) {
        // varchar(X) -> varchar(Y) and varbinary(X) -> varbinary(Y) expansion where X < 256 <= Y
        // the ALTER_COLUMN_TYPE handler flag is set for these cases
        return change_varchar_length_is_supported(old_field, new_field, table, altered_table, ha_alter_info, ctx);
1142 1143
    } else if (old_type == MYSQL_TYPE_BLOB && new_type == MYSQL_TYPE_BLOB) {
        return change_blob_length_is_supported(table, altered_table, old_field, new_field, ctx);
1144
    } else
1145 1146 1147 1148
        return false;
}

// Return true if all changed field types can be changed inplace
1149
static bool change_type_is_supported(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
1150 1151
    if (table->s->null_bytes != altered_table->s->null_bytes)
        return false;
1152 1153
    if (table->s->fields != altered_table->s->fields)
        return false;
1154 1155
    if (ctx->changed_fields.elements() > 1)
        return false; // only support one field change
1156
    for (DYNAMIC_ARRAY_ELEMENTS_TYPE  ai = 0; ai < ctx->changed_fields.elements(); ai++) {
1157 1158 1159
        uint i = ctx->changed_fields.at(ai);
        Field *old_field = table->field[i];
        Field *new_field = altered_table->field[i];
1160
        if (field_in_key_of_table(table, old_field) || field_in_key_of_table(altered_table, new_field))
1161
            return false;
1162
        if (!change_field_type_is_supported(old_field, new_field, table, altered_table, ha_alter_info, ctx))
1163 1164 1165 1166 1167
            return false;            
    }
    return true;
}

1168 1169 1170
// Allocate and initialize a new descriptor for a dictionary in the altered table identified with idx.
// Return the new descriptor in the row_descriptor DBT.
// Return non-zero on error.
1171
int ha_tokudb::new_row_descriptor(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, uint32_t idx, DBT *row_descriptor) {
1172 1173 1174
    int error = 0;
    tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
    row_descriptor->size = get_max_desc_size(ctx->altered_table_kc_info, altered_table);
1175
    row_descriptor->data = (uchar *) tokudb_my_malloc(row_descriptor->size, MYF(MY_WME));
1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
    if (row_descriptor->data == NULL) {
        error = ENOMEM;
    } else {
        KEY* prim_key = hidden_primary_key ? NULL : &altered_table->s->key_info[primary_key];
        if (idx == primary_key) {
            row_descriptor->size = create_main_key_descriptor((uchar *)row_descriptor->data,
                                                              prim_key,
                                                              hidden_primary_key,
                                                              primary_key,
                                                              altered_table,
                                                              ctx->altered_table_kc_info);
        } else {
            row_descriptor->size = create_secondary_key_descriptor((uchar *)row_descriptor->data,
                                                                   &altered_table->key_info[idx],
                                                                   prim_key,
                                                                   hidden_primary_key,
                                                                   altered_table,
                                                                   primary_key,
                                                                   idx,
                                                                   ctx->altered_table_kc_info);
        }
        error = 0;
    }
    return error;
}

1202
#endif