ha_maria.cc 105 KB
Newer Older
1 2 3 4
/* Copyright (C) 2006,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */


#ifdef USE_PRAGMA_IMPLEMENTATION
#pragma implementation                          // gcc: Class implementation
#endif

21
#define MYSQL_SERVER 1
22
#include "mysql_priv.h"
23
#include <mysql/plugin.h>
24
#include <m_ctype.h>
25
#include <my_dir.h>
26
#include <myisampack.h>
unknown's avatar
unknown committed
27
#include <my_bit.h>
28
#include "ha_maria.h"
29
#include "trnman_public.h"
30

31
C_MODE_START
32 33
#include "maria_def.h"
#include "ma_rt_index.h"
34
#include "ma_blockrec.h"
unknown's avatar
unknown committed
35 36
#include "ma_checkpoint.h"
#include "ma_recovery.h"
37
C_MODE_END
38

unknown's avatar
unknown committed
39 40 41 42
/*
  Note that in future versions, only *transactional* Maria tables can
  rollback, so this flag should be up or down conditionally.
*/
unknown's avatar
unknown committed
43 44 45 46
#define MARIA_CANNOT_ROLLBACK HA_NO_TRANSACTIONS
#ifdef MARIA_CANNOT_ROLLBACK
#define trans_register_ha(A, B, C)  do { /* nothing */ } while(0)
#endif
47
#define THD_TRN (*(TRN **)thd_ha_data(thd, maria_hton))
unknown's avatar
unknown committed
48

unknown's avatar
unknown committed
49 50 51
ulong pagecache_division_limit, pagecache_age_threshold;
ulonglong pagecache_buffer_size;

unknown's avatar
unknown committed
52
/**
53 54 55 56 57
   As the auto-repair is initiated when opened from the SQL layer
   (open_unireg_entry(), check_and_repair()), it does not happen when Maria's
   Recovery internally opens the table to apply log records to it, which is
   good. It would happen only after Recovery, if the table is still
   corrupted.
unknown's avatar
unknown committed
58
*/
59
ulong maria_recover_options= HA_RECOVER_NONE;
unknown's avatar
unknown committed
60
handlerton *maria_hton;
61 62 63 64

/* bits in maria_recover_options */
const char *maria_recover_names[]=
{
65 66 67 68 69 70 71 72
  /*
    Compared to MyISAM, "default" was renamed to "normal" as it collided with
    SET var=default which sets to the var's default i.e. what happens when the
    var is not set i.e. HA_RECOVER_NONE.
    Another change is that OFF is used to disable, not ""; this is to have OFF
    display in SHOW VARIABLES which is better than "".
  */
  "OFF", "NORMAL", "BACKUP", "FORCE", "QUICK", NullS
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
};
TYPELIB maria_recover_typelib=
{
  array_elements(maria_recover_names) - 1, "",
  maria_recover_names, NULL
};

const char *maria_stats_method_names[]=
{
  "nulls_unequal", "nulls_equal",
  "nulls_ignored", NullS
};
TYPELIB maria_stats_method_typelib=
{
  array_elements(maria_stats_method_names) - 1, "",
  maria_stats_method_names, NULL
};

91 92 93 94 95 96 97 98 99 100
/* transactions log purge mode */
const char *maria_translog_purge_type_names[]=
{
  "immediate", "external", "at_flush", NullS
};
TYPELIB maria_translog_purge_type_typelib=
{
  array_elements(maria_translog_purge_type_names) - 1, "",
  maria_translog_purge_type_names, NULL
};
101 102 103 104 105 106 107 108 109 110 111
const char *maria_sync_log_dir_names[]=
{
  "NEVER", "NEWFILE", "ALWAYS", NullS
};

TYPELIB maria_sync_log_dir_typelib=
{
  array_elements(maria_sync_log_dir_names) - 1, "",
  maria_sync_log_dir_names, NULL
};

112
/** Interval between background checkpoints in seconds */
unknown's avatar
unknown committed
113 114 115
static ulong checkpoint_interval;
static void update_checkpoint_interval(MYSQL_THD thd,
                                       struct st_mysql_sys_var *var,
unknown's avatar
unknown committed
116
                                       void *var_ptr, const void *save);
117 118
/** After that many consecutive recovery failures, remove logs */
static ulong force_start_after_recovery_failures;
119 120
static void update_log_file_size(MYSQL_THD thd,
                                 struct st_mysql_sys_var *var,
unknown's avatar
unknown committed
121
                                 void *var_ptr, const void *save);
unknown's avatar
unknown committed
122

123 124 125 126 127 128
static MYSQL_SYSVAR_ULONG(block_size, maria_block_size,
       PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
       "Block size to be used for MARIA index pages.", 0, 0,
       MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
       MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH);

unknown's avatar
unknown committed
129
static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval,
unknown's avatar
unknown committed
130
       PLUGIN_VAR_RQCMDARG,
unknown's avatar
unknown committed
131 132
       "Interval between automatic checkpoints, in seconds; 0 means"
       " 'no automatic checkpoints' which makes sense only for testing.",
unknown's avatar
unknown committed
133
       NULL, update_checkpoint_interval, 30, 0, UINT_MAX, 1);
unknown's avatar
unknown committed
134

135 136 137 138 139 140 141 142 143 144 145
static MYSQL_SYSVAR_ULONG(force_start_after_recovery_failures,
       force_start_after_recovery_failures,
       /*
         Read-only because setting it on the fly has no useful effect,
         should be set on command-line.
       */
       PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
       "Number of consecutive log recovery failures after which logs will be"
       " automatically deleted to cure the problem; 0 (the default) disables"
       " the feature.", NULL, NULL, 0, 0, UINT_MAX8, 1);

unknown's avatar
unknown committed
146 147 148 149
static MYSQL_SYSVAR_BOOL(page_checksum, maria_page_checksums, 0,
       "Maintain page checksums (can be overridden per table "
       "with PAGE_CHECKSUM clause in CREATE TABLE)", 0, 0, 1);

150 151 152 153 154 155 156
/* It is only command line argument */
static MYSQL_SYSVAR_STR(log_dir_path, maria_data_root,
       PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_RQCMDARG,
       "Path to the directory where to store transactional log",
       NULL, NULL, mysql_real_data_home);


157 158 159 160 161 162 163 164 165 166 167 168 169 170
static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size,
       PLUGIN_VAR_RQCMDARG,
       "Limit for transaction log size",
       NULL, update_log_file_size, TRANSLOG_FILE_SIZE,
       TRANSLOG_MIN_FILE_SIZE, 0xffffffffL, TRANSLOG_PAGE_SIZE);

static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type,
       PLUGIN_VAR_RQCMDARG,
       "Specifies how maria transactional log will be purged. "
       "Possible values of name are \"immediate\", \"external\" "
       "and \"at_flush\"",
       NULL, NULL, TRANSLOG_PURGE_IMMIDIATE,
       &maria_translog_purge_type_typelib);

171 172 173 174 175 176
static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size,
       maria_max_temp_length, PLUGIN_VAR_RQCMDARG,
       "Don't use the fast sort index method to created index if the "
       "temporary file would get bigger than this.",
       0, 0, MAX_FILE_SIZE, 0, MAX_FILE_SIZE, 1024*1024);

unknown's avatar
unknown committed
177 178 179 180 181 182 183 184 185 186 187
static MYSQL_SYSVAR_ULONG(pagecache_age_threshold,
       pagecache_age_threshold, PLUGIN_VAR_RQCMDARG,
       "This characterizes the number of hits a hot block has to be untouched "
       "until it is considered aged enough to be downgraded to a warm block. "
       "This specifies the percentage ratio of that number of hits to the "
       "total number of blocks in the page cache.", 0, 0,
        300, 100, ~0L, 100);

static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size,
       PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
       "The size of the buffer used for index blocks for Maria tables. "
unknown's avatar
unknown committed
188 189
       "Increase this to get better index handling (for all reads and "
       "multiple writes) to as much as you can afford.", 0, 0,
unknown's avatar
unknown committed
190 191 192 193 194 195 196
       KEY_CACHE_SIZE, MALLOC_OVERHEAD, ~(ulong) 0, IO_SIZE);

static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit,
       PLUGIN_VAR_RQCMDARG,
       "The minimum percentage of warm blocks in key cache", 0, 0,
       100,  1, 100, 1);

197 198 199 200 201 202
static MYSQL_SYSVAR_ENUM(recover, maria_recover_options, PLUGIN_VAR_OPCMDARG,
       "Specifies how corrupted tables should be automatically repaired."
       " Possible values are \"NORMAL\" (the default), \"BACKUP\", \"FORCE\","
       " \"QUICK\", or \"OFF\" which is like not using the option.",
       NULL, NULL, HA_RECOVER_NONE, &maria_recover_typelib);

203 204 205 206 207 208 209 210 211 212 213
static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
       "Number of threads to use when repairing maria tables. The value of 1 "
       "disables parallel repair.",
       0, 0, 1, 1, ~0L, 1);

static MYSQL_THDVAR_ULONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG,
       "The buffer that is allocated when sorting the index when doing a "
       "REPAIR or when creating indexes with CREATE INDEX or ALTER TABLE.",
       0, 0, 8192*1024, 4, ~0L, 1);

static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG,
214
       "Specifies how maria index statistics collection code should treat "
unknown's avatar
unknown committed
215
       "NULLs. Possible values are \"nulls_unequal\", \"nulls_equal\", "
216
       "and \"nulls_ignored\".", 0, 0, 0, &maria_stats_method_typelib);
217

218 219
static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir, PLUGIN_VAR_RQCMDARG,
       "Controls syncing directory after log file growth and new file "
unknown's avatar
unknown committed
220 221
       "creation. Possible values are \"never\", \"newfile\" and "
       "\"always\").", NULL, NULL, TRANSLOG_SYNC_DIR_NEWFILE,
222 223
       &maria_sync_log_dir_typelib);

224 225 226 227
/*****************************************************************************
** MARIA tables
*****************************************************************************/

228 229 230
static handler *maria_create_handler(handlerton *hton,
                                     TABLE_SHARE * table,
                                     MEM_ROOT *mem_root)
231
{
232
  return new (mem_root) ha_maria(hton, table);
233 234 235 236 237 238 239 240 241 242 243
}


// collect errors printed by maria_check routines

static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type,
                                const char *fmt, va_list args)
{
  THD *thd= (THD *) param->thd;
  Protocol *protocol= thd->protocol;
  uint length, msg_length;
244
  char msgbuf[HA_MAX_MSG_BUF];
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
  char name[NAME_LEN * 2 + 2];

  msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
  msgbuf[sizeof(msgbuf) - 1]= 0;                // healthy paranoia

  DBUG_PRINT(msg_type, ("message: %s", msgbuf));

  if (!thd->vio_ok())
  {
    sql_print_error(msgbuf);
    return;
  }

  if (param->testflag &
      (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR))
  {
    my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME));
    return;
  }
  length= (uint) (strxmov(name, param->db_name, ".", param->table_name,
                          NullS) - name);
266 267 268 269 270 271 272 273
  /*
    TODO: switch from protocol to push_warning here. The main reason we didn't
    it yet is parallel repair. Due to following trace:
    ma_check_print_msg/push_warning/sql_alloc/my_pthread_getspecific_ptr.

    Also we likely need to lock mutex here (in both cases with protocol and
    push_warning).
  */
274 275 276 277 278 279 280 281 282 283 284
  protocol->prepare_for_resend();
  protocol->store(name, length, system_charset_info);
  protocol->store(param->op_name, system_charset_info);
  protocol->store(msg_type, system_charset_info);
  protocol->store(msgbuf, msg_length, system_charset_info);
  if (protocol->write())
    sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n",
                    msgbuf);
  return;
}

285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300

/*
  Convert TABLE object to Maria key and column definition

  SYNOPSIS
    table2maria()
      table_arg   in     TABLE object.
      keydef_out  out    Maria key definition.
      recinfo_out out    Maria column definition.
      records_out out    Number of fields.

  DESCRIPTION
    This function will allocate and initialize Maria key and column
    definition for further use in ma_create or for a check for underlying
    table conformance in merge engine.

301 302 303 304
    The caller needs to free *recinfo_out after use. Since *recinfo_out
    and *keydef_out are allocated with a my_multi_malloc, *keydef_out
    is freed automatically when *recinfo_out is freed.

305 306
  RETURN VALUE
    0  OK
307
    # error code
308 309
*/

310 311 312 313
static int table2maria(TABLE *table_arg, data_file_type row_type,
                       MARIA_KEYDEF **keydef_out,
                       MARIA_COLUMNDEF **recinfo_out, uint *records_out,
                       MARIA_CREATE_INFO *create_info)
314 315 316
{
  uint i, j, recpos, minpos, fieldpos, temp_length, length;
  enum ha_base_keytype type= HA_KEYTYPE_BINARY;
unknown's avatar
unknown committed
317
  uchar *record;
318 319 320 321 322 323 324
  KEY *pos;
  MARIA_KEYDEF *keydef;
  MARIA_COLUMNDEF *recinfo, *recinfo_pos;
  HA_KEYSEG *keyseg;
  TABLE_SHARE *share= table_arg->s;
  uint options= share->db_options_in_use;
  DBUG_ENTER("table2maria");
325

326 327 328
  if (row_type == BLOCK_RECORD)
    options|= HA_OPTION_PACK_RECORD;

329 330 331 332 333 334 335 336 337 338 339 340
  if (!(my_multi_malloc(MYF(MY_WME),
          recinfo_out, (share->fields * 2 + 2) * sizeof(MARIA_COLUMNDEF),
          keydef_out, share->keys * sizeof(MARIA_KEYDEF),
          &keyseg,
          (share->key_parts + share->keys) * sizeof(HA_KEYSEG),
          NullS)))
    DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
  keydef= *keydef_out;
  recinfo= *recinfo_out;
  pos= table_arg->key_info;
  for (i= 0; i < share->keys; i++, pos++)
  {
341 342
    keydef[i].flag= (uint16) (pos->flags & (HA_NOSAME | HA_FULLTEXT |
                                            HA_SPATIAL));
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
    keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ?
      (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) :
      pos->algorithm;
    keydef[i].block_length= pos->block_size;
    keydef[i].seg= keyseg;
    keydef[i].keysegs= pos->key_parts;
    for (j= 0; j < pos->key_parts; j++)
    {
      Field *field= pos->key_part[j].field;
      type= field->key_type();
      keydef[i].seg[j].flag= pos->key_part[j].key_part_flag;

      if (options & HA_OPTION_PACK_KEYS ||
          (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY |
                         HA_SPACE_PACK_USED)))
      {
        if (pos->key_part[j].length > 8 &&
            (type == HA_KEYTYPE_TEXT ||
             type == HA_KEYTYPE_NUM ||
             (type == HA_KEYTYPE_BINARY && !field->zero_pack())))
        {
          /* No blobs here */
          if (j == 0)
            keydef[i].flag|= HA_PACK_KEY;
          if (!(field->flags & ZEROFILL_FLAG) &&
              (field->type() == MYSQL_TYPE_STRING ||
               field->type() == MYSQL_TYPE_VAR_STRING ||
               ((int) (pos->key_part[j].length - field->decimals())) >= 4))
            keydef[i].seg[j].flag|= HA_SPACE_PACK;
        }
        else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16))
          keydef[i].flag|= HA_BINARY_PACK_KEY;
      }
      keydef[i].seg[j].type= (int) type;
      keydef[i].seg[j].start= pos->key_part[j].offset;
      keydef[i].seg[j].length= pos->key_part[j].length;
      keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_end=
        keydef[i].seg[j].bit_length= 0;
      keydef[i].seg[j].bit_pos= 0;
      keydef[i].seg[j].language= field->charset()->number;

      if (field->null_ptr)
      {
        keydef[i].seg[j].null_bit= field->null_bit;
        keydef[i].seg[j].null_pos= (uint) (field->null_ptr-
                                           (uchar*) table_arg->record[0]);
      }
      else
      {
        keydef[i].seg[j].null_bit= 0;
        keydef[i].seg[j].null_pos= 0;
      }
      if (field->type() == MYSQL_TYPE_BLOB ||
          field->type() == MYSQL_TYPE_GEOMETRY)
      {
        keydef[i].seg[j].flag|= HA_BLOB_PART;
        /* save number of bytes used to pack length */
        keydef[i].seg[j].bit_start= (uint) (field->pack_length() -
                                            share->blob_ptr_size);
      }
      else if (field->type() == MYSQL_TYPE_BIT)
      {
        keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len;
        keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs;
        keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr -
                                          (uchar*) table_arg->record[0]);
      }
    }
    keyseg+= pos->key_parts;
  }
  if (table_arg->found_next_number_field)
    keydef[share->next_number_index].flag|= HA_AUTO_KEY;
  record= table_arg->record[0];
  recpos= 0;
  recinfo_pos= recinfo;
418 419
  create_info->null_bytes= table_arg->s->null_bytes;

420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
  while (recpos < (uint) share->reclength)
  {
    Field **field, *found= 0;
    minpos= share->reclength;
    length= 0;

    for (field= table_arg->field; *field; field++)
    {
      if ((fieldpos= (*field)->offset(record)) >= recpos &&
          fieldpos <= minpos)
      {
        /* skip null fields */
        if (!(temp_length= (*field)->pack_length_in_rec()))
          continue; /* Skip null-fields */
        if (! found || fieldpos < minpos ||
            (fieldpos == minpos && temp_length < length))
        {
          minpos= fieldpos;
          found= *field;
          length= temp_length;
        }
      }
    }
    DBUG_PRINT("loop", ("found: 0x%lx  recpos: %d  minpos: %d  length: %d",
                        (long) found, recpos, minpos, length));
    if (!found)
      break;

    if (found->flags & BLOB_FLAG)
      recinfo_pos->type= FIELD_BLOB;
    else if (found->type() == MYSQL_TYPE_VARCHAR)
      recinfo_pos->type= FIELD_VARCHAR;
452 453
    else if (!(options & HA_OPTION_PACK_RECORD) ||
             (found->zero_pack() && (found->flags & PRI_KEY_FLAG)))
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
      recinfo_pos->type= FIELD_NORMAL;
    else if (found->zero_pack())
      recinfo_pos->type= FIELD_SKIP_ZERO;
    else
      recinfo_pos->type= ((length <= 3 ||
                           (found->flags & ZEROFILL_FLAG)) ?
                          FIELD_NORMAL :
                          found->type() == MYSQL_TYPE_STRING ||
                          found->type() == MYSQL_TYPE_VAR_STRING ?
                          FIELD_SKIP_ENDSPACE :
                          FIELD_SKIP_PRESPACE);
    if (found->null_ptr)
    {
      recinfo_pos->null_bit= found->null_bit;
      recinfo_pos->null_pos= (uint) (found->null_ptr -
                                     (uchar*) table_arg->record[0]);
    }
    else
    {
      recinfo_pos->null_bit= 0;
      recinfo_pos->null_pos= 0;
    }
    (recinfo_pos++)->length= (uint16) length;
    recpos= minpos + length;
    DBUG_PRINT("loop", ("length: %d  type: %d",
                        recinfo_pos[-1].length,recinfo_pos[-1].type));
  }
  *records_out= (uint) (recinfo_pos - recinfo);
  DBUG_RETURN(0);
}


/*
  Check for underlying table conformance

  SYNOPSIS
    maria_check_definition()
      t1_keyinfo       in    First table key definition
      t1_recinfo       in    First table record definition
      t1_keys          in    Number of keys in first table
      t1_recs          in    Number of records in first table
      t2_keyinfo       in    Second table key definition
      t2_recinfo       in    Second table record definition
      t2_keys          in    Number of keys in second table
      t2_recs          in    Number of records in second table
      strict           in    Strict check switch

  DESCRIPTION
    This function compares two Maria definitions. By intention it was done
    to compare merge table definition against underlying table definition.
    It may also be used to compare dot-frm and MAI definitions of Maria
    table as well to compare different Maria table definitions.

    For merge table it is not required that number of keys in merge table
    must exactly match number of keys in underlying table. When calling this
    function for underlying table conformance check, 'strict' flag must be
    set to false, and converted merge definition must be passed as t1_*.

    Otherwise 'strict' flag must be set to 1 and it is not required to pass
    converted dot-frm definition as t1_*.

  RETURN VALUE
    0 - Equal definitions.
    1 - Different definitions.

519 520 521 522
  TODO
    - compare FULLTEXT keys;
    - compare SPATIAL keys;
    - compare FIELD_SKIP_ZERO which is converted to FIELD_NORMAL correctly
523
      (should be correctly detected in table2maria).
524
*/
525

526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550
int maria_check_definition(MARIA_KEYDEF *t1_keyinfo,
                           MARIA_COLUMNDEF *t1_recinfo,
                           uint t1_keys, uint t1_recs,
                           MARIA_KEYDEF *t2_keyinfo,
                           MARIA_COLUMNDEF *t2_recinfo,
                           uint t2_keys, uint t2_recs, bool strict)
{
  uint i, j;
  DBUG_ENTER("maria_check_definition");
  if ((strict ? t1_keys != t2_keys : t1_keys > t2_keys))
  {
    DBUG_PRINT("error", ("Number of keys differs: t1_keys=%u, t2_keys=%u",
                         t1_keys, t2_keys));
    DBUG_RETURN(1);
  }
  if (t1_recs != t2_recs)
  {
    DBUG_PRINT("error", ("Number of recs differs: t1_recs=%u, t2_recs=%u",
                         t1_recs, t2_recs));
    DBUG_RETURN(1);
  }
  for (i= 0; i < t1_keys; i++)
  {
    HA_KEYSEG *t1_keysegs= t1_keyinfo[i].seg;
    HA_KEYSEG *t2_keysegs= t2_keyinfo[i].seg;
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
    if (t1_keyinfo[i].flag & HA_FULLTEXT && t2_keyinfo[i].flag & HA_FULLTEXT)
      continue;
    else if (t1_keyinfo[i].flag & HA_FULLTEXT ||
             t2_keyinfo[i].flag & HA_FULLTEXT)
    {
       DBUG_PRINT("error", ("Key %d has different definition", i));
       DBUG_PRINT("error", ("t1_fulltext= %d, t2_fulltext=%d",
                            test(t1_keyinfo[i].flag & HA_FULLTEXT),
                            test(t2_keyinfo[i].flag & HA_FULLTEXT)));
       DBUG_RETURN(1);
    }
    if (t1_keyinfo[i].flag & HA_SPATIAL && t2_keyinfo[i].flag & HA_SPATIAL)
      continue;
    else if (t1_keyinfo[i].flag & HA_SPATIAL ||
             t2_keyinfo[i].flag & HA_SPATIAL)
    {
       DBUG_PRINT("error", ("Key %d has different definition", i));
       DBUG_PRINT("error", ("t1_spatial= %d, t2_spatial=%d",
                            test(t1_keyinfo[i].flag & HA_SPATIAL),
                            test(t2_keyinfo[i].flag & HA_SPATIAL)));
       DBUG_RETURN(1);
    }
573 574 575 576 577 578 579 580 581 582 583 584
    if (t1_keyinfo[i].keysegs != t2_keyinfo[i].keysegs ||
        t1_keyinfo[i].key_alg != t2_keyinfo[i].key_alg)
    {
      DBUG_PRINT("error", ("Key %d has different definition", i));
      DBUG_PRINT("error", ("t1_keysegs=%d, t1_key_alg=%d",
                           t1_keyinfo[i].keysegs, t1_keyinfo[i].key_alg));
      DBUG_PRINT("error", ("t2_keysegs=%d, t2_key_alg=%d",
                           t2_keyinfo[i].keysegs, t2_keyinfo[i].key_alg));
      DBUG_RETURN(1);
    }
    for (j=  t1_keyinfo[i].keysegs; j--;)
    {
585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
      uint8 t1_keysegs_j__type= t1_keysegs[j].type;
      /*
        Table migration from 4.1 to 5.1. In 5.1 a *TEXT key part is
        always HA_KEYTYPE_VARTEXT2. In 4.1 we had only the equivalent of
        HA_KEYTYPE_VARTEXT1. Since we treat both the same on MyISAM
        level, we can ignore a mismatch between these types.
      */
      if ((t1_keysegs[j].flag & HA_BLOB_PART) &&
          (t2_keysegs[j].flag & HA_BLOB_PART))
      {
        if ((t1_keysegs_j__type == HA_KEYTYPE_VARTEXT2) &&
            (t2_keysegs[j].type == HA_KEYTYPE_VARTEXT1))
          t1_keysegs_j__type= HA_KEYTYPE_VARTEXT1; /* purecov: tested */
        else if ((t1_keysegs_j__type == HA_KEYTYPE_VARBINARY2) &&
                 (t2_keysegs[j].type == HA_KEYTYPE_VARBINARY1))
          t1_keysegs_j__type= HA_KEYTYPE_VARBINARY1; /* purecov: inspected */
      }

      if (t1_keysegs_j__type != t2_keysegs[j].type ||
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
          t1_keysegs[j].language != t2_keysegs[j].language ||
          t1_keysegs[j].null_bit != t2_keysegs[j].null_bit ||
          t1_keysegs[j].length != t2_keysegs[j].length)
      {
        DBUG_PRINT("error", ("Key segment %d (key %d) has different "
                             "definition", j, i));
        DBUG_PRINT("error", ("t1_type=%d, t1_language=%d, t1_null_bit=%d, "
                             "t1_length=%d",
                             t1_keysegs[j].type, t1_keysegs[j].language,
                             t1_keysegs[j].null_bit, t1_keysegs[j].length));
        DBUG_PRINT("error", ("t2_type=%d, t2_language=%d, t2_null_bit=%d, "
                             "t2_length=%d",
                             t2_keysegs[j].type, t2_keysegs[j].language,
                             t2_keysegs[j].null_bit, t2_keysegs[j].length));

        DBUG_RETURN(1);
      }
    }
  }
623

624 625 626 627
  for (i= 0; i < t1_recs; i++)
  {
    MARIA_COLUMNDEF *t1_rec= &t1_recinfo[i];
    MARIA_COLUMNDEF *t2_rec= &t2_recinfo[i];
628 629 630 631 632 633 634 635
    /*
      FIELD_SKIP_ZERO can be changed to FIELD_NORMAL in maria_create,
      see NOTE1 in ma_create.c
    */
    if ((t1_rec->type != t2_rec->type &&
         !(t1_rec->type == (int) FIELD_SKIP_ZERO &&
           t1_rec->length == 1 &&
           t2_rec->type == (int) FIELD_NORMAL)) ||
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
        t1_rec->length != t2_rec->length ||
        t1_rec->null_bit != t2_rec->null_bit)
    {
      DBUG_PRINT("error", ("Field %d has different definition", i));
      DBUG_PRINT("error", ("t1_type=%d, t1_length=%d, t1_null_bit=%d",
                           t1_rec->type, t1_rec->length, t1_rec->null_bit));
      DBUG_PRINT("error", ("t2_type=%d, t2_length=%d, t2_null_bit=%d",
                           t2_rec->type, t2_rec->length, t2_rec->null_bit));
      DBUG_RETURN(1);
    }
  }
  DBUG_RETURN(0);
}


651 652 653 654 655 656 657 658 659 660 661
extern "C" {

volatile int *_ma_killed_ptr(HA_CHECK *param)
{
  /* In theory Unsafe conversion, but should be ok for now */
  return (int*) &(((THD *) (param->thd))->killed);
}


void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
{
662 663
  va_list args;
  DBUG_ENTER("_ma_check_print_error");
664 665 666 667 668
  param->error_printed |= 1;
  param->out_flag |= O_DATA_LOST;
  va_start(args, fmt);
  _ma_check_print_msg(param, "error", fmt, args);
  va_end(args);
669
  DBUG_VOID_RETURN;
670 671 672 673 674 675
}


void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...)
{
  va_list args;
676
  DBUG_ENTER("_ma_check_print_info");
677 678 679
  va_start(args, fmt);
  _ma_check_print_msg(param, "info", fmt, args);
  va_end(args);
680
  DBUG_VOID_RETURN;
681 682 683 684 685
}


void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
{
686 687
  va_list args;
  DBUG_ENTER("_ma_check_print_warning");
688 689 690 691 692
  param->warning_printed= 1;
  param->out_flag |= O_DATA_LOST;
  va_start(args, fmt);
  _ma_check_print_msg(param, "warning", fmt, args);
  va_end(args);
693
  DBUG_VOID_RETURN;
694 695 696 697
}

}

698 699 700 701 702 703 704
/**
  Transactional table doing bulk insert with one single UNDO
  (UNDO_BULK_INSERT) and with repair.
*/
#define BULK_INSERT_SINGLE_UNDO_AND_REPAIR    1
/**
  Transactional table doing bulk insert with one single UNDO
705
  (UNDO_BULK_INSERT) and without repair.
706 707 708 709 710 711 712
*/
#define BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR 2
/**
  None of BULK_INSERT_SINGLE_UNDO_AND_REPAIR and
  BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR.
*/
#define BULK_INSERT_NONE      0
713

714 715
ha_maria::ha_maria(handlerton *hton, TABLE_SHARE *table_arg):
handler(hton, table_arg), file(0),
716
int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
717
                HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
718
                HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
unknown's avatar
unknown committed
719
                HA_FILE_BASED | HA_CAN_GEOMETRY | MARIA_CANNOT_ROLLBACK |
unknown's avatar
unknown committed
720
                HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS |
721
                HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT),
722
can_enable_indexes(1), bulk_insert_single_undo(BULK_INSERT_NONE)
723 724 725
{}


726 727 728 729 730 731 732 733 734
handler *ha_maria::clone(MEM_ROOT *mem_root)
{
  ha_maria *new_handler= static_cast <ha_maria *>(handler::clone(mem_root));
  if (new_handler)
    new_handler->file->state= file->state;
  return new_handler;
}


735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
static const char *ha_maria_exts[]=
{
  MARIA_NAME_IEXT,
  MARIA_NAME_DEXT,
  NullS
};


const char **ha_maria::bas_ext() const
{
  return ha_maria_exts;
}


const char *ha_maria::index_type(uint key_number)
{
  return ((table->key_info[key_number].flags & HA_FULLTEXT) ?
          "FULLTEXT" :
          (table->key_info[key_number].flags & HA_SPATIAL) ?
          "SPATIAL" :
          (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ?
          "RTREE" : "BTREE");
}


unknown's avatar
unknown committed
760 761 762 763 764 765 766
double ha_maria::scan_time()
{
  if (file->s->data_file_type == BLOCK_RECORD)
    return ulonglong2double(stats.data_file_length - file->s->block_size) / max(file->s->block_size / 2, IO_SIZE) + 2;
  return handler::scan_time();
}

767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786
/*
  We need to be able to store at least two keys on an index page as the
  splitting algorithms depends on this. (With only one key on a page
  we also can't use any compression, which may make the index file much
  larger)
  We use HA_MAX_KEY_BUFF as this is a stack restriction imposed by the
  handler interface.

  We also need to reserve place for a record pointer (8) and 3 bytes
  per key segment to store the length of the segment + possible null bytes.
  These extra bytes are required here so that maria_create() will surely
  accept any keys created which the returned key data storage length.
*/

uint ha_maria::max_supported_key_length() const
{
  uint tmp= (maria_max_key_length() - 8 - HA_MAX_KEY_SEG*3);
  return min(HA_MAX_KEY_BUFF, tmp);
}

unknown's avatar
unknown committed
787

788 789 790
#ifdef HAVE_REPLICATION
int ha_maria::net_read_dump(NET * net)
{
unknown's avatar
unknown committed
791
  int data_fd= file->dfile.file;
792 793 794 795 796 797 798 799 800 801 802 803 804 805
  int error= 0;

  my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME));
  for (;;)
  {
    ulong packet_len= my_net_read(net);
    if (!packet_len)
      break;                                    // end of file
    if (packet_len == packet_error)
    {
      sql_print_error("ha_maria::net_read_dump - read error ");
      error= -1;
      goto err;
    }
unknown's avatar
unknown committed
806
    if (my_write(data_fd, (uchar *) net->read_pos, (uint) packet_len,
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
                 MYF(MY_WME | MY_FNABP)))
    {
      error= errno;
      goto err;
    }
  }
err:
  return error;
}


int ha_maria::dump(THD * thd, int fd)
{
  MARIA_SHARE *share= file->s;
  NET *net= &thd->net;
unknown's avatar
unknown committed
822
  uint block_size= share->block_size;
823
  my_off_t bytes_to_read= share->state.state.data_file_length;
unknown's avatar
unknown committed
824
  int data_fd= file->dfile.file;
unknown's avatar
unknown committed
825
  uchar *buf= (uchar *) my_malloc(block_size, MYF(MY_WME));
826 827 828 829 830 831 832
  if (!buf)
    return ENOMEM;

  int error= 0;
  my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME));
  for (; bytes_to_read > 0;)
  {
833
    size_t bytes= my_read(data_fd, buf, block_size, MYF(MY_WME));
834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849
    if (bytes == MY_FILE_ERROR)
    {
      error= errno;
      goto err;
    }

    if (fd >= 0)
    {
      if (my_write(fd, buf, bytes, MYF(MY_WME | MY_FNABP)))
      {
        error= errno ? errno : EPIPE;
        goto err;
      }
    }
    else
    {
unknown's avatar
unknown committed
850
      if (my_net_write(net, buf, bytes))
851 852 853 854 855 856 857 858 859 860
      {
        error= errno ? errno : EPIPE;
        goto err;
      }
    }
    bytes_to_read -= bytes;
  }

  if (fd < 0)
  {
unknown's avatar
unknown committed
861
    if (my_net_write(net, (uchar*) "", 0))
862 863 864 865 866
      error= errno ? errno : EPIPE;
    net_flush(net);
  }

err:
unknown's avatar
unknown committed
867
  my_free((uchar*) buf, MYF(0));
868 869 870 871 872 873 874 875 876
  return error;
}
#endif                                          /* HAVE_REPLICATION */

        /* Name is here without an extension */

int ha_maria::open(const char *name, int mode, uint test_if_locked)
{
  uint i;
877 878 879 880 881 882 883 884 885 886

#ifdef NOT_USED
  /*
    If the user wants to have memory mapped data files, add an
    open_flag. Do not memory map temporary tables because they are
    expected to be inserted and thus extended a lot. Memory mapping is
    efficient for files that keep their size, but very inefficient for
    growing files. Using an open_flag instead of calling ma_extra(...
    HA_EXTRA_MMAP ...) after maxs_open() has the advantage that the
    mapping is not repeated for every open, but just done on the initial
unknown's avatar
unknown committed
887
    open, when the MyISAM share is created. Every time the server
888 889 890 891 892 893 894 895 896 897
    requires to open a new instance of a table it calls this method. We
    will always supply HA_OPEN_MMAP for a permanent table. However, the
    Maria storage engine will ignore this flag if this is a secondary
    open of a table that is in use by other threads already (if the
    Maria share exists already).
  */
  if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_maria_use_mmap)
    test_if_locked|= HA_OPEN_MMAP;
#endif

898 899 900 901 902 903
  if (unlikely(maria_recover_options != HA_RECOVER_NONE))
  {
    /* user asked to trigger a repair if table was not properly closed */
    test_if_locked|= HA_OPEN_ABORT_IF_CRASHED;
  }

904 905 906 907 908 909 910 911 912
  if (!(file= maria_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER)))
    return (my_errno ? my_errno : -1);

  if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE))
    VOID(maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0));

  info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
  if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED))
    VOID(maria_extra(file, HA_EXTRA_WAIT_LOCK, 0));
913
  if ((data_file_type= file->s->data_file_type) != STATIC_RECORD)
914
    int_table_flags |= HA_REC_NOT_IN_SEQ;
unknown's avatar
unknown committed
915 916 917 918 919 920 921 922 923 924 925
  if (!file->s->base.born_transactional)
  {
    /*
      INSERT DELAYED cannot work with transactional tables (because it cannot
      stand up to "when client gets ok the data is safe on disk": the record
      may not even be inserted). In the future, we could enable it back (as a
      client doing INSERT DELAYED knows the specificities; but we then should
      make sure to regularly commit in the delayed_insert thread). 
    */
    int_table_flags|= HA_CAN_INSERT_DELAYED;
  }
926
  if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
927
    int_table_flags |= HA_HAS_NEW_CHECKSUM;
928 929 930

  for (i= 0; i < table->s->keys; i++)
  {
unknown's avatar
unknown committed
931
    plugin_ref parser= table->key_info[i].parser;
932 933
    if (table->key_info[i].flags & HA_USES_PARSER)
      file->s->keyinfo[i].parser=
unknown's avatar
unknown committed
934
        (struct st_mysql_ftparser *)plugin_decl(parser)->info;
935
    table->key_info[i].block_size= file->s->keyinfo[i].block_length;
936
  }
937 938
  my_errno= 0;
  return my_errno;
939 940 941 942 943 944 945 946 947 948 949
}


int ha_maria::close(void)
{
  MARIA_HA *tmp= file;
  file= 0;
  return maria_close(tmp);
}


unknown's avatar
unknown committed
950
int ha_maria::write_row(uchar * buf)
951
{
952
  ha_statistic_increment(&SSV::ha_write_count);
953 954 955 956 957 958 959 960 961 962

  /* If we have a timestamp column, update it to the current time */
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
    table->timestamp_field->set_time();

  /*
     If we have an auto_increment column and we are writing a changed row
     or a new row, then update the auto_increment value in the record.
  */
  if (table->next_number_field && buf == table->record[0])
963 964 965 966 967
  {
    int error;
    if ((error= update_auto_increment()))
      return error;
  }
968 969 970 971 972 973 974 975 976 977 978
  return maria_write(file, buf);
}


int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
{
  if (!file)
    return HA_ADMIN_INTERNAL_ERROR;
  int error;
  HA_CHECK param;
  MARIA_SHARE *share= file->s;
unknown's avatar
unknown committed
979
  const char *old_proc_info= thd_proc_info(thd, "Checking table");
980

unknown's avatar
unknown committed
981
  maria_chk_init(&param);
982 983 984 985 986
  param.thd= thd;
  param.op_name= "check";
  param.db_name= table->s->db.str;
  param.table_name= table->alias;
  param.testflag= check_opt->flags | T_CHECK | T_SILENT;
987
  param.stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002

  if (!(table->db_stat & HA_READ_ONLY))
    param.testflag |= T_STATISTICS;
  param.using_global_keycache= 1;

  if (!maria_is_crashed(file) &&
      (((param.testflag & T_CHECK_ONLY_CHANGED) &&
        !(share->state.changed & (STATE_CHANGED | STATE_CRASHED |
                                  STATE_CRASHED_ON_REPAIR)) &&
        share->state.open_count == 0) ||
       ((param.testflag & T_FAST) && (share->state.open_count ==
                                      (uint) (share->global_changed ? 1 :
                                              0)))))
    return HA_ADMIN_ALREADY_DONE;

unknown's avatar
unknown committed
1003 1004
  maria_chk_init_for_check(&param, file);
  (void) maria_chk_status(&param, file);                // Not fatal
1005 1006
  error= maria_chk_size(&param, file);
  if (!error)
1007
    error|= maria_chk_del(&param, file, param.testflag);
1008 1009 1010 1011 1012 1013 1014 1015 1016
  if (!error)
    error= maria_chk_key(&param, file);
  if (!error)
  {
    if ((!(param.testflag & T_QUICK) &&
         ((share->options &
           (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ||
          (param.testflag & (T_EXTEND | T_MEDIUM)))) || maria_is_crashed(file))
    {
1017
      ulonglong old_testflag= param.testflag;
1018
      param.testflag |= T_MEDIUM;
unknown's avatar
unknown committed
1019
      if (!(error= init_io_cache(&param.read_cache, file->dfile.file,
1020 1021 1022
                                 my_default_record_cache_size, READ_CACHE,
                                 share->pack.header_length, 1, MYF(MY_WME))))
      {
1023 1024
        error= maria_chk_data_link(&param, file,
                                   test(param.testflag & T_EXTEND));
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
        end_io_cache(&(param.read_cache));
      }
      param.testflag= old_testflag;
    }
  }
  if (!error)
  {
    if ((share->state.changed & (STATE_CHANGED |
                                 STATE_CRASHED_ON_REPAIR |
                                 STATE_CRASHED | STATE_NOT_ANALYZED)) ||
        (param.testflag & T_STATISTICS) || maria_is_crashed(file))
    {
      file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
      pthread_mutex_lock(&share->intern_lock);
      share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED |
                                STATE_CRASHED_ON_REPAIR);
      if (!(table->db_stat & HA_READ_ONLY))
1042 1043 1044
        error= maria_update_state_info(&param, file,
                                       UPDATE_TIME | UPDATE_OPEN_COUNT |
                                       UPDATE_STAT);
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055
      pthread_mutex_unlock(&share->intern_lock);
      info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
           HA_STATUS_CONST);
    }
  }
  else if (!maria_is_crashed(file) && !thd->killed)
  {
    maria_mark_crashed(file);
    file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
  }

unknown's avatar
unknown committed
1056
  thd_proc_info(thd, old_proc_info);
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
  return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
}


/*
  Analyze the key distribution in the table
  As the table may be only locked for read, we have to take into account that
  two threads may do an analyze at the same time!
*/

int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
{
  int error= 0;
  HA_CHECK param;
  MARIA_SHARE *share= file->s;

unknown's avatar
unknown committed
1073
  maria_chk_init(&param);
1074 1075 1076 1077 1078 1079 1080
  param.thd= thd;
  param.op_name= "analyze";
  param.db_name= table->s->db.str;
  param.table_name= table->alias;
  param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
                   T_DONT_CHECK_CHECKSUM);
  param.using_global_keycache= 1;
1081
  param.stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103

  if (!(share->state.changed & STATE_NOT_ANALYZED))
    return HA_ADMIN_ALREADY_DONE;

  error= maria_chk_key(&param, file);
  if (!error)
  {
    pthread_mutex_lock(&share->intern_lock);
    error= maria_update_state_info(&param, file, UPDATE_STAT);
    pthread_mutex_unlock(&share->intern_lock);
  }
  else if (!maria_is_crashed(file) && !thd->killed)
    maria_mark_crashed(file);
  return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
}


int ha_maria::restore(THD * thd, HA_CHECK_OPT *check_opt)
{
  HA_CHECK_OPT tmp_check_opt;
  char *backup_dir= thd->lex->backup_dir;
  char src_path[FN_REFLEN], dst_path[FN_REFLEN];
1104
  char table_name[FN_REFLEN];
1105 1106 1107 1108
  int error;
  const char *errmsg;
  DBUG_ENTER("restore");

1109 1110 1111
  VOID(tablename_to_filename(table->s->table_name.str, table_name,
                             sizeof(table_name)));

1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
  if (fn_format_relative_to_data_home(src_path, table_name, backup_dir,
                                      MARIA_NAME_DEXT))
    DBUG_RETURN(HA_ADMIN_INVALID);

  strxmov(dst_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS);
  if (my_copy(src_path, dst_path, MYF(MY_WME)))
  {
    error= HA_ADMIN_FAILED;
    errmsg= "Failed in my_copy (Error %d)";
    goto err;
  }

  tmp_check_opt.init();
  tmp_check_opt.flags |= T_VERY_SILENT | T_CALC_CHECKSUM | T_QUICK;
  DBUG_RETURN(repair(thd, &tmp_check_opt));

err:
  {
    HA_CHECK param;
unknown's avatar
unknown committed
1131
    maria_chk_init(&param);
1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
    param.thd= thd;
    param.op_name= "restore";
    param.db_name= table->s->db.str;
    param.table_name= table->s->table_name.str;
    param.testflag= 0;
    _ma_check_print_error(&param, errmsg, my_errno);
    DBUG_RETURN(error);
  }
}


int ha_maria::backup(THD * thd, HA_CHECK_OPT *check_opt)
{
  char *backup_dir= thd->lex->backup_dir;
  char src_path[FN_REFLEN], dst_path[FN_REFLEN];
1147
  char table_name[FN_REFLEN];
1148 1149 1150 1151
  int error;
  const char *errmsg;
  DBUG_ENTER("ha_maria::backup");

1152 1153 1154
  VOID(tablename_to_filename(table->s->table_name.str, table_name,
                             sizeof(table_name)));

1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181
  if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir,
                                      reg_ext))
  {
    errmsg= "Failed in fn_format() for .frm file (errno: %d)";
    error= HA_ADMIN_INVALID;
    goto err;
  }

  strxmov(src_path, table->s->normalized_path.str, reg_ext, NullS);
  if (my_copy(src_path, dst_path,
              MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE)))
  {
    error= HA_ADMIN_FAILED;
    errmsg= "Failed copying .frm file (errno: %d)";
    goto err;
  }

  /* Change extension */
  if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir,
                                      MARIA_NAME_DEXT))
  {
    errmsg= "Failed in fn_format() for .MYD file (errno: %d)";
    error= HA_ADMIN_INVALID;
    goto err;
  }

  strxmov(src_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS);
unknown's avatar
unknown committed
1182 1183 1184 1185 1186 1187 1188
  if (_ma_flush_table_files(file, MARIA_FLUSH_DATA, FLUSH_FORCE_WRITE,
                            FLUSH_KEEP))
  {
    error= HA_ADMIN_FAILED;
    errmsg= "Failed in flush (Error %d)";
    goto err;
  }
1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
  if (my_copy(src_path, dst_path,
              MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE)))
  {
    errmsg= "Failed copying .MYD file (errno: %d)";
    error= HA_ADMIN_FAILED;
    goto err;
  }
  DBUG_RETURN(HA_ADMIN_OK);

err:
  {
    HA_CHECK param;
unknown's avatar
unknown committed
1201
    maria_chk_init(&param);
1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221
    param.thd= thd;
    param.op_name= "backup";
    param.db_name= table->s->db.str;
    param.table_name= table->s->table_name.str;
    param.testflag= 0;
    _ma_check_print_error(&param, errmsg, my_errno);
    DBUG_RETURN(error);
  }
}


int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
{
  int error;
  HA_CHECK param;
  ha_rows start_records;

  if (!file)
    return HA_ADMIN_INTERNAL_ERROR;

unknown's avatar
unknown committed
1222
  maria_chk_init(&param);
1223 1224 1225 1226 1227
  param.thd= thd;
  param.op_name= "repair";
  param.testflag= ((check_opt->flags & ~(T_EXTEND)) |
                   T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM |
                   (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT));
1228
  param.sort_buffer_length= THDVAR(thd, sort_buffer_size);
1229
  start_records= file->state->records;
1230
  while ((error= repair(thd, &param, 0)) && param.retry_repair)
1231 1232 1233 1234 1235
  {
    param.retry_repair= 0;
    if (test_all_bits(param.testflag,
                      (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK)))
    {
1236 1237 1238 1239 1240 1241 1242 1243
      param.testflag&= ~(T_RETRY_WITHOUT_QUICK | T_QUICK);
      /* Ensure we don't loose any rows when retrying without quick */
      param.testflag|= T_SAFE_REPAIR;
      if (thd->vio_ok())
        _ma_check_print_info(&param, "Retrying repair without quick");
      else
        sql_print_information("Retrying repair of: '%s' without quick",
                              table->s->path.str);
1244 1245 1246 1247 1248 1249 1250
      continue;
    }
    param.testflag &= ~T_QUICK;
    if ((param.testflag & T_REP_BY_SORT))
    {
      param.testflag= (param.testflag & ~T_REP_BY_SORT) | T_REP;
      sql_print_information("Retrying repair of: '%s' with keycache",
1251
                            table->s->path.str);
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
      continue;
    }
    break;
  }
  if (!error && start_records != file->state->records &&
      !(check_opt->flags & T_VERY_SILENT))
  {
    char llbuff[22], llbuff2[22];
    sql_print_information("Found %s of %s rows when repairing '%s'",
                          llstr(file->state->records, llbuff),
1262 1263
                          llstr(start_records, llbuff2),
                          table->s->path.str);
1264 1265 1266 1267
  }
  return error;
}

1268 1269 1270 1271
int ha_maria::zerofill(THD * thd, HA_CHECK_OPT *check_opt)
{
  int error;
  HA_CHECK param;
1272
  MARIA_SHARE *share= file->s;
1273 1274 1275 1276 1277 1278 1279 1280

  if (!file)
    return HA_ADMIN_INTERNAL_ERROR;

  maria_chk_init(&param);
  param.thd= thd;
  param.op_name= "zerofill";
  param.testflag= check_opt->flags | T_SILENT | T_ZEROFILL;
1281
  param.sort_buffer_length= THDVAR(thd, sort_buffer_size);
1282
  error=maria_zerofill(&param, file, share->open_file_name.str);
1283

1284 1285 1286 1287 1288 1289
  if (!error)
  {
    pthread_mutex_lock(&share->intern_lock);
    maria_update_state_info(&param, file, UPDATE_TIME | UPDATE_OPEN_COUNT);
    pthread_mutex_unlock(&share->intern_lock);
  }
1290 1291 1292
  return error;
}

1293 1294 1295
int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt)
{
  int error;
1296
  HA_CHECK param;
1297 1298 1299
  if (!file)
    return HA_ADMIN_INTERNAL_ERROR;

unknown's avatar
unknown committed
1300
  maria_chk_init(&param);
1301 1302 1303 1304
  param.thd= thd;
  param.op_name= "optimize";
  param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE |
                   T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX);
1305
  param.sort_buffer_length= THDVAR(thd, sort_buffer_size);
1306
  if ((error= repair(thd, &param, 1)) && param.retry_repair)
1307
  {
1308 1309
    sql_print_warning("Warning: Optimize table got errno %d on %s.%s, retrying",
                      my_errno, param.db_name, param.table_name);
1310
    param.testflag &= ~T_REP_BY_SORT;
1311
    error= repair(thd, &param, 1);
1312 1313 1314 1315 1316
  }
  return error;
}


1317
int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
1318 1319
{
  int error= 0;
1320
  ulonglong local_testflag= param->testflag;
1321
  bool optimize_done= !do_optimize, statistics_done= 0;
1322 1323 1324 1325 1326 1327
  const char *old_proc_info= thd->proc_info;
  char fixed_name[FN_REFLEN];
  MARIA_SHARE *share= file->s;
  ha_rows rows= file->state->records;
  DBUG_ENTER("ha_maria::repair");

1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343
  /*
    Normally this method is entered with a properly opened table. If the
    repair fails, it can be repeated with more elaborate options. Under
    special circumstances it can happen that a repair fails so that it
    closed the data file and cannot re-open it. In this case file->dfile
    is set to -1. We must not try another repair without an open data
    file. (Bug #25289)
  */
  if (file->dfile.file == -1)
  {
    sql_print_information("Retrying repair of: '%s' failed. "
                          "Please try REPAIR EXTENDED or maria_chk",
                          table->s->path.str);
    DBUG_RETURN(HA_ADMIN_FAILED);
  }

1344 1345 1346 1347 1348 1349 1350 1351
  /*
    If transactions was not enabled for a transactional table then
    file->s->status is not up to date. This is needed for repair_by_sort
    to work
  */
  if (share->base.born_transactional && !share->now_transactional)
    _ma_copy_nontrans_state_information(file);

1352 1353 1354 1355 1356 1357 1358
  param->db_name= table->s->db.str;
  param->table_name= table->alias;
  param->tmpfile_createflag= O_RDWR | O_TRUNC;
  param->using_global_keycache= 1;
  param->thd= thd;
  param->tmpdir= &mysql_tmpdir_list;
  param->out_flag= 0;
1359
  strmov(fixed_name, share->open_file_name.str);
1360 1361 1362 1363 1364

  // Don't lock tables if we have used LOCK TABLE
  if (!thd->locked_tables &&
      maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK))
  {
1365
    _ma_check_print_error(param, ER(ER_CANT_LOCK), my_errno);
1366 1367 1368
    DBUG_RETURN(HA_ADMIN_FAILED);
  }

1369
  if (!do_optimize ||
1370
      ((share->data_file_type == BLOCK_RECORD) ?
unknown's avatar
unknown committed
1371 1372
       (share->state.changed & STATE_NOT_OPTIMIZED_ROWS) :
       (file->state->del || share->state.split != file->state->records)) &&
1373
      (!(param->testflag & T_QUICK) ||
unknown's avatar
unknown committed
1374 1375
       (share->state.changed & (STATE_NOT_OPTIMIZED_KEYS |
                                STATE_NOT_OPTIMIZED_ROWS))))
1376 1377 1378 1379
  {
    ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ?
                        maria_get_mask_all_keys_active(share->base.keys) :
                        share->state.key_map);
1380
    ulonglong save_testflag= param->testflag;
1381 1382 1383 1384
    if (maria_test_if_sort_rep(file, file->state->records, key_map, 0) &&
        (local_testflag & T_REP_BY_SORT))
    {
      local_testflag |= T_STATISTICS;
1385
      param->testflag |= T_STATISTICS;           // We get this for free
1386
      statistics_done= 1;
1387
      /* TODO: Remove BLOCK_RECORD test when parallel works with blocks */
1388
      if (THDVAR(thd,repair_threads) > 1 &&
1389
          share->data_file_type != BLOCK_RECORD)
1390 1391 1392 1393
      {
        char buf[40];
        /* TODO: respect maria_repair_threads variable */
        my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map));
unknown's avatar
unknown committed
1394
        thd_proc_info(thd, buf);
1395 1396 1397
        param->testflag|= T_REP_PARALLEL;
        error= maria_repair_parallel(param, file, fixed_name,
                                     test(param->testflag & T_QUICK));
unknown's avatar
unknown committed
1398 1399
        /* to reset proc_info, as it was pointing to local buffer */
        thd_proc_info(thd, "Repair done");
1400 1401 1402
      }
      else
      {
unknown's avatar
unknown committed
1403
        thd_proc_info(thd, "Repair by sorting");
1404 1405 1406
        param->testflag|= T_REP_BY_SORT;
        error= maria_repair_by_sort(param, file, fixed_name,
                                    test(param->testflag & T_QUICK));
1407 1408 1409 1410
      }
    }
    else
    {
unknown's avatar
unknown committed
1411
      thd_proc_info(thd, "Repair with keycache");
1412 1413 1414
      param->testflag &= ~(T_REP_BY_SORT | T_REP_PARALLEL);
      error= maria_repair(param, file, fixed_name,
                          test(param->testflag & T_QUICK));
1415
    }
1416
    param->testflag= save_testflag | (param->testflag & T_RETRY_WITHOUT_QUICK);
1417 1418 1419 1420 1421 1422 1423 1424
    optimize_done= 1;
  }
  if (!error)
  {
    if ((local_testflag & T_SORT_INDEX) &&
        (share->state.changed & STATE_NOT_SORTED_PAGES))
    {
      optimize_done= 1;
unknown's avatar
unknown committed
1425
      thd_proc_info(thd, "Sorting index");
1426
      error= maria_sort_index(param, file, fixed_name);
1427 1428 1429 1430 1431 1432
    }
    if (!statistics_done && (local_testflag & T_STATISTICS))
    {
      if (share->state.changed & STATE_NOT_ANALYZED)
      {
        optimize_done= 1;
unknown's avatar
unknown committed
1433
        thd_proc_info(thd, "Analyzing");
1434
        error= maria_chk_key(param, file);
1435 1436 1437 1438 1439
      }
      else
        local_testflag &= ~T_STATISTICS;        // Don't update statistics
    }
  }
unknown's avatar
unknown committed
1440
  thd_proc_info(thd, "Saving state");
unknown's avatar
unknown committed
1441
  pthread_mutex_lock(&share->intern_lock);
1442 1443 1444 1445 1446 1447 1448 1449 1450
  if (!error)
  {
    if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file))
    {
      share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED |
                                STATE_CRASHED_ON_REPAIR);
      file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
    }
    /*
1451
      repair updates share->state.state. Ensure that file->state is up to date
1452
    */
1453 1454 1455
    if (file->state != &share->state.state)
      *file->state= share->state.state;
    if (share->base.auto_key)
1456
      _ma_update_auto_increment_key(param, file, 1);
1457
    if (optimize_done)
1458
      error= maria_update_state_info(param, file,
1459 1460 1461
                                     UPDATE_TIME | UPDATE_OPEN_COUNT |
                                     (local_testflag &
                                      T_STATISTICS ? UPDATE_STAT : 0));
1462 1463
    info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
         HA_STATUS_CONST);
1464
    if (rows != file->state->records && !(param->testflag & T_VERY_SILENT))
1465 1466
    {
      char llbuff[22], llbuff2[22];
1467
      _ma_check_print_warning(param, "Number of rows changed from %s to %s",
1468 1469
                              llstr(rows, llbuff),
                              llstr(file->state->records, llbuff2));
unknown's avatar
unknown committed
1470 1471 1472
      /* Abort if warning was converted to error */
      if (current_thd->is_error())
        error= 1;
1473 1474 1475 1476 1477 1478
    }
  }
  else
  {
    maria_mark_crashed_on_repair(file);
    file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1479
    maria_update_state_info(param, file, 0);
1480
  }
unknown's avatar
unknown committed
1481
  pthread_mutex_unlock(&share->intern_lock);
unknown's avatar
unknown committed
1482
  thd_proc_info(thd, old_proc_info);
1483 1484
  if (!thd->locked_tables)
    maria_lock_database(file, F_UNLCK);
unknown's avatar
unknown committed
1485 1486
  error= error ? HA_ADMIN_FAILED :
    (optimize_done ?
1487
     (write_log_record_for_repair(param, file) ? HA_ADMIN_FAILED :
unknown's avatar
unknown committed
1488 1489
      HA_ADMIN_OK) : HA_ADMIN_ALREADY_DONE);
  DBUG_RETURN(error);
1490 1491 1492 1493 1494 1495 1496 1497 1498
}


/*
  Assign table indexes to a specific key cache.
*/

int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt)
{
unknown's avatar
unknown committed
1499
#if 0 && NOT_IMPLEMENTED
unknown's avatar
unknown committed
1500
  PAGECACHE *new_pagecache= check_opt->pagecache;
1501 1502
  const char *errmsg= 0;
  int error= HA_ADMIN_OK;
1503
  ulonglong map;
1504 1505 1506
  TABLE_LIST *table_list= table->pos_in_table_list;
  DBUG_ENTER("ha_maria::assign_to_keycache");

unknown's avatar
unknown committed
1507

unknown's avatar
unknown committed
1508 1509 1510 1511 1512 1513 1514 1515
  table->keys_in_use_for_query.clear_all();

  if (table_list->process_index_hints(table))
    DBUG_RETURN(HA_ADMIN_FAILED);
  map= ~(ulonglong) 0;
  if (!table->keys_in_use_for_query.is_clear_all())
    /* use all keys if there's no list specified by the user through hints */
    map= table->keys_in_use_for_query.to_ulonglong();
1516

unknown's avatar
unknown committed
1517
  if ((error= maria_assign_to_pagecache(file, map, new_pagecache)))
1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529
  {
    char buf[STRING_BUFFER_USUAL_SIZE];
    my_snprintf(buf, sizeof(buf),
                "Failed to flush to index file (errno: %d)", error);
    errmsg= buf;
    error= HA_ADMIN_CORRUPT;
  }

  if (error != HA_ADMIN_OK)
  {
    /* Send error to user */
    HA_CHECK param;
unknown's avatar
unknown committed
1530
    maria_chk_init(&param);
1531 1532 1533 1534 1535 1536 1537 1538
    param.thd= thd;
    param.op_name= "assign_to_keycache";
    param.db_name= table->s->db.str;
    param.table_name= table->s->table_name.str;
    param.testflag= 0;
    _ma_check_print_error(&param, errmsg);
  }
  DBUG_RETURN(error);
unknown's avatar
unknown committed
1539 1540 1541
#else
  return  HA_ADMIN_NOT_IMPLEMENTED;
#endif
1542 1543 1544 1545 1546 1547 1548 1549 1550
}


/*
  Preload pages of the index file for a table into the key cache.
*/

int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt)
{
1551
  ulonglong map;
1552 1553 1554 1555
  TABLE_LIST *table_list= table->pos_in_table_list;

  DBUG_ENTER("ha_maria::preload_keys");

unknown's avatar
unknown committed
1556 1557 1558 1559 1560 1561
  table->keys_in_use_for_query.clear_all();

  if (table_list->process_index_hints(table))
    DBUG_RETURN(HA_ADMIN_FAILED);

  map= ~(ulonglong) 0;
1562
  /* Check validity of the index references */
unknown's avatar
unknown committed
1563 1564 1565
  if (!table->keys_in_use_for_query.is_clear_all())
    /* use all keys if there's no list specified by the user through hints */
    map= table->keys_in_use_for_query.to_ulonglong();
1566 1567 1568 1569

  maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE,
              (void*) &thd->variables.preload_buff_size);

unknown's avatar
unknown committed
1570 1571 1572
  int error;

  if ((error= maria_preload(file, map, table_list->ignore_leaves)))
1573
  {
unknown's avatar
unknown committed
1574 1575 1576
    char buf[ERRMSGSIZE+20];
    const char *errmsg;

1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590
    switch (error) {
    case HA_ERR_NON_UNIQUE_BLOCK_SIZE:
      errmsg= "Indexes use different block sizes";
      break;
    case HA_ERR_OUT_OF_MEM:
      errmsg= "Failed to allocate buffer";
      break;
    default:
      my_snprintf(buf, ERRMSGSIZE,
                  "Failed to read from index file (errno: %d)", my_errno);
      errmsg= buf;
    }

    HA_CHECK param;
unknown's avatar
unknown committed
1591
    maria_chk_init(&param);
1592 1593 1594 1595 1596 1597
    param.thd= thd;
    param.op_name= "preload_keys";
    param.db_name= table->s->db.str;
    param.table_name= table->s->table_name.str;
    param.testflag= 0;
    _ma_check_print_error(&param, errmsg);
unknown's avatar
unknown committed
1598
    DBUG_RETURN(HA_ADMIN_FAILED);
1599
  }
unknown's avatar
unknown committed
1600
  DBUG_RETURN(HA_ADMIN_OK);
1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678
}


/*
  Disable indexes, making it persistent if requested.

  SYNOPSIS
    disable_indexes()
    mode        mode of operation:
                HA_KEY_SWITCH_NONUNIQ      disable all non-unique keys
                HA_KEY_SWITCH_ALL          disable all keys
                HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent
                HA_KEY_SWITCH_ALL_SAVE     dis. all keys and make persistent

  IMPLEMENTATION
    HA_KEY_SWITCH_NONUNIQ       is not implemented.
    HA_KEY_SWITCH_ALL_SAVE      is not implemented.

  RETURN
    0  ok
    HA_ERR_WRONG_COMMAND  mode not implemented.
*/

int ha_maria::disable_indexes(uint mode)
{
  int error;

  if (mode == HA_KEY_SWITCH_ALL)
  {
    /* call a storage engine function to switch the key map */
    error= maria_disable_indexes(file);
  }
  else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
  {
    maria_extra(file, HA_EXTRA_NO_KEYS, 0);
    info(HA_STATUS_CONST);                      // Read new key info
    error= 0;
  }
  else
  {
    /* mode not implemented */
    error= HA_ERR_WRONG_COMMAND;
  }
  return error;
}


/*
  Enable indexes, making it persistent if requested.

  SYNOPSIS
    enable_indexes()
    mode        mode of operation:
                HA_KEY_SWITCH_NONUNIQ      enable all non-unique keys
                HA_KEY_SWITCH_ALL          enable all keys
                HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent
                HA_KEY_SWITCH_ALL_SAVE     en. all keys and make persistent

  DESCRIPTION
    Enable indexes, which might have been disabled by disable_index() before.
    The modes without _SAVE work only if both data and indexes are empty,
    since the MARIA repair would enable them persistently.
    To be sure in these cases, call handler::delete_all_rows() before.

  IMPLEMENTATION
    HA_KEY_SWITCH_NONUNIQ       is not implemented.
    HA_KEY_SWITCH_ALL_SAVE      is not implemented.

  RETURN
    0  ok
    !=0  Error, among others:
    HA_ERR_CRASHED  data or index is non-empty. Delete all rows and retry.
    HA_ERR_WRONG_COMMAND  mode not implemented.
*/

int ha_maria::enable_indexes(uint mode)
{
  int error;
1679
  DBUG_PRINT("info", ("ha_maria::enable_indexes mode: %d", mode));
1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698
  if (maria_is_all_keys_active(file->s->state.key_map, file->s->base.keys))
  {
    /* All indexes are enabled already. */
    return 0;
  }

  if (mode == HA_KEY_SWITCH_ALL)
  {
    error= maria_enable_indexes(file);
    /*
       Do not try to repair on error,
       as this could make the enabled state persistent,
       but mode==HA_KEY_SWITCH_ALL forbids it.
    */
  }
  else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
  {
    THD *thd= current_thd;
    HA_CHECK param;
unknown's avatar
unknown committed
1699
    const char *save_proc_info= thd_proc_info(thd, "Creating index");
unknown's avatar
unknown committed
1700
    maria_chk_init(&param);
1701 1702
    param.op_name= "recreating_index";
    param.testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK |
unknown's avatar
unknown committed
1703
                     T_CREATE_MISSING_KEYS | T_SAFE_REPAIR);
1704
    if (bulk_insert_single_undo == BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR)
unknown's avatar
unknown committed
1705
    {
1706
      bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_REPAIR;
unknown's avatar
unknown committed
1707
      /*
1708
        Don't bump create_rename_lsn, because UNDO_BULK_INSERT
unknown's avatar
unknown committed
1709 1710 1711 1712
        should not be skipped in case of crash during repair.
      */
      param.testflag|= T_NO_CREATE_RENAME_LSN;
    }
1713
    param.myf_rw &= ~MY_WAIT_IF_FULL;
1714 1715
    param.sort_buffer_length= THDVAR(thd,sort_buffer_size);
    param.stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1716
    param.tmpdir= &mysql_tmpdir_list;
1717
    if ((error= (repair(thd, &param, 0) != HA_ADMIN_OK)) && param.retry_repair)
1718
    {
1719 1720
      sql_print_warning("Warning: Enabling keys got errno %d on %s.%s, "
                        "retrying",
1721
                        my_errno, param.db_name, param.table_name);
1722 1723
      /* This should never fail normally */
      DBUG_ASSERT(0);
1724
      /* Repairing by sort failed. Now try standard repair method. */
Michael Widenius's avatar
Michael Widenius committed
1725
      param.testflag &= ~T_REP_BY_SORT;
1726
      error= (repair(thd, &param, 0) != HA_ADMIN_OK);
1727
      /*
1728 1729 1730
        If the standard repair succeeded, clear all error messages which
        might have been set by the first repair. They can still be seen
        with SHOW WARNINGS then.
1731 1732 1733 1734 1735
      */
      if (!error)
        thd->clear_error();
    }
    info(HA_STATUS_CONST);
unknown's avatar
unknown committed
1736
    thd_proc_info(thd, save_proc_info);
1737 1738 1739 1740 1741 1742
  }
  else
  {
    /* mode not implemented */
    error= HA_ERR_WRONG_COMMAND;
  }
unknown's avatar
unknown committed
1743 1744 1745 1746 1747 1748 1749 1750
  DBUG_EXECUTE_IF("maria_flush_whole_log",
                  {
                    DBUG_PRINT("maria_flush_whole_log", ("now"));
                    translog_flush(translog_get_horizon());
                  });
  DBUG_EXECUTE_IF("maria_crash_enable_index",
                  {
                    DBUG_PRINT("maria_crash_enable_index", ("now"));
1751
                    DBUG_ABORT();
unknown's avatar
unknown committed
1752
                  });
1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796
  return error;
}


/*
  Test if indexes are disabled.


  SYNOPSIS
    indexes_are_disabled()
      no parameters


  RETURN
    0  indexes are not disabled
    1  all indexes are disabled
   [2  non-unique indexes are disabled - NOT YET IMPLEMENTED]
*/

int ha_maria::indexes_are_disabled(void)
{
  return maria_indexes_are_disabled(file);
}


/*
  prepare for a many-rows insert operation
  e.g. - disable indexes (if they can be recreated fast) or
  activate special bulk-insert optimizations

  SYNOPSIS
    start_bulk_insert(rows)
    rows        Rows to be inserted
                0 if we don't know

  NOTICE
    Do not forget to call end_bulk_insert() later!
*/

void ha_maria::start_bulk_insert(ha_rows rows)
{
  DBUG_ENTER("ha_maria::start_bulk_insert");
  THD *thd= current_thd;
  ulong size= min(thd->variables.read_buff_size,
1797
                  (ulong) (table->s->avg_row_length * rows));
1798 1799 1800 1801 1802 1803 1804
  DBUG_PRINT("info", ("start_bulk_insert: rows %lu size %lu",
                      (ulong) rows, size));

  /* don't enable row cache if too few rows */
  if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE))
    maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size);

1805 1806
  can_enable_indexes= (maria_is_all_keys_active(file->s->state.key_map,
                                                file->s->base.keys));
1807
  bulk_insert_single_undo= BULK_INSERT_NONE;
1808 1809 1810 1811 1812 1813 1814 1815

  if (!(specialflag & SPECIAL_SAFE_MODE))
  {
    /*
       Only disable old index if the table was empty and we are inserting
       a lot of rows.
       We should not do this for only a few rows as this is slower and
       we don't want to update the key statistics based of only a few rows.
1816 1817
       Index file rebuild requires an exclusive lock, so if versioning is on
       don't do it (see how ha_maria::store_lock() tries to predict repair).
1818 1819
    */
    if (file->state->records == 0 && can_enable_indexes &&
1820 1821
        (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES) &&
        (file->lock.type == TL_WRITE))
unknown's avatar
unknown committed
1822
    {
1823 1824 1825 1826
      /**
         @todo for a single-row INSERT SELECT, we will go into repair, which
         is more costly (flushes, syncs) than a row write.
      */
1827
      maria_disable_non_unique_index(file, rows);
unknown's avatar
unknown committed
1828 1829
      if (file->s->now_transactional)
      {
1830 1831
        bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR;
        write_log_record_for_bulk_insert(file);
1832
        _ma_tmp_disable_logging_for_table(file, TRUE);
unknown's avatar
unknown committed
1833 1834 1835 1836
        /*
          Pages currently in the page cache have type PAGECACHE_LSN_PAGE, we
          are not allowed to overwrite them with PAGECACHE_PLAIN_PAGE, so
          throw them away. It is not losing data, because we just wrote and
1837 1838 1839
          forced an UNDO which will for sure empty the table if we crash. The
          upcoming unique-key insertions however need a proper index, so we
          cannot leave the corrupted on-disk index file, thus we truncate it.
unknown's avatar
unknown committed
1840
        */
1841
        maria_delete_all_rows(file);
unknown's avatar
unknown committed
1842 1843
      }
    }
1844 1845
    else if (!file->bulk_insert &&
             (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT))
1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866
    {
      maria_init_bulk_insert(file, thd->variables.bulk_insert_buff_size, rows);
    }
  }
  DBUG_VOID_RETURN;
}


/*
  end special bulk-insert optimizations,
  which have been activated by start_bulk_insert().

  SYNOPSIS
    end_bulk_insert()
    no arguments

  RETURN
    0     OK
    != 0  Error
*/

1867
int ha_maria::end_bulk_insert(bool table_will_be_deleted)
1868
{
unknown's avatar
unknown committed
1869 1870
  int err;
  DBUG_ENTER("ha_maria::end_bulk_insert");
1871
  maria_end_bulk_insert(file, table_will_be_deleted);
unknown's avatar
unknown committed
1872 1873
  if ((err= maria_extra(file, HA_EXTRA_NO_CACHE, 0)))
    goto end;
1874
  if (can_enable_indexes && !table_will_be_deleted)
unknown's avatar
unknown committed
1875 1876
    err= enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE);
end:
1877
  if (bulk_insert_single_undo != BULK_INSERT_NONE)
unknown's avatar
unknown committed
1878 1879
  {
    DBUG_ASSERT(can_enable_indexes);
1880 1881 1882 1883 1884 1885 1886 1887
    /*
      Table was transactional just before start_bulk_insert().
      No need to flush pages if we did a repair (which already flushed).
    */
    err|=
      _ma_reenable_logging_for_table(file,
                                     bulk_insert_single_undo ==
                                     BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR);
unknown's avatar
unknown committed
1888 1889
  }
  DBUG_RETURN(err);
1890 1891 1892 1893 1894
}


bool ha_maria::check_and_repair(THD *thd)
{
1895
  int error, crashed;
1896 1897 1898 1899 1900 1901
  char *old_query;
  uint old_query_length;
  HA_CHECK_OPT check_opt;
  DBUG_ENTER("ha_maria::check_and_repair");

  check_opt.init();
1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919

  if (file->s->state.changed & STATE_MOVED)
  {
    sql_print_information("Zerofilling table:   '%s'", table->s->path.str);
    if (!(error= zerofill(thd, &check_opt)))
      DBUG_RETURN(0);
  }
  else
    error= 1;

  /*
    if we got this far - the table is crashed.
    but don't auto-repair if maria_recover_options is not set
  */
  if (!maria_recover_options)
    DBUG_RETURN(error);

  error= 0;
1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931
  check_opt.flags= T_MEDIUM | T_AUTO_REPAIR;
  // Don't use quick if deleted rows
  if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK))
    check_opt.flags |= T_QUICK;

  old_query= thd->query;
  old_query_length= thd->query_length;
  pthread_mutex_lock(&LOCK_thread_count);
  thd->query= table->s->table_name.str;
  thd->query_length= table->s->table_name.length;
  pthread_mutex_unlock(&LOCK_thread_count);

1932 1933 1934 1935 1936 1937 1938
  if (!(crashed= maria_is_crashed(file)))
  {
    sql_print_warning("Checking table:   '%s'", table->s->path.str);
    crashed= check(thd, &check_opt);
  }

  if (crashed)
1939
  {
1940
    sql_print_warning("Recovering table: '%s'", table->s->path.str);
1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957
    check_opt.flags=
      ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) |
       (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) |
       T_AUTO_REPAIR);
    if (repair(thd, &check_opt))
      error= 1;
  }
  pthread_mutex_lock(&LOCK_thread_count);
  thd->query= old_query;
  thd->query_length= old_query_length;
  pthread_mutex_unlock(&LOCK_thread_count);
  DBUG_RETURN(error);
}


bool ha_maria::is_crashed() const
{
1958
  return (file->s->state.changed & (STATE_CRASHED | STATE_MOVED) ||
1959 1960 1961
          (my_disable_locking && file->s->state.open_count));
}

1962 1963 1964 1965 1966 1967 1968 1969
#define CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING(msg) \
  do { \
    if (file->lock.type == TL_WRITE_CONCURRENT_INSERT) \
    { \
      my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), msg); \
      return 1; \
    } \
  } while(0)
1970

unknown's avatar
unknown committed
1971
int ha_maria::update_row(const uchar * old_data, uchar * new_data)
1972
{
1973
  CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("UPDATE in WRITE CONCURRENT");
1974
  ha_statistic_increment(&SSV::ha_update_count);
1975 1976 1977 1978 1979 1980
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
    table->timestamp_field->set_time();
  return maria_update(file, old_data, new_data);
}


unknown's avatar
unknown committed
1981
int ha_maria::delete_row(const uchar * buf)
1982
{
1983
  CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("DELETE in WRITE CONCURRENT");
1984
  ha_statistic_increment(&SSV::ha_delete_count);
1985 1986 1987 1988
  return maria_delete(file, buf);
}


1989 1990 1991
int ha_maria::index_read_map(uchar * buf, const uchar * key,
			     key_part_map keypart_map,
			     enum ha_rkey_function find_flag)
1992 1993
{
  DBUG_ASSERT(inited == INDEX);
1994 1995
  ha_statistic_increment(&SSV::ha_read_key_count);
  int error= maria_rkey(file, buf, active_index, key, keypart_map, find_flag);
1996 1997 1998 1999 2000
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


2001 2002 2003
int ha_maria::index_read_idx_map(uchar * buf, uint index, const uchar * key,
				 key_part_map keypart_map,
				 enum ha_rkey_function find_flag)
2004
{
2005 2006
  ha_statistic_increment(&SSV::ha_read_key_count);
  int error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
2007 2008 2009 2010 2011
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


2012 2013
int ha_maria::index_read_last_map(uchar * buf, const uchar * key,
				  key_part_map keypart_map)
2014
{
2015
  DBUG_ENTER("ha_maria::index_read_last_map");
2016
  DBUG_ASSERT(inited == INDEX);
2017 2018
  ha_statistic_increment(&SSV::ha_read_key_count);
  int error= maria_rkey(file, buf, active_index, key, keypart_map,
2019 2020
                        HA_READ_PREFIX_LAST);
  table->status= error ? STATUS_NOT_FOUND : 0;
2021
  DBUG_RETURN(error);
2022 2023 2024
}


unknown's avatar
unknown committed
2025
int ha_maria::index_next(uchar * buf)
2026 2027
{
  DBUG_ASSERT(inited == INDEX);
2028
  ha_statistic_increment(&SSV::ha_read_next_count);
2029 2030 2031 2032 2033 2034
  int error= maria_rnext(file, buf, active_index);
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


unknown's avatar
unknown committed
2035
int ha_maria::index_prev(uchar * buf)
2036 2037
{
  DBUG_ASSERT(inited == INDEX);
2038
  ha_statistic_increment(&SSV::ha_read_prev_count);
2039 2040 2041 2042 2043 2044
  int error= maria_rprev(file, buf, active_index);
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


unknown's avatar
unknown committed
2045
int ha_maria::index_first(uchar * buf)
2046 2047
{
  DBUG_ASSERT(inited == INDEX);
2048
  ha_statistic_increment(&SSV::ha_read_first_count);
2049 2050 2051 2052 2053 2054
  int error= maria_rfirst(file, buf, active_index);
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


unknown's avatar
unknown committed
2055
int ha_maria::index_last(uchar * buf)
2056 2057
{
  DBUG_ASSERT(inited == INDEX);
2058
  ha_statistic_increment(&SSV::ha_read_last_count);
2059 2060 2061 2062 2063 2064
  int error= maria_rlast(file, buf, active_index);
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


unknown's avatar
unknown committed
2065 2066
int ha_maria::index_next_same(uchar * buf,
                              const uchar *key __attribute__ ((unused)),
2067 2068
                              uint length __attribute__ ((unused)))
{
2069
  int error;
2070
  DBUG_ASSERT(inited == INDEX);
2071
  ha_statistic_increment(&SSV::ha_read_next_count);
2072 2073 2074 2075 2076 2077 2078 2079
  /*
    TODO: Delete this loop in Maria 1.5 as versioning will ensure this never
    happens
  */
  do
  {
    error= maria_rnext_same(file,buf);
  } while (error == HA_ERR_RECORD_DELETED);
2080 2081 2082 2083 2084 2085 2086 2087 2088
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


int ha_maria::rnd_init(bool scan)
{
  if (scan)
    return maria_scan_init(file);
2089
  return maria_reset(file);                        // Free buffers
2090 2091 2092
}


unknown's avatar
unknown committed
2093 2094 2095 2096 2097 2098 2099 2100
int ha_maria::rnd_end()
{
  /* Safe to call even if we don't have started a scan */
  maria_scan_end(file);
  return 0;
}


unknown's avatar
unknown committed
2101
int ha_maria::rnd_next(uchar *buf)
2102
{
2103
  ha_statistic_increment(&SSV::ha_read_rnd_next_count);
2104 2105 2106 2107 2108 2109
  int error= maria_scan(file, buf);
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


2110
int ha_maria::remember_rnd_pos()
2111
{
2112
  return (*file->s->scan_remember_pos)(file, &remember_pos);
2113 2114 2115
}


2116 2117 2118 2119 2120 2121 2122 2123
int ha_maria::restart_rnd_next(uchar *buf)
{
  (*file->s->scan_restore_pos)(file, remember_pos);
  return rnd_next(buf);
}


int ha_maria::rnd_pos(uchar *buf, uchar *pos)
2124
{
2125
  ha_statistic_increment(&SSV::ha_read_rnd_count);
2126 2127 2128 2129 2130 2131
  int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length));
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


2132
void ha_maria::position(const uchar *record)
2133
{
2134 2135
  my_off_t row_position= maria_position(file);
  my_store_ptr(ref, ref_length, row_position);
2136 2137 2138
}


2139
int ha_maria::info(uint flag)
2140
{
2141
  MARIA_INFO maria_info;
2142 2143
  char name_buff[FN_REFLEN];

2144
  (void) maria_status(file, &maria_info, flag);
2145 2146
  if (flag & HA_STATUS_VARIABLE)
  {
2147 2148 2149 2150 2151 2152 2153
    stats.records=           maria_info.records;
    stats.deleted=           maria_info.deleted;
    stats.data_file_length=  maria_info.data_file_length;
    stats.index_file_length= maria_info.index_file_length;
    stats.delete_length=     maria_info.delete_length;
    stats.check_time=        maria_info.check_time;
    stats.mean_rec_length=   maria_info.mean_reclength;
2154 2155 2156 2157
  }
  if (flag & HA_STATUS_CONST)
  {
    TABLE_SHARE *share= table->s;
2158 2159 2160 2161 2162
    stats.max_data_file_length=  maria_info.max_data_file_length;
    stats.max_index_file_length= maria_info.max_index_file_length;
    stats.create_time= maria_info.create_time;
    ref_length= maria_info.reflength;
    share->db_options_in_use= maria_info.options;
2163
    stats.block_size= maria_block_size;
2164 2165 2166 2167 2168

    /* Update share */
    if (share->tmp_table == NO_TMP_TABLE)
      pthread_mutex_lock(&share->mutex);
    share->keys_in_use.set_prefix(share->keys);
2169
    share->keys_in_use.intersect_extended(maria_info.key_map);
2170
    share->keys_for_keyread.intersect(share->keys_in_use);
2171
    share->db_record_offset= maria_info.record_offset;
2172
    if (share->key_parts)
2173 2174 2175 2176 2177 2178
    {
      ulong *to= table->key_info[0].rec_per_key, *end;
      double *from= maria_info.rec_per_key;
      for (end= to+ share->key_parts ; to < end ; to++, from++)
        *to= (ulong) (*from + 0.5);
    }
2179 2180 2181 2182 2183 2184 2185 2186
    if (share->tmp_table == NO_TMP_TABLE)
      pthread_mutex_unlock(&share->mutex);

    /*
       Set data_file_name and index_file_name to point at the symlink value
       if table is symlinked (Ie;  Real name is not same as generated name)
    */
    data_file_name= index_file_name= 0;
2187
    fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_DEXT,
2188 2189
              MY_APPEND_EXT | MY_UNPACK_FILENAME);
    if (strcmp(name_buff, maria_info.data_file_name))
2190 2191
      data_file_name =maria_info.data_file_name;
    fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_IEXT,
2192 2193 2194
              MY_APPEND_EXT | MY_UNPACK_FILENAME);
    if (strcmp(name_buff, maria_info.index_file_name))
      index_file_name=maria_info.index_file_name;
2195 2196 2197
  }
  if (flag & HA_STATUS_ERRKEY)
  {
2198 2199
    errkey= maria_info.errkey;
    my_store_ptr(dup_ref, ref_length, maria_info.dup_key_pos);
2200 2201
  }
  /* Faster to always update, than to do it based on flag */
2202 2203
  stats.update_time= maria_info.update_time;
  stats.auto_increment_value= maria_info.auto_increment;
2204 2205

  return 0;
2206 2207 2208 2209 2210 2211 2212
}


int ha_maria::extra(enum ha_extra_function operation)
{
  if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD)
    return 0;
2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225

  /*
    We have to set file->trn here because in some cases we call
    extern_lock(F_UNLOCK) (which resets file->trn) followed by maria_close()
    without calling commit/rollback in between.  If file->trn is not set
    we can't remove file->share from the transaction list in the extra() call.
  */

  if (!file->trn &&
      (operation == HA_EXTRA_PREPARE_FOR_DROP ||
       operation == HA_EXTRA_PREPARE_FOR_RENAME))
  {
    THD *thd= table->in_use;
2226 2227
    TRN *trn= THD_TRN;
    _ma_set_trn_for_table(file, trn);
2228
  }
2229 2230 2231
  return maria_extra(file, operation, 0);
}

2232 2233 2234 2235
int ha_maria::reset(void)
{
  return maria_reset(file);
}
2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248

/* To be used with WRITE_CACHE and EXTRA_CACHE */

int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size)
{
  if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE)
    return 0;
  return maria_extra(file, operation, (void*) &cache_size);
}


int ha_maria::delete_all_rows()
{
unknown's avatar
unknown committed
2249 2250 2251 2252 2253 2254 2255 2256 2257 2258
  if (file->s->now_transactional &&
      ((table->in_use->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) ||
       table->in_use->locked_tables))
  {
    /*
      We are not in autocommit mode or user have done LOCK TABLES.
      We must do the delete row by row to be able to rollback the command
    */
    return HA_ERR_WRONG_COMMAND;
  }
2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269
  return maria_delete_all_rows(file);
}


int ha_maria::delete_table(const char *name)
{
  return maria_delete_table(name);
}

int ha_maria::external_lock(THD *thd, int lock_type)
{
2270 2271
  TRN *trn= THD_TRN;
  DBUG_ENTER("ha_maria::external_lock");
unknown's avatar
unknown committed
2272 2273 2274 2275 2276 2277 2278 2279
  /*
    We don't test now_transactional because it may vary between lock/unlock
    and thus confuse our reference counting.
    It is critical to skip non-transactional tables: user-visible temporary
    tables get an external_lock() when read/written for the first time, but no
    corresponding unlock (they just stay locked and are later dropped while
    locked); if a tmp table was transactional, "SELECT FROM non_tmp, tmp"
    would never commit as its "locked_tables" count would stay 1.
unknown's avatar
unknown committed
2280 2281 2282 2283
    When Maria has has_transactions()==TRUE, open_temporary_table()
    (sql_base.cc) will use TRANSACTIONAL_TMP_TABLE and thus the
    external_lock(F_UNLCK) will happen and we can then allow the user to
    create transactional temporary tables.
unknown's avatar
unknown committed
2284
  */
2285
  if (file->s->base.born_transactional)
2286
  {
2287 2288
    /* Transactional table */
    if (lock_type != F_UNLCK)
unknown's avatar
unknown committed
2289
    {
2290 2291 2292
      /* Start of new statement */
      if (!trn)  /* no transaction yet - open it now */
      {
2293
        trn= trnman_new_trn(& thd->transaction.wt);
2294 2295 2296 2297 2298 2299
        if (unlikely(!trn))
          DBUG_RETURN(HA_ERR_OUT_OF_MEM);
        THD_TRN= trn;
        if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
          trans_register_ha(thd, TRUE, maria_hton);
      }
2300
      _ma_set_trn_for_table(file, trn);
2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314
      if (!trnman_increment_locked_tables(trn))
      {
        trans_register_ha(thd, FALSE, maria_hton);
        trnman_new_statement(trn);
      }

      if (file->s->lock.get_status)
      {
        if (_ma_setup_live_state(file))
          DBUG_RETURN(HA_ERR_OUT_OF_MEM);
      }
      else
      {
        /*
2315 2316 2317 2318 2319 2320 2321 2322 2323
          We come here in the following cases:
           - The table is a temporary table
           - It's a table which is crash safe but not yet versioned, for
             example a table with fulltext or rtree keys

          Set the current state to point to save_state so that the
          block_format code don't count the same record twice.
          Copy also the current state. This may have been wrong if the
          same file was used several times in the last statement
2324
        */
2325
        file->state=  file->state_start;
2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342
        *file->state= file->s->state.state;
      }

      if (!thd->transaction.on)
      {
        /*
          No need to log REDOs/UNDOs. If this is an internal temporary table
          which will be renamed to a permanent table (like in ALTER TABLE),
          the rename happens after unlocking so will be durable (and the table
          will get its create_rename_lsn).
          Note: if we wanted to enable users to have an old backup and apply
          tons of archived logs to roll-forward, we could then not disable
          REDOs/UNDOs in this case.
        */
        DBUG_PRINT("info", ("Disabling logging for table"));
        _ma_tmp_disable_logging_for_table(file, TRUE);
      }
2343
    }
2344
    else
unknown's avatar
unknown committed
2345
    {
2346 2347
      /* End of transaction */

unknown's avatar
unknown committed
2348
      /*
2349 2350 2351
        We always re-enable, don't rely on thd->transaction.on as it is
        sometimes reset to true after unlocking (see mysql_truncate() for a
        partitioned table based on Maria).
unknown's avatar
unknown committed
2352
      */
2353 2354 2355
      if (_ma_reenable_logging_for_table(file, TRUE))
        DBUG_RETURN(1);
      /** @todo zero file->trn also in commit and rollback */
2356
      _ma_set_trn_for_table(file, NULL);        // Safety
2357 2358 2359 2360 2361 2362
      /*
        Ensure that file->state points to the current number of rows. This
        is needed if someone calls maria_info() without first doing an
        external lock of the table
      */
      file->state= &file->s->state.state;
2363
      if (trn && trnman_has_locked_tables(trn))
2364
      {
2365
        if (!trnman_decrement_locked_tables(trn))
2366
        {
2367 2368 2369 2370 2371
          /*
            OK should not have been sent to client yet (ACID).
            This is a bit excessive, ACID requires this only if there are some
            changes to commit (rollback shouldn't be tested).
          */
2372 2373
          DBUG_ASSERT(!thd->main_da.is_sent ||
                      thd->killed == THD::KILL_CONNECTION);
2374 2375 2376 2377
          /* autocommit ? rollback a transaction */
#ifdef MARIA_CANNOT_ROLLBACK
          if (ma_commit(trn))
            DBUG_RETURN(1);
2378
          THD_TRN= 0;
2379 2380 2381 2382 2383 2384 2385
#else
          if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
          {
            trnman_rollback_trn(trn);
            DBUG_PRINT("info", ("THD_TRN set to 0x0"));
            THD_TRN= 0;
          }
unknown's avatar
unknown committed
2386
#endif
2387
        }
2388 2389
      }
    }
2390
  } /* if transactional table */
2391 2392 2393
  DBUG_RETURN(maria_lock_database(file, !table->s->tmp_table ?
                                  lock_type : ((lock_type == F_UNLCK) ?
                                               F_UNLCK : F_EXTRA_LCK)));
2394 2395
}

2396 2397
int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type)
{
unknown's avatar
unknown committed
2398
  TRN *trn;
unknown's avatar
unknown committed
2399
  if (file->s->base.born_transactional)
2400
  {
unknown's avatar
unknown committed
2401
    trn= THD_TRN;
2402
    DBUG_ASSERT(trn); // this may be called only after external_lock()
2403
    DBUG_ASSERT(trnman_has_locked_tables(trn));
unknown's avatar
unknown committed
2404
    DBUG_ASSERT(lock_type != TL_UNLOCK);
2405 2406
    DBUG_ASSERT(file->trn == trn);

unknown's avatar
unknown committed
2407 2408 2409 2410 2411 2412
    /*
      If there was an implicit commit under this LOCK TABLES by a previous
      statement (like a DDL), at least if that previous statement was about a
      different ha_maria than 'this' then this->file->trn is a stale
      pointer. We fix it:
    */
2413
    _ma_set_trn_for_table(file, trn);
2414 2415 2416 2417 2418 2419 2420
    /*
      As external_lock() was already called, don't increment locked_tables.
      Note that we call the function below possibly several times when
      statement starts (once per table). This is ok as long as that function
      does cheap operations. Otherwise, we will need to do it only on first
      call to start_stmt().
    */
2421 2422 2423 2424
    trnman_new_statement(trn);
  }
  return 0;
}
2425

unknown's avatar
unknown committed
2426 2427 2428 2429 2430 2431 2432 2433 2434

/**
  Performs an implicit commit of the Maria transaction and creates a new
  one.

  This can be considered a hack. When Maria loses HA_NO_TRANSACTIONS it will
  be participant in the connection's transaction and so the implicit commits
  (ha_commit()) (like in end_active_trans()) will do the implicit commit
  without need to call this function which can then be removed.
2435 2436 2437 2438 2439

  @param  thd              THD object
  @param  new_trn          if a new transaction should be created; a new
                           transaction is not needed when we know that the
                           tables will be unlocked very soon.
unknown's avatar
unknown committed
2440 2441
*/

2442
int ha_maria::implicit_commit(THD *thd, bool new_trn)
unknown's avatar
unknown committed
2443 2444 2445 2446 2447 2448
{
#ifndef MARIA_CANNOT_ROLLBACK
#error this method should be removed
#endif
  TRN *trn;
  int error= 0;
2449
  TABLE *table;
unknown's avatar
unknown committed
2450
  DBUG_ENTER("ha_maria::implicit_commit");
2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468
  if (!new_trn && thd->locked_tables)
  {
    /*
      "we are under LOCK TABLES" <=> "we shouldn't commit".
      As thd->locked_tables is true, we are either under LOCK TABLES, or in
      prelocking; prelocking can be under LOCK TABLES, or not (and in this
      latter case only we should commit).
      Note that we come here only at the end of the top statement
      (dispatch_command()), we are never committing inside a sub-statement./
    */
    enum prelocked_mode_type prelocked_mode= thd->prelocked_mode;
    if ((prelocked_mode == NON_PRELOCKED) ||
        (prelocked_mode == PRELOCKED_UNDER_LOCK_TABLES))
    {
      DBUG_PRINT("info", ("locked_tables, skipping"));
      DBUG_RETURN(0);
    }
  }
unknown's avatar
unknown committed
2469 2470 2471 2472 2473
  if ((trn= THD_TRN) != NULL)
  {
    uint locked_tables= trnman_has_locked_tables(trn);
    if (unlikely(ma_commit(trn)))
      error= 1;
2474 2475 2476 2477 2478
    if (!new_trn)
    {
      THD_TRN= NULL;
      goto end;
    }
unknown's avatar
unknown committed
2479 2480 2481 2482 2483
    /*
      We need to create a new transaction and put it in THD_TRN. Indeed,
      tables may be under LOCK TABLES, and so they will start the next
      statement assuming they have a trn (see ha_maria::start_stmt()).
    */
2484
    trn= trnman_new_trn(& thd->transaction.wt);
unknown's avatar
unknown committed
2485 2486 2487 2488 2489
    /* This is just a commit, tables stay locked if they were: */
    trnman_reset_locked_tables(trn, locked_tables);
    THD_TRN= trn;
    if (unlikely(trn == NULL))
      error= HA_ERR_OUT_OF_MEM;
2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504

    /*
      Move all locked tables to the new transaction
      We must do it here as otherwise file->thd and file->state may be
      stale pointers. We can't do this in start_stmt() as we don't know
      when we should call _ma_setup_live_state() and in some cases, like
      in check table, we use the table without calling start_stmt().
     */
    for (table=thd->open_tables; table ; table=table->next)
    {
      if (table->db_stat && table->file->ht == maria_hton)
      {
        MARIA_HA *handler= ((ha_maria*) table->file)->file;
        if (handler->s->base.born_transactional)
        {
2505
          _ma_set_trn_for_table(handler, trn);
2506 2507 2508 2509 2510 2511 2512 2513
          if (handler->s->lock.get_status)
          {
            if (_ma_setup_live_state(handler))
              error= HA_ERR_OUT_OF_MEM;
          }
        }
      }
    }
unknown's avatar
unknown committed
2514
  }
2515
end:
unknown's avatar
unknown committed
2516 2517 2518 2519
  DBUG_RETURN(error);
}


2520 2521 2522 2523
THR_LOCK_DATA **ha_maria::store_lock(THD *thd,
                                     THR_LOCK_DATA **to,
                                     enum thr_lock_type lock_type)
{
2524 2525 2526
  /* Test if we can fix test below */
  DBUG_ASSERT(lock_type != TL_UNLOCK &&
              (lock_type == TL_IGNORE || file->lock.type == TL_UNLOCK));
2527
  if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK)
2528
  {
2529
    const enum enum_sql_command sql_command= thd->lex->sql_command;
2530 2531 2532 2533 2534 2535 2536 2537
    /*
      We have to disable concurrent inserts for INSERT ... SELECT or
      INSERT/UPDATE/DELETE with sub queries if we are using statement based
      logging.  We take the safe route here and disable this for all commands
      that only does reading that are not SELECT.
    */
    if (lock_type <= TL_READ_HIGH_PRIORITY &&
        !thd->current_stmt_binlog_row_based &&
2538 2539
        (sql_command != SQLCOM_SELECT &&
         sql_command != SQLCOM_LOCK_TABLES) &&
2540
        (thd->options & OPTION_BIN_LOG) &&
2541 2542
        mysql_bin_log.is_open())
      lock_type= TL_READ_NO_INSERT;
2543
    else if (lock_type == TL_WRITE_CONCURRENT_INSERT)
2544
    {
2545
      const enum enum_duplicates duplicates= thd->lex->duplicates;
2546
      /*
2547 2548 2549
        Explanation for the 3 conditions below, in order:

        - Bulk insert may use repair, which will cause problems if other
2550 2551 2552 2553 2554 2555
        threads try to read/insert to the table: disable versioning.
        Note that our read of file->state->records is incorrect, as such
        variable may have changed when we come to start_bulk_insert() (worse
        case: we see != 0 so allow versioning, start_bulk_insert() sees 0 and
        uses repair). This is prevented because start_bulk_insert() will not
        try repair if we enabled versioning.
2556 2557 2558 2559
        - INSERT SELECT ON DUPLICATE KEY UPDATE comes here with
        TL_WRITE_CONCURRENT_INSERT but shouldn't because it can do
        update/delete of a row and versioning doesn't support that
        - same for LOAD DATA CONCURRENT REPLACE.
2560
      */
2561 2562 2563 2564
      if ((file->state->records == 0) ||
          (sql_command == SQLCOM_INSERT_SELECT && duplicates == DUP_UPDATE) ||
          (sql_command == SQLCOM_LOAD && duplicates == DUP_REPLACE))
        lock_type= TL_WRITE;
2565
    }
2566
    file->lock.type= lock_type;
2567
  }
2568 2569 2570 2571 2572 2573 2574 2575 2576 2577
  *to++= &file->lock;
  return to;
}


void ha_maria::update_create_info(HA_CREATE_INFO *create_info)
{
  ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST);
  if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
  {
2578
    create_info->auto_increment_value= stats.auto_increment_value;
2579 2580 2581
  }
  create_info->data_file_name= data_file_name;
  create_info->index_file_name= index_file_name;
2582 2583 2584 2585
  /* We need to restore the row type as Maria can change it */
  if (create_info->row_type != ROW_TYPE_DEFAULT &&
      !(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT))
    create_info->row_type= get_row_type();
2586 2587 2588 2589
  /*
    Show always page checksums, as this can be forced with
    maria_page_checksums variable
  */
2590 2591 2592 2593
  if (create_info->page_checksum == HA_CHOICE_UNDEF)
    create_info->page_checksum=
      (file->s->options & HA_OPTION_PAGE_CHECKSUM) ? HA_CHOICE_YES :
      HA_CHOICE_NO;
2594 2595 2596
}


unknown's avatar
unknown committed
2597 2598 2599 2600 2601
enum row_type ha_maria::get_row_type() const
{
  switch (file->s->data_file_type) {
  case STATIC_RECORD:     return ROW_TYPE_FIXED;
  case DYNAMIC_RECORD:    return ROW_TYPE_DYNAMIC;
2602
  case BLOCK_RECORD:      return ROW_TYPE_PAGE;
unknown's avatar
unknown committed
2603 2604 2605 2606 2607 2608
  case COMPRESSED_RECORD: return ROW_TYPE_COMPRESSED;
  default:                return ROW_TYPE_NOT_USED;
  }
}


2609
static enum data_file_type maria_row_type(HA_CREATE_INFO *info)
unknown's avatar
unknown committed
2610
{
2611
  if (info->transactional == HA_CHOICE_YES)
2612
    return BLOCK_RECORD;
unknown's avatar
unknown committed
2613 2614 2615 2616 2617 2618 2619 2620
  switch (info->row_type) {
  case ROW_TYPE_FIXED:   return STATIC_RECORD;
  case ROW_TYPE_DYNAMIC: return DYNAMIC_RECORD;
  default:               return BLOCK_RECORD;
  }
}


2621
int ha_maria::create(const char *name, register TABLE *table_arg,
2622
                     HA_CREATE_INFO *ha_create_info)
2623 2624
{
  int error;
2625
  uint create_flags= 0, record_count, i;
2626 2627
  char buff[FN_REFLEN];
  MARIA_KEYDEF *keydef;
2628 2629
  MARIA_COLUMNDEF *recinfo;
  MARIA_CREATE_INFO create_info;
2630 2631
  TABLE_SHARE *share= table_arg->s;
  uint options= share->db_options_in_use;
unknown's avatar
unknown committed
2632
  enum data_file_type row_type;
2633 2634
  DBUG_ENTER("ha_maria::create");

2635
  for (i= 0; i < share->keys; i++)
2636
  {
2637
    if (table_arg->key_info[i].flags & HA_USES_PARSER)
2638
    {
2639
      create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER;
2640 2641 2642
      break;
    }
  }
2643
  /* Note: BLOCK_RECORD is used if table is transactional */
2644
  row_type= maria_row_type(ha_create_info);
2645 2646 2647 2648 2649 2650 2651 2652
  if (ha_create_info->transactional == HA_CHOICE_YES &&
      ha_create_info->row_type != ROW_TYPE_PAGE &&
      ha_create_info->row_type != ROW_TYPE_NOT_USED &&
      ha_create_info->row_type != ROW_TYPE_DEFAULT)
    push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
                 ER_ILLEGAL_HA_CREATE_OPTION,
                 "Row format set to PAGE because of TRANSACTIONAL=1 option");

2653
  bzero((char*) &create_info, sizeof(create_info));
2654 2655 2656
  if ((error= table2maria(table_arg, row_type, &keydef, &recinfo,
                          &record_count, &create_info)))
    DBUG_RETURN(error); /* purecov: inspected */
2657 2658
  create_info.max_rows= share->max_rows;
  create_info.reloc_rows= share->min_rows;
2659 2660 2661 2662
  create_info.with_auto_increment= share->next_number_key_offset == 0;
  create_info.auto_increment= (ha_create_info->auto_increment_value ?
                               ha_create_info->auto_increment_value -1 :
                               (ulonglong) 0);
2663 2664
  create_info.data_file_length= ((ulonglong) share->max_rows *
                                 share->avg_row_length);
2665 2666
  create_info.data_file_name= ha_create_info->data_file_name;
  create_info.index_file_name= ha_create_info->index_file_name;
2667
  create_info.language= share->table_charset->number;
2668 2669 2670 2671 2672 2673

  /*
    Table is transactional:
    - If the user specify that table is transactional (in this case
      row type is forced to BLOCK_RECORD)
    - If they specify BLOCK_RECORD without specifying transactional behaviour
2674

unknown's avatar
unknown committed
2675 2676 2677
    Shouldn't this test be pushed down to maria_create()? Because currently,
    ma_test1 -T crashes: it creates a table with DYNAMIC_RECORD but has
    born_transactional==1, which confuses some recovery-related code.
2678
  */
2679 2680
  create_info.transactional= (row_type == BLOCK_RECORD &&
                              ha_create_info->transactional != HA_CHOICE_NO);
2681

2682 2683
  if (ha_create_info->options & HA_LEX_CREATE_TMP_TABLE)
    create_flags|= HA_CREATE_TMP_TABLE;
2684 2685
  if (ha_create_info->options & HA_CREATE_KEEP_FILES)
    create_flags|= HA_CREATE_KEEP_FILES;
2686
  if (options & HA_OPTION_PACK_RECORD)
2687
    create_flags|= HA_PACK_RECORD;
2688
  if (options & HA_OPTION_CHECKSUM)
2689
    create_flags|= HA_CREATE_CHECKSUM;
2690
  if (options & HA_OPTION_DELAY_KEY_WRITE)
2691
    create_flags|= HA_CREATE_DELAY_KEY_WRITE;
2692 2693
  if ((ha_create_info->page_checksum == HA_CHOICE_UNDEF &&
       maria_page_checksums) ||
unknown's avatar
unknown committed
2694
       ha_create_info->page_checksum ==  HA_CHOICE_YES)
2695
    create_flags|= HA_CREATE_PAGE_CHECKSUM;
2696 2697 2698

  /* TODO: Check that the following fn_format is really needed */
  error=
unknown's avatar
unknown committed
2699 2700
    maria_create(fn_format(buff, name, "", "",
                           MY_UNPACK_FILENAME | MY_APPEND_EXT),
2701
                 row_type, share->keys, keydef,
2702
                 record_count,  recinfo,
2703 2704
                 0, (MARIA_UNIQUEDEF *) 0,
                 &create_info, create_flags);
2705

unknown's avatar
unknown committed
2706
  my_free((uchar*) recinfo, MYF(0));
2707 2708 2709 2710 2711 2712 2713 2714 2715 2716
  DBUG_RETURN(error);
}


int ha_maria::rename_table(const char *from, const char *to)
{
  return maria_rename(from, to);
}


2717 2718 2719 2720
void ha_maria::get_auto_increment(ulonglong offset, ulonglong increment,
                                  ulonglong nb_desired_values,
                                  ulonglong *first_value,
                                  ulonglong *nb_reserved_values)
2721 2722 2723
{
  ulonglong nr;
  int error;
unknown's avatar
unknown committed
2724
  uchar key[HA_MAX_KEY_LENGTH];
2725 2726 2727 2728

  if (!table->s->next_number_key_offset)
  {                                             // Autoincrement at key-start
    ha_maria::info(HA_STATUS_AUTO);
2729 2730 2731 2732
    *first_value= stats.auto_increment_value;
    /* Maria has only table-level lock for now, so reserves to +inf */
    *nb_reserved_values= ULONGLONG_MAX;
    return;
2733 2734 2735 2736 2737 2738 2739 2740 2741 2742
  }

  /* it's safe to call the following if bulk_insert isn't on */
  maria_flush_bulk_insert(file, table->s->next_number_index);

  (void) extra(HA_EXTRA_KEYREAD);
  key_copy(key, table->record[0],
           table->key_info + table->s->next_number_index,
           table->s->next_number_key_offset);
  error= maria_rkey(file, table->record[1], (int) table->s->next_number_index,
2743 2744
                    key, make_prev_keypart_map(table->s->next_number_keypart),
                    HA_READ_PREFIX_LAST);
2745 2746 2747 2748 2749 2750 2751 2752 2753
  if (error)
    nr= 1;
  else
  {
    /* Get data from record[1] */
    nr= ((ulonglong) table->next_number_field->
         val_int_offset(table->s->rec_buff_length) + 1);
  }
  extra(HA_EXTRA_NO_KEYREAD);
2754 2755 2756 2757 2758 2759 2760 2761
  *first_value= nr;
  /*
    MySQL needs to call us for next row: assume we are inserting ("a",null)
    here, we return 3, and next this statement will want to insert ("b",null):
    there is no reason why ("b",3+1) would be the good row to insert: maybe it
    already exists, maybe 3+1 is too large...
  */
  *nb_reserved_values= 1;
2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796
}


/*
  Find out how many rows there is in the given range

  SYNOPSIS
    records_in_range()
    inx                 Index to use
    min_key             Start of range.  Null pointer if from first key
    max_key             End of range. Null pointer if to last key

  NOTES
    min_key.flag can have one of the following values:
      HA_READ_KEY_EXACT         Include the key in the range
      HA_READ_AFTER_KEY         Don't include key in range

    max_key.flag can have one of the following values:
      HA_READ_BEFORE_KEY        Don't include key in range
      HA_READ_AFTER_KEY         Include all 'end_key' values in the range

  RETURN
   HA_POS_ERROR         Something is wrong with the index tree.
   0                    There is no matching keys in the given range
   number > 0           There is approximately 'number' matching rows in
                        the range.
*/

ha_rows ha_maria::records_in_range(uint inx, key_range *min_key,
                                   key_range *max_key)
{
  return (ha_rows) maria_records_in_range(file, (int) inx, min_key, max_key);
}


unknown's avatar
unknown committed
2797
int ha_maria::ft_read(uchar * buf)
2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819
{
  int error;

  if (!ft_handler)
    return -1;

  thread_safe_increment(table->in_use->status_var.ha_read_next_count,
                        &LOCK_status);  // why ?

  error= ft_handler->please->read_next(ft_handler, (char*) buf);

  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


uint ha_maria::checksum() const
{
  return (uint) file->state->checksum;
}


2820
bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *create_info,
2821 2822 2823 2824
                                          uint table_changes)
{
  uint options= table->s->db_options_in_use;

2825 2826 2827
  if (create_info->auto_increment_value != stats.auto_increment_value ||
      create_info->data_file_name != data_file_name ||
      create_info->index_file_name != index_file_name ||
2828
      (maria_row_type(create_info) != data_file_type &&
2829
       create_info->row_type != ROW_TYPE_DEFAULT) ||
2830 2831
      table_changes == IS_EQUAL_NO ||
      table_changes & IS_EQUAL_PACK_LENGTH) // Not implemented yet
2832 2833
    return COMPATIBLE_DATA_NO;

2834
  if ((options & (HA_OPTION_CHECKSUM |
2835
                  HA_OPTION_DELAY_KEY_WRITE)) !=
2836
      (create_info->table_options & (HA_OPTION_CHECKSUM |
2837 2838 2839 2840
                              HA_OPTION_DELAY_KEY_WRITE)))
    return COMPATIBLE_DATA_NO;
  return COMPATIBLE_DATA_YES;
}
unknown's avatar
unknown committed
2841

2842 2843

static int maria_hton_panic(handlerton *hton, ha_panic_function flag)
2844
{
unknown's avatar
unknown committed
2845 2846 2847
  /* If no background checkpoints, we need to do one now */
  return ((checkpoint_interval == 0) ?
          ma_checkpoint_execute(CHECKPOINT_FULL, FALSE) : 0) | maria_panic(flag);
2848
}
2849

2850 2851 2852

static int maria_commit(handlerton *hton __attribute__ ((unused)),
                        THD *thd, bool all)
2853
{
2854 2855
  TRN *trn= THD_TRN;
  DBUG_ENTER("maria_commit");
unknown's avatar
unknown committed
2856
  trnman_reset_locked_tables(trn, 0);
2857 2858 2859 2860 2861
  /* statement or transaction ? */
  if ((thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && !all)
    DBUG_RETURN(0); // end of statement
  DBUG_PRINT("info", ("THD_TRN set to 0x0"));
  THD_TRN= 0;
2862
  DBUG_RETURN(ma_commit(trn)); // end of transaction
2863
}
2864

2865 2866 2867 2868 2869 2870

static int maria_rollback(handlerton *hton __attribute__ ((unused)),
                          THD *thd, bool all)
{
  TRN *trn= THD_TRN;
  DBUG_ENTER("maria_rollback");
unknown's avatar
unknown committed
2871
  trnman_reset_locked_tables(trn, 0);
2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884
  /* statement or transaction ? */
  if ((thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && !all)
  {
    trnman_rollback_statement(trn);
    DBUG_RETURN(0); // end of statement
  }
  DBUG_PRINT("info", ("THD_TRN set to 0x0"));
  THD_TRN= 0;
  DBUG_RETURN(trnman_rollback_trn(trn) ?
              HA_ERR_OUT_OF_MEM : 0); // end of transaction
}


2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915

/**
  @brief flush log handler

  @param hton            maria handlerton (unused)

  @retval FALSE OK
  @retval TRUE  Error
*/

bool maria_flush_logs(handlerton *hton)
{
  return test(translog_purge_at_flush());
}


#define SHOW_MSG_LEN (FN_REFLEN + 20)
/**
  @brief show status handler

  @param hton            maria handlerton
  @param thd             thread handler
  @param print           print function
  @param stat            type of status
*/

bool maria_show_status(handlerton *hton,
                       THD *thd,
                       stat_print_fn *print,
                       enum ha_stat_type stat)
{
2916
  const LEX_STRING *engine_name= hton_name(hton);
2917
  switch (stat) {
2918
  case HA_ENGINE_LOGS:
2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932
  {
    TRANSLOG_ADDRESS horizon= translog_get_horizon();
    uint32 last_file= LSN_FILE_NO(horizon);
    uint32 first_needed= translog_get_first_needed_file();
    uint32 first_file= translog_get_first_file(horizon);
    uint32 i;
    const char unknown[]= "unknown";
    const char needed[]= "in use";
    const char unneeded[]= "free";
    char path[FN_REFLEN];

    if (first_file == 0)
    {
      const char error[]= "error";
2933 2934
      print(thd, engine_name->str, engine_name->length,
            STRING_WITH_LEN(""), error, sizeof(error) - 1);
2935 2936 2937 2938
      break;
    }

    for (i= first_file; i <= last_file; i++)
2939
    {
2940 2941 2942 2943 2944 2945 2946 2947
      char *file;
      const char *status;
      uint length, status_len;
      MY_STAT stat_buff, *stat;
      const char error[]= "can't stat";
      char object[SHOW_MSG_LEN];
      file= translog_filename_by_fileno(i, path);
      if (!(stat= my_stat(file, &stat_buff, MYF(MY_WME))))
2948
      {
2949
        status= error;
2950
        status_len= sizeof(error) - 1;
2951
        length= my_snprintf(object, SHOW_MSG_LEN, "Size unknown ; %s", file);
2952
      }
2953
      else
2954
      {
2955
        if (first_needed == 0)
2956
        {
2957
          status= unknown;
2958
          status_len= sizeof(unknown) - 1;
2959 2960 2961 2962
        }
        else if (i < first_needed)
        {
          status= unneeded;
2963
          status_len= sizeof(unneeded) - 1;
2964 2965 2966
        }
        else
        {
2967
          status= needed;
2968
          status_len= sizeof(needed) - 1;
2969
        }
2970 2971
        length= my_snprintf(object, SHOW_MSG_LEN, "Size %12lu ; %s",
                            (ulong) stat->st_size, file);
2972
      }
2973

2974
      print(thd, engine_name->str, engine_name->length,
2975
            object, length, status, status_len);
2976
    }
2977 2978
    break;
  }
2979 2980 2981 2982 2983 2984 2985 2986
  case HA_ENGINE_STATUS:
  case HA_ENGINE_MUTEX:
  default:
    break;
  }
  return 0;
}

2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066

/**
  Callback to delete all logs in directory. This is lower-level than other
  functions in ma_loghandler.c which delete logs, as it does not rely on
  translog_init() having been called first.

  @param  directory        directory where file is
  @param  filename         base name of the file to delete
*/

static my_bool translog_callback_delete_all(const char *directory,
                                            const char *filename)
{
  char complete_name[FN_REFLEN];
  fn_format(complete_name, filename, directory, "", MYF(MY_UNPACK_FILENAME));
  return my_delete(complete_name, MYF(MY_WME));
}


/**
  Helper function for option maria-force-start-after-recovery-failures.
  Deletes logs if too many failures. Otherwise, increments the counter of
  failures in the control file.
  Notice how this has to be called _before_ translog_init() (if log is
  corrupted, translog_init() might crash the server, so we need to remove logs
  before).

  @param  log_dir          directory where logs to be deleted are
*/

static int mark_recovery_start(const char* log_dir)
{
  int res;
  DBUG_ENTER("mark_recovery_start");
  if (unlikely(maria_recover_options == HA_RECOVER_NONE))
    ma_message_no_user(ME_JUST_WARNING, "Please consider using option"
                       " --maria-recover[=...] to automatically check and"
                       " repair tables when logs are removed by option"
                       " --maria-force-start-after-recovery-failures=#");
  if (recovery_failures >= force_start_after_recovery_failures)
  {
    /*
      Remove logs which cause the problem; keep control file which has
      critical info like uuid, max_trid (removing control file may make
      correct tables look corrupted!).
    */
    char msg[100];
    res= translog_walk_filenames(log_dir, &translog_callback_delete_all);
    my_snprintf(msg, sizeof(msg),
                "%s logs after %u consecutive failures of"
                " recovery from logs",
                (res ? "failed to remove some" : "removed all"),
                recovery_failures);
    ma_message_no_user((res ? 0 : ME_JUST_WARNING), msg);
  }
  else
    res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
                                         max_trid_in_control_file,
                                         recovery_failures + 1);
  DBUG_RETURN(res);
}


/**
  Helper function for option maria-force-start-after-recovery-failures.
  Records in the control file that recovery was a success, so that it's not
  counted for maria-force-start-after-recovery-failures.
*/

static int mark_recovery_success(void)
{
  /* success of recovery, reset recovery_failures: */
  int res;
  DBUG_ENTER("mark_recovery_success");
  res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
                                       max_trid_in_control_file, 0);
  DBUG_RETURN(res);
}


3067 3068
static int ha_maria_init(void *p)
{
3069
  int res;
3070
  const char *log_dir= maria_data_root;
3071 3072
  maria_hton= (handlerton *)p;
  maria_hton->state= SHOW_OPTION_YES;
unknown's avatar
unknown committed
3073
  maria_hton->db_type= DB_TYPE_UNKNOWN;
3074
  maria_hton->create= maria_create_handler;
3075 3076 3077
  maria_hton->panic= maria_hton_panic;
  maria_hton->commit= maria_commit;
  maria_hton->rollback= maria_rollback;
3078 3079
  maria_hton->flush_logs= maria_flush_logs;
  maria_hton->show_status= maria_show_status;
3080 3081
  /* TODO: decide if we support Maria being used for log tables */
  maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES;
3082
  bzero(maria_log_pagecache, sizeof(*maria_log_pagecache));
3083
  maria_tmpdir= &mysql_tmpdir_list;             /* For REDO */
unknown's avatar
unknown committed
3084
  res= maria_init() || ma_control_file_open(TRUE, TRUE) ||
3085 3086
    ((force_start_after_recovery_failures != 0) &&
     mark_recovery_start(log_dir)) ||
3087
    !init_pagecache(maria_pagecache,
3088
                    (size_t) pagecache_buffer_size, pagecache_division_limit,
3089
                    pagecache_age_threshold, maria_block_size, 0) ||
3090
    !init_pagecache(maria_log_pagecache,
3091
                    TRANSLOG_PAGECACHE_SIZE, 0, 0,
3092
                    TRANSLOG_PAGE_SIZE, 0) ||
3093
    translog_init(maria_data_root, log_file_size,
3094
                  MYSQL_VERSION_ID, server_id, maria_log_pagecache,
3095
                  TRANSLOG_DEFAULT_FLAGS, 0) ||
3096 3097
    maria_recovery_from_log() ||
    ((force_start_after_recovery_failures != 0) && mark_recovery_success()) ||
unknown's avatar
unknown committed
3098
    ma_checkpoint_init(checkpoint_interval);
3099
  maria_multi_threaded= maria_in_ha_maria= TRUE;
3100 3101 3102 3103 3104

#if defined(HAVE_REALPATH) && !defined(HAVE_purify) && !defined(HAVE_BROKEN_REALPATH)
  /*  We can only test for sub paths if my_symlink.c is using realpath */
  maria_test_invalid_symlink= test_if_data_home_dir;
#endif
3105
  return res ? HA_ERR_INITIALIZATION : 0;
3106 3107
}

3108

3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126
#ifdef HAVE_QUERY_CACHE
/**
  @brief Register a named table with a call back function to the query cache.

  @param thd The thread handle
  @param table_key A pointer to the table name in the table cache
  @param key_length The length of the table name
  @param[out] engine_callback The pointer to the storage engine call back
    function, currently 0
  @param[out] engine_data Engine data will be set to 0.

  @note Despite the name of this function, it is used to check each statement
    before it is cached and not to register a table or callback function.

  @see handler::register_query_cache_table

  @return The error code. The engine_data and engine_callback will be set to 0.
    @retval TRUE Success
unknown's avatar
unknown committed
3127
    @retval FALSE An error occurred
3128 3129 3130 3131 3132 3133 3134 3135
*/

my_bool ha_maria::register_query_cache_table(THD *thd, char *table_name,
					     uint table_name_len,
					     qc_engine_callback
					     *engine_callback,
					     ulonglong *engine_data)
{
unknown's avatar
unknown committed
3136 3137
  ulonglong actual_data_file_length;
  ulonglong current_data_file_length;
3138
  DBUG_ENTER("ha_maria::register_query_cache_table");
3139

3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172
  /*
    No call back function is needed to determine if a cached statement
    is valid or not.
  */
  *engine_callback= 0;

  /*
    No engine data is needed.
  */
  *engine_data= 0;

  /*
    If a concurrent INSERT has happened just before the currently processed
    SELECT statement, the total size of the table is unknown.

    To determine if the table size is known, the current thread's snap shot of
    the table size with the actual table size are compared.

    If the table size is unknown the SELECT statement can't be cached.
  */

  /*
    POSIX visibility rules specify that "2. Whatever memory values a
    thread can see when it unlocks a mutex <...> can also be seen by any
    thread that later locks the same mutex". In this particular case,
    concurrent insert thread had modified the data_file_length in
    MYISAM_SHARE before it has unlocked (or even locked)
    structure_guard_mutex. So, here we're guaranteed to see at least that
    value after we've locked the same mutex. We can see a later value
    (modified by some other thread) though, but it's ok, as we only want
    to know if the variable was changed, the actual new value doesn't matter
  */
  actual_data_file_length= file->s->state.state.data_file_length;
3173
  current_data_file_length= file->state->data_file_length;
3174

3175 3176 3177
  /* Return whether is ok to try to cache current statement. */
  DBUG_RETURN(!(file->s->non_transactional_concurrent_insert &&
                current_data_file_length != actual_data_file_length));
3178 3179 3180
}
#endif

3181 3182
static struct st_mysql_sys_var* system_variables[]= {
  MYSQL_SYSVAR(block_size),
unknown's avatar
unknown committed
3183
  MYSQL_SYSVAR(checkpoint_interval),
3184
  MYSQL_SYSVAR(force_start_after_recovery_failures),
unknown's avatar
unknown committed
3185
  MYSQL_SYSVAR(page_checksum),
3186
  MYSQL_SYSVAR(log_dir_path),
3187 3188
  MYSQL_SYSVAR(log_file_size),
  MYSQL_SYSVAR(log_purge_type),
3189
  MYSQL_SYSVAR(max_sort_file_size),
unknown's avatar
unknown committed
3190 3191 3192
  MYSQL_SYSVAR(pagecache_age_threshold),
  MYSQL_SYSVAR(pagecache_buffer_size),
  MYSQL_SYSVAR(pagecache_division_limit),
3193
  MYSQL_SYSVAR(recover),
3194 3195 3196
  MYSQL_SYSVAR(repair_threads),
  MYSQL_SYSVAR(sort_buffer_size),
  MYSQL_SYSVAR(stats_method),
3197
  MYSQL_SYSVAR(sync_log_dir),
3198 3199 3200 3201
  NULL
};


unknown's avatar
unknown committed
3202
/**
unknown's avatar
unknown committed
3203
   @brief Updates the checkpoint interval and restarts the background thread.
unknown's avatar
unknown committed
3204
*/
unknown's avatar
unknown committed
3205

unknown's avatar
unknown committed
3206
static void update_checkpoint_interval(MYSQL_THD thd,
unknown's avatar
unknown committed
3207
                                        struct st_mysql_sys_var *var,
unknown's avatar
unknown committed
3208
                                        void *var_ptr, const void *save)
unknown's avatar
unknown committed
3209
{
unknown's avatar
unknown committed
3210 3211
  ma_checkpoint_end();
  ma_checkpoint_init(*(ulong *)var_ptr= (ulong)(*(long *)save));
unknown's avatar
unknown committed
3212 3213
}

3214 3215 3216 3217 3218 3219
/**
   @brief Updates the transaction log file limit.
*/

static void update_log_file_size(MYSQL_THD thd,
                                 struct st_mysql_sys_var *var,
unknown's avatar
unknown committed
3220
                                 void *var_ptr, const void *save)
3221 3222
{
  uint32 size= (uint32)((ulong)(*(long *)save));
unknown's avatar
unknown committed
3223
  translog_set_file_size(size);
3224 3225 3226
  *(ulong *)var_ptr= size;
}

3227

unknown's avatar
unknown committed
3228
static SHOW_VAR status_variables[]= {
3229 3230 3231
  {"Maria_pagecache_blocks_not_flushed", (char*) &maria_pagecache_var.global_blocks_changed, SHOW_LONG_NOFLUSH},
  {"Maria_pagecache_blocks_unused",      (char*) &maria_pagecache_var.blocks_unused, SHOW_LONG_NOFLUSH},
  {"Maria_pagecache_blocks_used",        (char*) &maria_pagecache_var.blocks_used, SHOW_LONG_NOFLUSH},
unknown's avatar
unknown committed
3232 3233 3234 3235 3236 3237
  {"Maria_pagecache_read_requests",      (char*) &maria_pagecache_var.global_cache_r_requests, SHOW_LONGLONG},
  {"Maria_pagecache_reads",              (char*) &maria_pagecache_var.global_cache_read, SHOW_LONGLONG},
  {"Maria_pagecache_write_requests",     (char*) &maria_pagecache_var.global_cache_w_requests, SHOW_LONGLONG},
  {"Maria_pagecache_writes",             (char*) &maria_pagecache_var.global_cache_write, SHOW_LONGLONG},
  {NullS, NullS, SHOW_LONG}
};
3238

3239
struct st_mysql_storage_engine maria_storage_engine=
3240
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
3241

unknown's avatar
unknown committed
3242 3243 3244
mysql_declare_plugin(maria)
{
  MYSQL_STORAGE_ENGINE_PLUGIN,
3245
  &maria_storage_engine,
unknown's avatar
unknown committed
3246
  "MARIA",
unknown's avatar
unknown committed
3247
  "MySQL AB",
3248
  "Crash-safe tables with MyISAM heritage",
3249
  PLUGIN_LICENSE_GPL,
3250 3251 3252
  ha_maria_init,              /* Plugin Init                     */
  NULL,                       /* Plugin Deinit                   */
  0x0100,                     /* 1.0                             */
unknown's avatar
unknown committed
3253
  status_variables,           /* status variables                */
3254 3255
  system_variables,           /* system variables                */
  NULL
unknown's avatar
unknown committed
3256 3257
}
mysql_declare_plugin_end;