ha_maria.cc 122 KB
Newer Older
Sergei Golubchik's avatar
Sergei Golubchik committed
1 2
/* Copyright (C) 2004-2008 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
   Copyright (C) 2008-2009 Sun Microsystems, Inc.
3 4 5

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
6
   the Free Software Foundation; version 2 of the License.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */


#ifdef USE_PRAGMA_IMPLEMENTATION
#pragma implementation                          // gcc: Class implementation
#endif

22
#define MYSQL_SERVER 1
23
#include "mysql_priv.h"
24
#include <mysql/plugin.h>
25
#include <m_ctype.h>
26
#include <my_dir.h>
27
#include <myisampack.h>
unknown's avatar
unknown committed
28
#include <my_bit.h>
29
#include "ha_maria.h"
30
#include "trnman_public.h"
31
#include "trnman.h"
32

33
C_MODE_START
34 35
#include "maria_def.h"
#include "ma_rt_index.h"
36
#include "ma_blockrec.h"
unknown's avatar
unknown committed
37 38
#include "ma_checkpoint.h"
#include "ma_recovery.h"
39
C_MODE_END
40

unknown's avatar
unknown committed
41 42 43 44
/*
  Note that in future versions, only *transactional* Maria tables can
  rollback, so this flag should be up or down conditionally.
*/
unknown's avatar
unknown committed
45
#ifdef MARIA_CANNOT_ROLLBACK
Sergei Golubchik's avatar
Sergei Golubchik committed
46
#define CANNOT_ROLLBACK_FLAG HA_NO_TRANSACTIONS
unknown's avatar
unknown committed
47
#define trans_register_ha(A, B, C)  do { /* nothing */ } while(0)
Sergei Golubchik's avatar
Sergei Golubchik committed
48 49
#else
#define CANNOT_ROLLBACK_FLAG 0
unknown's avatar
unknown committed
50
#endif
51
#define THD_TRN (*(TRN **)thd_ha_data(thd, maria_hton))
unknown's avatar
unknown committed
52

unknown's avatar
unknown committed
53 54 55
ulong pagecache_division_limit, pagecache_age_threshold;
ulonglong pagecache_buffer_size;

unknown's avatar
unknown committed
56
/**
57 58 59 60 61
   As the auto-repair is initiated when opened from the SQL layer
   (open_unireg_entry(), check_and_repair()), it does not happen when Maria's
   Recovery internally opens the table to apply log records to it, which is
   good. It would happen only after Recovery, if the table is still
   corrupted.
unknown's avatar
unknown committed
62
*/
63
ulong maria_recover_options= HA_RECOVER_NONE;
unknown's avatar
unknown committed
64
handlerton *maria_hton;
65 66 67 68

/* bits in maria_recover_options */
const char *maria_recover_names[]=
{
69 70 71 72 73 74 75 76
  /*
    Compared to MyISAM, "default" was renamed to "normal" as it collided with
    SET var=default which sets to the var's default i.e. what happens when the
    var is not set i.e. HA_RECOVER_NONE.
    Another change is that OFF is used to disable, not ""; this is to have OFF
    display in SHOW VARIABLES which is better than "".
  */
  "OFF", "NORMAL", "BACKUP", "FORCE", "QUICK", NullS
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
};
TYPELIB maria_recover_typelib=
{
  array_elements(maria_recover_names) - 1, "",
  maria_recover_names, NULL
};

const char *maria_stats_method_names[]=
{
  "nulls_unequal", "nulls_equal",
  "nulls_ignored", NullS
};
TYPELIB maria_stats_method_typelib=
{
  array_elements(maria_stats_method_names) - 1, "",
  maria_stats_method_names, NULL
};

95 96 97 98 99 100 101 102 103 104
/* transactions log purge mode */
const char *maria_translog_purge_type_names[]=
{
  "immediate", "external", "at_flush", NullS
};
TYPELIB maria_translog_purge_type_typelib=
{
  array_elements(maria_translog_purge_type_names) - 1, "",
  maria_translog_purge_type_names, NULL
};
unknown's avatar
unknown committed
105 106

/* transactional log directory sync */
107 108 109 110 111 112 113 114 115 116
const char *maria_sync_log_dir_names[]=
{
  "NEVER", "NEWFILE", "ALWAYS", NullS
};
TYPELIB maria_sync_log_dir_typelib=
{
  array_elements(maria_sync_log_dir_names) - 1, "",
  maria_sync_log_dir_names, NULL
};

unknown's avatar
unknown committed
117 118 119 120 121 122 123 124 125 126 127
/* transactional log group commit */
const char *maria_group_commit_names[]=
{
  "none", "hard", "soft", NullS
};
TYPELIB maria_group_commit_typelib=
{
  array_elements(maria_group_commit_names) - 1, "",
  maria_group_commit_names, NULL
};

128
/** Interval between background checkpoints in seconds */
unknown's avatar
unknown committed
129 130 131
static ulong checkpoint_interval;
static void update_checkpoint_interval(MYSQL_THD thd,
                                       struct st_mysql_sys_var *var,
unknown's avatar
unknown committed
132
                                       void *var_ptr, const void *save);
unknown's avatar
unknown committed
133 134 135 136 137 138
static void update_maria_group_commit(MYSQL_THD thd,
                                      struct st_mysql_sys_var *var,
                                      void *var_ptr, const void *save);
static void update_maria_group_commit_interval(MYSQL_THD thd,
                                           struct st_mysql_sys_var *var,
                                           void *var_ptr, const void *save);
139 140
/** After that many consecutive recovery failures, remove logs */
static ulong force_start_after_recovery_failures;
141 142
static void update_log_file_size(MYSQL_THD thd,
                                 struct st_mysql_sys_var *var,
unknown's avatar
unknown committed
143
                                 void *var_ptr, const void *save);
unknown's avatar
unknown committed
144

145 146
static MYSQL_SYSVAR_ULONG(block_size, maria_block_size,
       PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
Sergei Golubchik's avatar
Sergei Golubchik committed
147
       "Block size to be used for Aria index pages.", 0, 0,
148 149 150
       MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
       MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH);

unknown's avatar
unknown committed
151
static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval,
unknown's avatar
unknown committed
152
       PLUGIN_VAR_RQCMDARG,
unknown's avatar
unknown committed
153 154
       "Interval between automatic checkpoints, in seconds; 0 means"
       " 'no automatic checkpoints' which makes sense only for testing.",
unknown's avatar
unknown committed
155
       NULL, update_checkpoint_interval, 30, 0, UINT_MAX, 1);
unknown's avatar
unknown committed
156

157 158 159 160 161 162 163 164 165 166 167
static MYSQL_SYSVAR_ULONG(force_start_after_recovery_failures,
       force_start_after_recovery_failures,
       /*
         Read-only because setting it on the fly has no useful effect,
         should be set on command-line.
       */
       PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
       "Number of consecutive log recovery failures after which logs will be"
       " automatically deleted to cure the problem; 0 (the default) disables"
       " the feature.", NULL, NULL, 0, 0, UINT_MAX8, 1);

unknown's avatar
unknown committed
168 169 170 171
static MYSQL_SYSVAR_BOOL(page_checksum, maria_page_checksums, 0,
       "Maintain page checksums (can be overridden per table "
       "with PAGE_CHECKSUM clause in CREATE TABLE)", 0, 0, 1);

172 173
/* It is only command line argument */
static MYSQL_SYSVAR_STR(log_dir_path, maria_data_root,
174
       PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
175 176 177 178
       "Path to the directory where to store transactional log",
       NULL, NULL, mysql_real_data_home);


179 180 181 182 183 184
static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size,
       PLUGIN_VAR_RQCMDARG,
       "Limit for transaction log size",
       NULL, update_log_file_size, TRANSLOG_FILE_SIZE,
       TRANSLOG_MIN_FILE_SIZE, 0xffffffffL, TRANSLOG_PAGE_SIZE);

unknown's avatar
unknown committed
185 186
static MYSQL_SYSVAR_ENUM(group_commit, maria_group_commit,
       PLUGIN_VAR_RQCMDARG,
Sergei Golubchik's avatar
Sergei Golubchik committed
187
       "Specifies Aria group commit mode. "
unknown's avatar
unknown committed
188 189 190 191 192 193 194 195 196 197 198 199
       "Possible values are \"none\" (no group commit), "
       "\"hard\" (with waiting to actual commit), "
       "\"soft\" (no wait for commit (DANGEROUS!!!))",
       NULL, update_maria_group_commit,
       TRANSLOG_GCOMMIT_NONE, &maria_group_commit_typelib);

static MYSQL_SYSVAR_ULONG(group_commit_interval, maria_group_commit_interval,
       PLUGIN_VAR_RQCMDARG,
       "Interval between commite in microseconds (1/1000000c)."
       " 0 stands for no waiting"
       " for other threads to come and do a commit in \"hard\" mode and no"
       " sync()/commit at all in \"soft\" mode.  Option has only an effect"
Sergei Golubchik's avatar
Sergei Golubchik committed
200
       " if aria_group_commit is used",
unknown's avatar
unknown committed
201 202
       NULL, update_maria_group_commit_interval, 0, 0, UINT_MAX, 1);

203 204
static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type,
       PLUGIN_VAR_RQCMDARG,
205
       "Specifies how Aria transactional log will be purged. "
206 207 208 209 210
       "Possible values of name are \"immediate\", \"external\" "
       "and \"at_flush\"",
       NULL, NULL, TRANSLOG_PURGE_IMMIDIATE,
       &maria_translog_purge_type_typelib);

211 212 213 214
static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size,
       maria_max_temp_length, PLUGIN_VAR_RQCMDARG,
       "Don't use the fast sort index method to created index if the "
       "temporary file would get bigger than this.",
215 216
       0, 0, MAX_FILE_SIZE & ~((ulonglong) (1*MB-1)),
       0, MAX_FILE_SIZE, 1*MB);
217

unknown's avatar
unknown committed
218 219 220 221 222 223 224 225 226 227
static MYSQL_SYSVAR_ULONG(pagecache_age_threshold,
       pagecache_age_threshold, PLUGIN_VAR_RQCMDARG,
       "This characterizes the number of hits a hot block has to be untouched "
       "until it is considered aged enough to be downgraded to a warm block. "
       "This specifies the percentage ratio of that number of hits to the "
       "total number of blocks in the page cache.", 0, 0,
        300, 100, ~0L, 100);

static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size,
       PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
Sergei Golubchik's avatar
Sergei Golubchik committed
228
       "The size of the buffer used for index blocks for Aria tables. "
unknown's avatar
unknown committed
229 230
       "Increase this to get better index handling (for all reads and "
       "multiple writes) to as much as you can afford.", 0, 0,
Michael Widenius's avatar
Michael Widenius committed
231
       KEY_CACHE_SIZE, 8192*16L, ~(ulong) 0, 1);
unknown's avatar
unknown committed
232 233 234 235 236 237

static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit,
       PLUGIN_VAR_RQCMDARG,
       "The minimum percentage of warm blocks in key cache", 0, 0,
       100,  1, 100, 1);

238 239 240 241
static MYSQL_SYSVAR_ENUM(recover, maria_recover_options, PLUGIN_VAR_OPCMDARG,
       "Specifies how corrupted tables should be automatically repaired."
       " Possible values are \"NORMAL\" (the default), \"BACKUP\", \"FORCE\","
       " \"QUICK\", or \"OFF\" which is like not using the option.",
242
       NULL, NULL, HA_RECOVER_DEFAULT, &maria_recover_typelib);
243

244
static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
Sergei Golubchik's avatar
Sergei Golubchik committed
245
       "Number of threads to use when repairing Aria tables. The value of 1 "
246 247 248 249 250 251
       "disables parallel repair.",
       0, 0, 1, 1, ~0L, 1);

static MYSQL_THDVAR_ULONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG,
       "The buffer that is allocated when sorting the index when doing a "
       "REPAIR or when creating indexes with CREATE INDEX or ALTER TABLE.",
252
       0, 0, 128L*1024L*1024L, 4, ~0L, 1);
253 254

static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG,
Sergei Golubchik's avatar
Sergei Golubchik committed
255
       "Specifies how Aria index statistics collection code should treat "
unknown's avatar
unknown committed
256
       "NULLs. Possible values are \"nulls_unequal\", \"nulls_equal\", "
257
       "and \"nulls_ignored\".", 0, 0, 0, &maria_stats_method_typelib);
258

259 260
static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir, PLUGIN_VAR_RQCMDARG,
       "Controls syncing directory after log file growth and new file "
unknown's avatar
unknown committed
261 262
       "creation. Possible values are \"never\", \"newfile\" and "
       "\"always\").", NULL, NULL, TRANSLOG_SYNC_DIR_NEWFILE,
263 264
       &maria_sync_log_dir_typelib);

Sergey Petrunya's avatar
Sergey Petrunya committed
265
#ifdef USE_MARIA_FOR_TMP_TABLES
266
#define USE_MARIA_FOR_TMP_TABLES_VAL 1
Sergey Petrunya's avatar
Sergey Petrunya committed
267
#else
268
#define USE_MARIA_FOR_TMP_TABLES_VAL 0
Sergey Petrunya's avatar
Sergey Petrunya committed
269
#endif
270
my_bool use_maria_for_temp_tables= USE_MARIA_FOR_TMP_TABLES_VAL;
Sergey Petrunya's avatar
Sergey Petrunya committed
271 272 273

static MYSQL_SYSVAR_BOOL(used_for_temp_tables, 
       use_maria_for_temp_tables, PLUGIN_VAR_READONLY | PLUGIN_VAR_NOCMDOPT,
Sergei Golubchik's avatar
Sergei Golubchik committed
274
       "Whether temporary tables should be MyISAM or Aria", 0, 0,
Sergey Petrunya's avatar
Sergey Petrunya committed
275 276
       1);

277 278 279 280
/*****************************************************************************
** MARIA tables
*****************************************************************************/

281 282 283
static handler *maria_create_handler(handlerton *hton,
                                     TABLE_SHARE * table,
                                     MEM_ROOT *mem_root)
284
{
285
  return new (mem_root) ha_maria(hton, table);
286 287 288 289 290 291 292 293 294 295 296
}


// collect errors printed by maria_check routines

static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type,
                                const char *fmt, va_list args)
{
  THD *thd= (THD *) param->thd;
  Protocol *protocol= thd->protocol;
  uint length, msg_length;
297
  char msgbuf[HA_MAX_MSG_BUF];
298 299 300 301 302 303 304 305 306
  char name[NAME_LEN * 2 + 2];

  msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
  msgbuf[sizeof(msgbuf) - 1]= 0;                // healthy paranoia

  DBUG_PRINT(msg_type, ("message: %s", msgbuf));

  if (!thd->vio_ok())
  {
307
    sql_print_error(fmt, args);
308 309 310 311 312 313 314 315 316 317 318
    return;
  }

  if (param->testflag &
      (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR))
  {
    my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME));
    return;
  }
  length= (uint) (strxmov(name, param->db_name, ".", param->table_name,
                          NullS) - name);
319 320 321 322 323 324 325 326
  /*
    TODO: switch from protocol to push_warning here. The main reason we didn't
    it yet is parallel repair. Due to following trace:
    ma_check_print_msg/push_warning/sql_alloc/my_pthread_getspecific_ptr.

    Also we likely need to lock mutex here (in both cases with protocol and
    push_warning).
  */
327 328 329 330 331 332 333 334 335 336 337
  protocol->prepare_for_resend();
  protocol->store(name, length, system_charset_info);
  protocol->store(param->op_name, system_charset_info);
  protocol->store(msg_type, system_charset_info);
  protocol->store(msgbuf, msg_length, system_charset_info);
  if (protocol->write())
    sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n",
                    msgbuf);
  return;
}

338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353

/*
  Convert TABLE object to Maria key and column definition

  SYNOPSIS
    table2maria()
      table_arg   in     TABLE object.
      keydef_out  out    Maria key definition.
      recinfo_out out    Maria column definition.
      records_out out    Number of fields.

  DESCRIPTION
    This function will allocate and initialize Maria key and column
    definition for further use in ma_create or for a check for underlying
    table conformance in merge engine.

354 355 356 357
    The caller needs to free *recinfo_out after use. Since *recinfo_out
    and *keydef_out are allocated with a my_multi_malloc, *keydef_out
    is freed automatically when *recinfo_out is freed.

358 359
  RETURN VALUE
    0  OK
360
    # error code
361 362
*/

363 364 365 366
static int table2maria(TABLE *table_arg, data_file_type row_type,
                       MARIA_KEYDEF **keydef_out,
                       MARIA_COLUMNDEF **recinfo_out, uint *records_out,
                       MARIA_CREATE_INFO *create_info)
367 368 369
{
  uint i, j, recpos, minpos, fieldpos, temp_length, length;
  enum ha_base_keytype type= HA_KEYTYPE_BINARY;
unknown's avatar
unknown committed
370
  uchar *record;
371 372 373 374 375 376 377
  KEY *pos;
  MARIA_KEYDEF *keydef;
  MARIA_COLUMNDEF *recinfo, *recinfo_pos;
  HA_KEYSEG *keyseg;
  TABLE_SHARE *share= table_arg->s;
  uint options= share->db_options_in_use;
  DBUG_ENTER("table2maria");
378

379 380 381
  if (row_type == BLOCK_RECORD)
    options|= HA_OPTION_PACK_RECORD;

382 383 384 385 386 387 388 389 390 391 392 393
  if (!(my_multi_malloc(MYF(MY_WME),
          recinfo_out, (share->fields * 2 + 2) * sizeof(MARIA_COLUMNDEF),
          keydef_out, share->keys * sizeof(MARIA_KEYDEF),
          &keyseg,
          (share->key_parts + share->keys) * sizeof(HA_KEYSEG),
          NullS)))
    DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
  keydef= *keydef_out;
  recinfo= *recinfo_out;
  pos= table_arg->key_info;
  for (i= 0; i < share->keys; i++, pos++)
  {
394 395
    keydef[i].flag= (uint16) (pos->flags & (HA_NOSAME | HA_FULLTEXT |
                                            HA_SPATIAL));
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
    keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ?
      (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) :
      pos->algorithm;
    keydef[i].block_length= pos->block_size;
    keydef[i].seg= keyseg;
    keydef[i].keysegs= pos->key_parts;
    for (j= 0; j < pos->key_parts; j++)
    {
      Field *field= pos->key_part[j].field;
      type= field->key_type();
      keydef[i].seg[j].flag= pos->key_part[j].key_part_flag;

      if (options & HA_OPTION_PACK_KEYS ||
          (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY |
                         HA_SPACE_PACK_USED)))
      {
        if (pos->key_part[j].length > 8 &&
            (type == HA_KEYTYPE_TEXT ||
             type == HA_KEYTYPE_NUM ||
             (type == HA_KEYTYPE_BINARY && !field->zero_pack())))
        {
          /* No blobs here */
          if (j == 0)
            keydef[i].flag|= HA_PACK_KEY;
          if (!(field->flags & ZEROFILL_FLAG) &&
              (field->type() == MYSQL_TYPE_STRING ||
               field->type() == MYSQL_TYPE_VAR_STRING ||
               ((int) (pos->key_part[j].length - field->decimals())) >= 4))
            keydef[i].seg[j].flag|= HA_SPACE_PACK;
        }
        else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16))
          keydef[i].flag|= HA_BINARY_PACK_KEY;
      }
      keydef[i].seg[j].type= (int) type;
      keydef[i].seg[j].start= pos->key_part[j].offset;
      keydef[i].seg[j].length= pos->key_part[j].length;
      keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_end=
        keydef[i].seg[j].bit_length= 0;
      keydef[i].seg[j].bit_pos= 0;
      keydef[i].seg[j].language= field->charset()->number;

      if (field->null_ptr)
      {
        keydef[i].seg[j].null_bit= field->null_bit;
        keydef[i].seg[j].null_pos= (uint) (field->null_ptr-
                                           (uchar*) table_arg->record[0]);
      }
      else
      {
        keydef[i].seg[j].null_bit= 0;
        keydef[i].seg[j].null_pos= 0;
      }
      if (field->type() == MYSQL_TYPE_BLOB ||
          field->type() == MYSQL_TYPE_GEOMETRY)
      {
        keydef[i].seg[j].flag|= HA_BLOB_PART;
        /* save number of bytes used to pack length */
        keydef[i].seg[j].bit_start= (uint) (field->pack_length() -
                                            share->blob_ptr_size);
      }
      else if (field->type() == MYSQL_TYPE_BIT)
      {
        keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len;
        keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs;
        keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr -
                                          (uchar*) table_arg->record[0]);
      }
    }
    keyseg+= pos->key_parts;
  }
  if (table_arg->found_next_number_field)
    keydef[share->next_number_index].flag|= HA_AUTO_KEY;
  record= table_arg->record[0];
  recpos= 0;
  recinfo_pos= recinfo;
471 472
  create_info->null_bytes= table_arg->s->null_bytes;

unknown's avatar
unknown committed
473
  while (recpos < (uint) share->stored_rec_length)
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
  {
    Field **field, *found= 0;
    minpos= share->reclength;
    length= 0;

    for (field= table_arg->field; *field; field++)
    {
      if ((fieldpos= (*field)->offset(record)) >= recpos &&
          fieldpos <= minpos)
      {
        /* skip null fields */
        if (!(temp_length= (*field)->pack_length_in_rec()))
          continue; /* Skip null-fields */
        if (! found || fieldpos < minpos ||
            (fieldpos == minpos && temp_length < length))
        {
          minpos= fieldpos;
          found= *field;
          length= temp_length;
        }
      }
    }
    DBUG_PRINT("loop", ("found: 0x%lx  recpos: %d  minpos: %d  length: %d",
                        (long) found, recpos, minpos, length));
    if (!found)
      break;

    if (found->flags & BLOB_FLAG)
      recinfo_pos->type= FIELD_BLOB;
503 504
    else if (found->type() == MYSQL_TYPE_TIMESTAMP)
      recinfo_pos->type= FIELD_NORMAL;
505 506
    else if (found->type() == MYSQL_TYPE_VARCHAR)
      recinfo_pos->type= FIELD_VARCHAR;
507 508
    else if (!(options & HA_OPTION_PACK_RECORD) ||
             (found->zero_pack() && (found->flags & PRI_KEY_FLAG)))
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
      recinfo_pos->type= FIELD_NORMAL;
    else if (found->zero_pack())
      recinfo_pos->type= FIELD_SKIP_ZERO;
    else
      recinfo_pos->type= ((length <= 3 ||
                           (found->flags & ZEROFILL_FLAG)) ?
                          FIELD_NORMAL :
                          found->type() == MYSQL_TYPE_STRING ||
                          found->type() == MYSQL_TYPE_VAR_STRING ?
                          FIELD_SKIP_ENDSPACE :
                          FIELD_SKIP_PRESPACE);
    if (found->null_ptr)
    {
      recinfo_pos->null_bit= found->null_bit;
      recinfo_pos->null_pos= (uint) (found->null_ptr -
                                     (uchar*) table_arg->record[0]);
    }
    else
    {
      recinfo_pos->null_bit= 0;
      recinfo_pos->null_pos= 0;
    }
    (recinfo_pos++)->length= (uint16) length;
    recpos= minpos + length;
    DBUG_PRINT("loop", ("length: %d  type: %d",
                        recinfo_pos[-1].length,recinfo_pos[-1].type));
  }
  *records_out= (uint) (recinfo_pos - recinfo);
  DBUG_RETURN(0);
}


/*
  Check for underlying table conformance

  SYNOPSIS
    maria_check_definition()
      t1_keyinfo       in    First table key definition
      t1_recinfo       in    First table record definition
      t1_keys          in    Number of keys in first table
      t1_recs          in    Number of records in first table
      t2_keyinfo       in    Second table key definition
      t2_recinfo       in    Second table record definition
      t2_keys          in    Number of keys in second table
      t2_recs          in    Number of records in second table
      strict           in    Strict check switch

  DESCRIPTION
    This function compares two Maria definitions. By intention it was done
    to compare merge table definition against underlying table definition.
    It may also be used to compare dot-frm and MAI definitions of Maria
    table as well to compare different Maria table definitions.

    For merge table it is not required that number of keys in merge table
    must exactly match number of keys in underlying table. When calling this
    function for underlying table conformance check, 'strict' flag must be
    set to false, and converted merge definition must be passed as t1_*.

    Otherwise 'strict' flag must be set to 1 and it is not required to pass
    converted dot-frm definition as t1_*.

  RETURN VALUE
    0 - Equal definitions.
    1 - Different definitions.

574 575 576 577
  TODO
    - compare FULLTEXT keys;
    - compare SPATIAL keys;
    - compare FIELD_SKIP_ZERO which is converted to FIELD_NORMAL correctly
578
      (should be correctly detected in table2maria).
579
*/
580

581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
int maria_check_definition(MARIA_KEYDEF *t1_keyinfo,
                           MARIA_COLUMNDEF *t1_recinfo,
                           uint t1_keys, uint t1_recs,
                           MARIA_KEYDEF *t2_keyinfo,
                           MARIA_COLUMNDEF *t2_recinfo,
                           uint t2_keys, uint t2_recs, bool strict)
{
  uint i, j;
  DBUG_ENTER("maria_check_definition");
  if ((strict ? t1_keys != t2_keys : t1_keys > t2_keys))
  {
    DBUG_PRINT("error", ("Number of keys differs: t1_keys=%u, t2_keys=%u",
                         t1_keys, t2_keys));
    DBUG_RETURN(1);
  }
  if (t1_recs != t2_recs)
  {
    DBUG_PRINT("error", ("Number of recs differs: t1_recs=%u, t2_recs=%u",
                         t1_recs, t2_recs));
    DBUG_RETURN(1);
  }
  for (i= 0; i < t1_keys; i++)
  {
    HA_KEYSEG *t1_keysegs= t1_keyinfo[i].seg;
    HA_KEYSEG *t2_keysegs= t2_keyinfo[i].seg;
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
    if (t1_keyinfo[i].flag & HA_FULLTEXT && t2_keyinfo[i].flag & HA_FULLTEXT)
      continue;
    else if (t1_keyinfo[i].flag & HA_FULLTEXT ||
             t2_keyinfo[i].flag & HA_FULLTEXT)
    {
       DBUG_PRINT("error", ("Key %d has different definition", i));
       DBUG_PRINT("error", ("t1_fulltext= %d, t2_fulltext=%d",
                            test(t1_keyinfo[i].flag & HA_FULLTEXT),
                            test(t2_keyinfo[i].flag & HA_FULLTEXT)));
       DBUG_RETURN(1);
    }
    if (t1_keyinfo[i].flag & HA_SPATIAL && t2_keyinfo[i].flag & HA_SPATIAL)
      continue;
    else if (t1_keyinfo[i].flag & HA_SPATIAL ||
             t2_keyinfo[i].flag & HA_SPATIAL)
    {
       DBUG_PRINT("error", ("Key %d has different definition", i));
       DBUG_PRINT("error", ("t1_spatial= %d, t2_spatial=%d",
                            test(t1_keyinfo[i].flag & HA_SPATIAL),
                            test(t2_keyinfo[i].flag & HA_SPATIAL)));
       DBUG_RETURN(1);
    }
628 629 630 631 632 633 634 635 636 637 638 639
    if (t1_keyinfo[i].keysegs != t2_keyinfo[i].keysegs ||
        t1_keyinfo[i].key_alg != t2_keyinfo[i].key_alg)
    {
      DBUG_PRINT("error", ("Key %d has different definition", i));
      DBUG_PRINT("error", ("t1_keysegs=%d, t1_key_alg=%d",
                           t1_keyinfo[i].keysegs, t1_keyinfo[i].key_alg));
      DBUG_PRINT("error", ("t2_keysegs=%d, t2_key_alg=%d",
                           t2_keyinfo[i].keysegs, t2_keyinfo[i].key_alg));
      DBUG_RETURN(1);
    }
    for (j=  t1_keyinfo[i].keysegs; j--;)
    {
640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
      uint8 t1_keysegs_j__type= t1_keysegs[j].type;
      /*
        Table migration from 4.1 to 5.1. In 5.1 a *TEXT key part is
        always HA_KEYTYPE_VARTEXT2. In 4.1 we had only the equivalent of
        HA_KEYTYPE_VARTEXT1. Since we treat both the same on MyISAM
        level, we can ignore a mismatch between these types.
      */
      if ((t1_keysegs[j].flag & HA_BLOB_PART) &&
          (t2_keysegs[j].flag & HA_BLOB_PART))
      {
        if ((t1_keysegs_j__type == HA_KEYTYPE_VARTEXT2) &&
            (t2_keysegs[j].type == HA_KEYTYPE_VARTEXT1))
          t1_keysegs_j__type= HA_KEYTYPE_VARTEXT1; /* purecov: tested */
        else if ((t1_keysegs_j__type == HA_KEYTYPE_VARBINARY2) &&
                 (t2_keysegs[j].type == HA_KEYTYPE_VARBINARY1))
          t1_keysegs_j__type= HA_KEYTYPE_VARBINARY1; /* purecov: inspected */
      }

      if (t1_keysegs_j__type != t2_keysegs[j].type ||
659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
          t1_keysegs[j].language != t2_keysegs[j].language ||
          t1_keysegs[j].null_bit != t2_keysegs[j].null_bit ||
          t1_keysegs[j].length != t2_keysegs[j].length)
      {
        DBUG_PRINT("error", ("Key segment %d (key %d) has different "
                             "definition", j, i));
        DBUG_PRINT("error", ("t1_type=%d, t1_language=%d, t1_null_bit=%d, "
                             "t1_length=%d",
                             t1_keysegs[j].type, t1_keysegs[j].language,
                             t1_keysegs[j].null_bit, t1_keysegs[j].length));
        DBUG_PRINT("error", ("t2_type=%d, t2_language=%d, t2_null_bit=%d, "
                             "t2_length=%d",
                             t2_keysegs[j].type, t2_keysegs[j].language,
                             t2_keysegs[j].null_bit, t2_keysegs[j].length));

        DBUG_RETURN(1);
      }
    }
  }
678

679 680 681 682
  for (i= 0; i < t1_recs; i++)
  {
    MARIA_COLUMNDEF *t1_rec= &t1_recinfo[i];
    MARIA_COLUMNDEF *t2_rec= &t2_recinfo[i];
683 684 685 686 687 688 689 690
    /*
      FIELD_SKIP_ZERO can be changed to FIELD_NORMAL in maria_create,
      see NOTE1 in ma_create.c
    */
    if ((t1_rec->type != t2_rec->type &&
         !(t1_rec->type == (int) FIELD_SKIP_ZERO &&
           t1_rec->length == 1 &&
           t2_rec->type == (int) FIELD_NORMAL)) ||
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
        t1_rec->length != t2_rec->length ||
        t1_rec->null_bit != t2_rec->null_bit)
    {
      DBUG_PRINT("error", ("Field %d has different definition", i));
      DBUG_PRINT("error", ("t1_type=%d, t1_length=%d, t1_null_bit=%d",
                           t1_rec->type, t1_rec->length, t1_rec->null_bit));
      DBUG_PRINT("error", ("t2_type=%d, t2_length=%d, t2_null_bit=%d",
                           t2_rec->type, t2_rec->length, t2_rec->null_bit));
      DBUG_RETURN(1);
    }
  }
  DBUG_RETURN(0);
}


706 707
extern "C" {

708
int _ma_killed_ptr(HA_CHECK *param)
709
{
710
  return thd_killed((THD*)param->thd);
711 712 713
}


714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
/*
  Report progress to mysqld

  This is a bit more complex than what a normal progress report
  function normally is.

  The reason is that this is called by enable_index/repair which
  is one stage in ALTER TABLE and we can't use the external
  stage/max_stage for this.

  thd_progress_init/thd_progress_next_stage is to be called by
  high level commands like CHECK TABLE or REPAIR TABLE, not
  by sub commands like enable_index().

  In ma_check.c it's easier to work with stages than with a total
  progress, so we use internal stage/max_stage here to keep the
  code simple.
*/

void _ma_report_progress(HA_CHECK *param, ulonglong progress,
                         ulonglong max_progress)
{
  thd_progress_report((THD*)param->thd,
                      progress + max_progress * param->stage,
                      max_progress * param->max_stage);
}


742 743
void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
{
744 745
  va_list args;
  DBUG_ENTER("_ma_check_print_error");
746 747 748 749 750
  param->error_printed |= 1;
  param->out_flag |= O_DATA_LOST;
  va_start(args, fmt);
  _ma_check_print_msg(param, "error", fmt, args);
  va_end(args);
751
  DBUG_VOID_RETURN;
752 753 754 755 756 757
}


void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...)
{
  va_list args;
758
  DBUG_ENTER("_ma_check_print_info");
759 760 761
  va_start(args, fmt);
  _ma_check_print_msg(param, "info", fmt, args);
  va_end(args);
762
  DBUG_VOID_RETURN;
763 764 765 766 767
}


void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
{
768 769
  va_list args;
  DBUG_ENTER("_ma_check_print_warning");
770 771 772 773 774
  param->warning_printed= 1;
  param->out_flag |= O_DATA_LOST;
  va_start(args, fmt);
  _ma_check_print_msg(param, "warning", fmt, args);
  va_end(args);
775
  DBUG_VOID_RETURN;
776 777
}

778 779 780 781 782 783 784 785 786 787 788 789 790
/*
  Create a transaction object

  SYNOPSIS
    info	Maria handler

  RETURN
    0 		ok
    #		Error number (HA_ERR_OUT_OF_MEM)
*/

static int maria_create_trn_for_mysql(MARIA_HA *info)
{
791
  THD *thd= ((TABLE*) info->external_ref)->in_use;
792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
  TRN *trn= THD_TRN;
  DBUG_ENTER("maria_create_trn_for_mysql");

  if (!trn)  /* no transaction yet - open it now */
  {
    trn= trnman_new_trn(& thd->transaction.wt);
    if (unlikely(!trn))
      DBUG_RETURN(HA_ERR_OUT_OF_MEM);
    THD_TRN= trn;
    if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
      trans_register_ha(thd, TRUE, maria_hton);
  }
  _ma_set_trn_for_table(info, trn);
  if (!trnman_increment_locked_tables(trn))
  {
    trans_register_ha(thd, FALSE, maria_hton);
    trnman_new_statement(trn);
  }
#ifdef EXTRA_DEBUG
  if (info->lock_type == F_WRLCK &&
      ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED))
  {
    trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED |
                     TRN_STATE_TABLES_CAN_CHANGE);
    (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
                                   (uchar*) thd->query(),
                                   thd->query_length());
  }
820
  else
821
  {
822
    DBUG_PRINT("info", ("lock_type: %d  trnman_flags: %u",
823 824 825
                        info->lock_type, trnman_get_flags(trn)));
  }
  
826 827
#endif
  DBUG_RETURN(0);
828 829
}

830 831 832 833 834
my_bool ma_killed_in_mariadb(MARIA_HA *info)
{
  return (((TABLE*) (info->external_ref))->in_use->killed != 0);
}

835 836
} /* extern "C" */

837 838 839 840 841 842 843
/**
  Transactional table doing bulk insert with one single UNDO
  (UNDO_BULK_INSERT) and with repair.
*/
#define BULK_INSERT_SINGLE_UNDO_AND_REPAIR    1
/**
  Transactional table doing bulk insert with one single UNDO
844
  (UNDO_BULK_INSERT) and without repair.
845 846 847 848 849 850 851
*/
#define BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR 2
/**
  None of BULK_INSERT_SINGLE_UNDO_AND_REPAIR and
  BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR.
*/
#define BULK_INSERT_NONE      0
852

853 854
ha_maria::ha_maria(handlerton *hton, TABLE_SHARE *table_arg):
handler(hton, table_arg), file(0),
855
int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
856
                HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
857
                HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
Sergei Golubchik's avatar
Sergei Golubchik committed
858
                HA_FILE_BASED | HA_CAN_GEOMETRY | CANNOT_ROLLBACK_FLAG |
Sergei Golubchik's avatar
Sergei Golubchik committed
859
                HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | HA_CAN_VIRTUAL_COLUMNS |
860
                HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT),
861
can_enable_indexes(1), bulk_insert_single_undo(BULK_INSERT_NONE)
862 863 864
{}


865
handler *ha_maria::clone(const char *name, MEM_ROOT *mem_root)
866
{
867 868
  ha_maria *new_handler= static_cast <ha_maria *>(handler::clone(name,
                                                                 mem_root));
869
  if (new_handler)
870
  {
871
    new_handler->file->state= file->state;
872 873 874
    /* maria_create_trn_for_mysql() is never called for clone() tables */
    new_handler->file->trn= file->trn;
  }
875 876 877 878
  return new_handler;
}


879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903
static const char *ha_maria_exts[]=
{
  MARIA_NAME_IEXT,
  MARIA_NAME_DEXT,
  NullS
};


const char **ha_maria::bas_ext() const
{
  return ha_maria_exts;
}


const char *ha_maria::index_type(uint key_number)
{
  return ((table->key_info[key_number].flags & HA_FULLTEXT) ?
          "FULLTEXT" :
          (table->key_info[key_number].flags & HA_SPATIAL) ?
          "SPATIAL" :
          (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ?
          "RTREE" : "BTREE");
}


unknown's avatar
unknown committed
904 905 906 907 908 909 910
double ha_maria::scan_time()
{
  if (file->s->data_file_type == BLOCK_RECORD)
    return ulonglong2double(stats.data_file_length - file->s->block_size) / max(file->s->block_size / 2, IO_SIZE) + 2;
  return handler::scan_time();
}

911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
/*
  We need to be able to store at least two keys on an index page as the
  splitting algorithms depends on this. (With only one key on a page
  we also can't use any compression, which may make the index file much
  larger)
  We use HA_MAX_KEY_BUFF as this is a stack restriction imposed by the
  handler interface.

  We also need to reserve place for a record pointer (8) and 3 bytes
  per key segment to store the length of the segment + possible null bytes.
  These extra bytes are required here so that maria_create() will surely
  accept any keys created which the returned key data storage length.
*/

uint ha_maria::max_supported_key_length() const
{
  uint tmp= (maria_max_key_length() - 8 - HA_MAX_KEY_SEG*3);
  return min(HA_MAX_KEY_BUFF, tmp);
}

unknown's avatar
unknown committed
931

932 933 934
#ifdef HAVE_REPLICATION
int ha_maria::net_read_dump(NET * net)
{
unknown's avatar
unknown committed
935
  int data_fd= file->dfile.file;
936 937 938 939 940 941 942 943 944 945 946 947 948 949
  int error= 0;

  my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME));
  for (;;)
  {
    ulong packet_len= my_net_read(net);
    if (!packet_len)
      break;                                    // end of file
    if (packet_len == packet_error)
    {
      sql_print_error("ha_maria::net_read_dump - read error ");
      error= -1;
      goto err;
    }
unknown's avatar
unknown committed
950
    if (my_write(data_fd, (uchar *) net->read_pos, (uint) packet_len,
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965
                 MYF(MY_WME | MY_FNABP)))
    {
      error= errno;
      goto err;
    }
  }
err:
  return error;
}


int ha_maria::dump(THD * thd, int fd)
{
  MARIA_SHARE *share= file->s;
  NET *net= &thd->net;
unknown's avatar
unknown committed
966
  uint block_size= share->block_size;
967
  my_off_t bytes_to_read= share->state.state.data_file_length;
unknown's avatar
unknown committed
968
  int data_fd= file->dfile.file;
unknown's avatar
unknown committed
969
  uchar *buf= (uchar *) my_malloc(block_size, MYF(MY_WME));
970 971 972 973 974 975 976
  if (!buf)
    return ENOMEM;

  int error= 0;
  my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME));
  for (; bytes_to_read > 0;)
  {
977
    size_t bytes= my_read(data_fd, buf, block_size, MYF(MY_WME));
978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993
    if (bytes == MY_FILE_ERROR)
    {
      error= errno;
      goto err;
    }

    if (fd >= 0)
    {
      if (my_write(fd, buf, bytes, MYF(MY_WME | MY_FNABP)))
      {
        error= errno ? errno : EPIPE;
        goto err;
      }
    }
    else
    {
unknown's avatar
unknown committed
994
      if (my_net_write(net, buf, bytes))
995 996 997 998 999 1000 1001 1002 1003 1004
      {
        error= errno ? errno : EPIPE;
        goto err;
      }
    }
    bytes_to_read -= bytes;
  }

  if (fd < 0)
  {
unknown's avatar
unknown committed
1005
    if (my_net_write(net, (uchar*) "", 0))
1006 1007 1008 1009 1010
      error= errno ? errno : EPIPE;
    net_flush(net);
  }

err:
unknown's avatar
unknown committed
1011
  my_free((uchar*) buf, MYF(0));
1012 1013 1014 1015 1016 1017 1018 1019 1020
  return error;
}
#endif                                          /* HAVE_REPLICATION */

        /* Name is here without an extension */

int ha_maria::open(const char *name, int mode, uint test_if_locked)
{
  uint i;
1021 1022 1023 1024 1025 1026 1027 1028 1029 1030

#ifdef NOT_USED
  /*
    If the user wants to have memory mapped data files, add an
    open_flag. Do not memory map temporary tables because they are
    expected to be inserted and thus extended a lot. Memory mapping is
    efficient for files that keep their size, but very inefficient for
    growing files. Using an open_flag instead of calling ma_extra(...
    HA_EXTRA_MMAP ...) after maxs_open() has the advantage that the
    mapping is not repeated for every open, but just done on the initial
unknown's avatar
unknown committed
1031
    open, when the MyISAM share is created. Every time the server
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
    requires to open a new instance of a table it calls this method. We
    will always supply HA_OPEN_MMAP for a permanent table. However, the
    Maria storage engine will ignore this flag if this is a secondary
    open of a table that is in use by other threads already (if the
    Maria share exists already).
  */
  if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_maria_use_mmap)
    test_if_locked|= HA_OPEN_MMAP;
#endif

1042 1043 1044 1045 1046 1047
  if (unlikely(maria_recover_options != HA_RECOVER_NONE))
  {
    /* user asked to trigger a repair if table was not properly closed */
    test_if_locked|= HA_OPEN_ABORT_IF_CRASHED;
  }

1048 1049 1050
  if (!(file= maria_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER)))
    return (my_errno ? my_errno : -1);

1051
  file->s->chst_invalidator= query_cache_invalidate_by_MyISAM_filename_ref;
1052 1053
  /* Set external_ref, mainly for temporary tables */
  file->external_ref= (void*) table;            // For ma_killed()
1054

1055 1056 1057 1058 1059 1060
  if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE))
    VOID(maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0));

  info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
  if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED))
    VOID(maria_extra(file, HA_EXTRA_WAIT_LOCK, 0));
1061
  if ((data_file_type= file->s->data_file_type) != STATIC_RECORD)
1062
    int_table_flags |= HA_REC_NOT_IN_SEQ;
unknown's avatar
unknown committed
1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
  if (!file->s->base.born_transactional)
  {
    /*
      INSERT DELAYED cannot work with transactional tables (because it cannot
      stand up to "when client gets ok the data is safe on disk": the record
      may not even be inserted). In the future, we could enable it back (as a
      client doing INSERT DELAYED knows the specificities; but we then should
      make sure to regularly commit in the delayed_insert thread). 
    */
    int_table_flags|= HA_CAN_INSERT_DELAYED;
  }
1074
  if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
1075
    int_table_flags |= HA_HAS_NEW_CHECKSUM;
1076

1077 1078 1079 1080 1081 1082 1083 1084 1085 1086
  /*
    For static size rows, tell MariaDB that we will access all bytes
    in the record when writing it.  This signals MariaDB to initalize
    the full row to ensure we don't get any errors from valgrind and
    that all bytes in the row is properly reset.
  */
  if (file->s->data_file_type == STATIC_RECORD &&
      (file->s->has_varchar_fields | file->s->has_null_fields))
    int_table_flags|= HA_RECORD_MUST_BE_CLEAN_ON_WRITE;

1087 1088
  for (i= 0; i < table->s->keys; i++)
  {
unknown's avatar
unknown committed
1089
    plugin_ref parser= table->key_info[i].parser;
1090 1091
    if (table->key_info[i].flags & HA_USES_PARSER)
      file->s->keyinfo[i].parser=
unknown's avatar
unknown committed
1092
        (struct st_mysql_ftparser *)plugin_decl(parser)->info;
1093
    table->key_info[i].block_size= file->s->keyinfo[i].block_length;
1094
  }
1095 1096
  my_errno= 0;
  return my_errno;
1097 1098 1099 1100 1101 1102
}


int ha_maria::close(void)
{
  MARIA_HA *tmp= file;
unknown's avatar
unknown committed
1103 1104
  if (!tmp)
    return 0;
1105 1106 1107 1108 1109
  file= 0;
  return maria_close(tmp);
}


unknown's avatar
unknown committed
1110
int ha_maria::write_row(uchar * buf)
1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
{
  /* If we have a timestamp column, update it to the current time */
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
    table->timestamp_field->set_time();

  /*
     If we have an auto_increment column and we are writing a changed row
     or a new row, then update the auto_increment value in the record.
  */
  if (table->next_number_field && buf == table->record[0])
1121 1122 1123 1124 1125
  {
    int error;
    if ((error= update_auto_increment()))
      return error;
  }
1126 1127 1128 1129 1130 1131 1132
  return maria_write(file, buf);
}


int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
{
  int error;
1133
  HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
1134
  MARIA_SHARE *share= file->s;
1135
  const char *old_proc_info;
1136
  TRN *old_trn= file->trn;
1137

1138 1139
  if (!file || !&param) return HA_ADMIN_INTERNAL_ERROR;

unknown's avatar
unknown committed
1140
  maria_chk_init(&param);
1141 1142 1143
  param.thd= thd;
  param.op_name= "check";
  param.db_name= table->s->db.str;
1144
  param.table_name= table->alias.c_ptr();
1145
  param.testflag= check_opt->flags | T_CHECK | T_SILENT;
1146
  param.stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1147 1148 1149 1150 1151 1152 1153

  if (!(table->db_stat & HA_READ_ONLY))
    param.testflag |= T_STATISTICS;
  param.using_global_keycache= 1;

  if (!maria_is_crashed(file) &&
      (((param.testflag & T_CHECK_ONLY_CHANGED) &&
Michael Widenius's avatar
Michael Widenius committed
1154
        !(share->state.changed & (STATE_CHANGED | STATE_CRASHED_FLAGS |
1155
                                  STATE_IN_REPAIR)) &&
1156 1157 1158 1159 1160 1161
        share->state.open_count == 0) ||
       ((param.testflag & T_FAST) && (share->state.open_count ==
                                      (uint) (share->global_changed ? 1 :
                                              0)))))
    return HA_ADMIN_ALREADY_DONE;

unknown's avatar
unknown committed
1162
  maria_chk_init_for_check(&param, file);
1163 1164
  old_proc_info= thd_proc_info(thd, "Checking status");
  thd_progress_init(thd, 3);
unknown's avatar
unknown committed
1165
  (void) maria_chk_status(&param, file);                // Not fatal
1166 1167
  error= maria_chk_size(&param, file);
  if (!error)
1168
    error|= maria_chk_del(&param, file, param.testflag);
1169 1170
  thd_proc_info(thd, "Checking keys");
  thd_progress_next_stage(thd);
1171 1172
  if (!error)
    error= maria_chk_key(&param, file);
1173 1174
  thd_proc_info(thd, "Checking data");
  thd_progress_next_stage(thd);
1175 1176 1177 1178 1179 1180 1181
  if (!error)
  {
    if ((!(param.testflag & T_QUICK) &&
         ((share->options &
           (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ||
          (param.testflag & (T_EXTEND | T_MEDIUM)))) || maria_is_crashed(file))
    {
1182
      ulonglong old_testflag= param.testflag;
1183
      param.testflag |= T_MEDIUM;
unknown's avatar
unknown committed
1184
      if (!(error= init_io_cache(&param.read_cache, file->dfile.file,
1185 1186 1187
                                 my_default_record_cache_size, READ_CACHE,
                                 share->pack.header_length, 1, MYF(MY_WME))))
      {
1188 1189
        error= maria_chk_data_link(&param, file,
                                   test(param.testflag & T_EXTEND));
1190 1191 1192 1193 1194 1195 1196 1197
        end_io_cache(&(param.read_cache));
      }
      param.testflag= old_testflag;
    }
  }
  if (!error)
  {
    if ((share->state.changed & (STATE_CHANGED |
Michael Widenius's avatar
Michael Widenius committed
1198 1199
                                 STATE_CRASHED_FLAGS |
                                 STATE_IN_REPAIR | STATE_NOT_ANALYZED)) ||
1200 1201 1202 1203
        (param.testflag & T_STATISTICS) || maria_is_crashed(file))
    {
      file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
      pthread_mutex_lock(&share->intern_lock);
1204
      DBUG_PRINT("info", ("Reseting crashed state"));
Michael Widenius's avatar
Michael Widenius committed
1205 1206
      share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
                               STATE_IN_REPAIR);
1207
      if (!(table->db_stat & HA_READ_ONLY))
1208 1209 1210
        error= maria_update_state_info(&param, file,
                                       UPDATE_TIME | UPDATE_OPEN_COUNT |
                                       UPDATE_STAT);
1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221
      pthread_mutex_unlock(&share->intern_lock);
      info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
           HA_STATUS_CONST);
    }
  }
  else if (!maria_is_crashed(file) && !thd->killed)
  {
    maria_mark_crashed(file);
    file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
  }

1222 1223
  /* Reset trn, that may have been set by repair */
  _ma_set_trn_for_table(file, old_trn);
unknown's avatar
unknown committed
1224
  thd_proc_info(thd, old_proc_info);
1225
  thd_progress_end(thd);
1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238
  return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
}


/*
  Analyze the key distribution in the table
  As the table may be only locked for read, we have to take into account that
  two threads may do an analyze at the same time!
*/

int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
{
  int error= 0;
1239
  HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
1240
  MARIA_SHARE *share= file->s;
1241
  const char *old_proc_info;
1242

1243 1244 1245
  if (!&param)
    return HA_ADMIN_INTERNAL_ERROR;

unknown's avatar
unknown committed
1246
  maria_chk_init(&param);
1247 1248 1249
  param.thd= thd;
  param.op_name= "analyze";
  param.db_name= table->s->db.str;
1250
  param.table_name= table->alias.c_ptr();
1251 1252 1253
  param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
                   T_DONT_CHECK_CHECKSUM);
  param.using_global_keycache= 1;
1254
  param.stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1255 1256 1257 1258

  if (!(share->state.changed & STATE_NOT_ANALYZED))
    return HA_ADMIN_ALREADY_DONE;

1259 1260
  old_proc_info= thd_proc_info(thd, "Scanning");
  thd_progress_init(thd, 1);
1261 1262 1263 1264 1265 1266 1267 1268 1269
  error= maria_chk_key(&param, file);
  if (!error)
  {
    pthread_mutex_lock(&share->intern_lock);
    error= maria_update_state_info(&param, file, UPDATE_STAT);
    pthread_mutex_unlock(&share->intern_lock);
  }
  else if (!maria_is_crashed(file) && !thd->killed)
    maria_mark_crashed(file);
1270 1271
  thd_proc_info(thd, old_proc_info);
  thd_progress_end(thd);
1272 1273 1274 1275 1276 1277 1278 1279 1280
  return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
}


int ha_maria::restore(THD * thd, HA_CHECK_OPT *check_opt)
{
  HA_CHECK_OPT tmp_check_opt;
  char *backup_dir= thd->lex->backup_dir;
  char src_path[FN_REFLEN], dst_path[FN_REFLEN];
1281
  char table_name[FN_REFLEN];
1282 1283 1284 1285
  int error;
  const char *errmsg;
  DBUG_ENTER("restore");

1286 1287 1288
  VOID(tablename_to_filename(table->s->table_name.str, table_name,
                             sizeof(table_name)));

1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306
  if (fn_format_relative_to_data_home(src_path, table_name, backup_dir,
                                      MARIA_NAME_DEXT))
    DBUG_RETURN(HA_ADMIN_INVALID);

  strxmov(dst_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS);
  if (my_copy(src_path, dst_path, MYF(MY_WME)))
  {
    error= HA_ADMIN_FAILED;
    errmsg= "Failed in my_copy (Error %d)";
    goto err;
  }

  tmp_check_opt.init();
  tmp_check_opt.flags |= T_VERY_SILENT | T_CALC_CHECKSUM | T_QUICK;
  DBUG_RETURN(repair(thd, &tmp_check_opt));

err:
  {
1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321
    /*
      Don't allocate param on stack here as this may be huge and it's
      also allocated by repair()
    */
    HA_CHECK *param;
    if (!(param= (HA_CHECK*) my_malloc(sizeof(*param), MYF(MY_WME | MY_FAE))))
      DBUG_RETURN(error);
    maria_chk_init(param);
    param->thd= thd;
    param->op_name= "restore";
    param->db_name= table->s->db.str;
    param->table_name= table->s->table_name.str;
    param->testflag= 0;
    _ma_check_print_error(param, errmsg, my_errno);
    my_free(param, MYF(0));
1322 1323 1324 1325 1326 1327 1328 1329 1330
    DBUG_RETURN(error);
  }
}


int ha_maria::backup(THD * thd, HA_CHECK_OPT *check_opt)
{
  char *backup_dir= thd->lex->backup_dir;
  char src_path[FN_REFLEN], dst_path[FN_REFLEN];
1331
  char table_name[FN_REFLEN];
1332 1333 1334 1335
  int error;
  const char *errmsg;
  DBUG_ENTER("ha_maria::backup");

1336 1337 1338
  VOID(tablename_to_filename(table->s->table_name.str, table_name,
                             sizeof(table_name)));

1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365
  if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir,
                                      reg_ext))
  {
    errmsg= "Failed in fn_format() for .frm file (errno: %d)";
    error= HA_ADMIN_INVALID;
    goto err;
  }

  strxmov(src_path, table->s->normalized_path.str, reg_ext, NullS);
  if (my_copy(src_path, dst_path,
              MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE)))
  {
    error= HA_ADMIN_FAILED;
    errmsg= "Failed copying .frm file (errno: %d)";
    goto err;
  }

  /* Change extension */
  if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir,
                                      MARIA_NAME_DEXT))
  {
    errmsg= "Failed in fn_format() for .MYD file (errno: %d)";
    error= HA_ADMIN_INVALID;
    goto err;
  }

  strxmov(src_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS);
unknown's avatar
unknown committed
1366 1367 1368 1369 1370 1371 1372
  if (_ma_flush_table_files(file, MARIA_FLUSH_DATA, FLUSH_FORCE_WRITE,
                            FLUSH_KEEP))
  {
    error= HA_ADMIN_FAILED;
    errmsg= "Failed in flush (Error %d)";
    goto err;
  }
1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383
  if (my_copy(src_path, dst_path,
              MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE)))
  {
    errmsg= "Failed copying .MYD file (errno: %d)";
    error= HA_ADMIN_FAILED;
    goto err;
  }
  DBUG_RETURN(HA_ADMIN_OK);

err:
  {
1384 1385 1386 1387
    HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
    if (!&param)
      return HA_ADMIN_INTERNAL_ERROR;

unknown's avatar
unknown committed
1388
    maria_chk_init(&param);
1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402
    param.thd= thd;
    param.op_name= "backup";
    param.db_name= table->s->db.str;
    param.table_name= table->s->table_name.str;
    param.testflag= 0;
    _ma_check_print_error(&param, errmsg, my_errno);
    DBUG_RETURN(error);
  }
}


int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
{
  int error;
1403
  HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
1404
  ha_rows start_records;
1405
  const char *old_proc_info;
1406

1407
  if (!file || !&param)
1408 1409
    return HA_ADMIN_INTERNAL_ERROR;

unknown's avatar
unknown committed
1410
  maria_chk_init(&param);
1411 1412 1413 1414 1415
  param.thd= thd;
  param.op_name= "repair";
  param.testflag= ((check_opt->flags & ~(T_EXTEND)) |
                   T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM |
                   (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT));
1416
  param.sort_buffer_length= THDVAR(thd, sort_buffer_size);
1417
  param.backup_time= check_opt->start_time;
1418
  start_records= file->state->records;
1419 1420
  old_proc_info= thd_proc_info(thd, "Checking table");
  thd_progress_init(thd, 1);
1421
  while ((error= repair(thd, &param, 0)) && param.retry_repair)
1422 1423 1424 1425 1426
  {
    param.retry_repair= 0;
    if (test_all_bits(param.testflag,
                      (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK)))
    {
1427 1428 1429 1430 1431 1432 1433 1434
      param.testflag&= ~(T_RETRY_WITHOUT_QUICK | T_QUICK);
      /* Ensure we don't loose any rows when retrying without quick */
      param.testflag|= T_SAFE_REPAIR;
      if (thd->vio_ok())
        _ma_check_print_info(&param, "Retrying repair without quick");
      else
        sql_print_information("Retrying repair of: '%s' without quick",
                              table->s->path.str);
1435 1436 1437 1438 1439 1440 1441
      continue;
    }
    param.testflag &= ~T_QUICK;
    if ((param.testflag & T_REP_BY_SORT))
    {
      param.testflag= (param.testflag & ~T_REP_BY_SORT) | T_REP;
      sql_print_information("Retrying repair of: '%s' with keycache",
1442
                            table->s->path.str);
1443 1444 1445 1446 1447 1448 1449 1450 1451 1452
      continue;
    }
    break;
  }
  if (!error && start_records != file->state->records &&
      !(check_opt->flags & T_VERY_SILENT))
  {
    char llbuff[22], llbuff2[22];
    sql_print_information("Found %s of %s rows when repairing '%s'",
                          llstr(file->state->records, llbuff),
1453 1454
                          llstr(start_records, llbuff2),
                          table->s->path.str);
1455
  }
1456 1457
  thd_proc_info(thd, old_proc_info);
  thd_progress_end(thd);
1458 1459 1460
  return error;
}

1461 1462 1463
int ha_maria::zerofill(THD * thd, HA_CHECK_OPT *check_opt)
{
  int error;
1464
  HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
1465
  TRN *old_trn;
1466
  MARIA_SHARE *share= file->s;
1467

1468
  if (!file || !&param)
1469 1470
    return HA_ADMIN_INTERNAL_ERROR;

1471
  old_trn= file->trn;
1472 1473 1474 1475
  maria_chk_init(&param);
  param.thd= thd;
  param.op_name= "zerofill";
  param.testflag= check_opt->flags | T_SILENT | T_ZEROFILL;
1476
  param.sort_buffer_length= THDVAR(thd, sort_buffer_size);
1477
  error=maria_zerofill(&param, file, share->open_file_name.str);
1478

1479 1480 1481
  /* Reset trn, that may have been set by repair */
  _ma_set_trn_for_table(file, old_trn);

1482 1483 1484 1485 1486 1487
  if (!error)
  {
    pthread_mutex_lock(&share->intern_lock);
    maria_update_state_info(&param, file, UPDATE_TIME | UPDATE_OPEN_COUNT);
    pthread_mutex_unlock(&share->intern_lock);
  }
1488 1489 1490
  return error;
}

1491 1492 1493
int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt)
{
  int error;
1494
  HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
1495

1496
  if (!file || !&param)
1497 1498
    return HA_ADMIN_INTERNAL_ERROR;

unknown's avatar
unknown committed
1499
  maria_chk_init(&param);
1500 1501 1502 1503
  param.thd= thd;
  param.op_name= "optimize";
  param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE |
                   T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX);
1504
  param.sort_buffer_length= THDVAR(thd, sort_buffer_size);
1505
  thd_progress_init(thd, 1);
1506
  if ((error= repair(thd, &param, 1)) && param.retry_repair)
1507
  {
1508 1509
    sql_print_warning("Warning: Optimize table got errno %d on %s.%s, retrying",
                      my_errno, param.db_name, param.table_name);
1510
    param.testflag &= ~T_REP_BY_SORT;
1511
    error= repair(thd, &param, 0);
1512
  }
1513
  thd_progress_end(thd);
1514 1515 1516 1517
  return error;
}


1518
int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
1519 1520
{
  int error= 0;
1521
  ulonglong local_testflag= param->testflag;
1522
  bool optimize_done= !do_optimize, statistics_done= 0;
1523 1524 1525 1526
  const char *old_proc_info= thd->proc_info;
  char fixed_name[FN_REFLEN];
  MARIA_SHARE *share= file->s;
  ha_rows rows= file->state->records;
1527
  TRN *old_trn= file->trn;
1528 1529
  DBUG_ENTER("ha_maria::repair");

1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540
  /*
    Normally this method is entered with a properly opened table. If the
    repair fails, it can be repeated with more elaborate options. Under
    special circumstances it can happen that a repair fails so that it
    closed the data file and cannot re-open it. In this case file->dfile
    is set to -1. We must not try another repair without an open data
    file. (Bug #25289)
  */
  if (file->dfile.file == -1)
  {
    sql_print_information("Retrying repair of: '%s' failed. "
Sergei Golubchik's avatar
Sergei Golubchik committed
1541
                          "Please try REPAIR EXTENDED or aria_chk",
1542 1543 1544 1545
                          table->s->path.str);
    DBUG_RETURN(HA_ADMIN_FAILED);
  }

1546 1547 1548 1549 1550 1551 1552 1553
  /*
    If transactions was not enabled for a transactional table then
    file->s->status is not up to date. This is needed for repair_by_sort
    to work
  */
  if (share->base.born_transactional && !share->now_transactional)
    _ma_copy_nontrans_state_information(file);

1554
  param->db_name= table->s->db.str;
1555
  param->table_name= table->alias.c_ptr();
1556 1557 1558 1559 1560
  param->tmpfile_createflag= O_RDWR | O_TRUNC;
  param->using_global_keycache= 1;
  param->thd= thd;
  param->tmpdir= &mysql_tmpdir_list;
  param->out_flag= 0;
1561
  strmov(fixed_name, share->open_file_name.str);
1562 1563 1564 1565 1566

  // Don't lock tables if we have used LOCK TABLE
  if (!thd->locked_tables &&
      maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK))
  {
1567
    _ma_check_print_error(param, ER(ER_CANT_LOCK), my_errno);
1568 1569 1570
    DBUG_RETURN(HA_ADMIN_FAILED);
  }

1571
  if (!do_optimize ||
1572 1573 1574 1575 1576 1577 1578
      (((share->data_file_type == BLOCK_RECORD) ?
        (share->state.changed & STATE_NOT_OPTIMIZED_ROWS) :
        (file->state->del ||
         share->state.split != file->state->records)) &&
       (!(param->testflag & T_QUICK) ||
        (share->state.changed & (STATE_NOT_OPTIMIZED_KEYS |
                                 STATE_NOT_OPTIMIZED_ROWS)))))
1579 1580 1581 1582
  {
    ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ?
                        maria_get_mask_all_keys_active(share->base.keys) :
                        share->state.key_map);
1583
    ulonglong save_testflag= param->testflag;
1584 1585 1586 1587
    if (maria_test_if_sort_rep(file, file->state->records, key_map, 0) &&
        (local_testflag & T_REP_BY_SORT))
    {
      local_testflag |= T_STATISTICS;
1588
      param->testflag |= T_STATISTICS;           // We get this for free
1589
      statistics_done= 1;
1590
      /* TODO: Remove BLOCK_RECORD test when parallel works with blocks */
1591
      if (THDVAR(thd,repair_threads) > 1 &&
1592
          share->data_file_type != BLOCK_RECORD)
1593 1594 1595 1596
      {
        char buf[40];
        /* TODO: respect maria_repair_threads variable */
        my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map));
unknown's avatar
unknown committed
1597
        thd_proc_info(thd, buf);
1598 1599 1600
        param->testflag|= T_REP_PARALLEL;
        error= maria_repair_parallel(param, file, fixed_name,
                                     test(param->testflag & T_QUICK));
unknown's avatar
unknown committed
1601 1602
        /* to reset proc_info, as it was pointing to local buffer */
        thd_proc_info(thd, "Repair done");
1603 1604 1605
      }
      else
      {
unknown's avatar
unknown committed
1606
        thd_proc_info(thd, "Repair by sorting");
1607 1608 1609
        param->testflag|= T_REP_BY_SORT;
        error= maria_repair_by_sort(param, file, fixed_name,
                                    test(param->testflag & T_QUICK));
1610 1611 1612 1613
      }
    }
    else
    {
unknown's avatar
unknown committed
1614
      thd_proc_info(thd, "Repair with keycache");
1615 1616 1617
      param->testflag &= ~(T_REP_BY_SORT | T_REP_PARALLEL);
      error= maria_repair(param, file, fixed_name,
                          test(param->testflag & T_QUICK));
1618
    }
1619
    param->testflag= save_testflag | (param->testflag & T_RETRY_WITHOUT_QUICK);
1620 1621 1622 1623 1624 1625 1626 1627
    optimize_done= 1;
  }
  if (!error)
  {
    if ((local_testflag & T_SORT_INDEX) &&
        (share->state.changed & STATE_NOT_SORTED_PAGES))
    {
      optimize_done= 1;
unknown's avatar
unknown committed
1628
      thd_proc_info(thd, "Sorting index");
1629
      error= maria_sort_index(param, file, fixed_name);
1630 1631 1632 1633 1634 1635
    }
    if (!statistics_done && (local_testflag & T_STATISTICS))
    {
      if (share->state.changed & STATE_NOT_ANALYZED)
      {
        optimize_done= 1;
unknown's avatar
unknown committed
1636
        thd_proc_info(thd, "Analyzing");
1637
        error= maria_chk_key(param, file);
1638 1639 1640 1641 1642
      }
      else
        local_testflag &= ~T_STATISTICS;        // Don't update statistics
    }
  }
unknown's avatar
unknown committed
1643
  thd_proc_info(thd, "Saving state");
unknown's avatar
unknown committed
1644
  pthread_mutex_lock(&share->intern_lock);
1645 1646 1647 1648
  if (!error)
  {
    if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file))
    {
1649
      DBUG_PRINT("info", ("Reseting crashed state"));
Michael Widenius's avatar
Michael Widenius committed
1650 1651
      share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
                               STATE_IN_REPAIR);
1652 1653 1654
      file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
    }
    /*
1655
      repair updates share->state.state. Ensure that file->state is up to date
1656
    */
1657 1658 1659
    if (file->state != &share->state.state)
      *file->state= share->state.state;
    if (share->base.auto_key)
1660
      _ma_update_auto_increment_key(param, file, 1);
1661
    if (optimize_done)
1662
      error= maria_update_state_info(param, file,
1663 1664 1665
                                     UPDATE_TIME | UPDATE_OPEN_COUNT |
                                     (local_testflag &
                                      T_STATISTICS ? UPDATE_STAT : 0));
1666
    info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
1667
         HA_STATUS_CONST, 0);
1668
    if (rows != file->state->records && !(param->testflag & T_VERY_SILENT))
1669 1670
    {
      char llbuff[22], llbuff2[22];
1671
      _ma_check_print_warning(param, "Number of rows changed from %s to %s",
1672 1673
                              llstr(rows, llbuff),
                              llstr(file->state->records, llbuff2));
unknown's avatar
unknown committed
1674
      /* Abort if warning was converted to error */
1675
      if (table->in_use->is_error())
unknown's avatar
unknown committed
1676
        error= 1;
1677 1678 1679 1680 1681 1682
    }
  }
  else
  {
    maria_mark_crashed_on_repair(file);
    file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1683
    maria_update_state_info(param, file, 0);
1684
  }
unknown's avatar
unknown committed
1685
  pthread_mutex_unlock(&share->intern_lock);
unknown's avatar
unknown committed
1686
  thd_proc_info(thd, old_proc_info);
1687
  thd_progress_end(thd);                        // Mark done
1688 1689
  if (!thd->locked_tables)
    maria_lock_database(file, F_UNLCK);
1690 1691 1692

  /* Reset trn, that may have been set by repair */
  _ma_set_trn_for_table(file, old_trn);
unknown's avatar
unknown committed
1693 1694
  error= error ? HA_ADMIN_FAILED :
    (optimize_done ?
1695
     (write_log_record_for_repair(param, file) ? HA_ADMIN_FAILED :
unknown's avatar
unknown committed
1696 1697
      HA_ADMIN_OK) : HA_ADMIN_ALREADY_DONE);
  DBUG_RETURN(error);
1698 1699 1700 1701 1702 1703 1704 1705 1706
}


/*
  Assign table indexes to a specific key cache.
*/

int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt)
{
unknown's avatar
unknown committed
1707
#if 0 && NOT_IMPLEMENTED
unknown's avatar
unknown committed
1708
  PAGECACHE *new_pagecache= check_opt->pagecache;
1709 1710
  const char *errmsg= 0;
  int error= HA_ADMIN_OK;
1711
  ulonglong map;
1712 1713 1714
  TABLE_LIST *table_list= table->pos_in_table_list;
  DBUG_ENTER("ha_maria::assign_to_keycache");

unknown's avatar
unknown committed
1715

unknown's avatar
unknown committed
1716 1717 1718 1719 1720 1721 1722 1723
  table->keys_in_use_for_query.clear_all();

  if (table_list->process_index_hints(table))
    DBUG_RETURN(HA_ADMIN_FAILED);
  map= ~(ulonglong) 0;
  if (!table->keys_in_use_for_query.is_clear_all())
    /* use all keys if there's no list specified by the user through hints */
    map= table->keys_in_use_for_query.to_ulonglong();
1724

unknown's avatar
unknown committed
1725
  if ((error= maria_assign_to_pagecache(file, map, new_pagecache)))
1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736
  {
    char buf[STRING_BUFFER_USUAL_SIZE];
    my_snprintf(buf, sizeof(buf),
                "Failed to flush to index file (errno: %d)", error);
    errmsg= buf;
    error= HA_ADMIN_CORRUPT;
  }

  if (error != HA_ADMIN_OK)
  {
    /* Send error to user */
1737 1738 1739 1740
    HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
    if (!&param)
      return HA_ADMIN_INTERNAL_ERROR;

unknown's avatar
unknown committed
1741
    maria_chk_init(&param);
1742 1743 1744 1745 1746 1747 1748 1749
    param.thd= thd;
    param.op_name= "assign_to_keycache";
    param.db_name= table->s->db.str;
    param.table_name= table->s->table_name.str;
    param.testflag= 0;
    _ma_check_print_error(&param, errmsg);
  }
  DBUG_RETURN(error);
unknown's avatar
unknown committed
1750 1751 1752
#else
  return  HA_ADMIN_NOT_IMPLEMENTED;
#endif
1753 1754 1755 1756 1757 1758 1759 1760 1761
}


/*
  Preload pages of the index file for a table into the key cache.
*/

int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt)
{
1762
  ulonglong map;
1763 1764 1765 1766
  TABLE_LIST *table_list= table->pos_in_table_list;

  DBUG_ENTER("ha_maria::preload_keys");

unknown's avatar
unknown committed
1767 1768 1769 1770 1771 1772
  table->keys_in_use_for_query.clear_all();

  if (table_list->process_index_hints(table))
    DBUG_RETURN(HA_ADMIN_FAILED);

  map= ~(ulonglong) 0;
1773
  /* Check validity of the index references */
unknown's avatar
unknown committed
1774 1775 1776
  if (!table->keys_in_use_for_query.is_clear_all())
    /* use all keys if there's no list specified by the user through hints */
    map= table->keys_in_use_for_query.to_ulonglong();
1777 1778 1779 1780

  maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE,
              (void*) &thd->variables.preload_buff_size);

unknown's avatar
unknown committed
1781 1782 1783
  int error;

  if ((error= maria_preload(file, map, table_list->ignore_leaves)))
1784
  {
1785
    char buf[MYSQL_ERRMSG_SIZE+20];
unknown's avatar
unknown committed
1786 1787
    const char *errmsg;

1788 1789 1790 1791 1792 1793 1794 1795
    switch (error) {
    case HA_ERR_NON_UNIQUE_BLOCK_SIZE:
      errmsg= "Indexes use different block sizes";
      break;
    case HA_ERR_OUT_OF_MEM:
      errmsg= "Failed to allocate buffer";
      break;
    default:
1796
      my_snprintf(buf, sizeof(buf),
1797 1798 1799 1800
                  "Failed to read from index file (errno: %d)", my_errno);
      errmsg= buf;
    }

1801 1802 1803 1804
    HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
    if (!&param)
      return HA_ADMIN_INTERNAL_ERROR;

unknown's avatar
unknown committed
1805
    maria_chk_init(&param);
1806 1807 1808 1809 1810
    param.thd= thd;
    param.op_name= "preload_keys";
    param.db_name= table->s->db.str;
    param.table_name= table->s->table_name.str;
    param.testflag= 0;
1811
    _ma_check_print_error(&param, "%s", errmsg);
unknown's avatar
unknown committed
1812
    DBUG_RETURN(HA_ADMIN_FAILED);
1813
  }
unknown's avatar
unknown committed
1814
  DBUG_RETURN(HA_ADMIN_OK);
1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892
}


/*
  Disable indexes, making it persistent if requested.

  SYNOPSIS
    disable_indexes()
    mode        mode of operation:
                HA_KEY_SWITCH_NONUNIQ      disable all non-unique keys
                HA_KEY_SWITCH_ALL          disable all keys
                HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent
                HA_KEY_SWITCH_ALL_SAVE     dis. all keys and make persistent

  IMPLEMENTATION
    HA_KEY_SWITCH_NONUNIQ       is not implemented.
    HA_KEY_SWITCH_ALL_SAVE      is not implemented.

  RETURN
    0  ok
    HA_ERR_WRONG_COMMAND  mode not implemented.
*/

int ha_maria::disable_indexes(uint mode)
{
  int error;

  if (mode == HA_KEY_SWITCH_ALL)
  {
    /* call a storage engine function to switch the key map */
    error= maria_disable_indexes(file);
  }
  else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
  {
    maria_extra(file, HA_EXTRA_NO_KEYS, 0);
    info(HA_STATUS_CONST);                      // Read new key info
    error= 0;
  }
  else
  {
    /* mode not implemented */
    error= HA_ERR_WRONG_COMMAND;
  }
  return error;
}


/*
  Enable indexes, making it persistent if requested.

  SYNOPSIS
    enable_indexes()
    mode        mode of operation:
                HA_KEY_SWITCH_NONUNIQ      enable all non-unique keys
                HA_KEY_SWITCH_ALL          enable all keys
                HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent
                HA_KEY_SWITCH_ALL_SAVE     en. all keys and make persistent

  DESCRIPTION
    Enable indexes, which might have been disabled by disable_index() before.
    The modes without _SAVE work only if both data and indexes are empty,
    since the MARIA repair would enable them persistently.
    To be sure in these cases, call handler::delete_all_rows() before.

  IMPLEMENTATION
    HA_KEY_SWITCH_NONUNIQ       is not implemented.
    HA_KEY_SWITCH_ALL_SAVE      is not implemented.

  RETURN
    0  ok
    !=0  Error, among others:
    HA_ERR_CRASHED  data or index is non-empty. Delete all rows and retry.
    HA_ERR_WRONG_COMMAND  mode not implemented.
*/

int ha_maria::enable_indexes(uint mode)
{
  int error;
1893
  DBUG_PRINT("info", ("ha_maria::enable_indexes mode: %d", mode));
1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910
  if (maria_is_all_keys_active(file->s->state.key_map, file->s->base.keys))
  {
    /* All indexes are enabled already. */
    return 0;
  }

  if (mode == HA_KEY_SWITCH_ALL)
  {
    error= maria_enable_indexes(file);
    /*
       Do not try to repair on error,
       as this could make the enabled state persistent,
       but mode==HA_KEY_SWITCH_ALL forbids it.
    */
  }
  else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
  {
1911
    THD *thd= table->in_use;
1912 1913 1914 1915
    HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
    if (!&param)
      return HA_ADMIN_INTERNAL_ERROR;

unknown's avatar
unknown committed
1916
    const char *save_proc_info= thd_proc_info(thd, "Creating index");
1917

unknown's avatar
unknown committed
1918
    maria_chk_init(&param);
1919 1920
    param.op_name= "recreating_index";
    param.testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK |
unknown's avatar
unknown committed
1921
                     T_CREATE_MISSING_KEYS | T_SAFE_REPAIR);
1922
    if (bulk_insert_single_undo == BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR)
unknown's avatar
unknown committed
1923
    {
1924
      bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_REPAIR;
unknown's avatar
unknown committed
1925
      /*
1926
        Don't bump create_rename_lsn, because UNDO_BULK_INSERT
unknown's avatar
unknown committed
1927 1928 1929 1930
        should not be skipped in case of crash during repair.
      */
      param.testflag|= T_NO_CREATE_RENAME_LSN;
    }
1931
    param.myf_rw &= ~MY_WAIT_IF_FULL;
1932 1933
    param.sort_buffer_length= THDVAR(thd,sort_buffer_size);
    param.stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1934
    param.tmpdir= &mysql_tmpdir_list;
1935
    if ((error= (repair(thd, &param, 0) != HA_ADMIN_OK)) && param.retry_repair)
1936
    {
1937 1938
      sql_print_warning("Warning: Enabling keys got errno %d on %s.%s, "
                        "retrying",
1939
                        my_errno, param.db_name, param.table_name);
1940
      /* This should never fail normally */
1941
      DBUG_ASSERT(thd->killed != 0);
1942
      /* Repairing by sort failed. Now try standard repair method. */
Michael Widenius's avatar
Michael Widenius committed
1943
      param.testflag &= ~T_REP_BY_SORT;
1944
      error= (repair(thd, &param, 0) != HA_ADMIN_OK);
1945
      /*
1946 1947 1948
        If the standard repair succeeded, clear all error messages which
        might have been set by the first repair. They can still be seen
        with SHOW WARNINGS then.
1949 1950 1951 1952 1953
      */
      if (!error)
        thd->clear_error();
    }
    info(HA_STATUS_CONST);
unknown's avatar
unknown committed
1954
    thd_proc_info(thd, save_proc_info);
1955 1956 1957 1958 1959 1960
  }
  else
  {
    /* mode not implemented */
    error= HA_ERR_WRONG_COMMAND;
  }
unknown's avatar
unknown committed
1961 1962 1963 1964 1965 1966 1967 1968
  DBUG_EXECUTE_IF("maria_flush_whole_log",
                  {
                    DBUG_PRINT("maria_flush_whole_log", ("now"));
                    translog_flush(translog_get_horizon());
                  });
  DBUG_EXECUTE_IF("maria_crash_enable_index",
                  {
                    DBUG_PRINT("maria_crash_enable_index", ("now"));
1969
                    DBUG_ABORT();
unknown's avatar
unknown committed
1970
                  });
1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
  return error;
}


/*
  Test if indexes are disabled.


  SYNOPSIS
    indexes_are_disabled()
      no parameters


  RETURN
    0  indexes are not disabled
    1  all indexes are disabled
   [2  non-unique indexes are disabled - NOT YET IMPLEMENTED]
*/

int ha_maria::indexes_are_disabled(void)
{
  return maria_indexes_are_disabled(file);
}


/*
  prepare for a many-rows insert operation
  e.g. - disable indexes (if they can be recreated fast) or
  activate special bulk-insert optimizations

  SYNOPSIS
    start_bulk_insert(rows)
    rows        Rows to be inserted
                0 if we don't know

  NOTICE
    Do not forget to call end_bulk_insert() later!
*/

void ha_maria::start_bulk_insert(ha_rows rows)
{
  DBUG_ENTER("ha_maria::start_bulk_insert");
2013
  THD *thd= table->in_use;
2014
  MARIA_SHARE *share= file->s;
2015
  DBUG_PRINT("info", ("start_bulk_insert: rows %lu", (ulong) rows));
2016 2017 2018

  /* don't enable row cache if too few rows */
  if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE))
2019
  {
2020
    ulonglong size= thd->variables.read_buff_size, tmp;
2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031
    if (rows)
    {
      if (file->state->records)
      {
        MARIA_INFO maria_info;
        maria_status(file, &maria_info, HA_STATUS_NO_LOCK |HA_STATUS_VARIABLE);
        set_if_smaller(size, maria_info.mean_reclength * rows);
      }
      else if (table->s->avg_row_length)
        set_if_smaller(size, (size_t) (table->s->avg_row_length * rows));
    }
2032 2033
    tmp= (ulong) size;                          // Safe becasue of limits
    maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &tmp);
2034
  }
2035

2036 2037
  can_enable_indexes= (maria_is_all_keys_active(share->state.key_map,
                                                share->base.keys));
2038
  bulk_insert_single_undo= BULK_INSERT_NONE;
2039 2040 2041 2042 2043 2044 2045 2046

  if (!(specialflag & SPECIAL_SAFE_MODE))
  {
    /*
       Only disable old index if the table was empty and we are inserting
       a lot of rows.
       We should not do this for only a few rows as this is slower and
       we don't want to update the key statistics based of only a few rows.
2047 2048
       Index file rebuild requires an exclusive lock, so if versioning is on
       don't do it (see how ha_maria::store_lock() tries to predict repair).
2049 2050 2051 2052 2053 2054 2055 2056 2057 2058
       We can repair index only if we have an exclusive (TL_WRITE) lock or
       if this is inside an ALTER TABLE, in which case lock_type == TL_UNLOCK.

       To see if table is empty, we shouldn't rely on the old record
       count from our transaction's start (if that old count is 0 but
       now there are records in the table, we would wrongly destroy
       them).  So we need to look at share->state.state.records.  As a
       safety net for now, we don't remove the test of
       file->state->records, because there is uncertainty on what will
       happen during repair if the two states disagree.
2059
    */
2060 2061
    if ((file->state->records == 0) &&
        (share->state.state.records == 0) && can_enable_indexes &&
2062
        (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES) &&
2063
        (file->lock.type == TL_WRITE || file->lock.type == TL_UNLOCK))
unknown's avatar
unknown committed
2064
    {
2065 2066 2067 2068
      /**
         @todo for a single-row INSERT SELECT, we will go into repair, which
         is more costly (flushes, syncs) than a row write.
      */
2069 2070 2071 2072 2073 2074 2075 2076
      if (file->open_flags & HA_OPEN_INTERNAL_TABLE)
      {
        /* Internal table; If we get a duplicate something is very wrong */
        file->update|= HA_STATE_CHANGED;
        maria_clear_all_keys_active(file->s->state.key_map);
      }
      else
        maria_disable_non_unique_index(file, rows);
2077
      if (share->now_transactional)
unknown's avatar
unknown committed
2078
      {
2079 2080
        bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR;
        write_log_record_for_bulk_insert(file);
2081
        _ma_tmp_disable_logging_for_table(file, TRUE);
unknown's avatar
unknown committed
2082 2083 2084 2085
        /*
          Pages currently in the page cache have type PAGECACHE_LSN_PAGE, we
          are not allowed to overwrite them with PAGECACHE_PLAIN_PAGE, so
          throw them away. It is not losing data, because we just wrote and
2086 2087 2088
          forced an UNDO which will for sure empty the table if we crash. The
          upcoming unique-key insertions however need a proper index, so we
          cannot leave the corrupted on-disk index file, thus we truncate it.
unknown's avatar
unknown committed
2089
        */
2090
        maria_delete_all_rows(file);
unknown's avatar
unknown committed
2091 2092
      }
    }
2093 2094
    else if (!file->bulk_insert &&
             (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT))
2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115
    {
      maria_init_bulk_insert(file, thd->variables.bulk_insert_buff_size, rows);
    }
  }
  DBUG_VOID_RETURN;
}


/*
  end special bulk-insert optimizations,
  which have been activated by start_bulk_insert().

  SYNOPSIS
    end_bulk_insert()
    no arguments

  RETURN
    0     OK
    != 0  Error
*/

2116
int ha_maria::end_bulk_insert()
2117
{
unknown's avatar
unknown committed
2118 2119
  int err;
  DBUG_ENTER("ha_maria::end_bulk_insert");
2120
  maria_end_bulk_insert(file);
unknown's avatar
unknown committed
2121 2122
  if ((err= maria_extra(file, HA_EXTRA_NO_CACHE, 0)))
    goto end;
2123
  if (can_enable_indexes && !file->s->deleting)
unknown's avatar
unknown committed
2124 2125
    err= enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE);
end:
2126
  if (bulk_insert_single_undo != BULK_INSERT_NONE)
unknown's avatar
unknown committed
2127 2128
  {
    DBUG_ASSERT(can_enable_indexes);
2129 2130 2131 2132 2133 2134 2135 2136
    /*
      Table was transactional just before start_bulk_insert().
      No need to flush pages if we did a repair (which already flushed).
    */
    err|=
      _ma_reenable_logging_for_table(file,
                                     bulk_insert_single_undo ==
                                     BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR);
unknown's avatar
unknown committed
2137 2138
  }
  DBUG_RETURN(err);
2139 2140 2141 2142 2143
}


bool ha_maria::check_and_repair(THD *thd)
{
2144
  int error, crashed;
2145
  LEX_STRING old_query;
2146 2147 2148 2149
  HA_CHECK_OPT check_opt;
  DBUG_ENTER("ha_maria::check_and_repair");

  check_opt.init();
2150
  check_opt.flags= T_MEDIUM | T_AUTO_REPAIR;
2151

2152
  error= 1;
Michael Widenius's avatar
Michael Widenius committed
2153
  if ((file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) ==
2154
      STATE_MOVED)
2155
  {
2156 2157
    sql_print_information("Zerofilling moved table:  '%s'",
                          table->s->path.str);
2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169
    if (!(error= zerofill(thd, &check_opt)))
      DBUG_RETURN(0);
  }

  /*
    if we got this far - the table is crashed.
    but don't auto-repair if maria_recover_options is not set
  */
  if (!maria_recover_options)
    DBUG_RETURN(error);

  error= 0;
2170 2171 2172 2173
  // Don't use quick if deleted rows
  if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK))
    check_opt.flags |= T_QUICK;

2174
  old_query= thd->query_string;
2175
  pthread_mutex_lock(&LOCK_thread_count);
2176
  thd->query_string= table->s->table_name;
2177 2178
  pthread_mutex_unlock(&LOCK_thread_count);

2179 2180 2181 2182 2183 2184 2185
  if (!(crashed= maria_is_crashed(file)))
  {
    sql_print_warning("Checking table:   '%s'", table->s->path.str);
    crashed= check(thd, &check_opt);
  }

  if (crashed)
2186
  {
2187
    sql_print_warning("Recovering table: '%s'", table->s->path.str);
2188 2189 2190 2191 2192 2193 2194 2195
    check_opt.flags=
      ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) |
       (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) |
       T_AUTO_REPAIR);
    if (repair(thd, &check_opt))
      error= 1;
  }
  pthread_mutex_lock(&LOCK_thread_count);
2196
  thd->query_string= old_query;
2197 2198 2199 2200 2201 2202 2203
  pthread_mutex_unlock(&LOCK_thread_count);
  DBUG_RETURN(error);
}


bool ha_maria::is_crashed() const
{
Michael Widenius's avatar
Michael Widenius committed
2204
  return (file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED) ||
2205 2206 2207
          (my_disable_locking && file->s->state.open_count));
}

2208 2209 2210 2211 2212 2213 2214 2215
#define CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING(msg) \
  do { \
    if (file->lock.type == TL_WRITE_CONCURRENT_INSERT) \
    { \
      my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), msg); \
      return 1; \
    } \
  } while(0)
2216

unknown's avatar
unknown committed
2217
int ha_maria::update_row(const uchar * old_data, uchar * new_data)
2218
{
2219
  CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("UPDATE in WRITE CONCURRENT");
2220 2221 2222 2223 2224 2225
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
    table->timestamp_field->set_time();
  return maria_update(file, old_data, new_data);
}


unknown's avatar
unknown committed
2226
int ha_maria::delete_row(const uchar * buf)
2227
{
2228
  CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("DELETE in WRITE CONCURRENT");
2229 2230 2231
  return maria_delete(file, buf);
}

2232 2233
C_MODE_START

Sergey Petrunya's avatar
Sergey Petrunya committed
2234
ICP_RESULT index_cond_func_maria(void *arg)
2235 2236 2237 2238 2239
{
  ha_maria *h= (ha_maria*)arg;
  if (h->end_range)
  {
    if (h->compare_key2(h->end_range) > 0)
Sergey Petrunya's avatar
Sergey Petrunya committed
2240
      return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */
2241
  }
Sergey Petrunya's avatar
Sergey Petrunya committed
2242
  return h->pushed_idx_cond->val_int() ? ICP_MATCH : ICP_NO_MATCH;
2243 2244 2245
}

C_MODE_END
2246

2247 2248 2249
int ha_maria::index_read_map(uchar * buf, const uchar * key,
			     key_part_map keypart_map,
			     enum ha_rkey_function find_flag)
2250 2251
{
  DBUG_ASSERT(inited == INDEX);
2252
  int error= maria_rkey(file, buf, active_index, key, keypart_map, find_flag);
2253 2254 2255 2256 2257
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


2258 2259 2260
int ha_maria::index_read_idx_map(uchar * buf, uint index, const uchar * key,
				 key_part_map keypart_map,
				 enum ha_rkey_function find_flag)
2261
{
2262 2263 2264 2265 2266 2267 2268 2269
  int error;
  /* Use the pushed index condition if it matches the index we're scanning */
  if (index == pushed_idx_cond_keyno)
    ma_set_index_cond_func(file, index_cond_func_maria, this);
  
  error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
   
  ma_set_index_cond_func(file, NULL, 0);
2270 2271 2272 2273 2274
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


2275 2276
int ha_maria::index_read_last_map(uchar * buf, const uchar * key,
				  key_part_map keypart_map)
2277
{
2278
  DBUG_ENTER("ha_maria::index_read_last_map");
2279
  DBUG_ASSERT(inited == INDEX);
2280
  int error= maria_rkey(file, buf, active_index, key, keypart_map,
2281 2282
                        HA_READ_PREFIX_LAST);
  table->status= error ? STATUS_NOT_FOUND : 0;
2283
  DBUG_RETURN(error);
2284 2285 2286
}


unknown's avatar
unknown committed
2287
int ha_maria::index_next(uchar * buf)
2288 2289 2290 2291 2292 2293 2294 2295
{
  DBUG_ASSERT(inited == INDEX);
  int error= maria_rnext(file, buf, active_index);
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


unknown's avatar
unknown committed
2296
int ha_maria::index_prev(uchar * buf)
2297 2298 2299 2300 2301 2302 2303 2304
{
  DBUG_ASSERT(inited == INDEX);
  int error= maria_rprev(file, buf, active_index);
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


unknown's avatar
unknown committed
2305
int ha_maria::index_first(uchar * buf)
2306 2307 2308 2309 2310 2311 2312 2313
{
  DBUG_ASSERT(inited == INDEX);
  int error= maria_rfirst(file, buf, active_index);
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


unknown's avatar
unknown committed
2314
int ha_maria::index_last(uchar * buf)
2315 2316 2317 2318 2319 2320 2321 2322
{
  DBUG_ASSERT(inited == INDEX);
  int error= maria_rlast(file, buf, active_index);
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


unknown's avatar
unknown committed
2323 2324
int ha_maria::index_next_same(uchar * buf,
                              const uchar *key __attribute__ ((unused)),
2325 2326
                              uint length __attribute__ ((unused)))
{
2327
  int error;
2328
  DBUG_ASSERT(inited == INDEX);
2329 2330 2331 2332 2333 2334 2335 2336
  /*
    TODO: Delete this loop in Maria 1.5 as versioning will ensure this never
    happens
  */
  do
  {
    error= maria_rnext_same(file,buf);
  } while (error == HA_ERR_RECORD_DELETED);
2337 2338 2339 2340 2341
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360
int ha_maria::index_init(uint idx, bool sorted)
{ 
  active_index=idx;
  if (pushed_idx_cond_keyno == idx)
    ma_set_index_cond_func(file, index_cond_func_maria, this);
  return 0; 
}


int ha_maria::index_end()
{
  active_index=MAX_KEY;
  ma_set_index_cond_func(file, NULL, 0);
  in_range_check_pushed_down= FALSE;
  ds_mrr.dsmrr_close();
  return 0; 
}


2361 2362 2363 2364
int ha_maria::rnd_init(bool scan)
{
  if (scan)
    return maria_scan_init(file);
2365
  return maria_reset(file);                        // Free buffers
2366 2367 2368
}


unknown's avatar
unknown committed
2369 2370
int ha_maria::rnd_end()
{
Igor Babaev's avatar
Igor Babaev committed
2371
  ds_mrr.dsmrr_close();
unknown's avatar
unknown committed
2372 2373 2374 2375 2376 2377
  /* Safe to call even if we don't have started a scan */
  maria_scan_end(file);
  return 0;
}


unknown's avatar
unknown committed
2378
int ha_maria::rnd_next(uchar *buf)
2379 2380 2381 2382 2383 2384 2385
{
  int error= maria_scan(file, buf);
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


2386
int ha_maria::remember_rnd_pos()
2387
{
2388
  return (*file->s->scan_remember_pos)(file, &remember_pos);
2389 2390 2391
}


2392 2393 2394 2395 2396 2397 2398 2399
int ha_maria::restart_rnd_next(uchar *buf)
{
  (*file->s->scan_restore_pos)(file, remember_pos);
  return rnd_next(buf);
}


int ha_maria::rnd_pos(uchar *buf, uchar *pos)
2400 2401 2402 2403 2404 2405 2406
{
  int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length));
  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


2407
void ha_maria::position(const uchar *record)
2408
{
2409 2410
  my_off_t row_position= maria_position(file);
  my_store_ptr(ref, ref_length, row_position);
2411 2412 2413
}


2414
int ha_maria::info(uint flag)
2415
{
2416
  return (!table ? 1 : info(flag, table->s->tmp_table == NO_TMP_TABLE));
2417 2418 2419
}

int ha_maria::info(uint flag, my_bool lock_table_share)
2420
{
2421
  MARIA_INFO maria_info;
2422 2423
  char name_buff[FN_REFLEN];

2424
  (void) maria_status(file, &maria_info, flag);
2425 2426
  if (flag & HA_STATUS_VARIABLE)
  {
2427 2428 2429 2430 2431 2432 2433
    stats.records=           maria_info.records;
    stats.deleted=           maria_info.deleted;
    stats.data_file_length=  maria_info.data_file_length;
    stats.index_file_length= maria_info.index_file_length;
    stats.delete_length=     maria_info.delete_length;
    stats.check_time=        maria_info.check_time;
    stats.mean_rec_length=   maria_info.mean_reclength;
2434 2435 2436 2437
  }
  if (flag & HA_STATUS_CONST)
  {
    TABLE_SHARE *share= table->s;
2438 2439 2440 2441 2442
    stats.max_data_file_length=  maria_info.max_data_file_length;
    stats.max_index_file_length= maria_info.max_index_file_length;
    stats.create_time= maria_info.create_time;
    ref_length= maria_info.reflength;
    share->db_options_in_use= maria_info.options;
2443
    stats.block_size= maria_block_size;
2444
    stats.mrr_length_per_rec= maria_info.reflength + 8; // 8 = max(sizeof(void *))
2445 2446

    /* Update share */
2447
    if (lock_table_share)
2448 2449
      pthread_mutex_lock(&share->mutex);
    share->keys_in_use.set_prefix(share->keys);
2450
    share->keys_in_use.intersect_extended(maria_info.key_map);
2451
    share->keys_for_keyread.intersect(share->keys_in_use);
2452
    share->db_record_offset= maria_info.record_offset;
2453
    if (share->key_parts)
2454 2455 2456 2457 2458 2459
    {
      ulong *to= table->key_info[0].rec_per_key, *end;
      double *from= maria_info.rec_per_key;
      for (end= to+ share->key_parts ; to < end ; to++, from++)
        *to= (ulong) (*from + 0.5);
    }
2460
    if (lock_table_share)
2461 2462 2463 2464 2465 2466 2467
      pthread_mutex_unlock(&share->mutex);

    /*
       Set data_file_name and index_file_name to point at the symlink value
       if table is symlinked (Ie;  Real name is not same as generated name)
    */
    data_file_name= index_file_name= 0;
2468
    fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_DEXT,
2469 2470
              MY_APPEND_EXT | MY_UNPACK_FILENAME);
    if (strcmp(name_buff, maria_info.data_file_name))
2471 2472
      data_file_name =maria_info.data_file_name;
    fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_IEXT,
2473 2474 2475
              MY_APPEND_EXT | MY_UNPACK_FILENAME);
    if (strcmp(name_buff, maria_info.index_file_name))
      index_file_name=maria_info.index_file_name;
2476 2477 2478
  }
  if (flag & HA_STATUS_ERRKEY)
  {
2479 2480
    errkey= maria_info.errkey;
    my_store_ptr(dup_ref, ref_length, maria_info.dup_key_pos);
2481 2482
  }
  /* Faster to always update, than to do it based on flag */
2483 2484
  stats.update_time= maria_info.update_time;
  stats.auto_increment_value= maria_info.auto_increment;
2485 2486

  return 0;
2487 2488 2489 2490 2491
}


int ha_maria::extra(enum ha_extra_function operation)
{
2492 2493
  int tmp;
  TRN *old_trn= file->trn;
2494 2495
  if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD)
    return 0;
2496 2497 2498 2499
#ifdef NOT_USED
  if (operation == HA_EXTRA_MMAP && !opt_maria_use_mmap)
    return 0;
#endif
2500 2501 2502 2503 2504 2505

  /*
    We have to set file->trn here because in some cases we call
    extern_lock(F_UNLOCK) (which resets file->trn) followed by maria_close()
    without calling commit/rollback in between.  If file->trn is not set
    we can't remove file->share from the transaction list in the extra() call.
2506 2507 2508

    table->in_use is not set in the case this is a done as part of closefrm()
    as part of drop table.
2509 2510
  */

2511
  if (file->s->now_transactional && !file->trn && table->in_use && 
2512 2513 2514 2515
      (operation == HA_EXTRA_PREPARE_FOR_DROP ||
       operation == HA_EXTRA_PREPARE_FOR_RENAME))
  {
    THD *thd= table->in_use;
2516 2517
    TRN *trn= THD_TRN;
    _ma_set_trn_for_table(file, trn);
2518
  }
2519 2520 2521
  tmp= maria_extra(file, operation, 0);
  file->trn= old_trn;                           // Reset trn if was used
  return tmp;
2522 2523
}

2524 2525
int ha_maria::reset(void)
{
2526 2527
  pushed_idx_cond= NULL;
  pushed_idx_cond_keyno= MAX_KEY;
2528
  in_range_check_pushed_down= FALSE;
2529 2530
  ma_set_index_cond_func(file, NULL, 0);
  ds_mrr.dsmrr_close();
2531 2532 2533
  if (file->trn)
  {
    /* Next statement is a new statement. Ensure it's logged */
2534 2535
    trnman_set_flags(file->trn,
                     trnman_get_flags(file->trn) & ~TRN_STATE_INFO_LOGGED);
2536
  }
2537 2538
  return maria_reset(file);
}
2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551

/* To be used with WRITE_CACHE and EXTRA_CACHE */

int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size)
{
  if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE)
    return 0;
  return maria_extra(file, operation, (void*) &cache_size);
}


int ha_maria::delete_all_rows()
{
2552
  THD *thd= table->in_use;
2553
  (void) translog_log_debug_info(file->trn, LOGREC_DEBUG_INFO_QUERY,
2554
                                 (uchar*) thd->query(), thd->query_length());
unknown's avatar
unknown committed
2555 2556 2557 2558 2559 2560 2561 2562 2563 2564
  if (file->s->now_transactional &&
      ((table->in_use->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) ||
       table->in_use->locked_tables))
  {
    /*
      We are not in autocommit mode or user have done LOCK TABLES.
      We must do the delete row by row to be able to rollback the command
    */
    return HA_ERR_WRONG_COMMAND;
  }
2565 2566 2567 2568 2569 2570
  return maria_delete_all_rows(file);
}


int ha_maria::delete_table(const char *name)
{
2571 2572
  THD *thd= current_thd;
  (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
2573
                                 (uchar*) thd->query(), thd->query_length());
2574 2575 2576
  return maria_delete_table(name);
}

2577

2578 2579 2580 2581
/* This is mainly for temporary tables, so no logging necessary */

void ha_maria::drop_table(const char *name)
{
2582
  DBUG_ASSERT(file->s->temporary);
2583
  (void) ha_close();
2584
  (void) maria_delete_table_files(name, 0);
2585 2586 2587
}


2588 2589
int ha_maria::external_lock(THD *thd, int lock_type)
{
2590
  DBUG_ENTER("ha_maria::external_lock");
2591
  file->external_ref= (void*) table;            // For ma_killed()
unknown's avatar
unknown committed
2592 2593 2594 2595 2596 2597 2598 2599
  /*
    We don't test now_transactional because it may vary between lock/unlock
    and thus confuse our reference counting.
    It is critical to skip non-transactional tables: user-visible temporary
    tables get an external_lock() when read/written for the first time, but no
    corresponding unlock (they just stay locked and are later dropped while
    locked); if a tmp table was transactional, "SELECT FROM non_tmp, tmp"
    would never commit as its "locked_tables" count would stay 1.
unknown's avatar
unknown committed
2600 2601 2602 2603
    When Maria has has_transactions()==TRUE, open_temporary_table()
    (sql_base.cc) will use TRANSACTIONAL_TMP_TABLE and thus the
    external_lock(F_UNLCK) will happen and we can then allow the user to
    create transactional temporary tables.
unknown's avatar
unknown committed
2604
  */
2605
  if (file->s->base.born_transactional)
2606
  {
2607 2608
    /* Transactional table */
    if (lock_type != F_UNLCK)
unknown's avatar
unknown committed
2609
    {
2610
      if (!file->s->lock_key_trees)             // If we don't use versioning
2611 2612
      {
        /*
2613 2614 2615 2616 2617 2618 2619 2620 2621
          We come here in the following cases:
           - The table is a temporary table
           - It's a table which is crash safe but not yet versioned, for
             example a table with fulltext or rtree keys

          Set the current state to point to save_state so that the
          block_format code don't count the same record twice.
          Copy also the current state. This may have been wrong if the
          same file was used several times in the last statement
2622
        */
2623
        file->state=  file->state_start;
2624 2625 2626
        *file->state= file->s->state.state;
      }

2627 2628 2629 2630 2631 2632 2633
      if (file->trn)
      {
        /* This can only happen with tables created with clone() */
        DBUG_ASSERT(cloned);
        trnman_increment_locked_tables(file->trn);
      }

2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647
      if (!thd->transaction.on)
      {
        /*
          No need to log REDOs/UNDOs. If this is an internal temporary table
          which will be renamed to a permanent table (like in ALTER TABLE),
          the rename happens after unlocking so will be durable (and the table
          will get its create_rename_lsn).
          Note: if we wanted to enable users to have an old backup and apply
          tons of archived logs to roll-forward, we could then not disable
          REDOs/UNDOs in this case.
        */
        DBUG_PRINT("info", ("Disabling logging for table"));
        _ma_tmp_disable_logging_for_table(file, TRUE);
      }
2648
    }
2649
    else
unknown's avatar
unknown committed
2650
    {
2651
      TRN *trn= THD_TRN;
2652 2653
      /* End of transaction */

unknown's avatar
unknown committed
2654
      /*
2655 2656 2657
        We always re-enable, don't rely on thd->transaction.on as it is
        sometimes reset to true after unlocking (see mysql_truncate() for a
        partitioned table based on Maria).
2658 2659 2660
        Note that we can come here without having an exclusive lock on the
        table, for example in this case:
        external_lock(F_(WR|RD)LCK); thr_lock() which fails due to lock
2661 2662
        abortion; external_lock(F_UNLCK). Fortunately, the re-enabling happens
        only if we were the thread which disabled logging.
unknown's avatar
unknown committed
2663
      */
2664 2665 2666
      if (_ma_reenable_logging_for_table(file, TRUE))
        DBUG_RETURN(1);
      /** @todo zero file->trn also in commit and rollback */
2667
      _ma_set_trn_for_table(file, NULL);        // Safety
2668 2669 2670 2671 2672 2673
      /*
        Ensure that file->state points to the current number of rows. This
        is needed if someone calls maria_info() without first doing an
        external lock of the table
      */
      file->state= &file->s->state.state;
2674
      if (trn)
2675
      {
2676 2677
        DBUG_PRINT("info",
                   ("locked_tables: %u", trnman_has_locked_tables(trn)));
2678
        DBUG_ASSERT(trnman_has_locked_tables(trn) > 0);
2679 2680
        if (trnman_has_locked_tables(trn) &&
            !trnman_decrement_locked_tables(trn))
2681
        {
2682 2683 2684 2685 2686
          /*
            OK should not have been sent to client yet (ACID).
            This is a bit excessive, ACID requires this only if there are some
            changes to commit (rollback shouldn't be tested).
          */
2687 2688
          DBUG_ASSERT(!thd->main_da.is_sent ||
                      thd->killed == THD::KILL_CONNECTION);
2689 2690 2691 2692
          /* autocommit ? rollback a transaction */
#ifdef MARIA_CANNOT_ROLLBACK
          if (ma_commit(trn))
            DBUG_RETURN(1);
2693
          THD_TRN= 0;
2694 2695 2696 2697 2698 2699 2700
#else
          if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
          {
            trnman_rollback_trn(trn);
            DBUG_PRINT("info", ("THD_TRN set to 0x0"));
            THD_TRN= 0;
          }
unknown's avatar
unknown committed
2701
#endif
2702
        }
Sergei Golubchik's avatar
Sergei Golubchik committed
2703
        trnman_set_flags(trn, trnman_get_flags(trn) & ~ TRN_STATE_INFO_LOGGED);
2704 2705
      }
    }
2706
  } /* if transactional table */
2707 2708 2709
  DBUG_RETURN(maria_lock_database(file, !table->s->tmp_table ?
                                  lock_type : ((lock_type == F_UNLCK) ?
                                               F_UNLCK : F_EXTRA_LCK)));
2710 2711
}

2712 2713
int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type)
{
unknown's avatar
unknown committed
2714
  TRN *trn;
unknown's avatar
unknown committed
2715
  if (file->s->base.born_transactional)
2716
  {
unknown's avatar
unknown committed
2717
    trn= THD_TRN;
2718
    DBUG_ASSERT(trn); // this may be called only after external_lock()
2719
    DBUG_ASSERT(trnman_has_locked_tables(trn));
unknown's avatar
unknown committed
2720
    DBUG_ASSERT(lock_type != TL_UNLOCK);
2721 2722
    DBUG_ASSERT(file->trn == trn);

unknown's avatar
unknown committed
2723 2724 2725 2726 2727 2728
    /*
      If there was an implicit commit under this LOCK TABLES by a previous
      statement (like a DDL), at least if that previous statement was about a
      different ha_maria than 'this' then this->file->trn is a stale
      pointer. We fix it:
    */
2729
    _ma_set_trn_for_table(file, trn);
2730 2731 2732 2733 2734 2735 2736
    /*
      As external_lock() was already called, don't increment locked_tables.
      Note that we call the function below possibly several times when
      statement starts (once per table). This is ok as long as that function
      does cheap operations. Otherwise, we will need to do it only on first
      call to start_stmt().
    */
2737
    trnman_new_statement(trn);
2738 2739

#ifdef EXTRA_DEBUG
Sergei Golubchik's avatar
Sergei Golubchik committed
2740 2741
    if (!(trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED) &&
        trnman_get_flags(trn) & TRN_STATE_TABLES_CAN_CHANGE)
2742
    {
Sergei Golubchik's avatar
Sergei Golubchik committed
2743
      trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED);
2744
      (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
2745 2746
                                     (uchar*) thd->query(),
                                     thd->query_length());
2747 2748
    }
#endif
2749 2750 2751
  }
  return 0;
}
2752

unknown's avatar
unknown committed
2753 2754 2755 2756 2757 2758 2759 2760 2761

/**
  Performs an implicit commit of the Maria transaction and creates a new
  one.

  This can be considered a hack. When Maria loses HA_NO_TRANSACTIONS it will
  be participant in the connection's transaction and so the implicit commits
  (ha_commit()) (like in end_active_trans()) will do the implicit commit
  without need to call this function which can then be removed.
2762 2763 2764 2765 2766

  @param  thd              THD object
  @param  new_trn          if a new transaction should be created; a new
                           transaction is not needed when we know that the
                           tables will be unlocked very soon.
unknown's avatar
unknown committed
2767 2768
*/

2769
int ha_maria::implicit_commit(THD *thd, bool new_trn)
unknown's avatar
unknown committed
2770 2771 2772 2773 2774 2775
{
#ifndef MARIA_CANNOT_ROLLBACK
#error this method should be removed
#endif
  TRN *trn;
  int error= 0;
2776
  TABLE *table;
unknown's avatar
unknown committed
2777
  DBUG_ENTER("ha_maria::implicit_commit");
2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795
  if (!new_trn && thd->locked_tables)
  {
    /*
      "we are under LOCK TABLES" <=> "we shouldn't commit".
      As thd->locked_tables is true, we are either under LOCK TABLES, or in
      prelocking; prelocking can be under LOCK TABLES, or not (and in this
      latter case only we should commit).
      Note that we come here only at the end of the top statement
      (dispatch_command()), we are never committing inside a sub-statement./
    */
    enum prelocked_mode_type prelocked_mode= thd->prelocked_mode;
    if ((prelocked_mode == NON_PRELOCKED) ||
        (prelocked_mode == PRELOCKED_UNDER_LOCK_TABLES))
    {
      DBUG_PRINT("info", ("locked_tables, skipping"));
      DBUG_RETURN(0);
    }
  }
unknown's avatar
unknown committed
2796 2797 2798 2799 2800
  if ((trn= THD_TRN) != NULL)
  {
    uint locked_tables= trnman_has_locked_tables(trn);
    if (unlikely(ma_commit(trn)))
      error= 1;
2801 2802 2803 2804 2805
    if (!new_trn)
    {
      THD_TRN= NULL;
      goto end;
    }
unknown's avatar
unknown committed
2806 2807 2808 2809 2810
    /*
      We need to create a new transaction and put it in THD_TRN. Indeed,
      tables may be under LOCK TABLES, and so they will start the next
      statement assuming they have a trn (see ha_maria::start_stmt()).
    */
2811
    trn= trnman_new_trn(& thd->transaction.wt);
unknown's avatar
unknown committed
2812 2813
    THD_TRN= trn;
    if (unlikely(trn == NULL))
2814
    {
unknown's avatar
unknown committed
2815
      error= HA_ERR_OUT_OF_MEM;
2816 2817
      goto end;
    }
2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831
    /*
      Move all locked tables to the new transaction
      We must do it here as otherwise file->thd and file->state may be
      stale pointers. We can't do this in start_stmt() as we don't know
      when we should call _ma_setup_live_state() and in some cases, like
      in check table, we use the table without calling start_stmt().
     */
    for (table=thd->open_tables; table ; table=table->next)
    {
      if (table->db_stat && table->file->ht == maria_hton)
      {
        MARIA_HA *handler= ((ha_maria*) table->file)->file;
        if (handler->s->base.born_transactional)
        {
2832
          _ma_set_trn_for_table(handler, trn);
2833 2834
          /* If handler uses versioning */
          if (handler->s->lock_key_trees)
2835 2836 2837 2838 2839 2840 2841
          {
            if (_ma_setup_live_state(handler))
              error= HA_ERR_OUT_OF_MEM;
          }
        }
      }
    }
2842 2843
    /* This is just a commit, tables stay locked if they were: */
    trnman_reset_locked_tables(trn, locked_tables);
unknown's avatar
unknown committed
2844
  }
2845
end:
unknown's avatar
unknown committed
2846 2847 2848 2849
  DBUG_RETURN(error);
}


2850 2851 2852 2853
THR_LOCK_DATA **ha_maria::store_lock(THD *thd,
                                     THR_LOCK_DATA **to,
                                     enum thr_lock_type lock_type)
{
2854 2855 2856
  /* Test if we can fix test below */
  DBUG_ASSERT(lock_type != TL_UNLOCK &&
              (lock_type == TL_IGNORE || file->lock.type == TL_UNLOCK));
2857
  if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK)
2858
  {
2859
    const enum enum_sql_command sql_command= thd->lex->sql_command;
2860 2861 2862 2863 2864 2865 2866 2867
    /*
      We have to disable concurrent inserts for INSERT ... SELECT or
      INSERT/UPDATE/DELETE with sub queries if we are using statement based
      logging.  We take the safe route here and disable this for all commands
      that only does reading that are not SELECT.
    */
    if (lock_type <= TL_READ_HIGH_PRIORITY &&
        !thd->current_stmt_binlog_row_based &&
2868 2869
        (sql_command != SQLCOM_SELECT &&
         sql_command != SQLCOM_LOCK_TABLES) &&
2870
        (thd->options & OPTION_BIN_LOG) &&
2871 2872
        mysql_bin_log.is_open())
      lock_type= TL_READ_NO_INSERT;
2873
    else if (lock_type == TL_WRITE_CONCURRENT_INSERT)
2874
    {
2875
      const enum enum_duplicates duplicates= thd->lex->duplicates;
2876
      /*
2877 2878 2879
        Explanation for the 3 conditions below, in order:

        - Bulk insert may use repair, which will cause problems if other
2880 2881 2882 2883 2884 2885
        threads try to read/insert to the table: disable versioning.
        Note that our read of file->state->records is incorrect, as such
        variable may have changed when we come to start_bulk_insert() (worse
        case: we see != 0 so allow versioning, start_bulk_insert() sees 0 and
        uses repair). This is prevented because start_bulk_insert() will not
        try repair if we enabled versioning.
2886 2887 2888 2889
        - INSERT SELECT ON DUPLICATE KEY UPDATE comes here with
        TL_WRITE_CONCURRENT_INSERT but shouldn't because it can do
        update/delete of a row and versioning doesn't support that
        - same for LOAD DATA CONCURRENT REPLACE.
2890
      */
2891 2892 2893 2894
      if ((file->state->records == 0) ||
          (sql_command == SQLCOM_INSERT_SELECT && duplicates == DUP_UPDATE) ||
          (sql_command == SQLCOM_LOAD && duplicates == DUP_REPLACE))
        lock_type= TL_WRITE;
2895
    }
2896
    file->lock.type= lock_type;
2897
  }
2898 2899 2900 2901 2902 2903 2904 2905 2906 2907
  *to++= &file->lock;
  return to;
}


void ha_maria::update_create_info(HA_CREATE_INFO *create_info)
{
  ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST);
  if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
  {
2908
    create_info->auto_increment_value= stats.auto_increment_value;
2909 2910 2911
  }
  create_info->data_file_name= data_file_name;
  create_info->index_file_name= index_file_name;
2912 2913 2914 2915
  /* We need to restore the row type as Maria can change it */
  if (create_info->row_type != ROW_TYPE_DEFAULT &&
      !(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT))
    create_info->row_type= get_row_type();
2916 2917 2918 2919
  /*
    Show always page checksums, as this can be forced with
    maria_page_checksums variable
  */
2920 2921 2922 2923
  if (create_info->page_checksum == HA_CHOICE_UNDEF)
    create_info->page_checksum=
      (file->s->options & HA_OPTION_PAGE_CHECKSUM) ? HA_CHOICE_YES :
      HA_CHOICE_NO;
2924 2925 2926
}


unknown's avatar
unknown committed
2927 2928 2929 2930 2931
enum row_type ha_maria::get_row_type() const
{
  switch (file->s->data_file_type) {
  case STATIC_RECORD:     return ROW_TYPE_FIXED;
  case DYNAMIC_RECORD:    return ROW_TYPE_DYNAMIC;
2932
  case BLOCK_RECORD:      return ROW_TYPE_PAGE;
unknown's avatar
unknown committed
2933 2934 2935 2936 2937 2938
  case COMPRESSED_RECORD: return ROW_TYPE_COMPRESSED;
  default:                return ROW_TYPE_NOT_USED;
  }
}


2939
static enum data_file_type maria_row_type(HA_CREATE_INFO *info)
unknown's avatar
unknown committed
2940
{
2941
  if (info->transactional == HA_CHOICE_YES)
2942
    return BLOCK_RECORD;
unknown's avatar
unknown committed
2943 2944 2945 2946 2947 2948 2949 2950
  switch (info->row_type) {
  case ROW_TYPE_FIXED:   return STATIC_RECORD;
  case ROW_TYPE_DYNAMIC: return DYNAMIC_RECORD;
  default:               return BLOCK_RECORD;
  }
}


2951
int ha_maria::create(const char *name, register TABLE *table_arg,
2952
                     HA_CREATE_INFO *ha_create_info)
2953 2954
{
  int error;
2955
  uint create_flags= 0, record_count, i;
2956 2957
  char buff[FN_REFLEN];
  MARIA_KEYDEF *keydef;
2958 2959
  MARIA_COLUMNDEF *recinfo;
  MARIA_CREATE_INFO create_info;
2960 2961
  TABLE_SHARE *share= table_arg->s;
  uint options= share->db_options_in_use;
unknown's avatar
unknown committed
2962
  enum data_file_type row_type;
2963
  THD *thd= current_thd;
2964 2965
  DBUG_ENTER("ha_maria::create");

2966
  for (i= 0; i < share->keys; i++)
2967
  {
2968
    if (table_arg->key_info[i].flags & HA_USES_PARSER)
2969
    {
2970
      create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER;
2971 2972 2973
      break;
    }
  }
2974
  /* Note: BLOCK_RECORD is used if table is transactional */
2975
  row_type= maria_row_type(ha_create_info);
2976 2977 2978 2979
  if (ha_create_info->transactional == HA_CHOICE_YES &&
      ha_create_info->row_type != ROW_TYPE_PAGE &&
      ha_create_info->row_type != ROW_TYPE_NOT_USED &&
      ha_create_info->row_type != ROW_TYPE_DEFAULT)
2980
    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
2981 2982 2983
                 ER_ILLEGAL_HA_CREATE_OPTION,
                 "Row format set to PAGE because of TRANSACTIONAL=1 option");

2984
  bzero((char*) &create_info, sizeof(create_info));
2985 2986 2987
  if ((error= table2maria(table_arg, row_type, &keydef, &recinfo,
                          &record_count, &create_info)))
    DBUG_RETURN(error); /* purecov: inspected */
2988 2989
  create_info.max_rows= share->max_rows;
  create_info.reloc_rows= share->min_rows;
2990 2991 2992 2993
  create_info.with_auto_increment= share->next_number_key_offset == 0;
  create_info.auto_increment= (ha_create_info->auto_increment_value ?
                               ha_create_info->auto_increment_value -1 :
                               (ulonglong) 0);
2994 2995
  create_info.data_file_length= ((ulonglong) share->max_rows *
                                 share->avg_row_length);
2996 2997
  create_info.data_file_name= ha_create_info->data_file_name;
  create_info.index_file_name= ha_create_info->index_file_name;
2998
  create_info.language= share->table_charset->number;
2999 3000 3001 3002 3003 3004

  /*
    Table is transactional:
    - If the user specify that table is transactional (in this case
      row type is forced to BLOCK_RECORD)
    - If they specify BLOCK_RECORD without specifying transactional behaviour
3005

unknown's avatar
unknown committed
3006 3007 3008
    Shouldn't this test be pushed down to maria_create()? Because currently,
    ma_test1 -T crashes: it creates a table with DYNAMIC_RECORD but has
    born_transactional==1, which confuses some recovery-related code.
3009
  */
3010 3011
  create_info.transactional= (row_type == BLOCK_RECORD &&
                              ha_create_info->transactional != HA_CHOICE_NO);
3012

3013 3014
  if (ha_create_info->options & HA_LEX_CREATE_TMP_TABLE)
    create_flags|= HA_CREATE_TMP_TABLE;
3015 3016
  if (ha_create_info->options & HA_CREATE_KEEP_FILES)
    create_flags|= HA_CREATE_KEEP_FILES;
3017
  if (options & HA_OPTION_PACK_RECORD)
3018
    create_flags|= HA_PACK_RECORD;
3019
  if (options & HA_OPTION_CHECKSUM)
3020
    create_flags|= HA_CREATE_CHECKSUM;
3021
  if (options & HA_OPTION_DELAY_KEY_WRITE)
3022
    create_flags|= HA_CREATE_DELAY_KEY_WRITE;
3023 3024
  if ((ha_create_info->page_checksum == HA_CHOICE_UNDEF &&
       maria_page_checksums) ||
unknown's avatar
unknown committed
3025
       ha_create_info->page_checksum ==  HA_CHOICE_YES)
3026
    create_flags|= HA_CREATE_PAGE_CHECKSUM;
3027

3028
  (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
3029
                                 (uchar*) thd->query(), thd->query_length());
3030

3031 3032
  /* TODO: Check that the following fn_format is really needed */
  error=
unknown's avatar
unknown committed
3033 3034
    maria_create(fn_format(buff, name, "", "",
                           MY_UNPACK_FILENAME | MY_APPEND_EXT),
3035
                 row_type, share->keys, keydef,
3036
                 record_count,  recinfo,
3037 3038
                 0, (MARIA_UNIQUEDEF *) 0,
                 &create_info, create_flags);
3039

unknown's avatar
unknown committed
3040
  my_free((uchar*) recinfo, MYF(0));
3041 3042 3043 3044 3045 3046
  DBUG_RETURN(error);
}


int ha_maria::rename_table(const char *from, const char *to)
{
3047 3048
  THD *thd= current_thd;
  (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
3049
                                 (uchar*) thd->query(), thd->query_length());
3050 3051 3052 3053
  return maria_rename(from, to);
}


3054 3055 3056 3057
void ha_maria::get_auto_increment(ulonglong offset, ulonglong increment,
                                  ulonglong nb_desired_values,
                                  ulonglong *first_value,
                                  ulonglong *nb_reserved_values)
3058 3059 3060
{
  ulonglong nr;
  int error;
unknown's avatar
unknown committed
3061
  uchar key[HA_MAX_KEY_LENGTH];
3062 3063 3064 3065

  if (!table->s->next_number_key_offset)
  {                                             // Autoincrement at key-start
    ha_maria::info(HA_STATUS_AUTO);
3066 3067 3068 3069
    *first_value= stats.auto_increment_value;
    /* Maria has only table-level lock for now, so reserves to +inf */
    *nb_reserved_values= ULONGLONG_MAX;
    return;
3070 3071 3072 3073 3074 3075 3076 3077 3078 3079
  }

  /* it's safe to call the following if bulk_insert isn't on */
  maria_flush_bulk_insert(file, table->s->next_number_index);

  (void) extra(HA_EXTRA_KEYREAD);
  key_copy(key, table->record[0],
           table->key_info + table->s->next_number_index,
           table->s->next_number_key_offset);
  error= maria_rkey(file, table->record[1], (int) table->s->next_number_index,
3080 3081
                    key, make_prev_keypart_map(table->s->next_number_keypart),
                    HA_READ_PREFIX_LAST);
3082 3083 3084 3085 3086 3087 3088 3089 3090
  if (error)
    nr= 1;
  else
  {
    /* Get data from record[1] */
    nr= ((ulonglong) table->next_number_field->
         val_int_offset(table->s->rec_buff_length) + 1);
  }
  extra(HA_EXTRA_NO_KEYREAD);
3091 3092 3093 3094 3095 3096 3097 3098
  *first_value= nr;
  /*
    MySQL needs to call us for next row: assume we are inserting ("a",null)
    here, we return 3, and next this statement will want to insert ("b",null):
    there is no reason why ("b",3+1) would be the good row to insert: maybe it
    already exists, maybe 3+1 is too large...
  */
  *nb_reserved_values= 1;
3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133
}


/*
  Find out how many rows there is in the given range

  SYNOPSIS
    records_in_range()
    inx                 Index to use
    min_key             Start of range.  Null pointer if from first key
    max_key             End of range. Null pointer if to last key

  NOTES
    min_key.flag can have one of the following values:
      HA_READ_KEY_EXACT         Include the key in the range
      HA_READ_AFTER_KEY         Don't include key in range

    max_key.flag can have one of the following values:
      HA_READ_BEFORE_KEY        Don't include key in range
      HA_READ_AFTER_KEY         Include all 'end_key' values in the range

  RETURN
   HA_POS_ERROR         Something is wrong with the index tree.
   0                    There is no matching keys in the given range
   number > 0           There is approximately 'number' matching rows in
                        the range.
*/

ha_rows ha_maria::records_in_range(uint inx, key_range *min_key,
                                   key_range *max_key)
{
  return (ha_rows) maria_records_in_range(file, (int) inx, min_key, max_key);
}


unknown's avatar
unknown committed
3134
int ha_maria::ft_read(uchar * buf)
3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156
{
  int error;

  if (!ft_handler)
    return -1;

  thread_safe_increment(table->in_use->status_var.ha_read_next_count,
                        &LOCK_status);  // why ?

  error= ft_handler->please->read_next(ft_handler, (char*) buf);

  table->status= error ? STATUS_NOT_FOUND : 0;
  return error;
}


uint ha_maria::checksum() const
{
  return (uint) file->state->checksum;
}


3157
bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *create_info,
3158 3159
                                          uint table_changes)
{
3160
  DBUG_ENTER("check_if_incompatible_data");
3161 3162
  uint options= table->s->db_options_in_use;

3163 3164 3165
  if (create_info->auto_increment_value != stats.auto_increment_value ||
      create_info->data_file_name != data_file_name ||
      create_info->index_file_name != index_file_name ||
3166
      (maria_row_type(create_info) != data_file_type &&
3167
       create_info->row_type != ROW_TYPE_DEFAULT) ||
3168
      table_changes == IS_EQUAL_NO ||
3169 3170
      (table_changes & IS_EQUAL_PACK_LENGTH)) // Not implemented yet
    DBUG_RETURN(COMPATIBLE_DATA_NO);
3171

3172
  if ((options & (HA_OPTION_CHECKSUM |
3173
                  HA_OPTION_DELAY_KEY_WRITE)) !=
3174
      (create_info->table_options & (HA_OPTION_CHECKSUM |
3175
                              HA_OPTION_DELAY_KEY_WRITE)))
3176 3177
    DBUG_RETURN(COMPATIBLE_DATA_NO);
  DBUG_RETURN(COMPATIBLE_DATA_YES);
3178
}
unknown's avatar
unknown committed
3179

3180 3181

static int maria_hton_panic(handlerton *hton, ha_panic_function flag)
3182
{
unknown's avatar
unknown committed
3183 3184 3185
  /* If no background checkpoints, we need to do one now */
  return ((checkpoint_interval == 0) ?
          ma_checkpoint_execute(CHECKPOINT_FULL, FALSE) : 0) | maria_panic(flag);
3186
}
3187

3188 3189 3190

static int maria_commit(handlerton *hton __attribute__ ((unused)),
                        THD *thd, bool all)
3191
{
3192 3193
  TRN *trn= THD_TRN;
  DBUG_ENTER("maria_commit");
unknown's avatar
unknown committed
3194
  trnman_reset_locked_tables(trn, 0);
Sergei Golubchik's avatar
Sergei Golubchik committed
3195
  trnman_set_flags(trn, trnman_get_flags(trn) & ~TRN_STATE_INFO_LOGGED);
3196

3197 3198 3199 3200 3201
  /* statement or transaction ? */
  if ((thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && !all)
    DBUG_RETURN(0); // end of statement
  DBUG_PRINT("info", ("THD_TRN set to 0x0"));
  THD_TRN= 0;
3202
  DBUG_RETURN(ma_commit(trn)); // end of transaction
3203
}
3204

3205 3206 3207 3208 3209 3210

static int maria_rollback(handlerton *hton __attribute__ ((unused)),
                          THD *thd, bool all)
{
  TRN *trn= THD_TRN;
  DBUG_ENTER("maria_rollback");
unknown's avatar
unknown committed
3211
  trnman_reset_locked_tables(trn, 0);
3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224
  /* statement or transaction ? */
  if ((thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && !all)
  {
    trnman_rollback_statement(trn);
    DBUG_RETURN(0); // end of statement
  }
  DBUG_PRINT("info", ("THD_TRN set to 0x0"));
  THD_TRN= 0;
  DBUG_RETURN(trnman_rollback_trn(trn) ?
              HA_ERR_OUT_OF_MEM : 0); // end of transaction
}


3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240

/**
  @brief flush log handler

  @param hton            maria handlerton (unused)

  @retval FALSE OK
  @retval TRUE  Error
*/

bool maria_flush_logs(handlerton *hton)
{
  return test(translog_purge_at_flush());
}


3241 3242 3243 3244 3245 3246 3247 3248
int maria_checkpoint_state(handlerton *hton, bool disabled)
{
  maria_checkpoint_disabled= (my_bool) disabled;
  return 0;
}



3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263
#define SHOW_MSG_LEN (FN_REFLEN + 20)
/**
  @brief show status handler

  @param hton            maria handlerton
  @param thd             thread handler
  @param print           print function
  @param stat            type of status
*/

bool maria_show_status(handlerton *hton,
                       THD *thd,
                       stat_print_fn *print,
                       enum ha_stat_type stat)
{
3264
  const LEX_STRING *engine_name= hton_name(hton);
3265
  switch (stat) {
3266
  case HA_ENGINE_LOGS:
3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280
  {
    TRANSLOG_ADDRESS horizon= translog_get_horizon();
    uint32 last_file= LSN_FILE_NO(horizon);
    uint32 first_needed= translog_get_first_needed_file();
    uint32 first_file= translog_get_first_file(horizon);
    uint32 i;
    const char unknown[]= "unknown";
    const char needed[]= "in use";
    const char unneeded[]= "free";
    char path[FN_REFLEN];

    if (first_file == 0)
    {
      const char error[]= "error";
3281 3282
      print(thd, engine_name->str, engine_name->length,
            STRING_WITH_LEN(""), error, sizeof(error) - 1);
3283 3284 3285 3286
      break;
    }

    for (i= first_file; i <= last_file; i++)
3287
    {
3288 3289 3290 3291 3292 3293 3294
      char *file;
      const char *status;
      uint length, status_len;
      MY_STAT stat_buff, *stat;
      const char error[]= "can't stat";
      char object[SHOW_MSG_LEN];
      file= translog_filename_by_fileno(i, path);
3295
      if (!(stat= my_stat(file, &stat_buff, MYF(0))))
3296
      {
3297
        status= error;
3298
        status_len= sizeof(error) - 1;
3299
        length= my_snprintf(object, SHOW_MSG_LEN, "Size unknown ; %s", file);
3300
      }
3301
      else
3302
      {
3303
        if (first_needed == 0)
3304
        {
3305
          status= unknown;
3306
          status_len= sizeof(unknown) - 1;
3307 3308 3309 3310
        }
        else if (i < first_needed)
        {
          status= unneeded;
3311
          status_len= sizeof(unneeded) - 1;
3312 3313 3314
        }
        else
        {
3315
          status= needed;
3316
          status_len= sizeof(needed) - 1;
3317
        }
3318 3319
        length= my_snprintf(object, SHOW_MSG_LEN, "Size %12lu ; %s",
                            (ulong) stat->st_size, file);
3320
      }
3321

3322
      print(thd, engine_name->str, engine_name->length,
3323
            object, length, status, status_len);
3324
    }
3325 3326
    break;
  }
3327 3328 3329 3330 3331 3332 3333 3334
  case HA_ENGINE_STATUS:
  case HA_ENGINE_MUTEX:
  default:
    break;
  }
  return 0;
}

3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354

/**
  Callback to delete all logs in directory. This is lower-level than other
  functions in ma_loghandler.c which delete logs, as it does not rely on
  translog_init() having been called first.

  @param  directory        directory where file is
  @param  filename         base name of the file to delete
*/

static my_bool translog_callback_delete_all(const char *directory,
                                            const char *filename)
{
  char complete_name[FN_REFLEN];
  fn_format(complete_name, filename, directory, "", MYF(MY_UNPACK_FILENAME));
  return my_delete(complete_name, MYF(MY_WME));
}


/**
Sergei Golubchik's avatar
Sergei Golubchik committed
3355
  Helper function for option aria-force-start-after-recovery-failures.
3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370
  Deletes logs if too many failures. Otherwise, increments the counter of
  failures in the control file.
  Notice how this has to be called _before_ translog_init() (if log is
  corrupted, translog_init() might crash the server, so we need to remove logs
  before).

  @param  log_dir          directory where logs to be deleted are
*/

static int mark_recovery_start(const char* log_dir)
{
  int res;
  DBUG_ENTER("mark_recovery_start");
  if (unlikely(maria_recover_options == HA_RECOVER_NONE))
    ma_message_no_user(ME_JUST_WARNING, "Please consider using option"
Sergei Golubchik's avatar
Sergei Golubchik committed
3371
                       " --aria-recover[=...] to automatically check and"
3372
                       " repair tables when logs are removed by option"
Sergei Golubchik's avatar
Sergei Golubchik committed
3373
                       " --aria-force-start-after-recovery-failures=#");
3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398
  if (recovery_failures >= force_start_after_recovery_failures)
  {
    /*
      Remove logs which cause the problem; keep control file which has
      critical info like uuid, max_trid (removing control file may make
      correct tables look corrupted!).
    */
    char msg[100];
    res= translog_walk_filenames(log_dir, &translog_callback_delete_all);
    my_snprintf(msg, sizeof(msg),
                "%s logs after %u consecutive failures of"
                " recovery from logs",
                (res ? "failed to remove some" : "removed all"),
                recovery_failures);
    ma_message_no_user((res ? 0 : ME_JUST_WARNING), msg);
  }
  else
    res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
                                         max_trid_in_control_file,
                                         recovery_failures + 1);
  DBUG_RETURN(res);
}


/**
Sergei Golubchik's avatar
Sergei Golubchik committed
3399
  Helper function for option aria-force-start-after-recovery-failures.
3400
  Records in the control file that recovery was a success, so that it's not
Sergei Golubchik's avatar
Sergei Golubchik committed
3401
  counted for aria-force-start-after-recovery-failures.
3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414
*/

static int mark_recovery_success(void)
{
  /* success of recovery, reset recovery_failures: */
  int res;
  DBUG_ENTER("mark_recovery_success");
  res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
                                       max_trid_in_control_file, 0);
  DBUG_RETURN(res);
}


3415 3416 3417 3418 3419 3420 3421 3422 3423 3424
/*
  Return 1 if table has changed during the current transaction
*/

bool ha_maria::is_changed() const
{
  return file->state->changed;
}


3425 3426
static int ha_maria_init(void *p)
{
3427
  int res;
3428
  const char *log_dir= maria_data_root;
3429 3430
  maria_hton= (handlerton *)p;
  maria_hton->state= SHOW_OPTION_YES;
unknown's avatar
unknown committed
3431
  maria_hton->db_type= DB_TYPE_UNKNOWN;
3432
  maria_hton->create= maria_create_handler;
3433 3434 3435
  maria_hton->panic= maria_hton_panic;
  maria_hton->commit= maria_commit;
  maria_hton->rollback= maria_rollback;
3436
  maria_hton->checkpoint_state= maria_checkpoint_state;
3437 3438 3439
#ifdef MARIA_CANNOT_ROLLBACK
  maria_hton->commit= 0;
#endif
3440 3441
  maria_hton->flush_logs= maria_flush_logs;
  maria_hton->show_status= maria_show_status;
3442 3443
  /* TODO: decide if we support Maria being used for log tables */
  maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES;
3444
  bzero(maria_log_pagecache, sizeof(*maria_log_pagecache));
3445
  maria_tmpdir= &mysql_tmpdir_list;             /* For REDO */
3446
  res= maria_upgrade() || maria_init() || ma_control_file_open(TRUE, TRUE) ||
3447 3448
    ((force_start_after_recovery_failures != 0) &&
     mark_recovery_start(log_dir)) ||
3449
    !init_pagecache(maria_pagecache,
3450
                    (size_t) pagecache_buffer_size, pagecache_division_limit,
3451
                    pagecache_age_threshold, maria_block_size, 0) ||
3452
    !init_pagecache(maria_log_pagecache,
3453
                    TRANSLOG_PAGECACHE_SIZE, 0, 0,
3454
                    TRANSLOG_PAGE_SIZE, 0) ||
3455
    translog_init(maria_data_root, log_file_size,
3456
                  MYSQL_VERSION_ID, server_id, maria_log_pagecache,
3457
                  TRANSLOG_DEFAULT_FLAGS, 0) ||
3458
    maria_recovery_from_log() ||
3459 3460
    ((force_start_after_recovery_failures != 0 ||
      maria_recovery_changed_data) && mark_recovery_success()) ||
unknown's avatar
unknown committed
3461
    ma_checkpoint_init(checkpoint_interval);
3462
  maria_multi_threaded= maria_in_ha_maria= TRUE;
3463
  maria_create_trn_hook= maria_create_trn_for_mysql;
Michael Widenius's avatar
Michael Widenius committed
3464 3465
  maria_pagecache->extra_debug= 1;
  maria_assert_if_crashed_table= debug_assert_if_crashed_table;
3466

3467
#if defined(HAVE_REALPATH) && !defined(HAVE_valgrind) && !defined(HAVE_BROKEN_REALPATH)
3468 3469 3470
  /*  We can only test for sub paths if my_symlink.c is using realpath */
  maria_test_invalid_symlink= test_if_data_home_dir;
#endif
3471 3472
  if (res)
    maria_hton= 0;
3473 3474 3475

  ma_killed= ma_killed_in_mariadb;

3476
  return res ? HA_ERR_INITIALIZATION : 0;
3477 3478
}

3479

3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497
#ifdef HAVE_QUERY_CACHE
/**
  @brief Register a named table with a call back function to the query cache.

  @param thd The thread handle
  @param table_key A pointer to the table name in the table cache
  @param key_length The length of the table name
  @param[out] engine_callback The pointer to the storage engine call back
    function, currently 0
  @param[out] engine_data Engine data will be set to 0.

  @note Despite the name of this function, it is used to check each statement
    before it is cached and not to register a table or callback function.

  @see handler::register_query_cache_table

  @return The error code. The engine_data and engine_callback will be set to 0.
    @retval TRUE Success
unknown's avatar
unknown committed
3498
    @retval FALSE An error occurred
3499 3500 3501 3502 3503 3504 3505 3506
*/

my_bool ha_maria::register_query_cache_table(THD *thd, char *table_name,
					     uint table_name_len,
					     qc_engine_callback
					     *engine_callback,
					     ulonglong *engine_data)
{
unknown's avatar
unknown committed
3507 3508
  ulonglong actual_data_file_length;
  ulonglong current_data_file_length;
3509
  DBUG_ENTER("ha_maria::register_query_cache_table");
3510

3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521
  /*
    No call back function is needed to determine if a cached statement
    is valid or not.
  */
  *engine_callback= 0;

  /*
    No engine data is needed.
  */
  *engine_data= 0;

3522 3523 3524
  if (file->s->now_transactional && file->s->have_versioning)
    return (file->trn->trid >= file->s->state.last_change_trn);

3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546
  /*
    If a concurrent INSERT has happened just before the currently processed
    SELECT statement, the total size of the table is unknown.

    To determine if the table size is known, the current thread's snap shot of
    the table size with the actual table size are compared.

    If the table size is unknown the SELECT statement can't be cached.
  */

  /*
    POSIX visibility rules specify that "2. Whatever memory values a
    thread can see when it unlocks a mutex <...> can also be seen by any
    thread that later locks the same mutex". In this particular case,
    concurrent insert thread had modified the data_file_length in
    MYISAM_SHARE before it has unlocked (or even locked)
    structure_guard_mutex. So, here we're guaranteed to see at least that
    value after we've locked the same mutex. We can see a later value
    (modified by some other thread) though, but it's ok, as we only want
    to know if the variable was changed, the actual new value doesn't matter
  */
  actual_data_file_length= file->s->state.state.data_file_length;
3547
  current_data_file_length= file->state->data_file_length;
3548

3549 3550 3551
  /* Return whether is ok to try to cache current statement. */
  DBUG_RETURN(!(file->s->non_transactional_concurrent_insert &&
                current_data_file_length != actual_data_file_length));
3552 3553 3554
}
#endif

3555
struct st_mysql_sys_var* system_variables[]= {
3556
  MYSQL_SYSVAR(block_size),
unknown's avatar
unknown committed
3557
  MYSQL_SYSVAR(checkpoint_interval),
3558
  MYSQL_SYSVAR(force_start_after_recovery_failures),
unknown's avatar
unknown committed
3559 3560
  MYSQL_SYSVAR(group_commit),
  MYSQL_SYSVAR(group_commit_interval),
3561
  MYSQL_SYSVAR(log_dir_path),
3562 3563
  MYSQL_SYSVAR(log_file_size),
  MYSQL_SYSVAR(log_purge_type),
3564
  MYSQL_SYSVAR(max_sort_file_size),
3565
  MYSQL_SYSVAR(page_checksum),
unknown's avatar
unknown committed
3566 3567 3568
  MYSQL_SYSVAR(pagecache_age_threshold),
  MYSQL_SYSVAR(pagecache_buffer_size),
  MYSQL_SYSVAR(pagecache_division_limit),
3569
  MYSQL_SYSVAR(recover),
3570 3571 3572
  MYSQL_SYSVAR(repair_threads),
  MYSQL_SYSVAR(sort_buffer_size),
  MYSQL_SYSVAR(stats_method),
3573
  MYSQL_SYSVAR(sync_log_dir),
Sergey Petrunya's avatar
Sergey Petrunya committed
3574
  MYSQL_SYSVAR(used_for_temp_tables),
3575 3576 3577 3578
  NULL
};


unknown's avatar
unknown committed
3579
/**
unknown's avatar
unknown committed
3580
   @brief Updates the checkpoint interval and restarts the background thread.
unknown's avatar
unknown committed
3581
*/
unknown's avatar
unknown committed
3582

unknown's avatar
unknown committed
3583
static void update_checkpoint_interval(MYSQL_THD thd,
unknown's avatar
unknown committed
3584
                                        struct st_mysql_sys_var *var,
unknown's avatar
unknown committed
3585
                                        void *var_ptr, const void *save)
unknown's avatar
unknown committed
3586
{
unknown's avatar
unknown committed
3587 3588
  ma_checkpoint_end();
  ma_checkpoint_init(*(ulong *)var_ptr= (ulong)(*(long *)save));
unknown's avatar
unknown committed
3589 3590
}

unknown's avatar
unknown committed
3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676
/**
   @brief Updates group commit mode
*/

static void update_maria_group_commit(MYSQL_THD thd,
                                      struct st_mysql_sys_var *var,
                                      void *var_ptr, const void *save)
{
  ulong value= (ulong)*((long *)var_ptr);
  DBUG_ENTER("update_maria_group_commit");
  DBUG_PRINT("enter", ("old value: %lu  new value %lu  rate %lu",
                       value, (ulong)(*(long *)save),
                       maria_group_commit_interval));
  /* old value */
  switch (value) {
  case TRANSLOG_GCOMMIT_NONE:
    break;
  case TRANSLOG_GCOMMIT_HARD:
    translog_hard_group_commit(FALSE);
    break;
  case TRANSLOG_GCOMMIT_SOFT:
    translog_soft_sync(FALSE);
    if (maria_group_commit_interval)
      translog_soft_sync_end();
    break;
  default:
    DBUG_ASSERT(0); /* impossible */
  }
  value= *(ulong *)var_ptr= (ulong)(*(long *)save);
  translog_sync();
  /* new value */
  switch (value) {
  case TRANSLOG_GCOMMIT_NONE:
    break;
  case TRANSLOG_GCOMMIT_HARD:
    translog_hard_group_commit(TRUE);
    break;
  case TRANSLOG_GCOMMIT_SOFT:
    translog_soft_sync(TRUE);
    /* variable change made under global lock so we can just read it */
    if (maria_group_commit_interval)
      translog_soft_sync_start();
    break;
  default:
    DBUG_ASSERT(0); /* impossible */
  }
  DBUG_VOID_RETURN;
}

/**
   @brief Updates group commit interval
*/

static void update_maria_group_commit_interval(MYSQL_THD thd,
                                               struct st_mysql_sys_var *var,
                                               void *var_ptr, const void *save)
{
  ulong new_value= (ulong)*((long *)save);
  ulong *value_ptr= (ulong*) var_ptr;
  DBUG_ENTER("update_maria_group_commit_interval");
  DBUG_PRINT("enter", ("old value: %lu  new value %lu  group commit %lu",
                        *value_ptr, new_value, maria_group_commit));

  /* variable change made under global lock so we can just read it */
  switch (maria_group_commit) {
    case TRANSLOG_GCOMMIT_NONE:
      *value_ptr= new_value;
      translog_set_group_commit_interval(new_value);
      break;
    case TRANSLOG_GCOMMIT_HARD:
      *value_ptr= new_value;
      translog_set_group_commit_interval(new_value);
      break;
    case TRANSLOG_GCOMMIT_SOFT:
      if (*value_ptr)
        translog_soft_sync_end();
      translog_set_group_commit_interval(new_value);
      if ((*value_ptr= new_value))
        translog_soft_sync_start();
      break;
    default:
      DBUG_ASSERT(0); /* impossible */
  }
  DBUG_VOID_RETURN;
}

3677 3678 3679 3680 3681 3682
/**
   @brief Updates the transaction log file limit.
*/

static void update_log_file_size(MYSQL_THD thd,
                                 struct st_mysql_sys_var *var,
unknown's avatar
unknown committed
3683
                                 void *var_ptr, const void *save)
3684 3685
{
  uint32 size= (uint32)((ulong)(*(long *)save));
unknown's avatar
unknown committed
3686
  translog_set_file_size(size);
3687 3688 3689
  *(ulong *)var_ptr= size;
}

3690

3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704
SHOW_VAR status_variables[]= {
  {"pagecache_blocks_not_flushed", (char*) &maria_pagecache_var.global_blocks_changed, SHOW_LONG_NOFLUSH},
  {"pagecache_blocks_unused",      (char*) &maria_pagecache_var.blocks_unused, SHOW_LONG_NOFLUSH},
  {"pagecache_blocks_used",        (char*) &maria_pagecache_var.blocks_used, SHOW_LONG_NOFLUSH},
  {"pagecache_read_requests",      (char*) &maria_pagecache_var.global_cache_r_requests, SHOW_LONGLONG},
  {"pagecache_reads",              (char*) &maria_pagecache_var.global_cache_read, SHOW_LONGLONG},
  {"pagecache_write_requests",     (char*) &maria_pagecache_var.global_cache_w_requests, SHOW_LONGLONG},
  {"pagecache_writes",             (char*) &maria_pagecache_var.global_cache_write, SHOW_LONGLONG},
  {"transaction_log_syncs",        (char*) &translog_syncs, SHOW_LONGLONG},
  {NullS, NullS, SHOW_LONG}
};

static struct st_mysql_show_var aria_status_variables[]= {
  {"Aria", (char*) &status_variables, SHOW_ARRAY},
unknown's avatar
unknown committed
3705 3706
  {NullS, NullS, SHOW_LONG}
};
3707

3708 3709 3710 3711 3712
/****************************************************************************
 * Maria MRR implementation: use DS-MRR
 ***************************************************************************/

int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
3713 3714
                                    uint n_ranges, uint mode, 
                                    HANDLER_BUFFER *buf)
3715 3716 3717 3718
{
  return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
}

3719
int ha_maria::multi_range_read_next(range_id_t *range_info)
3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739
{
  return ds_mrr.dsmrr_next(range_info);
}

ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
                                               void *seq_init_param, 
                                               uint n_ranges, uint *bufsz,
                                               uint *flags, COST_VECT *cost)
{
  /*
    This call is here because there is no location where this->table would
    already be known.
    TODO: consider moving it into some per-query initialization call.
  */
  ds_mrr.init(this, table);
  return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
                                 flags, cost);
}

ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
3740 3741
                                       uint key_parts, uint *bufsz, 
                                       uint *flags, COST_VECT *cost)
3742 3743
{
  ds_mrr.init(this, table);
3744
  return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
3745 3746
}

3747 3748 3749 3750 3751
int ha_maria::multi_range_read_explain_info(uint mrr_mode, char *str, 
                                            size_t size)
{
  return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
}
3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770
/* MyISAM MRR implementation ends */


/* Index condition pushdown implementation*/


Item *ha_maria::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
{
  pushed_idx_cond_keyno= keyno_arg;
  pushed_idx_cond= idx_cond_arg;
  in_range_check_pushed_down= TRUE;
  if (active_index == pushed_idx_cond_keyno)
    ma_set_index_cond_func(file, index_cond_func_maria, this);
  return NULL;
}




3771
struct st_mysql_storage_engine maria_storage_engine=
3772
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
3773

Sergei Golubchik's avatar
Sergei Golubchik committed
3774
maria_declare_plugin(aria)
unknown's avatar
unknown committed
3775 3776
{
  MYSQL_STORAGE_ENGINE_PLUGIN,
3777
  &maria_storage_engine,
Sergei Golubchik's avatar
Sergei Golubchik committed
3778
  "Aria",
3779
  "Monty Program Ab",
3780
  "Crash-safe tables with MyISAM heritage",
3781
  PLUGIN_LICENSE_GPL,
3782 3783 3784 3785 3786 3787 3788
  ha_maria_init,                /* Plugin Init      */
  NULL,                         /* Plugin Deinit    */
  0x0105,                       /* 1.5              */
  aria_status_variables,        /* status variables */
  system_variables,             /* system variables */
  "1.5",                        /* string version   */
  MariaDB_PLUGIN_MATURITY_GAMMA /* maturity         */
unknown's avatar
unknown committed
3789 3790
}
maria_declare_plugin_end;