handler.h 160 KB
Newer Older
1 2
#ifndef HANDLER_INCLUDED
#define HANDLER_INCLUDED
3
/*
4
   Copyright (c) 2000, 2016, Oracle and/or its affiliates.
5
   Copyright (c) 2009, 2017, MariaDB Corporation.
6

7 8 9 10
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; version 2 of
   the License.
11

unknown's avatar
unknown committed
12 13
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
unknown's avatar
unknown committed
15
   GNU General Public License for more details.
16

unknown's avatar
unknown committed
17 18
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
19 20
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
*/
unknown's avatar
unknown committed
21 22 23

/* Definitions for parameters to do with handler-routines */

24
#ifdef USE_PRAGMA_INTERFACE
unknown's avatar
unknown committed
25 26 27
#pragma interface			/* gcc class implementation */
#endif

Monty's avatar
Monty committed
28
#include <my_global.h>                          /* For handlers */
29
#include "sql_const.h"
Monty's avatar
Monty committed
30
#include "sql_basic_types.h"
31 32 33 34 35
#include "mysqld.h"                             /* server_id */
#include "sql_plugin.h"        /* plugin_ref, st_plugin_int, plugin */
#include "thr_lock.h"          /* thr_lock_type, THR_LOCK_DATA */
#include "sql_cache.h"
#include "structs.h"                            /* SHOW_COMP_OPTION */
36
#include "sql_array.h"          /* Dynamic_array<> */
37
#include "mdl.h"
Aleksey Midenkov's avatar
Aleksey Midenkov committed
38
#include "vtq.h"
39

40 41
#include "sql_analyze_stmt.h" // for Exec_time_tracker 

42
#include <my_compare.h>
unknown's avatar
unknown committed
43
#include <ft_global.h>
unknown's avatar
unknown committed
44
#include <keycache.h>
45
#include <mysql/psi/mysql_table.h>
46
#include "sql_sequence.h"
unknown's avatar
unknown committed
47

48
class Alter_info;
49
class Virtual_column_info;
50
class sequence_definition;
51

unknown's avatar
unknown committed
52 53
// the following is for checking tables

54 55 56 57 58 59
#define HA_ADMIN_ALREADY_DONE	  1
#define HA_ADMIN_OK               0
#define HA_ADMIN_NOT_IMPLEMENTED -1
#define HA_ADMIN_FAILED		 -2
#define HA_ADMIN_CORRUPT         -3
#define HA_ADMIN_INTERNAL_ERROR  -4
unknown's avatar
unknown committed
60
#define HA_ADMIN_INVALID         -5
unknown's avatar
unknown committed
61
#define HA_ADMIN_REJECT          -6
62
#define HA_ADMIN_TRY_ALTER       -7
63
#define HA_ADMIN_WRONG_CHECKSUM  -8
unknown's avatar
unknown committed
64
#define HA_ADMIN_NOT_BASE_TABLE  -9
unknown's avatar
unknown committed
65 66 67
#define HA_ADMIN_NEEDS_UPGRADE  -10
#define HA_ADMIN_NEEDS_ALTER    -11
#define HA_ADMIN_NEEDS_CHECK    -12
unknown's avatar
unknown committed
68

69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
/**
   Return values for check_if_supported_inplace_alter().

   @see check_if_supported_inplace_alter() for description of
   the individual values.
*/
enum enum_alter_inplace_result {
  HA_ALTER_ERROR,
  HA_ALTER_INPLACE_NOT_SUPPORTED,
  HA_ALTER_INPLACE_EXCLUSIVE_LOCK,
  HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE,
  HA_ALTER_INPLACE_SHARED_LOCK,
  HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE,
  HA_ALTER_INPLACE_NO_LOCK
};

85
/* Bits in table_flags() to show what database can do */
86

87 88 89
#define HA_NO_TRANSACTIONS     (1ULL << 0) /* Doesn't support transactions */
#define HA_PARTIAL_COLUMN_READ (1ULL << 1) /* read may not return all columns */
#define HA_TABLE_SCAN_ON_INDEX (1ULL << 2) /* No separate data/index file */
90
/*
91 92 93 94
  The following should be set if the following is not true when scanning
  a table with rnd_next()
  - We will see all rows (including deleted ones)
  - Row positions are 'table->s->db_record_offset' apart
95
  If this flag is not set, filesort will do a position() call for each matched
96
  row to be able to find the row later.
97
*/
98 99
#define HA_REC_NOT_IN_SEQ      (1ULL << 3)
#define HA_CAN_GEOMETRY        (1ULL << 4)
100 101 102 103 104
/*
  Reading keys in random order is as fast as reading keys in sort order
  (Used in records.cc to decide if we should use a record cache and by
  filesort to decide if we should sort key + data or key + pointer-to-row
*/
105
#define HA_FAST_KEY_READ       (1ULL << 5)
106 107 108 109
/*
  Set the following flag if we on delete should force all key to be read
  and on update read all keys that changes
*/
110 111 112 113 114 115 116 117
#define HA_REQUIRES_KEY_COLUMNS_FOR_DELETE (1ULL << 6)
#define HA_NULL_IN_KEY         (1ULL << 7) /* One can have keys with NULL */
#define HA_DUPLICATE_POS       (1ULL << 8)    /* ha_position() gives dup row */
#define HA_NO_BLOBS            (1ULL << 9) /* Doesn't support blobs */
#define HA_CAN_INDEX_BLOBS     (1ULL << 10)
#define HA_AUTO_PART_KEY       (1ULL << 11) /* auto-increment in multi-part key */
#define HA_REQUIRE_PRIMARY_KEY (1ULL << 12) /* .. and can't create a hidden one */
#define HA_STATS_RECORDS_IS_EXACT (1ULL << 13) /* stats.records is exact */
118 119 120 121
/*
  INSERT_DELAYED only works with handlers that uses MySQL internal table
  level locks
*/
122
#define HA_CAN_INSERT_DELAYED  (1ULL << 14)
123 124
/*
  If we get the primary key columns for free when we do an index read
125 126
  (usually, it also implies that HA_PRIMARY_KEY_REQUIRED_FOR_POSITION
  flag is set).
127
*/
128
#define HA_PRIMARY_KEY_IN_READ_INDEX (1ULL << 15)
129
/*
130
  If HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is set, it means that to position()
131 132 133 134
  uses a primary key given by the record argument.
  Without primary key, we can't call position().
  If not set, the position is returned as the current rows position
  regardless of what argument is given.
135
*/ 
136 137
#define HA_PRIMARY_KEY_REQUIRED_FOR_POSITION (1ULL << 16) 
#define HA_CAN_RTREEKEYS       (1ULL << 17)
Sergei Golubchik's avatar
Sergei Golubchik committed
138
#define HA_NOT_DELETE_WITH_CACHE (1ULL << 18) /* unused */
139 140 141 142
/*
  The following is we need to a primary key to delete (and update) a row.
  If there is no primary key, all columns needs to be read on update and delete
*/
143 144 145 146 147
#define HA_PRIMARY_KEY_REQUIRED_FOR_DELETE (1ULL << 19)
#define HA_NO_PREFIX_CHAR_KEYS (1ULL << 20)
#define HA_CAN_FULLTEXT        (1ULL << 21)
#define HA_CAN_SQL_HANDLER     (1ULL << 22)
#define HA_NO_AUTO_INCREMENT   (1ULL << 23)
148
/* Has automatic checksums and uses the old checksum format */
149
#define HA_HAS_OLD_CHECKSUM    (1ULL << 24)
unknown's avatar
unknown committed
150
/* Table data are stored in separate files (for lower_case_table_names) */
151
#define HA_FILE_BASED	       (1ULL << 26)
Sergei Golubchik's avatar
Sergei Golubchik committed
152
#define HA_NO_VARCHAR	       (1ULL << 27) /* unused */
153 154 155 156 157
#define HA_CAN_BIT_FIELD       (1ULL << 28) /* supports bit fields */
#define HA_NEED_READ_RANGE_BUFFER (1ULL << 29) /* for read_multi_range */
#define HA_ANY_INDEX_MAY_BE_UNIQUE (1ULL << 30)
#define HA_NO_COPY_ON_ALTER    (1ULL << 31)
#define HA_HAS_RECORDS	       (1ULL << 32) /* records() gives exact count*/
158
/* Has it's own method of binlog logging */
159
#define HA_HAS_OWN_BINLOGGING  (1ULL << 33)
160 161 162 163
/*
  Engine is capable of row-format and statement-format logging,
  respectively
*/
164 165
#define HA_BINLOG_ROW_CAPABLE  (1ULL << 34)
#define HA_BINLOG_STMT_CAPABLE (1ULL << 35)
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
/*
    When a multiple key conflict happens in a REPLACE command mysql
    expects the conflicts to be reported in the ascending order of
    key names.

    For e.g.

    CREATE TABLE t1 (a INT, UNIQUE (a), b INT NOT NULL, UNIQUE (b), c INT NOT
                     NULL, INDEX(c));

    REPLACE INTO t1 VALUES (1,1,1),(2,2,2),(2,1,3);

    MySQL expects the conflict with 'a' to be reported before the conflict with
    'b'.

    If the underlying storage engine does not report the conflicting keys in
    ascending order, it causes unexpected errors when the REPLACE command is
    executed.

    This flag helps the underlying SE to inform the server that the keys are not
    ordered.
*/
188
#define HA_DUPLICATE_KEY_NOT_IN_ORDER    (1ULL << 36)
Sergei Golubchik's avatar
Sergei Golubchik committed
189

190 191 192 193 194
/*
  Engine supports REPAIR TABLE. Used by CHECK TABLE FOR UPGRADE if an
  incompatible table is detected. If this flag is set, CHECK TABLE FOR UPGRADE
  will report ER_TABLE_NEEDS_UPGRADE, otherwise ER_TABLE_NEED_REBUILD.
*/
195
#define HA_CAN_REPAIR                    (1ULL << 37)
196

197
/* Has automatic checksums and uses the new checksum format */
198 199 200
#define HA_HAS_NEW_CHECKSUM    (1ULL << 38)
#define HA_CAN_VIRTUAL_COLUMNS (1ULL << 39)
#define HA_MRR_CANT_SORT       (1ULL << 40)
201
#define HA_RECORD_MUST_BE_CLEAN_ON_WRITE (1ULL << 41) /* unused */
202

203
/*
204
  This storage engine supports condition pushdown
205
*/
206 207 208
#define HA_CAN_TABLE_CONDITION_PUSHDOWN (1ULL << 42)
/* old name for the same flag */
#define HA_MUST_USE_TABLE_CONDITION_PUSHDOWN HA_CAN_TABLE_CONDITION_PUSHDOWN
209

Sergei Golubchik's avatar
Sergei Golubchik committed
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
/**
  The handler supports read before write removal optimization

  Read before write removal may be used for storage engines which support
  write without previous read of the row to be updated. Handler returning
  this flag must implement start_read_removal() and end_read_removal().
  The handler may return "fake" rows constructed from the key of the row
  asked for. This is used to optimize UPDATE and DELETE by reducing the
  numer of roundtrips between handler and storage engine.
  
  Example:
  UPDATE a=1 WHERE pk IN (<keys>)

  mysql_update()
  {
    if (<conditions for starting read removal>)
      start_read_removal()
      -> handler returns true if read removal supported for this table/query

    while(read_record("pk=<key>"))
      -> handler returns fake row with column "pk" set to <key>

      ha_update_row()
      -> handler sends write "a=1" for row with "pk=<key>"

    end_read_removal()
    -> handler returns the number of rows actually written
  }

  @note This optimization in combination with batching may be used to
        remove even more roundtrips.
*/
242
#define HA_READ_BEFORE_WRITE_REMOVAL  (1ULL << 43)
Sergei Golubchik's avatar
Sergei Golubchik committed
243 244 245 246

/*
  Engine supports extended fulltext API
 */
247
#define HA_CAN_FULLTEXT_EXT              (1ULL << 44)
248

249 250
/*
  Storage engine supports table export using the
251 252 253
  FLUSH TABLE <table_list> FOR EXPORT statement
  (meaning, after this statement one can copy table files out of the
  datadir and later "import" (somehow) in another MariaDB instance)
254
 */
255
#define HA_CAN_EXPORT                 (1ULL << 45)
256

257 258 259 260 261 262 263 264
/*
  Storage engine does not require an exclusive metadata lock
  on the table during optimize. (TODO and repair?).
  It can allow other connections to open the table.
  (it does not necessarily mean that other connections can
  read or modify the table - this is defined by THR locks and the
  ::store_lock() method).
*/
265
#define HA_CONCURRENT_OPTIMIZE          (1ULL << 46)
266

267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
/*
  If the storage engine support tables that will not roll back on commit
  In addition the table should not lock rows and support READ and WRITE
  UNCOMMITTED.
  This is useful for implementing things like SEQUENCE but can also in
  the future be useful to do logging that should never roll back.
*/
#define HA_CAN_TABLES_WITHOUT_ROLLBACK  (1ULL << 47)

/*
  Mainly for usage by SEQUENCE engine. Setting this flag means
  that the table will never roll back and that all operations
  for this table should stored in the non transactional log
  space that will always be written, even on rollback.
*/

#define HA_PERSISTENT_TABLE              (1ULL << 48)

285 286 287 288 289
/*
  Set of all binlog flags. Currently only contain the capabilities
  flags.
 */
#define HA_BINLOG_FLAGS (HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE)
unknown's avatar
unknown committed
290

291
/* bits in index_flags(index_number) for what you can do with index */
unknown's avatar
unknown committed
292 293 294 295
#define HA_READ_NEXT            1       /* TODO really use this flag */
#define HA_READ_PREV            2       /* supports ::index_prev */
#define HA_READ_ORDER           4       /* index_next/prev follow sort order */
#define HA_READ_RANGE           8       /* can find all records in a range */
296
#define HA_ONLY_WHOLE_INDEX	16	/* Can't use part key searches */
unknown's avatar
unknown committed
297
#define HA_KEYREAD_ONLY         64	/* Support HA_EXTRA_KEYREAD */
298

299 300 301 302 303 304
/*
  Index scan will not return records in rowid order. Not guaranteed to be
  set for unordered (e.g. HASH) indexes.
*/
#define HA_KEY_SCAN_NOT_ROR     128 
#define HA_DO_INDEX_COND_PUSHDOWN  256 /* Supports Index Condition Pushdown */
305 306
/*
  Data is clustered on this key. This means that when you read the key
307
  you also get the row data without any additional disk reads.
308 309
*/
#define HA_CLUSTERED_INDEX      512
310

unknown's avatar
unknown committed
311 312 313
/*
  bits in alter_table_flags:
*/
314
/*
315 316 317 318
  These bits are set if different kinds of indexes can be created or dropped
  in-place without re-creating the table using a temporary table.
  NO_READ_WRITE indicates that the handler needs concurrent reads and writes
  of table data to be blocked.
319 320
  Partitioning needs both ADD and DROP to be supported by its underlying
  handlers, due to error handling, see bug#57778.
321
*/
322 323 324 325 326 327
#define HA_INPLACE_ADD_INDEX_NO_READ_WRITE         (1UL << 0)
#define HA_INPLACE_DROP_INDEX_NO_READ_WRITE        (1UL << 1)
#define HA_INPLACE_ADD_UNIQUE_INDEX_NO_READ_WRITE  (1UL << 2)
#define HA_INPLACE_DROP_UNIQUE_INDEX_NO_READ_WRITE (1UL << 3)
#define HA_INPLACE_ADD_PK_INDEX_NO_READ_WRITE      (1UL << 4)
#define HA_INPLACE_DROP_PK_INDEX_NO_READ_WRITE     (1UL << 5)
328
/*
329 330 331 332
  These are set if different kinds of indexes can be created or dropped
  in-place while still allowing concurrent reads (but not writes) of table
  data. If a handler is capable of one or more of these, it should also set
  the corresponding *_NO_READ_WRITE bit(s).
333
*/
334 335 336 337 338 339
#define HA_INPLACE_ADD_INDEX_NO_WRITE              (1UL << 6)
#define HA_INPLACE_DROP_INDEX_NO_WRITE             (1UL << 7)
#define HA_INPLACE_ADD_UNIQUE_INDEX_NO_WRITE       (1UL << 8)
#define HA_INPLACE_DROP_UNIQUE_INDEX_NO_WRITE      (1UL << 9)
#define HA_INPLACE_ADD_PK_INDEX_NO_WRITE           (1UL << 10)
#define HA_INPLACE_DROP_PK_INDEX_NO_WRITE          (1UL << 11)
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
/*
  HA_PARTITION_FUNCTION_SUPPORTED indicates that the function is
  supported at all.
  HA_FAST_CHANGE_PARTITION means that optimised variants of the changes
  exists but they are not necessarily done online.

  HA_ONLINE_DOUBLE_WRITE means that the handler supports writing to both
  the new partition and to the old partitions when updating through the
  old partitioning schema while performing a change of the partitioning.
  This means that we can support updating of the table while performing
  the copy phase of the change. For no lock at all also a double write
  from new to old must exist and this is not required when this flag is
  set.
  This is actually removed even before it was introduced the first time.
  The new idea is that handlers will handle the lock level already in
  store_lock for ALTER TABLE partitions.

  HA_PARTITION_ONE_PHASE is a flag that can be set by handlers that take
  care of changing the partitions online and in one phase. Thus all phases
  needed to handle the change are implemented inside the storage engine.
  The storage engine must also support auto-discovery since the frm file
  is changed as part of the change and this change must be controlled by
  the storage engine. A typical engine to support this is NDB (through
  WL #2498).
*/
365 366 367
#define HA_PARTITION_FUNCTION_SUPPORTED         (1UL << 12)
#define HA_FAST_CHANGE_PARTITION                (1UL << 13)
#define HA_PARTITION_ONE_PHASE                  (1UL << 14)
unknown's avatar
unknown committed
368

369 370 371 372 373 374
/* operations for disable/enable indexes */
#define HA_KEY_SWITCH_NONUNIQ      0
#define HA_KEY_SWITCH_ALL          1
#define HA_KEY_SWITCH_NONUNIQ_SAVE 2
#define HA_KEY_SWITCH_ALL_SAVE     3

375 376
/*
  Note: the following includes binlog and closing 0.
unknown's avatar
unknown committed
377
  TODO remove the limit, use dynarrays
378
*/
379
#define MAX_HA 64
380

381 382 383 384 385 386 387
/*
  Use this instead of 0 as the initial value for the slot number of
  handlerton, so that we can distinguish uninitialized slot number
  from slot 0.
*/
#define HA_SLOT_UNDEF ((uint)-1)

388 389 390 391
/*
  Parameters for open() (in register form->filestat)
  HA_GET_INFO does an implicit HA_ABORT_IF_LOCKED
*/
unknown's avatar
unknown committed
392

393 394
#define HA_OPEN_KEYFILE		1U
#define HA_READ_ONLY		16U	/* File opened as readonly */
395
/* Try readonly if can't open with read and write */
396
#define HA_TRY_READ_ONLY	32U
unknown's avatar
unknown committed
397 398 399 400 401

	/* Some key definitions */
#define HA_KEY_NULL_LENGTH	1
#define HA_KEY_BLOB_LENGTH	2

402 403
#define HA_LEX_CREATE_TMP_TABLE	1U
#define HA_CREATE_TMP_ALTER     8U
404
#define HA_LEX_CREATE_SEQUENCE  16U
Aleksey Midenkov's avatar
Aleksey Midenkov committed
405
#define HA_VERSIONED_TABLE      32U
406

unknown's avatar
unknown committed
407 408
#define HA_MAX_REC_LENGTH	65535

409 410
/* Table caching type */
#define HA_CACHE_TBL_NONTRANSACT 0
411 412 413
#define HA_CACHE_TBL_NOCACHE     1U
#define HA_CACHE_TBL_ASKTRANSACT 2U
#define HA_CACHE_TBL_TRANSACT    4U
414

415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
/**
  Options for the START TRANSACTION statement.

  Note that READ ONLY and READ WRITE are logically mutually exclusive.
  This is enforced by the parser and depended upon by trans_begin().

  We need two flags instead of one in order to differentiate between
  situation when no READ WRITE/ONLY clause were given and thus transaction
  is implicitly READ WRITE and the case when READ WRITE clause was used
  explicitly.
*/

// WITH CONSISTENT SNAPSHOT option
static const uint MYSQL_START_TRANS_OPT_WITH_CONS_SNAPSHOT = 1;
// READ ONLY option
static const uint MYSQL_START_TRANS_OPT_READ_ONLY          = 2;
// READ WRITE option
static const uint MYSQL_START_TRANS_OPT_READ_WRITE         = 4;
433

434
/* Flags for method is_fatal_error */
435 436 437
#define HA_CHECK_DUP_KEY 1U
#define HA_CHECK_DUP_UNIQUE 2U
#define HA_CHECK_FK_ERROR 4U
438
#define HA_CHECK_DUP (HA_CHECK_DUP_KEY + HA_CHECK_DUP_UNIQUE)
439
#define HA_CHECK_ALL (~0U)
440

unknown's avatar
unknown committed
441
enum legacy_db_type
442
{
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
  /* note these numerical values are fixed and can *not* be changed */
  DB_TYPE_UNKNOWN=0,
  DB_TYPE_HEAP=6,
  DB_TYPE_MYISAM=9,
  DB_TYPE_MRG_MYISAM=10,
  DB_TYPE_INNODB=12,
  DB_TYPE_EXAMPLE_DB=15,
  DB_TYPE_ARCHIVE_DB=16,
  DB_TYPE_CSV_DB=17,
  DB_TYPE_FEDERATED_DB=18,
  DB_TYPE_BLACKHOLE_DB=19,
  DB_TYPE_PARTITION_DB=20,
  DB_TYPE_BINLOG=21,
  DB_TYPE_PBXT=23,
  DB_TYPE_PERFORMANCE_SCHEMA=28,
458
  DB_TYPE_ARIA=42,
459
  DB_TYPE_TOKUDB=43,
460 461
  DB_TYPE_SEQUENCE=44,
  DB_TYPE_FIRST_DYNAMIC=45,
unknown's avatar
unknown committed
462
  DB_TYPE_DEFAULT=127 // Must be last
unknown's avatar
unknown committed
463
};
464 465 466 467 468
/*
  Better name for DB_TYPE_UNKNOWN. Should be used for engines that do not have
  a hard-coded type value here.
 */
#define DB_TYPE_AUTOASSIGN DB_TYPE_UNKNOWN
unknown's avatar
unknown committed
469

470
enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED,
471
		ROW_TYPE_DYNAMIC, ROW_TYPE_COMPRESSED,
Sergei Golubchik's avatar
Sergei Golubchik committed
472
		ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT, ROW_TYPE_PAGE };
unknown's avatar
unknown committed
473

474 475
/* not part of the enum, so that it shouldn't be in switch(row_type) */
#define ROW_TYPE_MAX ((uint)ROW_TYPE_PAGE + 1)
unknown's avatar
unknown committed
476

477 478 479 480 481 482 483
/* Specifies data storage format for individual columns */
enum column_format_type {
  COLUMN_FORMAT_TYPE_DEFAULT=   0, /* Not specified (use engine default) */
  COLUMN_FORMAT_TYPE_FIXED=     1, /* FIXED format */
  COLUMN_FORMAT_TYPE_DYNAMIC=   2  /* DYNAMIC format */
};

unknown's avatar
unknown committed
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
enum enum_binlog_func {
  BFN_RESET_LOGS=        1,
  BFN_RESET_SLAVE=       2,
  BFN_BINLOG_WAIT=       3,
  BFN_BINLOG_END=        4,
  BFN_BINLOG_PURGE_FILE= 5
};

enum enum_binlog_command {
  LOGCOM_CREATE_TABLE,
  LOGCOM_ALTER_TABLE,
  LOGCOM_RENAME_TABLE,
  LOGCOM_DROP_TABLE,
  LOGCOM_CREATE_DB,
  LOGCOM_ALTER_DB,
  LOGCOM_DROP_DB
};

unknown's avatar
unknown committed
502 503 504
/* struct to hold information about the table that should be created */

/* Bits in used_fields */
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
#define HA_CREATE_USED_AUTO             (1UL << 0)
#define HA_CREATE_USED_RAID             (1UL << 1) //RAID is no longer availble
#define HA_CREATE_USED_UNION            (1UL << 2)
#define HA_CREATE_USED_INSERT_METHOD    (1UL << 3)
#define HA_CREATE_USED_MIN_ROWS         (1UL << 4)
#define HA_CREATE_USED_MAX_ROWS         (1UL << 5)
#define HA_CREATE_USED_AVG_ROW_LENGTH   (1UL << 6)
#define HA_CREATE_USED_PACK_KEYS        (1UL << 7)
#define HA_CREATE_USED_CHARSET          (1UL << 8)
#define HA_CREATE_USED_DEFAULT_CHARSET  (1UL << 9)
#define HA_CREATE_USED_DATADIR          (1UL << 10)
#define HA_CREATE_USED_INDEXDIR         (1UL << 11)
#define HA_CREATE_USED_ENGINE           (1UL << 12)
#define HA_CREATE_USED_CHECKSUM         (1UL << 13)
#define HA_CREATE_USED_DELAY_KEY_WRITE  (1UL << 14)
#define HA_CREATE_USED_ROW_FORMAT       (1UL << 15)
#define HA_CREATE_USED_COMMENT          (1UL << 16)
#define HA_CREATE_USED_PASSWORD         (1UL << 17)
#define HA_CREATE_USED_CONNECTION       (1UL << 18)
#define HA_CREATE_USED_KEY_BLOCK_SIZE   (1UL << 19)
Sergey Petrunya's avatar
Sergey Petrunya committed
525
/* The following two are used by Maria engine: */
526 527
#define HA_CREATE_USED_TRANSACTIONAL    (1UL << 20)
#define HA_CREATE_USED_PAGE_CHECKSUM    (1UL << 21)
528 529 530 531 532
/** This is set whenever STATS_PERSISTENT=0|1|default has been
specified in CREATE/ALTER TABLE. See also HA_OPTION_STATS_PERSISTENT in
include/my_base.h. It is possible to distinguish whether
STATS_PERSISTENT=default has been specified or no STATS_PERSISTENT= is
given at all. */
533
#define HA_CREATE_USED_STATS_PERSISTENT (1UL << 22)
534 535 536 537 538 539
/**
   This is set whenever STATS_AUTO_RECALC=0|1|default has been
   specified in CREATE/ALTER TABLE. See enum_stats_auto_recalc.
   It is possible to distinguish whether STATS_AUTO_RECALC=default
   has been specified or no STATS_AUTO_RECALC= is given at all.
*/
540
#define HA_CREATE_USED_STATS_AUTO_RECALC (1UL << 23)
541 542 543 544 545 546
/**
   This is set whenever STATS_SAMPLE_PAGES=N|default has been
   specified in CREATE/ALTER TABLE. It is possible to distinguish whether
   STATS_SAMPLE_PAGES=default has been specified or no STATS_SAMPLE_PAGES= is
   given at all.
*/
547
#define HA_CREATE_USED_STATS_SAMPLE_PAGES (1UL << 24)
548

549 550
/* Create a sequence */
#define HA_CREATE_USED_SEQUENCE           (1UL << 25)
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568

/*
  This is master database for most of system tables. However there
  can be other databases which can hold system tables. Respective
  storage engines define their own system database names.
*/
extern const char *mysqld_system_database;

/*
  Structure to hold list of system_database.system_table.
  This is used at both mysqld and storage engine layer.
*/
struct st_system_tablename
{
  const char *db;
  const char *tablename;
};

unknown's avatar
unknown committed
569

570
typedef ulonglong my_xid; // this line is the same as in log_event.h
571 572 573 574
#define MYSQL_XID_PREFIX "MySQLXid"
#define MYSQL_XID_PREFIX_LEN 8 // must be a multiple of 8
#define MYSQL_XID_OFFSET (MYSQL_XID_PREFIX_LEN+sizeof(server_id))
#define MYSQL_XID_GTRID_LEN (MYSQL_XID_OFFSET+sizeof(my_xid))
575

576
#define XIDDATASIZE MYSQL_XIDDATASIZE
577 578 579
#define MAXGTRIDSIZE 64
#define MAXBQUALSIZE 64

unknown's avatar
unknown committed
580 581 582
#define COMPATIBLE_DATA_YES 0
#define COMPATIBLE_DATA_NO  1

583 584 585 586 587 588 589 590
/**
  struct xid_t is binary compatible with the XID structure as
  in the X/Open CAE Specification, Distributed Transaction Processing:
  The XA Specification, X/Open Company Ltd., 1991.
  http://www.opengroup.org/bookstore/catalog/c193.htm

  @see MYSQL_XID in mysql/plugin.h
*/
591 592 593 594
struct xid_t {
  long formatID;
  long gtrid_length;
  long bqual_length;
595
  char data[XIDDATASIZE];  // not \0-terminated !
596

597
  xid_t() {}                                /* Remove gcc warning */
unknown's avatar
unknown committed
598
  bool eq(struct xid_t *xid)
Sergei Golubchik's avatar
Sergei Golubchik committed
599
  { return !xid->is_null() && eq(xid->gtrid_length, xid->bqual_length, xid->data); }
600
  bool eq(long g, long b, const char *d)
Sergei Golubchik's avatar
Sergei Golubchik committed
601
  { return !is_null() && g == gtrid_length && b == bqual_length && !memcmp(d, data, g+b); }
unknown's avatar
unknown committed
602
  void set(struct xid_t *xid)
603
  { memcpy(this, xid, xid->length()); }
unknown's avatar
unknown committed
604 605 606 607 608 609
  void set(long f, const char *g, long gl, const char *b, long bl)
  {
    formatID= f;
    memcpy(data, g, gtrid_length= gl);
    memcpy(data+gl, b, bqual_length= bl);
  }
610
  void set(ulonglong xid)
611
  {
612
    my_xid tmp;
unknown's avatar
unknown committed
613
    formatID= 1;
614
    set(MYSQL_XID_PREFIX_LEN, 0, MYSQL_XID_PREFIX);
615 616 617
    memcpy(data+MYSQL_XID_PREFIX_LEN, &server_id, sizeof(server_id));
    tmp= xid;
    memcpy(data+MYSQL_XID_OFFSET, &tmp, sizeof(tmp));
618 619 620 621
    gtrid_length=MYSQL_XID_GTRID_LEN;
  }
  void set(long g, long b, const char *d)
  {
unknown's avatar
unknown committed
622
    formatID= 1;
623 624 625 626 627 628 629 630
    gtrid_length= g;
    bqual_length= b;
    memcpy(data, d, g+b);
  }
  bool is_null() { return formatID == -1; }
  void null() { formatID= -1; }
  my_xid quick_get_my_xid()
  {
631 632 633
    my_xid tmp;
    memcpy(&tmp, data+MYSQL_XID_OFFSET, sizeof(tmp));
    return tmp;
634 635 636 637 638 639 640
  }
  my_xid get_my_xid()
  {
    return gtrid_length == MYSQL_XID_GTRID_LEN && bqual_length == 0 &&
           !memcmp(data, MYSQL_XID_PREFIX, MYSQL_XID_PREFIX_LEN) ?
           quick_get_my_xid() : 0;
  }
641 642
  uint length()
  {
643
    return static_cast<uint>(sizeof(formatID)) + key_length();
644
  }
645
  uchar *key() const
646
  {
647
    return (uchar *)&gtrid_length;
648
  }
649
  uint key_length() const
650
  {
651 652
    return static_cast<uint>(sizeof(gtrid_length)+sizeof(bqual_length)+
                             gtrid_length+bqual_length);
653
  }
654
};
655 656
typedef struct xid_t XID;

657 658 659
/* for recover() handlerton call */
#define MIN_XID_LIST_SIZE  128
#define MAX_XID_LIST_SIZE  (1024*128)
660

unknown's avatar
unknown committed
661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693
/*
  These structures are used to pass information from a set of SQL commands
  on add/drop/change tablespace definitions to the proper hton.
*/
#define UNDEF_NODEGROUP 65535
enum ts_command_type
{
  TS_CMD_NOT_DEFINED = -1,
  CREATE_TABLESPACE = 0,
  ALTER_TABLESPACE = 1,
  CREATE_LOGFILE_GROUP = 2,
  ALTER_LOGFILE_GROUP = 3,
  DROP_TABLESPACE = 4,
  DROP_LOGFILE_GROUP = 5,
  CHANGE_FILE_TABLESPACE = 6,
  ALTER_ACCESS_MODE_TABLESPACE = 7
};

enum ts_alter_tablespace_type
{
  TS_ALTER_TABLESPACE_TYPE_NOT_DEFINED = -1,
  ALTER_TABLESPACE_ADD_FILE = 1,
  ALTER_TABLESPACE_DROP_FILE = 2
};

enum tablespace_access_mode
{
  TS_NOT_DEFINED= -1,
  TS_READ_ONLY = 0,
  TS_READ_WRITE = 1,
  TS_NOT_ACCESSIBLE = 2
};

694
struct handlerton;
unknown's avatar
unknown committed
695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711
class st_alter_tablespace : public Sql_alloc
{
  public:
  const char *tablespace_name;
  const char *logfile_group_name;
  enum ts_command_type ts_cmd_type;
  enum ts_alter_tablespace_type ts_alter_tablespace_type;
  const char *data_file_name;
  const char *undo_file_name;
  const char *redo_file_name;
  ulonglong extent_size;
  ulonglong undo_buffer_size;
  ulonglong redo_buffer_size;
  ulonglong initial_size;
  ulonglong autoextend_size;
  ulonglong max_size;
  uint nodegroup_id;
712
  handlerton *storage_engine;
unknown's avatar
unknown committed
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729
  bool wait_until_completed;
  const char *ts_comment;
  enum tablespace_access_mode ts_access_mode;
  st_alter_tablespace()
  {
    tablespace_name= NULL;
    logfile_group_name= "DEFAULT_LG"; //Default log file group
    ts_cmd_type= TS_CMD_NOT_DEFINED;
    data_file_name= NULL;
    undo_file_name= NULL;
    redo_file_name= NULL;
    extent_size= 1024*1024;        //Default 1 MByte
    undo_buffer_size= 8*1024*1024; //Default 8 MByte
    redo_buffer_size= 8*1024*1024; //Default 8 MByte
    initial_size= 128*1024*1024;   //Default 128 MByte
    autoextend_size= 0;            //No autoextension as default
    max_size= 0;                   //Max size == initial size => no extension
730
    storage_engine= NULL;
unknown's avatar
unknown committed
731 732 733 734 735 736 737
    nodegroup_id= UNDEF_NODEGROUP;
    wait_until_completed= TRUE;
    ts_comment= NULL;
    ts_access_mode= TS_NOT_DEFINED;
  }
};

738 739
/* The handler for a table type.  Will be included in the TABLE structure */

Konstantin Osipov's avatar
Konstantin Osipov committed
740
struct TABLE;
741 742 743 744 745 746

/*
  Make sure that the order of schema_tables and enum_schema_tables are the same.
*/
enum enum_schema_tables
{
747
  SCH_ALL_PLUGINS,
748
  SCH_APPLICABLE_ROLES,
749
  SCH_CHARSETS,
750 751 752 753
  SCH_COLLATIONS,
  SCH_COLLATION_CHARACTER_SET_APPLICABILITY,
  SCH_COLUMNS,
  SCH_COLUMN_PRIVILEGES,
754
  SCH_ENABLED_ROLES,
755 756
  SCH_ENGINES,
  SCH_EVENTS,
757
  SCH_EXPLAIN,
758 759 760
  SCH_FILES,
  SCH_GLOBAL_STATUS,
  SCH_GLOBAL_VARIABLES,
Sergei Golubchik's avatar
Sergei Golubchik committed
761
  SCH_KEY_CACHES,
762 763
  SCH_KEY_COLUMN_USAGE,
  SCH_OPEN_TABLES,
Sergey Glukhov's avatar
Sergey Glukhov committed
764
  SCH_PARAMETERS,
765 766 767 768 769
  SCH_PARTITIONS,
  SCH_PLUGINS,
  SCH_PROCESSLIST,
  SCH_PROFILES,
  SCH_REFERENTIAL_CONSTRAINTS,
770
  SCH_PROCEDURES,
771 772 773 774 775
  SCH_SCHEMATA,
  SCH_SCHEMA_PRIVILEGES,
  SCH_SESSION_STATUS,
  SCH_SESSION_VARIABLES,
  SCH_STATISTICS,
776
  SCH_SYSTEM_VARIABLES,
777
  SCH_TABLES,
778
  SCH_TABLESPACES,
779 780 781 782 783
  SCH_TABLE_CONSTRAINTS,
  SCH_TABLE_NAMES,
  SCH_TABLE_PRIVILEGES,
  SCH_TRIGGERS,
  SCH_USER_PRIVILEGES,
784 785 786 787 788
  SCH_VIEWS,
#ifdef HAVE_SPATIAL
  SCH_GEOMETRY_COLUMNS,
  SCH_SPATIAL_REF_SYS,
#endif /*HAVE_SPATIAL*/
789 790
};

Konstantin Osipov's avatar
Konstantin Osipov committed
791
struct TABLE_SHARE;
Sergei Golubchik's avatar
Sergei Golubchik committed
792
struct HA_CREATE_INFO;
793 794
struct st_foreign_key_info;
typedef struct st_foreign_key_info FOREIGN_KEY_INFO;
unknown's avatar
unknown committed
795 796 797
typedef bool (stat_print_fn)(THD *thd, const char *type, uint type_len,
                             const char *file, uint file_len,
                             const char *status, uint status_len);
798
enum ha_stat_type { HA_ENGINE_STATUS, HA_ENGINE_LOGS, HA_ENGINE_MUTEX };
unknown's avatar
unknown committed
799
extern st_plugin_int *hton2plugin[MAX_HA];
800

801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
/* Transaction log maintains type definitions */
enum log_status
{
  HA_LOG_STATUS_FREE= 0,      /* log is free and can be deleted */
  HA_LOG_STATUS_INUSE= 1,     /* log can't be deleted because it is in use */
  HA_LOG_STATUS_NOSUCHLOG= 2  /* no such log (can't be returned by
                                the log iterator status) */
};
/*
  Function for signaling that the log file changed its state from
  LOG_STATUS_INUSE to LOG_STATUS_FREE

  Now it do nothing, will be implemented as part of new transaction
  log management for engines.
  TODO: implement the function.
*/
void signal_log_not_needed(struct handlerton, char *log_file);
/*
  Data of transaction log iterator.
*/
struct handler_log_file_data {
  LEX_STRING filename;
  enum log_status status;
};

826 827 828
/*
  Definitions for engine-specific table/field/index options in the CREATE TABLE.

Sergei Golubchik's avatar
Sergei Golubchik committed
829 830
  Options are declared with HA_*OPTION_* macros (HA_TOPTION_NUMBER,
  HA_FOPTION_ENUM, HA_IOPTION_STRING, etc).
831 832 833 834 835

  Every macros takes the option name, and the name of the underlying field of
  the appropriate C structure. The "appropriate C structure" is
  ha_table_option_struct for table level options,
  ha_field_option_struct for field level options,
Sergei Golubchik's avatar
Sergei Golubchik committed
836
  ha_index_option_struct for key level options. The engine either
837 838 839 840
  defines a structure of this name, or uses #define's to map
  these "appropriate" names to the actual structure type name.

  ULL options use a ulonglong as the backing store.
Sergei Golubchik's avatar
Sergei Golubchik committed
841
  HA_*OPTION_NUMBER() takes the option name, the structure field name,
842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863
  the default value for the option, min, max, and blk_siz values.

  STRING options use a char* as a backing store.
  HA_*OPTION_STRING takes the option name and the structure field name.
  The default value will be 0.

  ENUM options use a uint as a backing store (not enum!!!).
  HA_*OPTION_ENUM takes the option name, the structure field name,
  the default value for the option as a number, and a string with the
  permitted values for this enum - one string with comma separated values,
  for example: "gzip,bzip2,lzma"

  BOOL options use a bool as a backing store.
  HA_*OPTION_BOOL takes the option name, the structure field name,
  and the default value for the option.
  From the SQL, BOOL options accept YES/NO, ON/OFF, and 1/0.

  The name of the option is limited to 255 bytes,
  the value (for string options) - to the 32767 bytes.

  See ha_example.cc for an example.
*/
864 865 866 867 868

struct ha_table_option_struct;
struct ha_field_option_struct;
struct ha_index_option_struct;

869 870 871
enum ha_option_type { HA_OPTION_TYPE_ULL,    /* unsigned long long */
                      HA_OPTION_TYPE_STRING, /* char * */
                      HA_OPTION_TYPE_ENUM,   /* uint */
872 873
                      HA_OPTION_TYPE_BOOL,   /* bool */
                      HA_OPTION_TYPE_SYSVAR};/* type of the sysval */
874

Sergei Golubchik's avatar
Sergei Golubchik committed
875
#define HA_xOPTION_NUMBER(name, struc, field, def, min, max, blk_siz)   \
876
  { HA_OPTION_TYPE_ULL, name, sizeof(name)-1,                        \
877
    offsetof(struc, field), def, min, max, blk_siz, 0, 0 }
878 879
#define HA_xOPTION_STRING(name, struc, field)                        \
  { HA_OPTION_TYPE_STRING, name, sizeof(name)-1,                     \
880
    offsetof(struc, field), 0, 0, 0, 0, 0, 0}
881 882 883
#define HA_xOPTION_ENUM(name, struc, field, values, def)             \
  { HA_OPTION_TYPE_ENUM, name, sizeof(name)-1,                       \
    offsetof(struc, field), def, 0,                                  \
884
    sizeof(values)-1, 0, values, 0 }
885 886
#define HA_xOPTION_BOOL(name, struc, field, def)                     \
  { HA_OPTION_TYPE_BOOL, name, sizeof(name)-1,                       \
887 888 889 890 891
    offsetof(struc, field), def, 0, 1, 0, 0, 0 }
#define HA_xOPTION_SYSVAR(name, struc, field, sysvar)                \
  { HA_OPTION_TYPE_SYSVAR, name, sizeof(name)-1,                     \
    offsetof(struc, field), 0, 0, 0, 0, 0, MYSQL_SYSVAR(sysvar) }
#define HA_xOPTION_END { HA_OPTION_TYPE_ULL, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
892

Sergei Golubchik's avatar
Sergei Golubchik committed
893 894
#define HA_TOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
  HA_xOPTION_NUMBER(name, ha_table_option_struct, field, def, min, max, blk_siz)
895 896 897 898 899 900
#define HA_TOPTION_STRING(name, field)                               \
  HA_xOPTION_STRING(name, ha_table_option_struct, field)
#define HA_TOPTION_ENUM(name, field, values, def)                    \
  HA_xOPTION_ENUM(name, ha_table_option_struct, field, values, def)
#define HA_TOPTION_BOOL(name, field, def)                            \
  HA_xOPTION_BOOL(name, ha_table_option_struct, field, def)
901 902
#define HA_TOPTION_SYSVAR(name, field, sysvar)                       \
  HA_xOPTION_SYSVAR(name, ha_table_option_struct, field, sysvar)
903 904
#define HA_TOPTION_END HA_xOPTION_END

Sergei Golubchik's avatar
Sergei Golubchik committed
905 906
#define HA_FOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
  HA_xOPTION_NUMBER(name, ha_field_option_struct, field, def, min, max, blk_siz)
907 908 909 910 911 912
#define HA_FOPTION_STRING(name, field)                               \
  HA_xOPTION_STRING(name, ha_field_option_struct, field)
#define HA_FOPTION_ENUM(name, field, values, def)                    \
  HA_xOPTION_ENUM(name, ha_field_option_struct, field, values, def)
#define HA_FOPTION_BOOL(name, field, def)                            \
  HA_xOPTION_BOOL(name, ha_field_option_struct, field, def)
913 914
#define HA_FOPTION_SYSVAR(name, field, sysvar)                       \
  HA_xOPTION_SYSVAR(name, ha_field_option_struct, field, sysvar)
915 916
#define HA_FOPTION_END HA_xOPTION_END

Sergei Golubchik's avatar
Sergei Golubchik committed
917 918 919 920 921 922
#define HA_IOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
  HA_xOPTION_NUMBER(name, ha_index_option_struct, field, def, min, max, blk_siz)
#define HA_IOPTION_STRING(name, field)                               \
  HA_xOPTION_STRING(name, ha_index_option_struct, field)
#define HA_IOPTION_ENUM(name, field, values, def)                    \
  HA_xOPTION_ENUM(name, ha_index_option_struct, field, values, def)
923 924
#define HA_IOPTION_BOOL(name, field, def)                            \
  HA_xOPTION_BOOL(name, ha_index_option_struct, field, def)
925 926
#define HA_IOPTION_SYSVAR(name, field, sysvar)                       \
  HA_xOPTION_SYSVAR(name, ha_index_option_struct, field, sysvar)
Sergei Golubchik's avatar
Sergei Golubchik committed
927
#define HA_IOPTION_END HA_xOPTION_END
928 929 930 931 932 933 934 935 936

typedef struct st_ha_create_table_option {
  enum ha_option_type type;
  const char *name;
  size_t name_length;
  ptrdiff_t offset;
  ulonglong def_value;
  ulonglong min_value, max_value, block_size;
  const char *values;
937
  struct st_mysql_sys_var *var;
938
} ha_create_table_option;
939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979

enum handler_iterator_type
{
  /* request of transaction log iterator */
  HA_TRANSACTLOG_ITERATOR= 1
};
enum handler_create_iterator_result
{
  HA_ITERATOR_OK,          /* iterator created */
  HA_ITERATOR_UNSUPPORTED, /* such type of iterator is not supported */
  HA_ITERATOR_ERROR        /* error during iterator creation */
};

/*
  Iterator structure. Can be used by handler/handlerton for different purposes.

  Iterator should be created in the way to point "before" the first object
  it iterate, so next() call move it to the first object or return !=0 if
  there is nothing to iterate through.
*/
struct handler_iterator {
  /*
    Moves iterator to next record and return 0 or return !=0
    if there is no records.
    iterator_object will be filled by this function if next() returns 0.
    Content of the iterator_object depend on iterator type.
  */
  int (*next)(struct handler_iterator *, void *iterator_object);
  /*
    Free resources allocated by iterator, after this call iterator
    is not usable.
  */
  void (*destroy)(struct handler_iterator *);
  /*
    Pointer to buffer for the iterator to use.
    Should be allocated by function which created the iterator and
    destroied by freed by above "destroy" call
  */
  void *buffer;
};

Konstantin Osipov's avatar
Konstantin Osipov committed
980
class handler;
981
class group_by_handler;
982
struct Query;
983 984 985
typedef class st_select_lex SELECT_LEX;
typedef struct st_order ORDER;

unknown's avatar
unknown committed
986 987
/*
  handlerton is a singleton structure - one instance per storage engine -
unknown's avatar
unknown committed
988 989
  to provide access to storage engine functionality that works on the
  "global" level (unlike handler class that works on a per-table basis)
unknown's avatar
unknown committed
990 991

  usually handlerton instance is defined statically in ha_xxx.cc as
992

unknown's avatar
unknown committed
993 994 995 996
  static handlerton { ... } xxx_hton;

  savepoint_*, prepare, recover, and *_by_xid pointers can be 0.
*/
997
struct handlerton
998
{
unknown's avatar
unknown committed
999
  /*
unknown's avatar
unknown committed
1000
    Historical marker for if the engine is available of not
1001 1002 1003 1004
  */
  SHOW_COMP_OPTION state;

  /*
1005 1006 1007
    Historical number used for frm file to determine the correct
    storage engine.  This is going away and new engines will just use
    "name" for this.
1008
  */
unknown's avatar
unknown committed
1009
  enum legacy_db_type db_type;
unknown's avatar
unknown committed
1010 1011 1012 1013 1014 1015 1016 1017 1018
  /*
    each storage engine has it's own memory area (actually a pointer)
    in the thd, for storing per-connection information.
    It is accessed as

      thd->ha_data[xxx_hton.slot]

   slot number is initialized by MySQL after xxx_init() is called.
   */
1019
   uint slot;
unknown's avatar
unknown committed
1020 1021 1022 1023 1024 1025 1026 1027 1028
   /*
     to store per-savepoint data storage engine is provided with an area
     of a requested size (0 is ok here).
     savepoint_offset must be initialized statically to the size of
     the needed memory to store per-savepoint information.
     After xxx_init it is changed to be an offset to savepoint storage
     area and need not be used by storage engine.
     see binlog_hton and binlog_savepoint_set/rollback for an example.
   */
1029
   uint savepoint_offset;
unknown's avatar
unknown committed
1030 1031 1032 1033 1034 1035 1036 1037
   /*
     handlerton methods:

     close_connection is only called if
     thd->ha_data[xxx_hton.slot] is non-zero, so even if you don't need
     this storage area - set it to something, so that MySQL would know
     this storage engine was accessed in this connection
   */
1038
   int  (*close_connection)(handlerton *hton, THD *thd);
1039 1040 1041
   /*
     Tell handler that query has been killed.
   */
1042
   void (*kill_query)(handlerton *hton, THD *thd, enum thd_kill_levels level);
unknown's avatar
unknown committed
1043 1044 1045 1046
   /*
     sv points to an uninitialized storage area of requested size
     (see savepoint_offset description)
   */
1047
   int  (*savepoint_set)(handlerton *hton, THD *thd, void *sv);
unknown's avatar
unknown committed
1048 1049 1050 1051
   /*
     sv points to a storage area, that was earlier passed
     to the savepoint_set call
   */
1052
   int  (*savepoint_rollback)(handlerton *hton, THD *thd, void *sv);
1053 1054 1055 1056 1057 1058 1059
   /**
     Check if storage engine allows to release metadata locks which were
     acquired after the savepoint if rollback to savepoint is done.
     @return true  - If it is safe to release MDL locks.
             false - If it is not.
   */
   bool (*savepoint_rollback_can_release_mdl)(handlerton *hton, THD *thd);
1060
   int  (*savepoint_release)(handlerton *hton, THD *thd, void *sv);
unknown's avatar
unknown committed
1061 1062 1063 1064 1065 1066 1067
   /*
     'all' is true if it's a real commit, that makes persistent changes
     'all' is false if it's not in fact a commit but an end of the
     statement that is part of the transaction.
     NOTE 'all' is also false in auto-commit mode where 'end of statement'
     and 'real commit' mean the same event.
   */
1068 1069 1070 1071 1072 1073 1074 1075
   int (*commit)(handlerton *hton, THD *thd, bool all);
   /*
     The commit_ordered() method is called prior to the commit() method, after
     the transaction manager has decided to commit (not rollback) the
     transaction. Unlike commit(), commit_ordered() is called only when the
     full transaction is committed, not for each commit of statement
     transaction in a multi-statement transaction.

unknown's avatar
unknown committed
1076 1077 1078
     Not that like prepare(), commit_ordered() is only called when 2-phase
     commit takes place. Ie. when no binary log and only a single engine
     participates in a transaction, one commit() is called, no
unknown's avatar
unknown committed
1079
     commit_ordered(). So engines must be prepared for this.
unknown's avatar
unknown committed
1080

1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
     The calls to commit_ordered() in multiple parallel transactions is
     guaranteed to happen in the same order in every participating
     handler. This can be used to ensure the same commit order among multiple
     handlers (eg. in table handler and binlog). So if transaction T1 calls
     into commit_ordered() of handler A before T2, then T1 will also call
     commit_ordered() of handler B before T2.

     Engines that implement this method should during this call make the
     transaction visible to other transactions, thereby making the order of
     transaction commits be defined by the order of commit_ordered() calls.

unknown's avatar
unknown committed
1092
     The intention is that commit_ordered() should do the minimal amount of
1093 1094 1095 1096 1097
     work that needs to happen in consistent commit order among handlers. To
     preserve ordering, calls need to be serialised on a global mutex, so
     doing any time-consuming or blocking operations in commit_ordered() will
     limit scalability.

unknown's avatar
unknown committed
1098 1099 1100
     Handlers can rely on commit_ordered() calls to be serialised (no two
     calls can run in parallel, so no extra locking on the handler part is
     required to ensure this).
1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111

     Note that commit_ordered() can be called from a different thread than the
     one handling the transaction! So it can not do anything that depends on
     thread local storage, in particular it can not call my_error() and
     friends (instead it can store the error code and delay the call of
     my_error() to the commit() method).

     Similarly, since commit_ordered() returns void, any return error code
     must be saved and returned from the commit() method instead.

     The commit_ordered method is optional, and can be left unset if not
unknown's avatar
unknown committed
1112 1113
     needed in a particular handler (then there will be no ordering guarantees
     wrt. other engines and binary log).
1114 1115
   */
   void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
1116 1117
   int  (*rollback)(handlerton *hton, THD *thd, bool all);
   int  (*prepare)(handlerton *hton, THD *thd, bool all);
1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135
   /*
     The prepare_ordered method is optional. If set, it will be called after
     successful prepare() in all handlers participating in 2-phase
     commit. Like commit_ordered(), it is called only when the full
     transaction is committed, not for each commit of statement transaction.

     The calls to prepare_ordered() among multiple parallel transactions are
     ordered consistently with calls to commit_ordered(). This means that
     calls to prepare_ordered() effectively define the commit order, and that
     each handler will see the same sequence of transactions calling into
     prepare_ordered() and commit_ordered().

     Thus, prepare_ordered() can be used to define commit order for handlers
     that need to do this in the prepare step (like binlog). It can also be
     used to release transaction's locks early in an order consistent with the
     order transactions will be eventually committed.

     Like commit_ordered(), prepare_ordered() calls are serialised to maintain
unknown's avatar
unknown committed
1136
     ordering, so the intention is that they should execute fast, with only
1137 1138 1139 1140 1141 1142 1143 1144 1145
     the minimal amount of work needed to define commit order. Handlers can
     rely on this serialisation, and do not need to do any extra locking to
     avoid two prepare_ordered() calls running in parallel.

     Like commit_ordered(), prepare_ordered() is not guaranteed to be called
     in the context of the thread handling the rest of the transaction. So it
     cannot invoke code that relies on thread local storage, in particular it
     cannot call my_error().

unknown's avatar
unknown committed
1146 1147 1148 1149
     prepare_ordered() cannot cause a rollback by returning an error, all
     possible errors must be handled in prepare() (the prepare_ordered()
     method returns void). In case of some fatal error, a record of the error
     must be made internally by the engine and returned from commit() later.
1150 1151 1152 1153 1154 1155

     Note that for user-level XA SQL commands, no consistent ordering among
     prepare_ordered() and commit_ordered() is guaranteed (as that would
     require blocking all other commits for an indefinite time).

     When 2-phase commit is not used (eg. only one engine (and no binlog) in
unknown's avatar
unknown committed
1156
     transaction), neither prepare() nor prepare_ordered() is called.
1157 1158
   */
   void (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
1159 1160 1161
   int  (*recover)(handlerton *hton, XID *xid_list, uint len);
   int  (*commit_by_xid)(handlerton *hton, XID *xid);
   int  (*rollback_by_xid)(handlerton *hton, XID *xid);
1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
   /*
     The commit_checkpoint_request() handlerton method is used to checkpoint
     the XA recovery process for storage engines that support two-phase
     commit.

     The method is optional - an engine that does not implemented is expected
     to work the traditional way, where every commit() durably flushes the
     transaction to disk in the engine before completion, so XA recovery will
     no longer be needed for that transaction.

     An engine that does implement commit_checkpoint_request() is also
     expected to implement commit_ordered(), so that ordering of commits is
     consistent between 2pc participants. Such engine is no longer required to
     durably flush to disk transactions in commit(), provided that the
     transaction has been successfully prepare()d and commit_ordered(); thus
     potentionally saving one fsync() call. (Engine must still durably flush
     to disk in commit() when no prepare()/commit_ordered() steps took place,
     at least if durable commits are wanted; this happens eg. if binlog is
     disabled).

     The TC will periodically (eg. once per binlog rotation) call
     commit_checkpoint_request(). When this happens, the engine must arrange
     for all transaction that have completed commit_ordered() to be durably
     flushed to disk (this does not include transactions that might be in the
     middle of executing commit_ordered()). When such flush has completed, the
     engine must call commit_checkpoint_notify_ha(), passing back the opaque
     "cookie".

     The flush and call of commit_checkpoint_notify_ha() need not happen
     immediately - it can be scheduled and performed asynchroneously (ie. as
     part of next prepare(), or sync every second, or whatever), but should
     not be postponed indefinitely. It is however also permissible to do it
     immediately, before returning from commit_checkpoint_request().

     When commit_checkpoint_notify_ha() is called, the TC will know that the
     transactions are durably committed, and thus no longer require XA
     recovery. It uses that to reduce the work needed for any subsequent XA
     recovery process.
   */
   void (*commit_checkpoint_request)(handlerton *hton, void *cookie);
1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214
  /*
    "Disable or enable checkpointing internal to the storage engine. This is
    used for FLUSH TABLES WITH READ LOCK AND DISABLE CHECKPOINT to ensure that
    the engine will never start any recovery from a time between
    FLUSH TABLES ... ; UNLOCK TABLES.

    While checkpointing is disabled, the engine should pause any background
    write activity (such as tablespace checkpointing) that require consistency
    between different files (such as transaction log and tablespace files) for
    crash recovery to succeed. The idea is to use this to make safe
    multi-volume LVM snapshot backups.
  */
   int  (*checkpoint_state)(handlerton *hton, bool disabled);
1215 1216 1217
   void *(*create_cursor_read_view)(handlerton *hton, THD *thd);
   void (*set_cursor_read_view)(handlerton *hton, THD *thd, void *read_view);
   void (*close_cursor_read_view)(handlerton *hton, THD *thd, void *read_view);
1218
   handler *(*create)(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root);
1219 1220 1221 1222 1223
   void (*drop_database)(handlerton *hton, char* path);
   int (*panic)(handlerton *hton, enum ha_panic_function flag);
   int (*start_consistent_snapshot)(handlerton *hton, THD *thd);
   bool (*flush_logs)(handlerton *hton);
   bool (*show_status)(handlerton *hton, THD *thd, stat_print_fn *print, enum ha_stat_type stat);
unknown's avatar
unknown committed
1224 1225
   uint (*partition_flags)();
   uint (*alter_table_flags)(uint flags);
1226
   int (*alter_tablespace)(handlerton *hton, THD *thd, st_alter_tablespace *ts_info);
1227 1228 1229
   int (*fill_is_table)(handlerton *hton, THD *thd, TABLE_LIST *tables, 
                        class Item *cond, 
                        enum enum_schema_tables);
1230
   uint32 flags;                                /* global handler flags */
1231 1232 1233
   /*
      Those handlerton functions below are properly initialized at handler
      init.
unknown's avatar
unknown committed
1234
   */
1235 1236 1237
   int (*binlog_func)(handlerton *hton, THD *thd, enum_binlog_func fn, void *arg);
   void (*binlog_log_query)(handlerton *hton, THD *thd, 
                            enum_binlog_command binlog_command,
unknown's avatar
unknown committed
1238 1239
                            const char *query, uint query_length,
                            const char *db, const char *table_name);
1240
   int (*release_temporary_latches)(handlerton *hton, THD *thd);
1241 1242 1243 1244 1245 1246 1247 1248

   /*
     Get log status.
     If log_status is null then the handler do not support transaction
     log information (i.e. log iterator can't be created).
     (see example of implementation in handler.cc, TRANS_LOG_MGM_EXAMPLE_CODE)

   */
1249
   enum log_status (*get_log_status)(handlerton *hton, char *log);
1250 1251 1252 1253 1254 1255

   /*
     Iterators creator.
     Presence of the pointer should be checked before using
   */
   enum handler_create_iterator_result
1256
     (*create_iterator)(handlerton *hton, enum handler_iterator_type type,
1257
                        struct handler_iterator *fill_this_in);
1258 1259 1260 1261 1262
   int (*abort_transaction)(handlerton *hton, THD *bf_thd,
			    THD *victim_thd, my_bool signal);
   int (*set_checkpoint)(handlerton *hton, const XID* xid);
   int (*get_checkpoint)(handlerton *hton, XID* xid);
   void (*fake_trx_id)(handlerton *hton, THD *thd);
1263 1264 1265 1266 1267 1268 1269
   /*
     Optional clauses in the CREATE/ALTER TABLE
   */
   ha_create_table_option *table_options; // table level options
   ha_create_table_option *field_options; // these are specified per field
   ha_create_table_option *index_options; // these are specified per index

1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
   /**
     The list of extensions of files created for a single table in the
     database directory (datadir/db_name/).

     Used by open_table_error(), by the default rename_table and delete_table
     handler methods, and by the default discovery implementation.
  
     For engines that have more than one file name extentions (separate
     metadata, index, and/or data files), the order of elements is relevant.
     First element of engine file name extentions array should be metadata
     file extention. This is implied by the open_table_error()
     and the default discovery implementation.
     
     Second element - data file extention. This is implied
     assumed by REPAIR TABLE ... USE_FRM implementation.
   */
   const char **tablefile_extensions; // by default - empty list

1288 1289 1290 1291 1292 1293 1294 1295 1296
  /**********************************************************************
   Functions to intercept queries
  **********************************************************************/

  /*
    Create and return a group_by_handler, if the storage engine can execute
    the summary / group by query.
    If the storage engine can't do that, return NULL.

1297
    The server guaranteeds that all tables in the list belong to this
1298 1299
    storage engine.
  */
1300
  group_by_handler *(*create_group_by)(THD *thd, Query *query);
1301

1302 1303 1304 1305 1306 1307
   /*********************************************************************
     Table discovery API.
     It allows the server to "discover" tables that exist in the storage
     engine, without user issuing an explicit CREATE TABLE statement.
   **********************************************************************/

1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321
   /*
     This method is required for any engine that supports automatic table
     discovery, there is no default implementation.

     Given a TABLE_SHARE discover_table() fills it in with a correct table
     structure using one of the TABLE_SHARE::init_from_* methods.

     Returns HA_ERR_NO_SUCH_TABLE if the table did not exist in the engine,
     zero if the table was discovered successfully, or any other
     HA_ERR_* error code as appropriate if the table existed, but the
     discovery failed.
   */
   int (*discover_table)(handlerton *hton, THD* thd, TABLE_SHARE *share);

1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337
   /*
     The discover_table_names method tells the server
     about all tables in the specified database that the engine
     knows about. Tables (or file names of tables) are added to
     the provided discovered_list collector object using
     add_table() or add_file() methods.
   */
   class discovered_list
   {
     public:
     virtual bool add_table(const char *tname, size_t tlen) = 0;
     virtual bool add_file(const char *fname) = 0;
     protected: virtual ~discovered_list() {}
   };

   /*
Sergei Golubchik's avatar
Sergei Golubchik committed
1338
     By default (if not implemented by the engine, but the discover_table() is
1339 1340 1341 1342 1343 1344 1345 1346 1347
     implemented) it will perform a file-based discovery:

     - if tablefile_extensions[0] is not null, this will discovers all tables
       with the tablefile_extensions[0] extension.

     Returns 0 on success and 1 on error.
   */
   int (*discover_table_names)(handlerton *hton, LEX_STRING *db, MY_DIR *dir,
                               discovered_list *result);
1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368

   /*
     This is a method that allows to server to check if a table exists without
     an overhead of the complete discovery.

     By default (if not implemented by the engine, but the discovery_table() is
     implemented) it will try to perform a file-based discovery:

     - if tablefile_extensions[0] is not null this will look for a file name
       with the tablefile_extensions[0] extension.

     - if tablefile_extensions[0] is null, this will resort to discover_table().

     Note that resorting to discover_table() is slow and the engine
     should probably implement its own discover_table_existence() method,
     if its tablefile_extensions[0] is null.

     Returns 1 if the table exists and 0 if it does not.
   */
   int (*discover_table_existence)(handlerton *hton, const char *db,
                                   const char *table_name);
1369

Sergei Golubchik's avatar
Sergei Golubchik committed
1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388
   /*
     This is the assisted table discovery method. Unlike the fully
     automatic discovery as above, here a user is expected to issue an
     explicit CREATE TABLE with the appropriate table attributes to
     "assist" the discovery of a table. But this "discovering" CREATE TABLE
     statement will not specify the table structure - the engine discovers
     it using this method. For example, FederatedX uses it in

      CREATE TABLE t1 ENGINE=FEDERATED CONNECTION="mysql://foo/bar/t1";

     Given a TABLE_SHARE discover_table_structure() fills it in with a correct
     table structure using one of the TABLE_SHARE::init_from_* methods.

     Assisted discovery works independently from the automatic discover.
     An engine is allowed to support only assisted discovery and not
     support automatic one. Or vice versa.
   */
   int (*discover_table_structure)(handlerton *hton, THD* thd,
                                   TABLE_SHARE *share, HA_CREATE_INFO *info);
Aleksey Midenkov's avatar
Aleksey Midenkov committed
1389 1390

   /*
Aleksey Midenkov's avatar
Aleksey Midenkov committed
1391
     System Versioning
Aleksey Midenkov's avatar
Aleksey Midenkov committed
1392
   */
Aleksey Midenkov's avatar
Aleksey Midenkov committed
1393 1394
   bool versioned() const;
   bool (*vers_get_vtq_ts)(THD* thd, MYSQL_TIME *out, ulonglong trx_id, vtq_field_t field);
1395
};
1396

1397

1398
static inline LEX_STRING *hton_name(const handlerton *hton)
1399 1400 1401 1402
{
  return &(hton2plugin[hton->slot]->name);
}

Sergei Golubchik's avatar
Sergei Golubchik committed
1403 1404 1405 1406 1407
static inline handlerton *plugin_hton(plugin_ref plugin)
{
  return plugin_data(plugin, handlerton *);
}

1408 1409 1410 1411 1412
static inline sys_var *find_hton_sysvar(handlerton *hton, st_mysql_sys_var *var)
{
  return find_plugin_sysvar(hton2plugin[hton->slot], var);
}

1413 1414
handlerton *ha_default_handlerton(THD *thd);
handlerton *ha_default_tmp_handlerton(THD *thd);
1415

1416
/* Possible flags of a handlerton (there can be 32 of them) */
1417 1418
#define HTON_NO_FLAGS                 0
#define HTON_CLOSE_CURSORS_AT_COMMIT (1 << 0)
1419
#define HTON_ALTER_NOT_SUPPORTED     (1 << 1) //Engine does not support alter
Michael Widenius's avatar
Michael Widenius committed
1420
#define HTON_CAN_RECREATE            (1 << 2) //Delete all is used for truncate
1421
#define HTON_HIDDEN                  (1 << 3) //Engine does not appear in lists
1422
#define HTON_NOT_USER_SELECTABLE     (1 << 5)
1423
#define HTON_TEMPORARY_NOT_SUPPORTED (1 << 6) //Having temporary tables not supported
unknown's avatar
unknown committed
1424
#define HTON_SUPPORT_LOG_TABLES      (1 << 7) //Engine supports log tables
1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440
#define HTON_NO_PARTITION            (1 << 8) //Not partition of these tables

/*
  This flag should be set when deciding that the engine does not allow
  row based binary logging (RBL) optimizations.

  Currently, setting this flag, means that table's read/write_set will
  be left untouched when logging changes to tables in this engine. In
  practice this means that the server will not mess around with
  table->write_set and/or table->read_set when using RBL and deciding
  whether to log full or minimal rows.

  It's valuable for instance for virtual tables, eg: Performance
  Schema which have no meaning for replication.
*/
#define HTON_NO_BINLOG_ROW_OPT       (1 << 9)
1441
#define HTON_SUPPORTS_EXTENDED_KEYS  (1 <<10) //supports extended keys
Aleksey Midenkov's avatar
Aleksey Midenkov committed
1442
#define HTON_SUPPORTS_SYS_VERSIONING (1 << 11) //Engine supports System Versioning
1443

Sergei Golubchik's avatar
Sergei Golubchik committed
1444 1445
// MySQL compatibility. Unused.
#define HTON_SUPPORTS_FOREIGN_KEYS   (1 << 0) //Foreign key constraint supported.
1446

1447 1448 1449
class Ha_trx_info;

struct THD_TRANS
1450
{
unknown's avatar
unknown committed
1451
  /* true is not all entries in the ht[] support 2pc */
1452
  bool        no_2pc;
1453 1454
  /* storage engines that registered in this transaction */
  Ha_trx_info *ha_list;
unknown's avatar
unknown committed
1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
  /* 
    The purpose of this flag is to keep track of non-transactional
    tables that were modified in scope of:
    - transaction, when the variable is a member of
    THD::transaction.all
    - top-level statement or sub-statement, when the variable is a
    member of THD::transaction.stmt
    This member has the following life cycle:
    * stmt.modified_non_trans_table is used to keep track of
    modified non-transactional tables of top-level statements. At
    the end of the previous statement and at the beginning of the session,
    it is reset to FALSE.  If such functions
    as mysql_insert, mysql_update, mysql_delete etc modify a
    non-transactional table, they set this flag to TRUE.  At the
    end of the statement, the value of stmt.modified_non_trans_table 
    is merged with all.modified_non_trans_table and gets reset.
    * all.modified_non_trans_table is reset at the end of transaction
    
    * Since we do not have a dedicated context for execution of a
    sub-statement, to keep track of non-transactional changes in a
    sub-statement, we re-use stmt.modified_non_trans_table. 
    At entrance into a sub-statement, a copy of the value of
    stmt.modified_non_trans_table (containing the changes of the
    outer statement) is saved on stack. Then 
    stmt.modified_non_trans_table is reset to FALSE and the
    substatement is executed. Then the new value is merged with the
    saved value.
  */
  bool modified_non_trans_table;
1484

1485 1486 1487 1488 1489
  void reset() {
    no_2pc= FALSE;
    modified_non_trans_table= FALSE;
    m_unsafe_rollback_flags= 0;
  }
1490
  bool is_empty() const { return ha_list == NULL; }
unknown's avatar
unknown committed
1491
  THD_TRANS() {}                        /* Remove gcc warning */
1492 1493 1494 1495 1496 1497 1498 1499 1500

  unsigned int m_unsafe_rollback_flags;
 /*
    Define the type of statemens which cannot be rolled back safely.
    Each type occupies one bit in m_unsafe_rollback_flags.
  */
  static unsigned int const MODIFIED_NON_TRANS_TABLE= 0x01;
  static unsigned int const CREATED_TEMP_TABLE= 0x02;
  static unsigned int const DROPPED_TEMP_TABLE= 0x04;
1501
  static unsigned int const DID_WAIT= 0x08;
1502 1503 1504 1505 1506 1507

  void mark_created_temp_table()
  {
    DBUG_PRINT("debug", ("mark_created_temp_table"));
    m_unsafe_rollback_flags|= CREATED_TEMP_TABLE;
  }
1508 1509 1510 1511
  void mark_trans_did_wait() { m_unsafe_rollback_flags|= DID_WAIT; }
  bool trans_did_wait() const {
    return (m_unsafe_rollback_flags & DID_WAIT) != 0;
  }
1512
  bool is_trx_read_write() const;
1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610
};


/**
  Either statement transaction or normal transaction - related
  thread-specific storage engine data.

  If a storage engine participates in a statement/transaction,
  an instance of this class is present in
  thd->transaction.{stmt|all}.ha_list. The addition to
  {stmt|all}.ha_list is made by trans_register_ha().

  When it's time to commit or rollback, each element of ha_list
  is used to access storage engine's prepare()/commit()/rollback()
  methods, and also to evaluate if a full two phase commit is
  necessary.

  @sa General description of transaction handling in handler.cc.
*/

class Ha_trx_info
{
public:
  /** Register this storage engine in the given transaction context. */
  void register_ha(THD_TRANS *trans, handlerton *ht_arg)
  {
    DBUG_ASSERT(m_flags == 0);
    DBUG_ASSERT(m_ht == NULL);
    DBUG_ASSERT(m_next == NULL);

    m_ht= ht_arg;
    m_flags= (int) TRX_READ_ONLY; /* Assume read-only at start. */

    m_next= trans->ha_list;
    trans->ha_list= this;
  }

  /** Clear, prepare for reuse. */
  void reset()
  {
    m_next= NULL;
    m_ht= NULL;
    m_flags= 0;
  }

  Ha_trx_info() { reset(); }

  void set_trx_read_write()
  {
    DBUG_ASSERT(is_started());
    m_flags|= (int) TRX_READ_WRITE;
  }
  bool is_trx_read_write() const
  {
    DBUG_ASSERT(is_started());
    return m_flags & (int) TRX_READ_WRITE;
  }
  bool is_started() const { return m_ht != NULL; }
  /** Mark this transaction read-write if the argument is read-write. */
  void coalesce_trx_with(const Ha_trx_info *stmt_trx)
  {
    /*
      Must be called only after the transaction has been started.
      Can be called many times, e.g. when we have many
      read-write statements in a transaction.
    */
    DBUG_ASSERT(is_started());
    if (stmt_trx->is_trx_read_write())
      set_trx_read_write();
  }
  Ha_trx_info *next() const
  {
    DBUG_ASSERT(is_started());
    return m_next;
  }
  handlerton *ht() const
  {
    DBUG_ASSERT(is_started());
    return m_ht;
  }
private:
  enum { TRX_READ_ONLY= 0, TRX_READ_WRITE= 1 };
  /** Auxiliary, used for ha_list management */
  Ha_trx_info *m_next;
  /**
    Although a given Ha_trx_info instance is currently always used
    for the same storage engine, 'ht' is not-NULL only when the
    corresponding storage is a part of a transaction.
  */
  handlerton *m_ht;
  /**
    Transaction flags related to this engine.
    Not-null only if this instance is a part of transaction.
    May assume a combination of enum values above.
  */
  uchar       m_flags;
};

1611

1612 1613 1614 1615 1616 1617 1618 1619 1620 1621
inline bool THD_TRANS::is_trx_read_write() const
{
  Ha_trx_info *ha_info;
  for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
    if (ha_info->is_trx_read_write())
      return TRUE;
  return FALSE;
}


unknown's avatar
unknown committed
1622 1623 1624
enum enum_tx_isolation { ISO_READ_UNCOMMITTED, ISO_READ_COMMITTED,
			 ISO_REPEATABLE_READ, ISO_SERIALIZABLE};

1625

1626 1627 1628 1629 1630 1631 1632
typedef struct {
  ulonglong data_file_length;
  ulonglong max_data_file_length;
  ulonglong index_file_length;
  ulonglong delete_length;
  ha_rows records;
  ulong mean_rec_length;
1633 1634 1635
  time_t create_time;
  time_t check_time;
  time_t update_time;
1636
  ulonglong check_sum;
1637
} PARTITION_STATS;
1638

1639 1640
#define UNDEF_NODEGROUP 65535
class Item;
unknown's avatar
unknown committed
1641
struct st_table_log_memory_entry;
1642 1643 1644

class partition_info;

unknown's avatar
unknown committed
1645 1646
struct st_partition_iter;

1647
enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES, HA_CHOICE_MAX };
1648

1649 1650 1651 1652
enum enum_stats_auto_recalc { HA_STATS_AUTO_RECALC_DEFAULT= 0,
                              HA_STATS_AUTO_RECALC_ON,
                              HA_STATS_AUTO_RECALC_OFF };

1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671
/**
  A helper struct for schema DDL statements:
    CREATE SCHEMA [IF NOT EXISTS] name [ schema_specification... ]
    ALTER SCHEMA name [ schema_specification... ]

  It stores the "schema_specification" part of the CREATE/ALTER statements and
  is passed to mysql_create_db() and  mysql_alter_db().
  Currently consists only of the schema default character set and collation.
*/
struct Schema_specification_st
{
  CHARSET_INFO *default_table_charset;
  void init()
  {
    bzero(this, sizeof(*this));
  }
};


1672
struct Vers_parse_info
1673
{
1674
  Vers_parse_info() :
1675 1676 1677 1678 1679
    declared_system_versioning(false),
    has_versioned_fields(false),
    has_unversioned_fields(false)
  {}

1680
  struct start_end_t
1681
  {
1682 1683 1684
    start_end_t() :
      start(NULL),
      end(NULL) {}
1685 1686
    String *start;
    String *end;
1687 1688 1689 1690
  };

  start_end_t period_for_system_time;
  start_end_t generated_as_row;
1691 1692 1693 1694 1695 1696

  void set_period_for_system_time(String *start, String *end)
  {
    period_for_system_time.start = start;
    period_for_system_time.end = end;
  }
1697

1698 1699
  bool add_versioning_info(THD *thd, Alter_info *alter_info, bool integer_fields);
  bool check(THD *thd, Alter_info *alter_info, bool integer_fields);
1700

1701
  /** User has added 'WITH SYSTEM VERSIONING' to table definition */
1702
  bool declared_system_versioning : 1;
1703

1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714
  /**
     At least one field was specified 'WITH SYSTEM VERSIONING'. Useful for
     error handling.
  */
  bool has_versioned_fields : 1;

  /**
     At least one field was specified 'WITHOUT SYSTEM VERSIONING'. Useful for
     error handling.
  */
  bool has_unversioned_fields : 1;
1715 1716
};

1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729
/**
  A helper struct for table DDL statements, e.g.:
  CREATE [OR REPLACE] [TEMPORARY]
    TABLE [IF NOT EXISTS] tbl_name table_contents_source;

  Represents a combinations of:
  1. The scope, i.e. TEMPORARY or not TEMPORARY
  2. The "table_contents_source" part of the table DDL statements,
     which can be initialized from either of these:
     - table_element_list ...      // Explicit definition (column and key list)
     - LIKE another_table_name ... // Copy structure from another table
     - [AS] SELECT ...             // Copy structure from a subquery
*/
1730

1731
struct Table_scope_and_contents_source_st
unknown's avatar
unknown committed
1732
{
1733
  CHARSET_INFO *table_charset;
Sergei Golubchik's avatar
Sergei Golubchik committed
1734
  LEX_CUSTRING tabledef_version;
1735
  LEX_STRING connect_string;
Sergei Golubchik's avatar
Sergei Golubchik committed
1736
  const char *password, *tablespace;
1737
  LEX_STRING comment;
unknown's avatar
unknown committed
1738 1739
  const char *data_file_name, *index_file_name;
  const char *alias;
1740 1741
  ulonglong max_rows,min_rows;
  ulonglong auto_increment_value;
1742
  ulong table_options;                  ///< HA_OPTION_ values
1743
  ulong avg_row_length;
unknown's avatar
unknown committed
1744
  ulong used_fields;
1745
  ulong key_block_size;
1746
  ulong expression_length;
1747
  ulong field_check_constraints;
1748 1749 1750 1751 1752 1753 1754 1755 1756
  /*
    number of pages to sample during
    stats estimation, if used, otherwise 0.
  */
  uint stats_sample_pages;
  uint null_bits;                       /* NULL bits at start of record */
  uint options;				/* OR of HA_CREATE_ options */
  uint merge_insert_method;
  uint extra_size;                      /* length of extra data segment */
1757
  SQL_I_List<TABLE_LIST> merge_list;
unknown's avatar
unknown committed
1758
  handlerton *db_type;
1759 1760 1761 1762 1763 1764 1765
  /**
    Row type of the table definition.

    Defaults to ROW_TYPE_DEFAULT for all non-ALTER statements.
    For ALTER TABLE defaults to ROW_TYPE_NOT_USED (means "keep the current").

    Can be changed either explicitly by the parser.
1766
    If nothing specified inherits the value of the original table (if present).
1767
  */
1768
  enum row_type row_type;
1769
  enum ha_choice transactional;
Sergei Golubchik's avatar
Sergei Golubchik committed
1770
  enum ha_storage_media storage_media;  ///< DEFAULT, DISK or MEMORY
1771 1772
  enum ha_choice page_checksum;         ///< If we have page_checksums
  engine_option_value *option_list;     ///< list of table create options
1773 1774
  enum_stats_auto_recalc stats_auto_recalc;
  bool varchar;                         ///< 1 if table has a VARCHAR
1775
  bool sequence;                        // If SEQUENCE=1 was used
Sergei Golubchik's avatar
Sergei Golubchik committed
1776

Sergei Golubchik's avatar
Sergei Golubchik committed
1777 1778
  List<Virtual_column_info> *check_constraint_list;

1779
  /* the following three are only for ALTER TABLE, check_if_incompatible_data() */
1780 1781 1782
  ha_table_option_struct *option_struct;           ///< structure with parsed table options
  ha_field_option_struct **fields_option_struct;   ///< array of field option structures
  ha_index_option_struct **indexes_option_struct;  ///< array of index option structures
1783

1784 1785 1786 1787
  /* The following is used to remember the old state for CREATE OR REPLACE */
  TABLE *table;
  TABLE_LIST *pos_in_locked_tables;
  MDL_ticket *mdl_ticket;
1788
  bool table_was_deleted;
1789
  sequence_definition *seq_create_info;
1790

1791
  Vers_parse_info vers_info;
1792

1793 1794 1795 1796 1797
  void init()
  {
    bzero(this, sizeof(*this));
  }
  bool tmp_table() const { return options & HA_LEX_CREATE_TMP_TABLE; }
1798 1799 1800 1801 1802
  void use_default_db_type(THD *thd)
  {
    db_type= tmp_table() ? ha_default_tmp_handlerton(thd)
                         : ha_default_handlerton(thd);
  }
1803

1804
  bool versioned() const
1805
  {
1806
    return options & HA_VERSIONED_TABLE;
1807
  }
1808
};
unknown's avatar
unknown committed
1809

1810

1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823
/**
  This struct is passed to handler table routines, e.g. ha_create().
  It does not include the "OR REPLACE" and "IF NOT EXISTS" parts, as these
  parts are handled on the SQL level and are not needed on the handler level.
*/
struct HA_CREATE_INFO: public Table_scope_and_contents_source_st,
                       public Schema_specification_st
{
  void init()
  {
    Table_scope_and_contents_source_st::init();
    Schema_specification_st::init();
  }
1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850
  bool check_conflicting_charset_declarations(CHARSET_INFO *cs);
  bool add_table_option_default_charset(CHARSET_INFO *cs)
  {
    // cs can be NULL, e.g.:  CREATE TABLE t1 (..) CHARACTER SET DEFAULT;
    if (check_conflicting_charset_declarations(cs))
      return true;
    default_table_charset= cs;
    used_fields|= HA_CREATE_USED_DEFAULT_CHARSET;
    return false;
  }
  bool add_alter_list_item_convert_to_charset(CHARSET_INFO *cs)
  {
    /* 
      cs cannot be NULL, as sql_yacc.yy translates
         CONVERT TO CHARACTER SET DEFAULT
      to
         CONVERT TO CHARACTER SET <character-set-of-the-current-database>
      TODO: Should't we postpone resolution of DEFAULT until the
      character set of the table owner database is loaded from its db.opt?
    */
    DBUG_ASSERT(cs);
    if (check_conflicting_charset_declarations(cs))
      return true;
    table_charset= default_table_charset= cs;
    used_fields|= (HA_CREATE_USED_CHARSET | HA_CREATE_USED_DEFAULT_CHARSET);  
    return false;
  }
1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866
};


/**
  This struct is passed to mysql_create_table() and similar creation functions,
  as well as to show_create_table().
*/
struct Table_specification_st: public HA_CREATE_INFO,
                               public DDL_options_st
{
  // Deep initialization
  void init()
  {
    HA_CREATE_INFO::init();
    DDL_options_st::init();
  }
1867
  void init(DDL_options_st::Options options_arg)
1868 1869
  {
    HA_CREATE_INFO::init();
1870
    DDL_options_st::init(options_arg);
1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885
  }
  /*
    Quick initialization, for parser.
    Most of the HA_CREATE_INFO is left uninitialized.
    It gets fully initialized in sql_yacc.yy, only when the parser
    scans a related keyword (e.g. CREATE, ALTER).
  */
  void lex_start()
  {
    HA_CREATE_INFO::options= 0;
    DDL_options_st::init();
  }
};


1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927
/**
  In-place alter handler context.

  This is a superclass intended to be subclassed by individual handlers
  in order to store handler unique context between in-place alter API calls.

  The handler is responsible for creating the object. This can be done
  as early as during check_if_supported_inplace_alter().

  The SQL layer is responsible for destroying the object.
  The class extends Sql_alloc so the memory will be mem root allocated.

  @see Alter_inplace_info
*/

class inplace_alter_handler_ctx : public Sql_alloc
{
public:
  inplace_alter_handler_ctx() {}

  virtual ~inplace_alter_handler_ctx() {}
};


/**
  Class describing changes to be done by ALTER TABLE.
  Instance of this class is passed to storage engine in order
  to determine if this ALTER TABLE can be done using in-place
  algorithm. It is also used for executing the ALTER TABLE
  using in-place algorithm.
*/

class Alter_inplace_info
{
public:
  /**
     Bits to show in detail what operations the storage engine is
     to execute.

     All these operations are supported as in-place operations by the
     SQL layer. This means that operations that by their nature must
     be performed by copying the table to a temporary table, will not
1928
     have their own flags here.
1929 1930 1931 1932 1933 1934

     We generally try to specify handler flags only if there are real
     changes. But in cases when it is cumbersome to determine if some
     attribute has really changed we might choose to set flag
     pessimistically, for example, relying on parser output only.
  */
1935
  typedef ulonglong HA_ALTER_FLAGS;
1936 1937

  // Add non-unique, non-primary index
1938 1939 1940 1941 1942
  static const HA_ALTER_FLAGS ADD_INDEX                  = 1ULL << 0;
  //
  // Adds a spatial index. At the moment all engines treat it
  // identically to the ADD_INDEX, so it gets the same code
  static const HA_ALTER_FLAGS ADD_SPATIAL_INDEX          = ADD_INDEX;
1943 1944

  // Drop non-unique, non-primary index
1945
  static const HA_ALTER_FLAGS DROP_INDEX                 = 1ULL << 1;
1946 1947

  // Add unique, non-primary index
1948
  static const HA_ALTER_FLAGS ADD_UNIQUE_INDEX           = 1ULL << 2;
1949 1950

  // Drop unique, non-primary index
1951
  static const HA_ALTER_FLAGS DROP_UNIQUE_INDEX          = 1ULL << 3;
1952 1953

  // Add primary index
1954
  static const HA_ALTER_FLAGS ADD_PK_INDEX               = 1ULL << 4;
1955 1956

  // Drop primary index
1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968
  static const HA_ALTER_FLAGS DROP_PK_INDEX              = 1ULL << 5;

  // Virtual generated column
  static const HA_ALTER_FLAGS ADD_VIRTUAL_COLUMN         = 1ULL << 6;
  // Stored base (non-generated) column
  static const HA_ALTER_FLAGS ADD_STORED_BASE_COLUMN     = 1ULL << 7;
  // Stored generated column
  static const HA_ALTER_FLAGS ADD_STORED_GENERATED_COLUMN= 1ULL << 8;
  // Add generic column (convience constant).
  static const HA_ALTER_FLAGS ADD_COLUMN= ADD_VIRTUAL_COLUMN |
                                          ADD_STORED_BASE_COLUMN |
                                          ADD_STORED_GENERATED_COLUMN;
1969 1970

  // Drop column
1971 1972 1973 1974
  static const HA_ALTER_FLAGS DROP_VIRTUAL_COLUMN        = 1ULL << 9;
  static const HA_ALTER_FLAGS DROP_STORED_COLUMN         = 1ULL << 10;
  static const HA_ALTER_FLAGS DROP_COLUMN= DROP_VIRTUAL_COLUMN |
                                           DROP_STORED_COLUMN;
1975 1976

  // Rename column
1977
  static const HA_ALTER_FLAGS ALTER_COLUMN_NAME          = 1ULL << 11;
1978 1979

  // Change column datatype
1980 1981
  static const HA_ALTER_FLAGS ALTER_VIRTUAL_COLUMN_TYPE  = 1ULL << 12;
  static const HA_ALTER_FLAGS ALTER_STORED_COLUMN_TYPE   = 1ULL << 13;
1982 1983 1984 1985 1986 1987 1988

  /**
    Change column datatype in such way that new type has compatible
    packed representation with old type, so it is theoretically
    possible to perform change by only updating data dictionary
    without changing table rows.
  */
1989 1990 1991 1992
  static const HA_ALTER_FLAGS ALTER_COLUMN_EQUAL_PACK_LENGTH = 1ULL << 14;

  // Reorder column
  static const HA_ALTER_FLAGS ALTER_STORED_COLUMN_ORDER =  1ULL << 15;
1993 1994

  // Reorder column
1995
  static const HA_ALTER_FLAGS ALTER_VIRTUAL_COLUMN_ORDER = 1ULL << 16;
1996 1997

  // Change column from NOT NULL to NULL
1998
  static const HA_ALTER_FLAGS ALTER_COLUMN_NULLABLE      = 1ULL << 17;
1999 2000

  // Change column from NULL to NOT NULL
2001
  static const HA_ALTER_FLAGS ALTER_COLUMN_NOT_NULLABLE  = 1ULL << 18;
2002 2003

  // Set or remove default column value
2004
  static const HA_ALTER_FLAGS ALTER_COLUMN_DEFAULT       = 1ULL << 19;
2005

2006 2007 2008 2009
  // Change column generation expression
  static const HA_ALTER_FLAGS ALTER_VIRTUAL_GCOL_EXPR    = 1ULL << 20;
  static const HA_ALTER_FLAGS ALTER_STORED_GCOL_EXPR     = 1ULL << 21;
  //
2010
  // Add foreign key
2011
  static const HA_ALTER_FLAGS ADD_FOREIGN_KEY            = 1ULL << 22;
2012 2013

  // Drop foreign key
2014
  static const HA_ALTER_FLAGS DROP_FOREIGN_KEY           = 1ULL << 23;
2015 2016

  // table_options changed, see HA_CREATE_INFO::used_fields for details.
2017
  static const HA_ALTER_FLAGS CHANGE_CREATE_OPTION       = 1ULL << 24;
2018 2019

  // Table is renamed
2020
  static const HA_ALTER_FLAGS ALTER_RENAME               = 1ULL << 25;
2021

2022
  // column's engine options changed, something in field->option_struct
2023
  static const HA_ALTER_FLAGS ALTER_COLUMN_OPTION        = 1ULL << 26;
2024 2025

  // MySQL alias for the same thing:
2026
  static const HA_ALTER_FLAGS ALTER_COLUMN_STORAGE_TYPE  = 1ULL << 26;
2027 2028

  // Change the column format of column
2029
  static const HA_ALTER_FLAGS ALTER_COLUMN_COLUMN_FORMAT = 1ULL << 27;
2030 2031

  // Add partition
2032
  static const HA_ALTER_FLAGS ADD_PARTITION              = 1ULL << 28;
2033 2034

  // Drop partition
2035
  static const HA_ALTER_FLAGS DROP_PARTITION             = 1ULL << 29;
2036 2037

  // Changing partition options
2038
  static const HA_ALTER_FLAGS ALTER_PARTITION            = 1ULL << 30;
2039 2040

  // Coalesce partition
2041
  static const HA_ALTER_FLAGS COALESCE_PARTITION         = 1ULL << 31;
2042 2043

  // Reorganize partition ... into
2044
  static const HA_ALTER_FLAGS REORGANIZE_PARTITION       = 1ULL << 32;
2045 2046

  // Reorganize partition
2047
  static const HA_ALTER_FLAGS ALTER_TABLE_REORG          = 1ULL << 33;
2048 2049

  // Remove partitioning
2050
  static const HA_ALTER_FLAGS ALTER_REMOVE_PARTITIONING  = 1ULL << 34;
2051 2052

  // Partition operation with ALL keyword
2053
  static const HA_ALTER_FLAGS ALTER_ALL_PARTITION        = 1ULL << 35;
2054

2055 2056 2057 2058
  /**
    Recreate the table for ALTER TABLE FORCE, ALTER TABLE ENGINE
    and OPTIMIZE TABLE operations.
  */
2059
  static const HA_ALTER_FLAGS RECREATE_TABLE             = 1ULL << 36;
2060

2061 2062 2063 2064 2065 2066
  /**
    Changes in generated columns that affect storage,
    for example, when a vcol type or expression changes
    and this vcol is indexed or used in a partitioning expression
  */
  static const HA_ALTER_FLAGS ALTER_COLUMN_VCOL          = 1ULL << 37;
2067

Sergei Golubchik's avatar
Sergei Golubchik committed
2068 2069 2070 2071
  /**
    ALTER TABLE for a partitioned table. The engine needs to commit
    online alter of all partitions atomically (using group_commit_ctx)
  */
2072
  static const HA_ALTER_FLAGS ALTER_PARTITIONED          = 1ULL << 38;
2073

2074
  static const HA_ALTER_FLAGS ALTER_ADD_CHECK_CONSTRAINT = 1ULL << 39;
2075

2076
  static const HA_ALTER_FLAGS ALTER_DROP_CHECK_CONSTRAINT= 1ULL << 40;
2077

2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147
  /**
    Create options (like MAX_ROWS) for the new version of table.

    @note The referenced instance of HA_CREATE_INFO object was already
          used to create new .FRM file for table being altered. So it
          has been processed by mysql_prepare_create_table() already.
          For example, this means that it has HA_OPTION_PACK_RECORD
          flag in HA_CREATE_INFO::table_options member correctly set.
  */
  HA_CREATE_INFO *create_info;

  /**
    Alter options, fields and keys for the new version of table.

    @note The referenced instance of Alter_info object was already
          used to create new .FRM file for table being altered. So it
          has been processed by mysql_prepare_create_table() already.
          In particular, this means that in Create_field objects for
          fields which were present in some form in the old version
          of table, Create_field::field member points to corresponding
          Field instance for old version of table.
  */
  Alter_info *alter_info;

  /**
    Array of KEYs for new version of table - including KEYs to be added.

    @note Currently this array is produced as result of
          mysql_prepare_create_table() call.
          This means that it follows different convention for
          KEY_PART_INFO::fieldnr values than objects in TABLE::key_info
          array.

    @todo This is mainly due to the fact that we need to keep compatibility
          with removed handler::add_index() call. We plan to switch to
          TABLE::key_info numbering later.

    KEYs are sorted - see sort_keys().
  */
  KEY  *key_info_buffer;

  /** Size of key_info_buffer array. */
  uint key_count;

  /** Size of index_drop_buffer array. */
  uint index_drop_count;

  /**
     Array of pointers to KEYs to be dropped belonging to the TABLE instance
     for the old version of the table.
  */
  KEY  **index_drop_buffer;

  /** Size of index_add_buffer array. */
  uint index_add_count;

  /**
     Array of indexes into key_info_buffer for KEYs to be added,
     sorted in increasing order.
  */
  uint *index_add_buffer;

  /**
     Context information to allow handlers to keep context between in-place
     alter API calls.

     @see inplace_alter_handler_ctx for information about object lifecycle.
  */
  inplace_alter_handler_ctx *handler_ctx;

2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159
  /**
    If the table uses several handlers, like ha_partition uses one handler
    per partition, this contains a Null terminated array of ctx pointers
    that should all be committed together.
    Or NULL if only handler_ctx should be committed.
    Set to NULL if the low level handler::commit_inplace_alter_table uses it,
    to signal to the main handler that everything was committed as atomically.

    @see inplace_alter_handler_ctx for information about object lifecycle.
  */
  inplace_alter_handler_ctx **group_commit_ctx;

2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207
  /**
     Flags describing in detail which operations the storage engine is to execute.
  */
  HA_ALTER_FLAGS handler_flags;

  /**
     Partition_info taking into account the partition changes to be performed.
     Contains all partitions which are present in the old version of the table
     with partitions to be dropped or changed marked as such + all partitions
     to be added in the new version of table marked as such.
  */
  partition_info *modified_part_info;

  /** true for ALTER IGNORE TABLE ... */
  const bool ignore;

  /** true for online operation (LOCK=NONE) */
  bool online;

  /**
     Can be set by handler to describe why a given operation cannot be done
     in-place (HA_ALTER_INPLACE_NOT_SUPPORTED) or why it cannot be done
     online (HA_ALTER_INPLACE_NO_LOCK or
     HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE)
     If set, it will be used with ER_ALTER_OPERATION_NOT_SUPPORTED_REASON if
     results from handler::check_if_supported_inplace_alter() doesn't match
     requirements set by user. If not set, the more generic
     ER_ALTER_OPERATION_NOT_SUPPORTED will be used.

     Please set to a properly localized string, for example using
     my_get_err_msg(), so that the error message as a whole is localized.
  */
  const char *unsupported_reason;

  Alter_inplace_info(HA_CREATE_INFO *create_info_arg,
                     Alter_info *alter_info_arg,
                     KEY *key_info_arg, uint key_count_arg,
                     partition_info *modified_part_info_arg,
                     bool ignore_arg)
    : create_info(create_info_arg),
    alter_info(alter_info_arg),
    key_info_buffer(key_info_arg),
    key_count(key_count_arg),
    index_drop_count(0),
    index_drop_buffer(NULL),
    index_add_count(0),
    index_add_buffer(NULL),
    handler_ctx(NULL),
2208
    group_commit_ctx(NULL),
2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234
    handler_flags(0),
    modified_part_info(modified_part_info_arg),
    ignore(ignore_arg),
    online(false),
    unsupported_reason(NULL)
  {}

  ~Alter_inplace_info()
  {
    delete handler_ctx;
  }

  /**
    Used after check_if_supported_inplace_alter() to report
    error if the result does not match the LOCK/ALGORITHM
    requirements set by the user.

    @param not_supported  Part of statement that was not supported.
    @param try_instead    Suggestion as to what the user should
                          replace not_supported with.
  */
  void report_unsupported_error(const char *not_supported,
                                const char *try_instead);
};


2235 2236 2237 2238 2239
typedef struct st_key_create_information
{
  enum ha_key_alg algorithm;
  ulong block_size;
  LEX_STRING parser_name;
2240
  LEX_STRING comment;
2241 2242 2243 2244 2245 2246
  /**
    A flag to determine if we will check for duplicate indexes.
    This typically means that the key information was specified
    directly by the user (set by the parser).
  */
  bool check_for_duplicate_indexes;
2247 2248 2249
} KEY_CREATE_INFO;


2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266
/*
  Class for maintaining hooks used inside operations on tables such
  as: create table functions, delete table functions, and alter table
  functions.

  Class is using the Template Method pattern to separate the public
  usage interface from the private inheritance interface.  This
  imposes no overhead, since the public non-virtual function is small
  enough to be inlined.

  The hooks are usually used for functions that does several things,
  e.g., create_table_from_items(), which both create a table and lock
  it.
 */
class TABLEOP_HOOKS
{
public:
2267 2268 2269
  TABLEOP_HOOKS() {}
  virtual ~TABLEOP_HOOKS() {}

2270 2271 2272 2273
  inline void prelock(TABLE **tables, uint count)
  {
    do_prelock(tables, count);
  }
unknown's avatar
unknown committed
2274

2275 2276 2277 2278
  inline int postlock(TABLE **tables, uint count)
  {
    return do_postlock(tables, count);
  }
2279 2280 2281 2282 2283 2284
private:
  /* Function primitive that is called prior to locking tables */
  virtual void do_prelock(TABLE **tables, uint count)
  {
    /* Default is to do nothing */
  }
2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297

  /**
     Primitive called after tables are locked.

     If an error is returned, the tables will be unlocked and error
     handling start.

     @return Error code or zero.
   */
  virtual int do_postlock(TABLE **tables, uint count)
  {
    return 0;                           /* Default is to do nothing */
  }
2298
};
unknown's avatar
unknown committed
2299

2300 2301
typedef struct st_savepoint SAVEPOINT;
extern ulong savepoint_alloc_size;
2302
extern KEY_CREATE_INFO default_key_create_info;
2303

2304
/* Forward declaration for condition pushdown to storage engine */
2305
typedef class Item COND;
unknown's avatar
Merge  
unknown committed
2306

unknown's avatar
unknown committed
2307 2308
typedef struct st_ha_check_opt
{
2309
  st_ha_check_opt() {}                        /* Remove gcc warning */
unknown's avatar
unknown committed
2310 2311
  uint flags;       /* isam layer flags (e.g. for myisamchk) */
  uint sql_flags;   /* sql layer flags - for something myisamchk cannot do */
2312
  time_t start_time;   /* When check/repair starts */
unknown's avatar
unknown committed
2313
  KEY_CACHE *key_cache; /* new key cache when changing key cache */
unknown's avatar
unknown committed
2314
  void init();
unknown's avatar
unknown committed
2315 2316
} HA_CHECK_OPT;

2317

2318 2319 2320 2321 2322 2323 2324 2325
/********************************************************************************
 * MRR
 ********************************************************************************/

typedef void *range_seq_t;

typedef struct st_range_seq_if
{
2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342
  /*
    Get key information
 
    SYNOPSIS
      get_key_info()
        init_params  The seq_init_param parameter 
        length       OUT length of the keys in this range sequence
        map          OUT key_part_map of the keys in this range sequence

    DESCRIPTION
      This function is set only when using HA_MRR_FIXED_KEY mode. In that mode, 
      all ranges are single-point equality ranges that use the same set of key
      parts. This function allows the MRR implementation to get the length of
      a key, and which keyparts it uses.
  */
  void (*get_key_info)(void *init_params, uint *length, key_part_map *map);

2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366
  /*
    Initialize the traversal of range sequence
    
    SYNOPSIS
      init()
        init_params  The seq_init_param parameter 
        n_ranges     The number of ranges obtained 
        flags        A combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY

    RETURN
      An opaque value to be used as RANGE_SEQ_IF::next() parameter
  */
  range_seq_t (*init)(void *init_params, uint n_ranges, uint flags);


  /*
    Get the next range in the range sequence

    SYNOPSIS
      next()
        seq    The value returned by RANGE_SEQ_IF::init()
        range  OUT Information about the next range
    
    RETURN
2367 2368
      FALSE - Ok, the range structure filled with info about the next range
      TRUE  - No more ranges
2369
  */
2370
  bool (*next) (range_seq_t seq, KEY_MULTI_RANGE *range);
2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386

  /*
    Check whether range_info orders to skip the next record

    SYNOPSIS
      skip_record()
        seq         The value returned by RANGE_SEQ_IF::init()
        range_info  Information about the next range 
                    (Ignored if MRR_NO_ASSOCIATION is set)
        rowid       Rowid of the record to be checked (ignored if set to 0)
    
    RETURN
      1 - Record with this range_info and/or this rowid shall be filtered
          out from the stream of records returned by multi_range_read_next()
      0 - The record shall be left in the stream
  */ 
2387
  bool (*skip_record) (range_seq_t seq, range_id_t range_info, uchar *rowid);
2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399

  /*
    Check if the record combination matches the index condition
    SYNOPSIS
      skip_index_tuple()
        seq         The value returned by RANGE_SEQ_IF::init()
        range_info  Information about the next range 
    
    RETURN
      0 - The record combination satisfies the index condition
      1 - Otherwise
  */ 
2400
  bool (*skip_index_tuple) (range_seq_t seq, range_id_t range_info);
2401 2402
} RANGE_SEQ_IF;

2403
typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, range_id_t range_info);
2404

2405
class Cost_estimate
2406 2407 2408 2409 2410 2411
{ 
public:
  double io_count;     /* number of I/O                 */
  double avg_io_cost;  /* cost of an average I/O oper.  */
  double cpu_cost;     /* cost of operations in CPU     */
  double import_cost;  /* cost of remote operations     */
2412
  double mem_cost;     /* cost of used memory           */ 
2413 2414 2415 2416 2417 2418
  
  enum { IO_COEFF=1 };
  enum { CPU_COEFF=1 };
  enum { MEM_COEFF=1 };
  enum { IMPORT_COEFF=1 };

2419 2420 2421 2422
  Cost_estimate()
  {
    reset();
  }
2423 2424 2425 2426 2427 2428 2429

  double total_cost() 
  {
    return IO_COEFF*io_count*avg_io_cost + CPU_COEFF * cpu_cost +
           MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost;
  }

2430 2431
  /**
    Whether or not all costs in the object are zero
2432

2433 2434 2435
    @return true if all costs are zero, false otherwise
  */
  bool is_zero() const
2436 2437 2438
  {
    return io_count == 0.0 && cpu_cost == 0.0 &&
      import_cost == 0.0 && mem_cost == 0.0;
2439 2440 2441
  }

  void reset()
2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454
  {
    avg_io_cost= 1.0;
    io_count= cpu_cost= mem_cost= import_cost= 0.0;
  }

  void multiply(double m)
  {
    io_count *= m;
    cpu_cost *= m;
    import_cost *= m;
    /* Don't multiply mem_cost */
  }

2455
  void add(const Cost_estimate* cost)
2456 2457 2458 2459 2460 2461
  {
    double io_count_sum= io_count + cost->io_count;
    add_io(cost->io_count, cost->avg_io_cost);
    io_count= io_count_sum;
    cpu_cost += cost->cpu_cost;
  }
2462

2463 2464
  void add_io(double add_io_cnt, double add_avg_cost)
  {
2465 2466 2467 2468 2469 2470 2471 2472
    /* In edge cases add_io_cnt may be zero */
    if (add_io_cnt > 0)
    {
      double io_count_sum= io_count + add_io_cnt;
      avg_io_cost= (io_count * avg_io_cost + 
                    add_io_cnt * add_avg_cost) / io_count_sum;
      io_count= io_count_sum;
    }
2473 2474
  }

2475 2476 2477 2478 2479 2480 2481 2482 2483
  /// Add to CPU cost
  void add_cpu(double add_cpu_cost) { cpu_cost+= add_cpu_cost; }

  /// Add to import cost
  void add_import(double add_import_cost) { import_cost+= add_import_cost; }

  /// Add to memory cost
  void add_mem(double add_mem_cost) { mem_cost+= add_mem_cost; }

2484 2485
  /*
    To be used when we go from old single value-based cost calculations to
2486
    the new Cost_estimate-based.
2487 2488 2489
  */
  void convert_from_cost(double cost)
  {
2490
    reset();
2491 2492 2493 2494 2495
    io_count= cost;
  }
};

void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
2496
                         Cost_estimate *cost);
2497 2498

/*
2499 2500 2501
  Indicates that all scanned ranges will be singlepoint (aka equality) ranges.
  The ranges may not use the full key but all of them will use the same number
  of key parts.
2502
*/
2503 2504
#define HA_MRR_SINGLE_POINT 1U
#define HA_MRR_FIXED_KEY  2U
2505 2506 2507 2508 2509

/* 
  Indicates that RANGE_SEQ_IF::next(&range) doesn't need to fill in the
  'range' parameter.
*/
2510
#define HA_MRR_NO_ASSOCIATION 4U
2511 2512 2513 2514 2515

/* 
  The MRR user will provide ranges in key order, and MRR implementation
  must return rows in key order.
*/
2516
#define HA_MRR_SORTED 8U
2517 2518

/* MRR implementation doesn't have to retrieve full records */
2519
#define HA_MRR_INDEX_ONLY 16U
2520 2521 2522 2523 2524

/* 
  The passed memory buffer is of maximum possible size, the caller can't
  assume larger buffer.
*/
2525
#define HA_MRR_LIMITS 32U
2526 2527 2528 2529 2530 2531 2532 2533


/*
  Flag set <=> default MRR implementation is used
  (The choice is made by **_info[_const]() function which may set this
   flag. SQL layer remembers the flag value and then passes it to
   multi_read_range_init().
*/
2534
#define HA_MRR_USE_DEFAULT_IMPL 64U
2535 2536 2537 2538 2539 2540

/*
  Used only as parameter to multi_range_read_info():
  Flag set <=> the caller guarantees that the bounds of the scanned ranges
  will not have NULL values.
*/
2541
#define HA_MRR_NO_NULL_ENDPOINTS 128U
2542

2543 2544 2545 2546
/*
  The MRR user has materialized range keys somewhere in the user's buffer.
  This can be used for optimization of the procedure that sorts these keys
  since in this case key values don't have to be copied into the MRR buffer.
2547 2548 2549 2550

  In other words, it is guaranteed that after RANGE_SEQ_IF::next() call the 
  pointer in range->start_key.key will point to a key value that will remain 
  there until the end of the MRR scan.
2551
*/
2552
#define HA_MRR_MATERIALIZED_KEYS 256U
2553

Sergey Petrunya's avatar
Sergey Petrunya committed
2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569
/*
  The following bits are reserved for use by MRR implementation. The intended
  use scenario:

  * sql layer calls handler->multi_range_read_info[_const]() 
    - MRR implementation figures out what kind of scan it will perform, saves
      the result in *mrr_mode parameter.
  * sql layer remembers what was returned in *mrr_mode

  * the optimizer picks the query plan (which may or may not include the MRR 
    scan that was estimated by the multi_range_read_info[_const] call)

  * if the query is an EXPLAIN statement, sql layer will call 
    handler->multi_range_read_explain_info(mrr_mode) to get a text description
    of the picked MRR scan; the description will be a part of EXPLAIN output.
*/
2570 2571 2572 2573 2574 2575
#define HA_MRR_IMPLEMENTATION_FLAG1 512U
#define HA_MRR_IMPLEMENTATION_FLAG2 1024U
#define HA_MRR_IMPLEMENTATION_FLAG3 2048U
#define HA_MRR_IMPLEMENTATION_FLAG4 4096U
#define HA_MRR_IMPLEMENTATION_FLAG5 8192U
#define HA_MRR_IMPLEMENTATION_FLAG6 16384U
2576

2577
#define HA_MRR_IMPLEMENTATION_FLAGS \
2578
  (512U | 1024U | 2048U | 4096U | 8192U | 16384U)
2579

unknown's avatar
unknown committed
2580 2581 2582 2583 2584 2585 2586 2587 2588
/*
  This is a buffer area that the handler can use to store rows.
  'end_of_used_area' should be kept updated after calls to
  read-functions so that other parts of the code can use the
  remaining area (until next read calls is issued).
*/

typedef struct st_handler_buffer
{
2589 2590
  /* const? */uchar *buffer;         /* Buffer one can start using */
  /* const? */uchar *buffer_end;     /* End of buffer */
2591
  uchar *end_of_used_area;     /* End of area that was used by handler */
unknown's avatar
unknown committed
2592 2593
} HANDLER_BUFFER;

2594 2595
typedef struct system_status_var SSV;

2596 2597 2598 2599 2600 2601 2602 2603 2604
class ha_statistics
{
public:
  ulonglong data_file_length;		/* Length off data file */
  ulonglong max_data_file_length;	/* Length off data file */
  ulonglong index_file_length;
  ulonglong max_index_file_length;
  ulonglong delete_length;		/* Free bytes */
  ulonglong auto_increment_value;
2605 2606 2607 2608 2609 2610 2611 2612 2613
  /*
    The number of records in the table. 
      0    - means the table has exactly 0 rows
    other  - if (table_flags() & HA_STATS_RECORDS_IS_EXACT)
               the value is the exact number of records in the table
             else
               it is an estimate
  */
  ha_rows records;
2614 2615
  ha_rows deleted;			/* Deleted records */
  ulong mean_rec_length;		/* physical reclength */
2616 2617 2618
  time_t create_time;			/* When table was created */
  time_t check_time;
  time_t update_time;
2619 2620
  uint block_size;			/* index block size */

2621 2622 2623 2624 2625
  /*
    number of buffer bytes that native mrr implementation needs,
  */
  uint mrr_length_per_rec; 

2626 2627 2628 2629
  ha_statistics():
    data_file_length(0), max_data_file_length(0),
    index_file_length(0), delete_length(0), auto_increment_value(0),
    records(0), deleted(0), mean_rec_length(0), create_time(0),
2630
    check_time(0), update_time(0), block_size(0), mrr_length_per_rec(0)
2631 2632 2633
  {}
};

2634 2635
extern "C" enum icp_result handler_index_cond_check(void* h_arg);

2636
uint calculate_key_len(TABLE *, uint, const uchar *, key_part_map);
2637 2638 2639 2640
/*
  bitmap with first N+1 bits set
  (keypart_map for a key prefix of [0..N] keyparts)
*/
unknown's avatar
unknown committed
2641
#define make_keypart_map(N) (((key_part_map)2 << (N)) - 1)
2642 2643 2644 2645
/*
  bitmap with first N bits set
  (keypart_map for a key prefix of [0..N-1] keyparts)
*/
unknown's avatar
unknown committed
2646
#define make_prev_keypart_map(N) (((key_part_map)1 << (N)) - 1)
2647

2648

2649 2650
/** Base class to be used by handlers different shares */
class Handler_share
2651 2652
{
public:
2653 2654
  Handler_share() {}
  virtual ~Handler_share() {}
2655 2656
};

2657

2658
/**
2659 2660 2661
  The handler class is the interface for dynamically loadable
  storage engines. Do not add ifdefs and take care when adding or
  changing virtual functions to avoid vtable confusion
2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703

  Functions in this class accept and return table columns data. Two data
  representation formats are used:
  1. TableRecordFormat - Used to pass [partial] table records to/from
     storage engine

  2. KeyTupleFormat - used to pass index search tuples (aka "keys") to
     storage engine. See opt_range.cc for description of this format.

  TableRecordFormat
  =================
  [Warning: this description is work in progress and may be incomplete]
  The table record is stored in a fixed-size buffer:
   
    record: null_bytes, column1_data, column2_data, ...
  
  The offsets of the parts of the buffer are also fixed: every column has 
  an offset to its column{i}_data, and if it is nullable it also has its own
  bit in null_bytes. 

  The record buffer only includes data about columns that are marked in the
  relevant column set (table->read_set and/or table->write_set, depending on
  the situation). 
  <not-sure>It could be that it is required that null bits of non-present
  columns are set to 1</not-sure>

  VARIOUS EXCEPTIONS AND SPECIAL CASES

  If the table has no nullable columns, then null_bytes is still 
  present, its length is one byte <not-sure> which must be set to 0xFF 
  at all times. </not-sure>
  
  If the table has columns of type BIT, then certain bits from those columns
  may be stored in null_bytes as well. Grep around for Field_bit for
  details.

  For blob columns (see Field_blob), the record buffer stores length of the 
  data, following by memory pointer to the blob data. The pointer is owned 
  by the storage engine and is valid until the next operation.

  If a blob column has NULL value, then its length and blob data pointer
  must be set to 0.
2704
*/
2705

unknown's avatar
unknown committed
2706 2707
class handler :public Sql_alloc
{
2708 2709
public:
  typedef ulonglong Table_flags;
2710
protected:
Konstantin Osipov's avatar
Konstantin Osipov committed
2711 2712
  TABLE_SHARE *table_share;   /* The table definition */
  TABLE *table;               /* The current open table */
2713
  Table_flags cached_table_flags;       /* Set on init() and open() */
unknown's avatar
unknown committed
2714

2715
  ha_rows estimation_rows_to_insert;
unknown's avatar
unknown committed
2716
public:
2717
  handlerton *ht;                 /* storage engine of this handler */
2718 2719
  uchar *ref;				/* Pointer to current row */
  uchar *dup_ref;			/* Pointer to duplicate row */
2720 2721

  ha_statistics stats;
2722

2723 2724 2725 2726 2727 2728 2729
  /** MultiRangeRead-related members: */
  range_seq_t mrr_iter;    /* Interator to traverse the range sequence */
  RANGE_SEQ_IF mrr_funcs;  /* Range sequence traversal functions */
  HANDLER_BUFFER *multi_range_buffer; /* MRR buffer info */
  uint ranges_in_seq; /* Total number of ranges in the traversed sequence */
  /** Current range (the one we're now returning rows from) */
  KEY_MULTI_RANGE mrr_cur_range;
unknown's avatar
unknown committed
2730

2731
  /** The following are for read_range() */
2732 2733 2734 2735
  key_range save_end_range, *end_range;
  KEY_PART_INFO *range_key_part;
  int key_compare_result_on_equal;

2736 2737 2738 2739 2740 2741 2742 2743 2744 2745
  /* TRUE <=> source MRR ranges and the output are ordered */
  bool mrr_is_output_sorted;
  /** TRUE <=> we're currently traversing a range in mrr_cur_range. */
  bool mrr_have_range;
  bool eq_range;
  bool internal_tmp_table;                 /* If internal tmp table */
  bool implicit_emptied;                   /* Can be !=0 only if HEAP */
  bool mark_trx_read_write_done;           /* mark_trx_read_write was called */
  bool check_table_binlog_row_based_done; /* check_table_binlog.. was called */
  bool check_table_binlog_row_based_result; /* cached check_table_binlog... */
2746 2747
  /* Set to 1 if handler logged last insert/update/delete operation */
  bool row_already_logged;
2748 2749 2750 2751 2752 2753
  /* 
    TRUE <=> the engine guarantees that returned records are within the range
    being scanned.
  */
  bool in_range_check_pushed_down;

2754 2755
  uint errkey;                             /* Last dup key */
  uint key_used_on_scan;
2756
  uint active_index, keyread;
2757

2758
  /** Length of ref (1-8 or the clustered key length) */
2759
  uint ref_length;
unknown's avatar
unknown committed
2760
  FT_INFO *ft_handler;
2761
  enum {NONE=0, INDEX, RND} inited;
2762

2763
  const COND *pushed_cond;
2764
  /**
2765 2766 2767 2768 2769 2770 2771 2772 2773
    next_insert_id is the next value which should be inserted into the
    auto_increment column: in a inserting-multi-row statement (like INSERT
    SELECT), for the first row where the autoinc value is not specified by the
    statement, get_auto_increment() called and asked to generate a value,
    next_insert_id is set to the next value, then for all other rows
    next_insert_id is used (and increased each time) without calling
    get_auto_increment().
  */
  ulonglong next_insert_id;
2774
  /**
2775 2776 2777 2778 2779 2780
    insert id for the current row (*autogenerated*; if not
    autogenerated, it's 0).
    At first successful insertion, this variable is stored into
    THD::first_successful_insert_id_in_cur_stmt.
  */
  ulonglong insert_id_for_cur_row;
2781
  /**
2782 2783 2784
    Interval returned by get_auto_increment() and being consumed by the
    inserter.
  */
2785 2786
  /* Statistics  variables */
  ulonglong rows_read;
2787
  ulonglong rows_tmp_read;
2788 2789 2790
  ulonglong rows_changed;
  /* One bigger than needed to avoid to test if key == MAX_KEY */
  ulonglong index_rows_read[MAX_KEY+1];
2791 2792

private:
2793 2794
  /* ANALYZE time tracker, if present */
  Exec_time_tracker *tracker;
2795 2796 2797
public:
  void set_time_tracker(Exec_time_tracker *tracker_arg) { tracker=tracker_arg;}

Sergei Golubchik's avatar
Sergei Golubchik committed
2798 2799 2800
  Item *pushed_idx_cond;
  uint pushed_idx_cond_keyno;  /* The index which the above condition is for */

2801
  Discrete_interval auto_inc_interval_for_cur_row;
Guilhem Bichot's avatar
Guilhem Bichot committed
2802 2803 2804 2805 2806 2807 2808
  /**
     Number of reserved auto-increment intervals. Serves as a heuristic
     when we have no estimation of how many records the statement will insert:
     the more intervals we have reserved, the bigger the next one. Reset in
     handler::ha_release_auto_increment().
  */
  uint auto_inc_intervals_count;
2809

Marc Alff's avatar
Marc Alff committed
2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823
  /**
    Instrumented table associated with this handler.
    This member should be set to NULL when no instrumentation is in place,
    so that linking an instrumented/non instrumented server/plugin works.
    For example:
    - the server is compiled with the instrumentation.
    The server expects either NULL or valid pointers in m_psi.
    - an engine plugin is compiled without instrumentation.
    The plugin can not leave this pointer uninitialized,
    or can not leave a trash value on purpose in this pointer,
    as this would crash the server.
  */
  PSI_table *m_psi;

2824 2825 2826
  virtual void unbind_psi();
  virtual void rebind_psi();

2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841
private:
  /**
    The lock type set by when calling::ha_external_lock(). This is 
    propagated down to the storage engine. The reason for also storing 
    it here, is that when doing MRR we need to create/clone a second handler
    object. This cloned handler object needs to know about the lock_type used.
  */
  int m_lock_type;
  /**
    Pointer where to store/retrieve the Handler_share pointer.
    For non partitioned handlers this is &TABLE_SHARE::ha_share.
  */
  Handler_share **ha_share;

public:
2842
  handler(handlerton *ht_arg, TABLE_SHARE *share_arg)
unknown's avatar
unknown committed
2843 2844
    :table_share(share_arg), table(0),
    estimation_rows_to_insert(0), ht(ht_arg),
2845 2846 2847 2848 2849
    ref(0), end_range(NULL),
    implicit_emptied(0),
    mark_trx_read_write_done(0),
    check_table_binlog_row_based_done(0),
    check_table_binlog_row_based_result(0),
2850
    row_already_logged(0),
Sergei Golubchik's avatar
Sergei Golubchik committed
2851
    in_range_check_pushed_down(FALSE),
2852
    key_used_on_scan(MAX_KEY),
2853
    active_index(MAX_KEY), keyread(MAX_KEY),
2854
    ref_length(sizeof(my_off_t)),
2855
    ft_handler(0), inited(NONE),
Guilhem Bichot's avatar
Guilhem Bichot committed
2856
    pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0),
2857
    tracker(NULL),
Sergei Golubchik's avatar
Sergei Golubchik committed
2858
    pushed_idx_cond(NULL),
2859
    pushed_idx_cond_keyno(MAX_KEY),
Marc Alff's avatar
Marc Alff committed
2860
    auto_inc_intervals_count(0),
2861
    m_psi(NULL), m_lock_type(F_UNLCK), ha_share(NULL)
Sergei Golubchik's avatar
Sergei Golubchik committed
2862
  {
2863 2864 2865
    DBUG_PRINT("info",
               ("handler created F_UNLCK %d F_RDLCK %d F_WRLCK %d",
                F_UNLCK, F_RDLCK, F_WRLCK));
Sergei Golubchik's avatar
Sergei Golubchik committed
2866 2867
    reset_statistics();
  }
2868 2869
  virtual ~handler(void)
  {
2870
    DBUG_ASSERT(m_lock_type == F_UNLCK);
Konstantin Osipov's avatar
Konstantin Osipov committed
2871
    DBUG_ASSERT(inited == NONE);
2872
  }
2873
  virtual handler *clone(const char *name, MEM_ROOT *mem_root);
2874
  /** This is called after create to allow us to set up cached variables */
2875 2876 2877 2878
  void init()
  {
    cached_table_flags= table_flags();
  }
2879
  /* ha_ methods: pubilc wrappers for private virtual API */
2880
  
2881
  int ha_open(TABLE *table, const char *name, int mode, uint test_if_locked);
2882 2883
  int ha_index_init(uint idx, bool sorted)
  {
2884
    DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2885 2886 2887 2888
    int result;
    DBUG_ENTER("ha_index_init");
    DBUG_ASSERT(inited==NONE);
    if (!(result= index_init(idx, sorted)))
2889 2890 2891
    {
      inited=       INDEX;
      active_index= idx;
Michael Widenius's avatar
Michael Widenius committed
2892
      end_range= NULL;
2893
    }
2894 2895 2896 2897 2898 2899
    DBUG_RETURN(result);
  }
  int ha_index_end()
  {
    DBUG_ENTER("ha_index_end");
    DBUG_ASSERT(inited==INDEX);
2900 2901
    inited=       NONE;
    active_index= MAX_KEY;
Michael Widenius's avatar
Michael Widenius committed
2902
    end_range=    NULL;
2903 2904
    DBUG_RETURN(index_end());
  }
2905 2906
  /* This is called after index_init() if we need to do a index scan */
  virtual int prepare_index_scan() { return 0; }
2907 2908 2909 2910 2911 2912 2913 2914 2915 2916
  virtual int prepare_index_key_scan_map(const uchar * key, key_part_map keypart_map)
  {
    uint key_len= calculate_key_len(table, active_index, key, keypart_map);
    return  prepare_index_key_scan(key, key_len);
  }
  virtual int prepare_index_key_scan( const uchar * key, uint key_len )
  { return 0; }
  virtual int prepare_range_scan(const key_range *start_key, const key_range *end_key)
  { return 0; }

2917
  int ha_rnd_init(bool scan) __attribute__ ((warn_unused_result))
2918
  {
2919
    DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2920 2921 2922 2923
    int result;
    DBUG_ENTER("ha_rnd_init");
    DBUG_ASSERT(inited==NONE || (inited==RND && scan));
    inited= (result= rnd_init(scan)) ? NONE: RND;
2924
    end_range= NULL;
2925 2926 2927 2928 2929 2930 2931
    DBUG_RETURN(result);
  }
  int ha_rnd_end()
  {
    DBUG_ENTER("ha_rnd_end");
    DBUG_ASSERT(inited==RND);
    inited=NONE;
2932
    end_range= NULL;
2933 2934
    DBUG_RETURN(rnd_end());
  }
2935
  int ha_rnd_init_with_error(bool scan) __attribute__ ((warn_unused_result));
2936 2937 2938 2939 2940 2941
  int ha_reset();
  /* this is necessary in many places, e.g. in HANDLER command */
  int ha_index_or_rnd_end()
  {
    return inited == INDEX ? ha_index_end() : inited == RND ? ha_rnd_end() : 0;
  }
2942 2943 2944
  /**
    The cached_table_flags is set at ha_open and ha_external_lock
  */
2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957
  Table_flags ha_table_flags() const { return cached_table_flags; }
  /**
    These functions represent the public interface to *users* of the
    handler class, hence they are *not* virtual. For the inheritance
    interface, see the (private) functions write_row(), update_row(),
    and delete_row() below.
  */
  int ha_external_lock(THD *thd, int lock_type);
  int ha_write_row(uchar * buf);
  int ha_update_row(const uchar * old_data, uchar * new_data);
  int ha_delete_row(const uchar * buf);
  void ha_release_auto_increment();

2958 2959
  bool keyread_enabled() { return keyread < MAX_KEY; }
  int ha_start_keyread(uint idx)
2960
  {
2961
    if (keyread_enabled())
2962
      return 0;
2963
    keyread= idx;
2964 2965 2966 2967
    return extra(HA_EXTRA_KEYREAD);
  }
  int ha_end_keyread()
  {
2968
    if (!keyread_enabled())
2969
      return 0;
2970
    keyread= MAX_KEY;
2971 2972 2973
    return extra(HA_EXTRA_NO_KEYREAD);
  }

2974
  int check_collation_compatibility();
2975 2976 2977 2978
  int ha_check_for_upgrade(HA_CHECK_OPT *check_opt);
  /** to be actually called to get 'check()' functionality*/
  int ha_check(THD *thd, HA_CHECK_OPT *check_opt);
  int ha_repair(THD* thd, HA_CHECK_OPT* check_opt);
2979
  void ha_start_bulk_insert(ha_rows rows, uint flags= 0)
2980
  {
2981
    DBUG_ENTER("handler::ha_start_bulk_insert");
2982
    estimation_rows_to_insert= rows;
2983
    start_bulk_insert(rows, flags);
2984
    DBUG_VOID_RETURN;
2985 2986 2987
  }
  int ha_end_bulk_insert()
  {
2988
    DBUG_ENTER("handler::ha_end_bulk_insert");
2989
    estimation_rows_to_insert= 0;
2990 2991
    int ret= end_bulk_insert();
    DBUG_RETURN(ret);
2992
  }
2993 2994 2995
  int ha_bulk_update_row(const uchar *old_data, uchar *new_data,
                         uint *dup_key_found);
  int ha_delete_all_rows();
2996
  int ha_truncate();
2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009
  int ha_reset_auto_increment(ulonglong value);
  int ha_optimize(THD* thd, HA_CHECK_OPT* check_opt);
  int ha_analyze(THD* thd, HA_CHECK_OPT* check_opt);
  bool ha_check_and_repair(THD *thd);
  int ha_disable_indexes(uint mode);
  int ha_enable_indexes(uint mode);
  int ha_discard_or_import_tablespace(my_bool discard);
  int ha_rename_table(const char *from, const char *to);
  int ha_delete_table(const char *name);
  void ha_drop_table(const char *name);

  int ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info);

3010
  int ha_create_partitioning_metadata(const char *name, const char *old_name,
3011
                                      int action_flag);
3012 3013 3014

  int ha_change_partitions(HA_CREATE_INFO *create_info,
                           const char *path,
3015 3016
                           ulonglong * const copied,
                           ulonglong * const deleted,
3017 3018 3019 3020
                           const uchar *pack_frm_data,
                           size_t pack_frm_len);
  int ha_drop_partitions(const char *path);
  int ha_rename_partitions(const char *path);
3021

3022
  void adjust_next_insert_id_after_explicit_value(ulonglong nr);
unknown's avatar
unknown committed
3023
  int update_auto_increment();
unknown's avatar
unknown committed
3024
  virtual void print_error(int error, myf errflag);
3025
  virtual bool get_error_message(int error, String *buf);
unknown's avatar
unknown committed
3026
  uint get_dup_key(int error);
Sergei Golubchik's avatar
Sergei Golubchik committed
3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049
  /**
    Retrieves the names of the table and the key for which there was a
    duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY.

    If any of the table or key name is not available this method will return
    false and will not change any of child_table_name or child_key_name.

    @param child_table_name[out]    Table name
    @param child_table_name_len[in] Table name buffer size
    @param child_key_name[out]      Key name
    @param child_key_name_len[in]   Key name buffer size

    @retval  true                  table and key names were available
                                   and were written into the corresponding
                                   out parameters.
    @retval  false                 table and key names were not available,
                                   the out parameters were not touched.
  */
  virtual bool get_foreign_dup_key(char *child_table_name,
                                   uint child_table_name_len,
                                   char *child_key_name,
                                   uint child_key_name_len)
  { DBUG_ASSERT(false); return(false); }
3050 3051
  void reset_statistics()
  {
3052
    rows_read= rows_changed= rows_tmp_read= 0;
3053 3054
    bzero(index_rows_read, sizeof(index_rows_read));
  }
3055
  virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
unknown's avatar
unknown committed
3056 3057 3058
  {
    table= table_arg;
    table_share= share;
3059
    reset_statistics();
unknown's avatar
unknown committed
3060
  }
unknown's avatar
unknown committed
3061
  virtual double scan_time()
3062
  { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074

  /**
     The cost of reading a set of ranges from the table using an index
     to access it.
     
     @param index  The index number.
     @param ranges The number of ranges to be read.
     @param rows   Total number of rows to be read.
     
     This method can be used to calculate the total cost of scanning a table
     using an index by calling it using read_time(index, 1, table_size).
  */
unknown's avatar
unknown committed
3075
  virtual double read_time(uint index, uint ranges, ha_rows rows)
3076
  { return rows2double(ranges+rows); }
Sergei Golubchik's avatar
Sergei Golubchik committed
3077 3078 3079 3080 3081 3082 3083 3084 3085 3086

  /**
    Calculate cost of 'keyread' scan for given index and number of records.

     @param index    index to read
     @param ranges   #of ranges to read
     @param rows     #of records to read
  */
  virtual double keyread_time(uint index, uint ranges, ha_rows rows);

unknown's avatar
unknown committed
3087
  virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
3088 3089 3090 3091 3092 3093

  /*
    True if changes to the table is persistent (no rollback)
    This is manly used to decide how to log changes to the table in
    the binary log.
  */
3094
  bool has_transactions()
3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105
  {
    return ((ha_table_flags() & (HA_NO_TRANSACTIONS | HA_PERSISTENT_TABLE))
            == 0);
  }
  /*
    True if the underlaying table doesn't support transactions
  */
  bool has_transaction_manager()
  {
    return ((ha_table_flags() & HA_NO_TRANSACTIONS) == 0);
  }
3106

3107
  /**
3108 3109 3110 3111 3112
    This method is used to analyse the error to see whether the error
    is ignorable or not, certain handlers can have more error that are
    ignorable than others. E.g. the partition handler can get inserts
    into a range where there is no partition and this is an ignorable
    error.
3113 3114
    HA_ERR_FOUND_DUP_UNIQUE is a special case in MyISAM that means the
    same thing as HA_ERR_FOUND_DUP_KEY but can in some cases lead to
3115
    a slightly different error message.
3116
  */
3117
  virtual bool is_fatal_error(int error, uint flags)
3118 3119
  {
    if (!error ||
3120
        ((flags & HA_CHECK_DUP_KEY) &&
3121
         (error == HA_ERR_FOUND_DUPP_KEY ||
3122
          error == HA_ERR_FOUND_DUPP_UNIQUE)) ||
3123
        error == HA_ERR_AUTOINC_ERANGE ||
3124 3125 3126
        ((flags & HA_CHECK_FK_ERROR) &&
         (error == HA_ERR_ROW_IS_REFERENCED ||
          error == HA_ERR_NO_REFERENCED_ROW)))
3127 3128 3129 3130
      return FALSE;
    return TRUE;
  }

3131
  /**
3132 3133 3134 3135
    Number of rows in table. It will only be called if
    (table_flags() & (HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT)) != 0
  */
  virtual ha_rows records() { return stats.records; }
3136
  /**
unknown's avatar
unknown committed
3137 3138 3139 3140 3141 3142
    Return upper bound of current number of records in the table
    (max. of how many records one will retrieve when doing a full table scan)
    If upper bound is not known, HA_POS_ERROR should be returned as a max
    possible upper bound.
  */
  virtual ha_rows estimate_rows_upper_bound()
3143
  { return stats.records+EXTRA_RECORDS; }
unknown's avatar
unknown committed
3144

3145
  /**
3146 3147 3148 3149 3150
    Get the row type from the storage engine.  If this method returns
    ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
  */
  virtual enum row_type get_row_type() const { return ROW_TYPE_NOT_USED; }

unknown's avatar
unknown committed
3151 3152
  virtual const char *index_type(uint key_number) { DBUG_ASSERT(0); return "";}

3153

3154
  /**
3155 3156 3157 3158 3159
    Signal that the table->read_set and table->write_set table maps changed
    The handler is allowed to set additional bits in the above map in this
    call. Normally the handler should ignore all calls until we have done
    a ha_rnd_init() or ha_index_init(), write_row(), update_row or delete_row()
    as there may be several calls to this routine.
3160
  */
3161
  virtual void column_bitmaps_signal();
3162 3163 3164 3165 3166 3167
  /*
    We have to check for inited as some engines, like innodb, sets
    active_index during table scan.
  */
  uint get_index(void) const
  { return inited == INDEX ? active_index : MAX_KEY; }
3168
  int ha_close(void);
3169

3170 3171 3172
  /**
    @retval  0   Bulk update used by handler
    @retval  1   Bulk update not used, normal operation used
unknown's avatar
unknown committed
3173 3174
  */
  virtual bool start_bulk_update() { return 1; }
3175 3176 3177
  /**
    @retval  0   Bulk delete used by handler
    @retval  1   Bulk delete not used, normal operation used
unknown's avatar
unknown committed
3178 3179
  */
  virtual bool start_bulk_delete() { return 1; }
3180
  /**
unknown's avatar
unknown committed
3181 3182 3183 3184 3185
    After this call all outstanding updates must be performed. The number
    of duplicate key errors are reported in the duplicate key parameter.
    It is allowed to continue to the batched update after this call, the
    handler has to wait until end_bulk_update with changing state.

3186 3187 3188 3189
    @param    dup_key_found       Number of duplicate keys found

    @retval  0           Success
    @retval  >0          Error code
unknown's avatar
unknown committed
3190
  */
unknown's avatar
unknown committed
3191 3192 3193 3194 3195
  virtual int exec_bulk_update(uint *dup_key_found)
  {
    DBUG_ASSERT(FALSE);
    return HA_ERR_WRONG_COMMAND;
  }
3196
  /**
unknown's avatar
unknown committed
3197 3198 3199
    Perform any needed clean-up, no outstanding updates are there at the
    moment.
  */
unknown's avatar
unknown committed
3200
  virtual void end_bulk_update() { return; }
3201
  /**
unknown's avatar
unknown committed
3202 3203
    Execute all outstanding deletes and close down the bulk delete.

3204 3205
    @retval 0             Success
    @retval >0            Error code
unknown's avatar
unknown committed
3206
  */
unknown's avatar
unknown committed
3207 3208 3209 3210 3211
  virtual int end_bulk_delete()
  {
    DBUG_ASSERT(FALSE);
    return HA_ERR_WRONG_COMMAND;
  }
3212 3213
  /**
     @brief
3214 3215 3216
     Positions an index cursor to the index specified in the
     handle. Fetches the row if available. If the key value is null,
     begin at the first key of the index.
3217
  */
3218
protected:
3219
  virtual int index_read_map(uchar * buf, const uchar * key,
unknown's avatar
unknown committed
3220
                             key_part_map keypart_map,
3221 3222 3223
                             enum ha_rkey_function find_flag)
  {
    uint key_len= calculate_key_len(table, active_index, key, keypart_map);
3224
    return index_read(buf, key, key_len, find_flag);
3225 3226 3227
  }
  /**
     @brief
3228 3229 3230
     Positions an index cursor to the index specified in the
     handle. Fetches the row if available. If the key value is null,
     begin at the first key of the index.
3231 3232 3233 3234
  */
  virtual int index_read_idx_map(uchar * buf, uint index, const uchar * key,
                                 key_part_map keypart_map,
                                 enum ha_rkey_function find_flag);
3235
  virtual int index_next(uchar * buf)
unknown's avatar
unknown committed
3236
   { return  HA_ERR_WRONG_COMMAND; }
3237
  virtual int index_prev(uchar * buf)
unknown's avatar
unknown committed
3238
   { return  HA_ERR_WRONG_COMMAND; }
3239
  virtual int index_first(uchar * buf)
unknown's avatar
unknown committed
3240
   { return  HA_ERR_WRONG_COMMAND; }
3241
  virtual int index_last(uchar * buf)
unknown's avatar
unknown committed
3242
   { return  HA_ERR_WRONG_COMMAND; }
3243
  virtual int index_next_same(uchar *buf, const uchar *key, uint keylen);
3244 3245 3246 3247 3248 3249 3250 3251
  virtual int close(void)=0;
  inline void update_rows_read()
  {
    if (likely(!internal_tmp_table))
      rows_read++;
    else
      rows_tmp_read++;
  }
3252
  inline void update_index_statistics()
3253
  {
3254
    index_rows_read[active_index]++;
3255
    update_rows_read();
3256
  }
3257 3258
public:

Sergei Golubchik's avatar
Sergei Golubchik committed
3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269
  int ha_index_read_map(uchar * buf, const uchar * key,
                        key_part_map keypart_map,
                        enum ha_rkey_function find_flag);
  int ha_index_read_idx_map(uchar * buf, uint index, const uchar * key,
                            key_part_map keypart_map,
                            enum ha_rkey_function find_flag);
  int ha_index_next(uchar * buf);
  int ha_index_prev(uchar * buf);
  int ha_index_first(uchar * buf);
  int ha_index_last(uchar * buf);
  int ha_index_next_same(uchar *buf, const uchar *key, uint keylen);
3270 3271 3272 3273
  /*
    TODO: should we make for those functions non-virtual ha_func_name wrappers,
    too?
  */
3274 3275 3276
  virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
                                              void *seq_init_param, 
                                              uint n_ranges, uint *bufsz,
3277 3278
                                              uint *mrr_mode,
                                              Cost_estimate *cost);
3279
  virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
3280
                                        uint key_parts, uint *bufsz, 
3281
                                        uint *mrr_mode, Cost_estimate *cost);
3282
  virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
3283
                                    uint n_ranges, uint mrr_mode, 
3284
                                    HANDLER_BUFFER *buf);
3285
  virtual int multi_range_read_next(range_id_t *range_info);
3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311
  /*
    Return string representation of the MRR plan.

    This is intended to be used for EXPLAIN, via the following scenario:
    1. SQL layer calls handler->multi_range_read_info().
    1.1. Storage engine figures out whether it will use some non-default
         MRR strategy, sets appropritate bits in *mrr_mode, and returns 
         control to SQL layer
    2. SQL layer remembers the returned mrr_mode
    3. SQL layer compares various options and choses the final query plan. As
       a part of that, it makes a choice of whether to use the MRR strategy
       picked in 1.1
    4. EXPLAIN code converts the query plan to its text representation. If MRR
       strategy is part of the plan, it calls
       multi_range_read_explain_info(mrr_mode) to get a text representation of
       the picked MRR strategy.

    @param mrr_mode   Mode which was returned by multi_range_read_info[_const]
    @param str        INOUT string to be printed for EXPLAIN
    @param str_end    End of the string buffer. The function is free to put the 
                      string into [str..str_end] memory range.
  */
  virtual int multi_range_read_explain_info(uint mrr_mode, char *str, 
                                            size_t size)
  { return 0; }

3312
  virtual int read_range_first(const key_range *start_key,
unknown's avatar
unknown committed
3313 3314 3315
                               const key_range *end_key,
                               bool eq_range, bool sorted);
  virtual int read_range_next();
3316
  void set_end_range(const key_range *end_key);
3317
  int compare_key(key_range *range);
3318
  int compare_key2(key_range *range) const;
unknown's avatar
unknown committed
3319
  virtual int ft_init() { return HA_ERR_WRONG_COMMAND; }
3320
  void ft_end() { ft_handler=NULL; }
3321
  virtual FT_INFO *ft_init_ext(uint flags, uint inx,String *key)
unknown's avatar
unknown committed
3322
    { return NULL; }
3323
private:
3324 3325 3326
  virtual int ft_read(uchar *buf) { return HA_ERR_WRONG_COMMAND; }
  virtual int rnd_next(uchar *buf)=0;
  virtual int rnd_pos(uchar * buf, uchar *pos)=0;
3327
  /**
3328 3329 3330
    This function only works for handlers having
    HA_PRIMARY_KEY_REQUIRED_FOR_POSITION set.
    It will return the row with the PK given in the record argument.
3331 3332
  */
  virtual int rnd_pos_by_record(uchar *record)
3333
  {
3334
    DBUG_ASSERT(table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION);
3335 3336 3337
    position(record);
    return rnd_pos(record, ref);
  }
3338
  virtual int read_first_row(uchar *buf, uint primary_key);
3339 3340 3341
public:

  /* Same as above, but with statistics */
3342
  inline int ha_ft_read(uchar *buf);
Sergei Golubchik's avatar
Sergei Golubchik committed
3343 3344
  int ha_rnd_next(uchar *buf);
  int ha_rnd_pos(uchar *buf, uchar *pos);
3345 3346
  inline int ha_rnd_pos_by_record(uchar *buf);
  inline int ha_read_first_row(uchar *buf, uint primary_key);
3347

3348
  /**
3349
    The following 3 function is only needed for tables that may be
3350
    internal temporary tables during joins.
unknown's avatar
unknown committed
3351
  */
3352 3353 3354
  virtual int remember_rnd_pos()
    { return HA_ERR_WRONG_COMMAND; }
  virtual int restart_rnd_next(uchar *buf)
unknown's avatar
unknown committed
3355
    { return HA_ERR_WRONG_COMMAND; }
3356
  virtual int rnd_same(uchar *buf, uint inx)
unknown's avatar
unknown committed
3357
    { return HA_ERR_WRONG_COMMAND; }
3358 3359 3360

  virtual ha_rows records_in_range(uint inx, key_range *min_key,
                                   key_range *max_key)
unknown's avatar
unknown committed
3361
    { return (ha_rows) 10; }
3362 3363 3364 3365 3366 3367
  /*
    If HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is set, then it sets ref
    (reference to the row, aka position, with the primary key given in
    the record).
    Otherwise it set ref to the current row.
  */
3368
  virtual void position(const uchar *record)=0;
3369
  virtual int info(uint)=0; // see my_base.h for full description
3370
  virtual void get_dynamic_partition_info(PARTITION_STATS *stat_info,
3371
                                          uint part_id);
unknown's avatar
unknown committed
3372 3373
  virtual int extra(enum ha_extra_function operation)
  { return 0; }
unknown's avatar
unknown committed
3374
  virtual int extra_opt(enum ha_extra_function operation, ulong cache_size)
unknown's avatar
unknown committed
3375
  { return extra(operation); }
3376

3377
  /**
3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388
    In an UPDATE or DELETE, if the row under the cursor was locked by another
    transaction, and the engine used an optimistic read of the last
    committed row value under the cursor, then the engine returns 1 from this
    function. MySQL must NOT try to update this optimistic value. If the
    optimistic value does not match the WHERE condition, MySQL can decide to
    skip over this row. Currently only works for InnoDB. This can be used to
    avoid unnecessary lock waits.

    If this method returns nonzero, it will also signal the storage
    engine that the next read will be a locking re-read of the row.
  */
3389
  bool ha_was_semi_consistent_read();
3390
  virtual bool was_semi_consistent_read() { return 0; }
3391
  /**
3392 3393 3394 3395 3396 3397
    Tell the engine whether it should avoid unnecessary lock waits.
    If yes, in an UPDATE or DELETE, if the row under the cursor was locked
    by another transaction, the engine may try an optimistic read of
    the last committed row value under the cursor.
  */
  virtual void try_semi_consistent_read(bool) {}
unknown's avatar
unknown committed
3398
  virtual void unlock_row() {}
unknown's avatar
unknown committed
3399
  virtual int start_stmt(THD *thd, thr_lock_type lock_type) {return 0;}
3400 3401 3402 3403
  virtual void get_auto_increment(ulonglong offset, ulonglong increment,
                                  ulonglong nb_desired_values,
                                  ulonglong *first_value,
                                  ulonglong *nb_reserved_values);
3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423
  void set_next_insert_id(ulonglong id)
  {
    DBUG_PRINT("info",("auto_increment: next value %lu", (ulong)id));
    next_insert_id= id;
  }
  void restore_auto_increment(ulonglong prev_insert_id)
  {
    /*
      Insertion of a row failed, re-use the lastly generated auto_increment
      id, for the next row. This is achieved by resetting next_insert_id to
      what it was before the failed insertion (that old value is provided by
      the caller). If that value was 0, it was the first row of the INSERT;
      then if insert_id_for_cur_row contains 0 it means no id was generated
      for this first row, so no id was generated since the INSERT started, so
      we should set next_insert_id to 0; if insert_id_for_cur_row is not 0, it
      is the generated id of the first and failed row, so we use it.
    */
    next_insert_id= (prev_insert_id > 0) ? prev_insert_id :
      insert_id_for_cur_row;
  }
3424

unknown's avatar
unknown committed
3425
  virtual void update_create_info(HA_CREATE_INFO *create_info) {}
unknown's avatar
unknown committed
3426
  int check_old_types();
unknown's avatar
unknown committed
3427 3428 3429 3430 3431 3432
  virtual int assign_to_keycache(THD* thd, HA_CHECK_OPT* check_opt)
  { return HA_ADMIN_NOT_IMPLEMENTED; }
  virtual int preload_keys(THD* thd, HA_CHECK_OPT* check_opt)
  { return HA_ADMIN_NOT_IMPLEMENTED; }
  /* end of the list of admin commands */

3433
  virtual int indexes_are_disabled(void) {return 0;}
3434 3435 3436
  virtual char *update_table_comment(const char * comment)
  { return (char*) comment;}
  virtual void append_create_info(String *packet) {}
3437 3438 3439 3440 3441 3442 3443 3444 3445
  /**
    If index == MAX_KEY then a check for table is made and if index <
    MAX_KEY then a check is made if the table has foreign keys and if
    a foreign key uses this index (and thus the index cannot be dropped).

    @param  index            Index to check if foreign key uses it

    @retval   TRUE            Foreign key defined on table or index
    @retval   FALSE           No foreign key defined
unknown's avatar
unknown committed
3446 3447 3448
  */
  virtual bool is_fk_defined_on_table_or_index(uint index)
  { return FALSE; }
3449 3450
  virtual char* get_foreign_key_create_info()
  { return(NULL);}  /* gets foreign key create string from InnoDB */
3451 3452 3453 3454 3455 3456 3457 3458 3459
  /**
    Used in ALTER TABLE to check if changing storage engine is allowed.

    @note Called without holding thr_lock.c lock.

    @retval true   Changing storage engine is allowed.
    @retval false  Changing storage engine not allowed.
  */
  virtual bool can_switch_engines() { return true; }
3460
  virtual int can_continue_handler_scan() { return 0; }
3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487
  /**
    Get the list of foreign keys in this table.

    @remark Returns the set of foreign keys where this table is the
            dependent or child table.

    @param thd  The thread handle.
    @param f_key_list[out]  The list of foreign keys.

    @return The handler error code or zero for success.
  */
  virtual int
  get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
  { return 0; }
  /**
    Get the list of foreign keys referencing this table.

    @remark Returns the set of foreign keys where this table is the
            referenced or parent table.

    @param thd  The thread handle.
    @param f_key_list[out]  The list of foreign keys.

    @return The handler error code or zero for success.
  */
  virtual int
  get_parent_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
3488
  { return 0; }
unknown's avatar
unknown committed
3489
  virtual uint referenced_by_foreign_key() { return 0;}
unknown's avatar
unknown committed
3490
  virtual void init_table_handle_for_HANDLER()
unknown's avatar
unknown committed
3491 3492
  { return; }       /* prepare InnoDB for HANDLER */
  virtual void free_foreign_key_create_info(char* str) {}
3493
  /** The following can be called without an open handler */
3494
  const char *table_type() const { return hton_name(ht)->str; }
3495
  const char **bas_ext() const { return ht->tablefile_extensions; }
3496

3497 3498
  virtual int get_default_no_partitions(HA_CREATE_INFO *create_info)
  { return 1;}
unknown's avatar
unknown committed
3499 3500 3501 3502 3503 3504 3505 3506
  virtual void set_auto_partitions(partition_info *part_info) { return; }
  virtual bool get_no_parts(const char *name,
                            uint *no_parts)
  {
    *no_parts= 0;
    return 0;
  }
  virtual void set_part_info(partition_info *part_info) {return;}
3507

3508
  virtual ulong index_flags(uint idx, uint part, bool all_parts) const =0;
3509

unknown's avatar
unknown committed
3510
  uint max_record_length() const
3511
  { return MY_MIN(HA_MAX_REC_LENGTH, max_supported_record_length()); }
unknown's avatar
unknown committed
3512
  uint max_keys() const
3513
  { return MY_MIN(MAX_KEY, max_supported_keys()); }
unknown's avatar
unknown committed
3514
  uint max_key_parts() const
3515
  { return MY_MIN(MAX_REF_PARTS, max_supported_key_parts()); }
unknown's avatar
unknown committed
3516
  uint max_key_length() const
3517
  { return MY_MIN(MAX_KEY_LENGTH, max_supported_key_length()); }
unknown's avatar
unknown committed
3518
  uint max_key_part_length() const
3519
  { return MY_MIN(MAX_KEY_LENGTH, max_supported_key_part_length()); }
unknown's avatar
unknown committed
3520 3521 3522 3523 3524

  virtual uint max_supported_record_length() const { return HA_MAX_REC_LENGTH; }
  virtual uint max_supported_keys() const { return 0; }
  virtual uint max_supported_key_parts() const { return MAX_REF_PARTS; }
  virtual uint max_supported_key_length() const { return MAX_KEY_LENGTH; }
3525
  virtual uint max_supported_key_part_length() const { return 255; }
unknown's avatar
unknown committed
3526
  virtual uint min_record_length(uint options) const { return 1; }
unknown's avatar
unknown committed
3527

unknown's avatar
unknown committed
3528
  virtual uint checksum() const { return 0; }
3529
  virtual bool is_crashed() const  { return 0; }
3530
  virtual bool auto_repair(int error) const { return 0; }
unknown's avatar
unknown committed
3531

3532 3533
  void update_global_table_stats();
  void update_global_index_stats();
3534

3535
#define CHF_CREATE_FLAG 0
3536 3537
#define CHF_DELETE_FLAG 1
#define CHF_RENAME_FLAG 2
3538
#define CHF_INDEX_FLAG  3
3539

3540 3541 3542
  /**
    @note lock_count() can return > 1 if the table is MERGE or partitioned.
  */
unknown's avatar
unknown committed
3543
  virtual uint lock_count(void) const { return 1; }
3544 3545
  /**
    Is not invoked for non-transactional temporary tables.
3546

3547 3548 3549
    @note store_lock() can return more than one lock if the table is MERGE
    or partitioned.

3550
    @note that one can NOT rely on table->in_use in store_lock().  It may
unknown's avatar
unknown committed
3551
    refer to a different thread if called from mysql_lock_abort_for_thread().
3552 3553 3554 3555

    @note If the table is MERGE, store_lock() can return less locks
    than lock_count() claimed. This can happen when the MERGE children
    are not attached when this is called from another thread.
unknown's avatar
unknown committed
3556
  */
unknown's avatar
unknown committed
3557 3558 3559
  virtual THR_LOCK_DATA **store_lock(THD *thd,
				     THR_LOCK_DATA **to,
				     enum thr_lock_type lock_type)=0;
3560

3561
  /** Type of table for caching query */
3562
  virtual uint8 table_cache_type() { return HA_CACHE_TBL_NONTRANSACT; }
3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594


  /**
    @brief Register a named table with a call back function to the query cache.

    @param thd The thread handle
    @param table_key A pointer to the table name in the table cache
    @param key_length The length of the table name
    @param[out] engine_callback The pointer to the storage engine call back
      function
    @param[out] engine_data Storage engine specific data which could be
      anything

    This method offers the storage engine, the possibility to store a reference
    to a table name which is going to be used with query cache. 
    The method is called each time a statement is written to the cache and can
    be used to verify if a specific statement is cachable. It also offers
    the possibility to register a generic (but static) call back function which
    is called each time a statement is matched against the query cache.

    @note If engine_data supplied with this function is different from
      engine_data supplied with the callback function, and the callback returns
      FALSE, a table invalidation on the current table will occur.

    @return Upon success the engine_callback will point to the storage engine
      call back function, if any, and engine_data will point to any storage
      engine data used in the specific implementation.
      @retval TRUE Success
      @retval FALSE The specified table or current statement should not be
        cached
  */

3595
  virtual my_bool register_query_cache_table(THD *thd, char *table_key,
3596 3597 3598 3599
                                             uint key_length,
                                             qc_engine_callback
                                             *engine_callback,
                                             ulonglong *engine_data)
unknown's avatar
unknown committed
3600 3601
  {
    *engine_callback= 0;
3602
    return TRUE;
unknown's avatar
unknown committed
3603
  }
3604

unknown's avatar
unknown committed
3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644
  /*
    Count tables invisible from all tables list on which current one built
    (like myisammrg and partitioned tables)

    tables_type          mask for the tables should be added herdde

    returns number of such tables
  */

  virtual uint count_query_cache_dependant_tables(uint8 *tables_type
                                                  __attribute__((unused)))
  {
    return 0;
  }

  /*
    register tables invisible from all tables list on which current one built
    (like myisammrg and partitioned tables).

    @note they should be counted by method above

    cache                Query cache pointer
    block                Query cache block to write the table
    n                    Number of the table

    @retval FALSE - OK
    @retval TRUE  - Error
  */

  virtual my_bool
    register_query_cache_dependant_tables(THD *thd
                                          __attribute__((unused)),
                                          Query_cache *cache
                                          __attribute__((unused)),
                                          Query_cache_block_table **block
                                          __attribute__((unused)),
                                          uint *n __attribute__((unused)))
  {
    return FALSE;
  }
3645

unknown's avatar
unknown committed
3646
 /*
3647 3648
   Check if the primary key (if there is one) is a clustered and a
   reference key. This means:
3649 3650 3651 3652 3653 3654 3655 3656 3657 3658

   - Data is stored together with the primary key (no secondary lookup
     needed to find the row data). The optimizer uses this to find out
     the cost of fetching data.
   - The primary key is part of each secondary key and is used
     to find the row data in the primary index when reading trough
     secondary indexes.
   - When doing a HA_KEYREAD_ONLY we get also all the primary key parts
     into the row. This is critical property used by index_merge.

3659 3660 3661 3662 3663 3664 3665
   All the above is usually true for engines that store the row
   data in the primary key index (e.g. in a b-tree), and use the primary
   key value as a position().  InnoDB is an example of such an engine.

   For such a clustered primary key, the following should also hold:
   index_flags() should contain HA_CLUSTERED_INDEX
   table_flags() should contain HA_TABLE_SCAN_ON_INDEX
3666 3667 3668

   @retval TRUE   yes
   @retval FALSE  No.
unknown's avatar
unknown committed
3669 3670
 */
 virtual bool primary_key_is_clustered() { return FALSE; }
3671
 virtual int cmp_ref(const uchar *ref1, const uchar *ref2)
unknown's avatar
unknown committed
3672 3673 3674
 {
   return memcmp(ref1, ref2, ref_length);
 }
3675

unknown's avatar
Merge  
unknown committed
3676 3677 3678
 /*
   Condition pushdown to storage engines
 */
3679

3680
 /**
3681
   Push condition down to the table handler.
3682

3683 3684 3685 3686
   @param  cond   Condition to be pushed. The condition tree must not be
                  modified by the by the caller.

   @return
3687 3688 3689
     The 'remainder' condition that caller must use to filter out records.
     NULL means the handler will not return rows that do not match the
     passed condition.
3690

3691
   @note
3692 3693 3694 3695 3696
   The pushed conditions form a stack (from which one can remove the
   last pushed condition using cond_pop).
   The table handler filters out rows using (pushed_cond1 AND pushed_cond2 
   AND ... AND pushed_condN)
   or less restrictive condition, depending on handler's capabilities.
3697

3698
   handler->ha_reset() call empties the condition stack.
3699 3700 3701
   Calls to rnd_init/rnd_end, index_init/index_end etc do not affect the
   condition stack.
 */ 
unknown's avatar
Merge  
unknown committed
3702
 virtual const COND *cond_push(const COND *cond) { return cond; };
3703
 /**
3704
   Pop the top condition from the condition stack of the handler instance.
3705 3706

   Pops the top if condition stack, if stack is not empty.
3707
 */
unknown's avatar
Merge  
unknown committed
3708
 virtual void cond_pop() { return; };
3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733

 /**
   Push down an index condition to the handler.

   The server will use this method to push down a condition it wants
   the handler to evaluate when retrieving records using a specified
   index. The pushed index condition will only refer to fields from
   this handler that is contained in the index (but it may also refer
   to fields in other handlers). Before the handler evaluates the
   condition it must read the content of the index entry into the 
   record buffer.

   The handler is free to decide if and how much of the condition it
   will take responsibility for evaluating. Based on this evaluation
   it should return the part of the condition it will not evaluate.
   If it decides to evaluate the entire condition it should return
   NULL. If it decides not to evaluate any part of the condition it
   should return a pointer to the same condition as given as argument.

   @param keyno    the index number to evaluate the condition on
   @param idx_cond the condition to be evaluated by the handler

   @return The part of the pushed condition that the handler decides
           not to evaluate
 */
3734
 virtual Item *idx_cond_push(uint keyno, Item* idx_cond) { return idx_cond; }
3735

3736 3737 3738 3739 3740 3741 3742
 /** Reset information about pushed index conditions */
 virtual void cancel_pushed_idx_cond()
 {
   pushed_idx_cond= NULL;
   pushed_idx_cond_keyno= MAX_KEY;
   in_range_check_pushed_down= false;
 }
3743 3744 3745
 /**
   Part of old, deprecated in-place ALTER API.
 */
unknown's avatar
unknown committed
3746 3747 3748
 virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
					 uint table_changes)
 { return COMPATIBLE_DATA_NO; }
3749

3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809
 /* On-line/in-place ALTER TABLE interface. */

 /*
   Here is an outline of on-line/in-place ALTER TABLE execution through
   this interface.

   Phase 1 : Initialization
   ========================
   During this phase we determine which algorithm should be used
   for execution of ALTER TABLE and what level concurrency it will
   require.

   *) This phase starts by opening the table and preparing description
      of the new version of the table.
   *) Then we check if it is impossible even in theory to carry out
      this ALTER TABLE using the in-place algorithm. For example, because
      we need to change storage engine or the user has explicitly requested
      usage of the "copy" algorithm.
   *) If in-place ALTER TABLE is theoretically possible, we continue
      by compiling differences between old and new versions of the table
      in the form of HA_ALTER_FLAGS bitmap. We also build a few
      auxiliary structures describing requested changes and store
      all these data in the Alter_inplace_info object.
   *) Then the handler::check_if_supported_inplace_alter() method is called
      in order to find if the storage engine can carry out changes requested
      by this ALTER TABLE using the in-place algorithm. To determine this,
      the engine can rely on data in HA_ALTER_FLAGS/Alter_inplace_info
      passed to it as well as on its own checks. If the in-place algorithm
      can be used for this ALTER TABLE, the level of required concurrency for
      its execution is also returned.
      If any errors occur during the handler call, ALTER TABLE is aborted
      and no further handler functions are called.
   *) Locking requirements of the in-place algorithm are compared to any
      concurrency requirements specified by user. If there is a conflict
      between them, we either switch to the copy algorithm or emit an error.

   Phase 2 : Execution
   ===================

   In this phase the operations are executed.

   *) As the first step, we acquire a lock corresponding to the concurrency
      level which was returned by handler::check_if_supported_inplace_alter()
      and requested by the user. This lock is held for most of the
      duration of in-place ALTER (if HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE
      or HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE were returned we acquire an
      exclusive lock for duration of the next step only).
   *) After that we call handler::ha_prepare_inplace_alter_table() to give the
      storage engine a chance to update its internal structures with a higher
      lock level than the one that will be used for the main step of algorithm.
      After that we downgrade the lock if it is necessary.
   *) After that, the main step of this phase and algorithm is executed.
      We call the handler::ha_inplace_alter_table() method, which carries out the
      changes requested by ALTER TABLE but does not makes them visible to other
      connections yet.
   *) We ensure that no other connection uses the table by upgrading our
      lock on it to exclusive.
   *) a) If the previous step succeeds, handler::ha_commit_inplace_alter_table() is
         called to allow the storage engine to do any final updates to its structures,
         to make all earlier changes durable and visible to other connections.
3810
      b) If we have failed to upgrade lock or any errors have occurred during the
3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974
         handler functions calls (including commit), we call
         handler::ha_commit_inplace_alter_table()
         to rollback all changes which were done during previous steps.

  Phase 3 : Final
  ===============

  In this phase we:

  *) Update SQL-layer data-dictionary by installing .FRM file for the new version
     of the table.
  *) Inform the storage engine about this change by calling the
     handler::ha_notify_table_changed() method.
  *) Destroy the Alter_inplace_info and handler_ctx objects.

 */

 /**
    Check if a storage engine supports a particular alter table in-place

    @param    altered_table     TABLE object for new version of table.
    @param    ha_alter_info     Structure describing changes to be done
                                by ALTER TABLE and holding data used
                                during in-place alter.

    @retval   HA_ALTER_ERROR                  Unexpected error.
    @retval   HA_ALTER_INPLACE_NOT_SUPPORTED  Not supported, must use copy.
    @retval   HA_ALTER_INPLACE_EXCLUSIVE_LOCK Supported, but requires X lock.
    @retval   HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE
                                              Supported, but requires SNW lock
                                              during main phase. Prepare phase
                                              requires X lock.
    @retval   HA_ALTER_INPLACE_SHARED_LOCK    Supported, but requires SNW lock.
    @retval   HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
                                              Supported, concurrent reads/writes
                                              allowed. However, prepare phase
                                              requires X lock.
    @retval   HA_ALTER_INPLACE_NO_LOCK        Supported, concurrent
                                              reads/writes allowed.

    @note The default implementation uses the old in-place ALTER API
    to determine if the storage engine supports in-place ALTER or not.

    @note Called without holding thr_lock.c lock.
 */
 virtual enum_alter_inplace_result
 check_if_supported_inplace_alter(TABLE *altered_table,
                                  Alter_inplace_info *ha_alter_info);


 /**
    Public functions wrapping the actual handler call.
    @see prepare_inplace_alter_table()
 */
 bool ha_prepare_inplace_alter_table(TABLE *altered_table,
                                     Alter_inplace_info *ha_alter_info);


 /**
    Public function wrapping the actual handler call.
    @see inplace_alter_table()
 */
 bool ha_inplace_alter_table(TABLE *altered_table,
                             Alter_inplace_info *ha_alter_info)
 {
   return inplace_alter_table(altered_table, ha_alter_info);
 }


 /**
    Public function wrapping the actual handler call.
    Allows us to enforce asserts regardless of handler implementation.
    @see commit_inplace_alter_table()
 */
 bool ha_commit_inplace_alter_table(TABLE *altered_table,
                                    Alter_inplace_info *ha_alter_info,
                                    bool commit);


 /**
    Public function wrapping the actual handler call.
    @see notify_table_changed()
 */
 void ha_notify_table_changed()
 {
   notify_table_changed();
 }


protected:
 /**
    Allows the storage engine to update internal structures with concurrent
    writes blocked. If check_if_supported_inplace_alter() returns
    HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE or
    HA_ALTER_INPLACE_SHARED_AFTER_PREPARE, this function is called with
    exclusive lock otherwise the same level of locking as for
    inplace_alter_table() will be used.

    @note Storage engines are responsible for reporting any errors by
    calling my_error()/print_error()

    @note If this function reports error, commit_inplace_alter_table()
    will be called with commit= false.

    @note For partitioning, failing to prepare one partition, means that
    commit_inplace_alter_table() will be called to roll back changes for
    all partitions. This means that commit_inplace_alter_table() might be
    called without prepare_inplace_alter_table() having been called first
    for a given partition.

    @param    altered_table     TABLE object for new version of table.
    @param    ha_alter_info     Structure describing changes to be done
                                by ALTER TABLE and holding data used
                                during in-place alter.

    @retval   true              Error
    @retval   false             Success
 */
 virtual bool prepare_inplace_alter_table(TABLE *altered_table,
                                          Alter_inplace_info *ha_alter_info)
 { return false; }


 /**
    Alter the table structure in-place with operations specified using HA_ALTER_FLAGS
    and Alter_inplace_info. The level of concurrency allowed during this
    operation depends on the return value from check_if_supported_inplace_alter().

    @note Storage engines are responsible for reporting any errors by
    calling my_error()/print_error()

    @note If this function reports error, commit_inplace_alter_table()
    will be called with commit= false.

    @param    altered_table     TABLE object for new version of table.
    @param    ha_alter_info     Structure describing changes to be done
                                by ALTER TABLE and holding data used
                                during in-place alter.

    @retval   true              Error
    @retval   false             Success
 */
 virtual bool inplace_alter_table(TABLE *altered_table,
                                  Alter_inplace_info *ha_alter_info)
 { return false; }


 /**
    Commit or rollback the changes made during prepare_inplace_alter_table()
    and inplace_alter_table() inside the storage engine.
    Note that in case of rollback the allowed level of concurrency during
    this operation will be the same as for inplace_alter_table() and thus
    might be higher than during prepare_inplace_alter_table(). (For example,
    concurrent writes were blocked during prepare, but might not be during
    rollback).

    @note Storage engines are responsible for reporting any errors by
    calling my_error()/print_error()

    @note If this function with commit= true reports error, it will be called
    again with commit= false.

    @note In case of partitioning, this function might be called for rollback
    without prepare_inplace_alter_table() having been called first.
3975 3976 3977 3978
    Also partitioned tables sets ha_alter_info->group_commit_ctx to a NULL
    terminated array of the partitions handlers and if all of them are
    committed as one, then group_commit_ctx should be set to NULL to indicate
    to the partitioning handler that all partitions handlers are committed.
3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992
    @see prepare_inplace_alter_table().

    @param    altered_table     TABLE object for new version of table.
    @param    ha_alter_info     Structure describing changes to be done
                                by ALTER TABLE and holding data used
                                during in-place alter.
    @param    commit            True => Commit, False => Rollback.

    @retval   true              Error
    @retval   false             Success
 */
 virtual bool commit_inplace_alter_table(TABLE *altered_table,
                                         Alter_inplace_info *ha_alter_info,
                                         bool commit)
3993 3994 3995 3996 3997
{
  /* Nothing to commit/rollback, mark all handlers committed! */
  ha_alter_info->group_commit_ctx= NULL;
  return false;
}
3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010


 /**
    Notify the storage engine that the table structure (.FRM) has been updated.

    @note No errors are allowed during notify_table_changed().
 */
 virtual void notify_table_changed();

public:
 /* End of On-line/in-place ALTER TABLE interface. */


4011
  /**
4012 4013 4014 4015 4016
    use_hidden_primary_key() is called in case of an update/delete when
    (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
    but we don't have a primary key
  */
  virtual void use_hidden_primary_key();
4017 4018 4019 4020 4021
  virtual uint alter_table_flags(uint flags)
  {
    if (ht->alter_table_flags)
      return ht->alter_table_flags(flags);
    return 0;
4022
  }
4023

4024
  virtual LEX_STRING *engine_name();
4025 4026
  
  TABLE* get_table() { return table; }
4027
  TABLE_SHARE* get_table_share() { return table_share; }
4028 4029 4030 4031
protected:
  /* Service methods for use by storage engines. */
  void **ha_data(THD *) const;
  THD *ha_thd(void) const;
4032

Marc Alff's avatar
Marc Alff committed
4033 4034 4035 4036
  /**
    Acquire the instrumented table information from a table share.
    @return an instrumented table share, or NULL.
  */
4037
  PSI_table_share *ha_table_share_psi() const;
Marc Alff's avatar
Marc Alff committed
4038

4039 4040 4041 4042 4043 4044 4045 4046
  /**
    Default rename_table() and delete_table() rename/delete files with a
    given name and extensions from bas_ext().

    These methods can be overridden, but their default implementation
    provide useful functionality.
  */
  virtual int rename_table(const char *from, const char *to);
4047 4048 4049 4050
  /**
    Delete a table in the engine. Called for base as well as temporary
    tables.
  */
4051
  virtual int delete_table(const char *name);
4052

4053 4054
public:
  inline bool check_table_binlog_row_based(bool binlog_row);
4055
private:
4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067
  /* Cache result to avoid extra calls */
  inline void mark_trx_read_write()
  {
    if (unlikely(!mark_trx_read_write_done))
    {
      mark_trx_read_write_done= 1;
      mark_trx_read_write_internal();
    }
  }
  void mark_trx_read_write_internal();
  bool check_table_binlog_row_based_internal(bool binlog_row);

4068 4069 4070 4071 4072 4073
protected:
  /*
    These are intended to be used only by handler::ha_xxxx() functions
    However, engines that implement read_range_XXX() (like MariaRocks)
    or embed other engines (like ha_partition) may need to call these also
  */
4074 4075
  inline void increment_statistics(ulong SSV::*offset) const;
  inline void decrement_statistics(ulong SSV::*offset) const;
4076

4077
private:
4078
  /*
4079
    Low-level primitives for storage engines.  These should be
4080 4081 4082
    overridden by the storage engine class. To call these methods, use
    the corresponding 'ha_*' method above.
  */
4083

4084
  virtual int open(const char *name, int mode, uint test_if_locked)=0;
Sergei Golubchik's avatar
Sergei Golubchik committed
4085
  /* Note: ha_index_read_idx_map() may bypass index_init() */
4086 4087
  virtual int index_init(uint idx, bool sorted) { return 0; }
  virtual int index_end() { return 0; }
4088 4089 4090 4091 4092 4093 4094 4095 4096
  /**
    rnd_init() can be called two times without rnd_end() in between
    (it only makes sense if scan=1).
    then the second call should prepare for the new table scan (e.g
    if rnd_init allocates the cursor, second call should position it
    to the start of the table, no need to deallocate and allocate it again
  */
  virtual int rnd_init(bool scan)= 0;
  virtual int rnd_end() { return 0; }
4097 4098 4099 4100 4101
  virtual int write_row(uchar *buf __attribute__((unused)))
  {
    return HA_ERR_WRONG_COMMAND;
  }

4102 4103 4104 4105 4106 4107 4108 4109
  /**
    Update a single row.

    Note: If HA_ERR_FOUND_DUPP_KEY is returned, the handler must read
    all columns of the row so MySQL can create an error message. If
    the columns required for the error message are not read, the error
    message will contain garbage.
  */
4110 4111 4112 4113 4114 4115
  virtual int update_row(const uchar *old_data __attribute__((unused)),
                         uchar *new_data __attribute__((unused)))
  {
    return HA_ERR_WRONG_COMMAND;
  }

4116 4117 4118 4119 4120 4121
  /*
    Optimized function for updating the first row. Only used by sequence
    tables
  */
  virtual int update_first_row(uchar *new_data);

4122 4123 4124 4125
  virtual int delete_row(const uchar *buf __attribute__((unused)))
  {
    return HA_ERR_WRONG_COMMAND;
  }
4126 4127 4128 4129 4130 4131 4132
  /**
    Reset state of file to after 'open'.
    This function is called after every statement for all tables used
    by that statement.
  */
  virtual int reset() { return 0; }
  virtual Table_flags table_flags(void) const= 0;
4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154
  /**
    Is not invoked for non-transactional temporary tables.

    Tells the storage engine that we intend to read or write data
    from the table. This call is prefixed with a call to handler::store_lock()
    and is invoked only for those handler instances that stored the lock.

    Calls to rnd_init/index_init are prefixed with this call. When table
    IO is complete, we call external_lock(F_UNLCK).
    A storage engine writer should expect that each call to
    ::external_lock(F_[RD|WR]LOCK is followed by a call to
    ::external_lock(F_UNLCK). If it is not, it is a bug in MySQL.

    The name and signature originate from the first implementation
    in MyISAM, which would call fcntl to set/clear an advisory
    lock on the data file in this method.

    @param   lock_type    F_RDLCK, F_WRLCK, F_UNLCK

    @return  non-0 in case of failure, 0 in case of success.
    When lock_type is F_UNLCK, the return value is ignored.
  */
4155 4156 4157 4158 4159
  virtual int external_lock(THD *thd __attribute__((unused)),
                            int lock_type __attribute__((unused)))
  {
    return 0;
  }
4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172
  virtual void release_auto_increment() { return; };
  /** admin commands - called from mysql_admin_table */
  virtual int check_for_upgrade(HA_CHECK_OPT *check_opt)
  { return 0; }
  virtual int check(THD* thd, HA_CHECK_OPT* check_opt)
  { return HA_ADMIN_NOT_IMPLEMENTED; }

  /**
     In this method check_opt can be modified
     to specify CHECK option to use to call check()
     upon the table.
  */
  virtual int repair(THD* thd, HA_CHECK_OPT* check_opt)
4173 4174 4175 4176
  {
    DBUG_ASSERT(!(ha_table_flags() & HA_CAN_REPAIR));
    return HA_ADMIN_NOT_IMPLEMENTED;
  }
4177
  virtual void start_bulk_insert(ha_rows rows, uint flags) {}
4178
  virtual int end_bulk_insert() { return 0; }
4179
protected:
4180 4181 4182
  virtual int index_read(uchar * buf, const uchar * key, uint key_len,
                         enum ha_rkey_function find_flag)
   { return  HA_ERR_WRONG_COMMAND; }
4183
  friend class ha_partition;
4184
  friend class ha_sequence;
4185
public:
4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208
  /**
    This method is similar to update_row, however the handler doesn't need
    to execute the updates at this point in time. The handler can be certain
    that another call to bulk_update_row will occur OR a call to
    exec_bulk_update before the set of updates in this query is concluded.

    @param    old_data       Old record
    @param    new_data       New record
    @param    dup_key_found  Number of duplicate keys found

    @retval  0   Bulk delete used by handler
    @retval  1   Bulk delete not used, normal operation used
  */
  virtual int bulk_update_row(const uchar *old_data, uchar *new_data,
                              uint *dup_key_found)
  {
    DBUG_ASSERT(FALSE);
    return HA_ERR_WRONG_COMMAND;
  }
  /**
    This is called to delete all rows in a table
    If the handler don't support this, then this function will
    return HA_ERR_WRONG_COMMAND and MySQL will delete the rows one
4209
    by one.
4210 4211 4212
  */
  virtual int delete_all_rows()
  { return (my_errno=HA_ERR_WRONG_COMMAND); }
4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231
  /**
    Quickly remove all rows from a table.

    @remark This method is responsible for implementing MySQL's TRUNCATE
            TABLE statement, which is a DDL operation. As such, a engine
            can bypass certain integrity checks and in some cases avoid
            fine-grained locking (e.g. row locks) which would normally be
            required for a DELETE statement.

    @remark Typically, truncate is not used if it can result in integrity
            violation. For example, truncate is not used when a foreign
            key references the table, but it might be used if foreign key
            checks are disabled.

    @remark Engine is responsible for resetting the auto-increment counter.

    @remark The table is locked in exclusive mode.
  */
  virtual int truncate()
Sergei Golubchik's avatar
Sergei Golubchik committed
4232 4233 4234 4235
  {
    int error= delete_all_rows();
    return error ? error : reset_auto_increment(0);
  }
4236 4237
  /**
    Reset the auto-increment counter to the given value, i.e. the next row
Sergei Golubchik's avatar
Sergei Golubchik committed
4238
    inserted will get the given value.
4239 4240
  */
  virtual int reset_auto_increment(ulonglong value)
Sergei Golubchik's avatar
Sergei Golubchik committed
4241
  { return 0; }
4242 4243 4244 4245
  virtual int optimize(THD* thd, HA_CHECK_OPT* check_opt)
  { return HA_ADMIN_NOT_IMPLEMENTED; }
  virtual int analyze(THD* thd, HA_CHECK_OPT* check_opt)
  { return HA_ADMIN_NOT_IMPLEMENTED; }
unknown's avatar
unknown committed
4246
  virtual bool check_and_repair(THD *thd) { return TRUE; }
4247 4248 4249 4250 4251 4252 4253 4254
  virtual int disable_indexes(uint mode) { return HA_ERR_WRONG_COMMAND; }
  virtual int enable_indexes(uint mode) { return HA_ERR_WRONG_COMMAND; }
  virtual int discard_or_import_tablespace(my_bool discard)
  { return (my_errno=HA_ERR_WRONG_COMMAND); }
  virtual void prepare_for_alter() { return; }
  virtual void drop_table(const char *name);
  virtual int create(const char *name, TABLE *form, HA_CREATE_INFO *info)=0;

4255
  virtual int create_partitioning_metadata(const char *name, const char *old_name,
4256
                                   int action_flag)
4257 4258 4259 4260
  { return FALSE; }

  virtual int change_partitions(HA_CREATE_INFO *create_info,
                                const char *path,
4261 4262
                                ulonglong * const copied,
                                ulonglong * const deleted,
4263 4264 4265 4266 4267 4268 4269
                                const uchar *pack_frm_data,
                                size_t pack_frm_len)
  { return HA_ERR_WRONG_COMMAND; }
  virtual int drop_partitions(const char *path)
  { return HA_ERR_WRONG_COMMAND; }
  virtual int rename_partitions(const char *path)
  { return HA_ERR_WRONG_COMMAND; }
4270 4271 4272 4273 4274 4275 4276 4277 4278 4279
  virtual bool set_ha_share_ref(Handler_share **arg_ha_share)
  {
    DBUG_ASSERT(!ha_share);
    DBUG_ASSERT(arg_ha_share);
    if (ha_share || !arg_ha_share)
      return true;
    ha_share= arg_ha_share;
    return false;
  }
  int get_lock_type() const { return m_lock_type; }
4280 4281 4282 4283
public:
  /* XXX to be removed, see ha_partition::partition_ht() */
  virtual handlerton *partition_ht() const
  { return ht; }
4284
  inline int ha_write_tmp_row(uchar *buf);
4285
  inline int ha_delete_tmp_row(uchar *buf);
4286
  inline int ha_update_tmp_row(const uchar * old_data, uchar * new_data);
4287

4288
  virtual void set_lock_type(enum thr_lock_type lock);
4289

4290
  friend enum icp_result handler_index_cond_check(void* h_arg);
4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305

  /**
    Find unique record by index or unique constrain

    @param record        record to find (also will be fillded with
                         actual record fields)
    @param unique_ref    index or unique constraiun number (depends
                         on what used in the engine

    @retval -1 Error
    @retval  1 Not found
    @retval  0 Found
  */
  virtual int find_unique_row(uchar *record, uint unique_ref)
  { return -1; /*unsupported */}
4306 4307 4308 4309 4310
protected:
  Handler_share *get_ha_share_ptr();
  void set_ha_share_ptr(Handler_share *arg_ha_share);
  void lock_shared_ha_data();
  void unlock_shared_ha_data();
unknown's avatar
unknown committed
4311 4312
};

Sergey Petrunya's avatar
Sergey Petrunya committed
4313
#include "multi_range_read.h"
4314
#include "group_by_handler.h"
4315

4316
bool key_uses_partial_cols(TABLE_SHARE *table, uint keyno);
4317

unknown's avatar
unknown committed
4318 4319 4320
	/* Some extern variables used with handlers */

extern const char *ha_row_type[];
4321 4322
extern MYSQL_PLUGIN_IMPORT const char *tx_isolation_names[];
extern MYSQL_PLUGIN_IMPORT const char *binlog_format_names[];
4323
extern TYPELIB tx_isolation_typelib;
4324
extern const char *myisam_stats_method_names[];
4325
extern ulong total_ha, total_ha_2pc;
unknown's avatar
unknown committed
4326

4327
/* lookups */
4328
plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name, bool tmp_table);
4329
plugin_ref ha_lock_engine(THD *thd, const handlerton *hton);
unknown's avatar
unknown committed
4330
handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type);
unknown's avatar
unknown committed
4331
handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
unknown's avatar
unknown committed
4332
                         handlerton *db_type);
Sergei Golubchik's avatar
Sergei Golubchik committed
4333
handlerton *ha_checktype(THD *thd, handlerton *hton, bool no_substitute);
unknown's avatar
unknown committed
4334

Sergei Golubchik's avatar
Sergei Golubchik committed
4335 4336 4337 4338 4339
static inline handlerton *ha_checktype(THD *thd, enum legacy_db_type type,
                                       bool no_substitute = 0)
{
  return ha_checktype(thd, ha_resolve_by_legacy_type(thd, type), no_substitute);
}
unknown's avatar
unknown committed
4340

unknown's avatar
unknown committed
4341
static inline enum legacy_db_type ha_legacy_type(const handlerton *db_type)
unknown's avatar
unknown committed
4342 4343 4344 4345
{
  return (db_type == NULL) ? DB_TYPE_UNKNOWN : db_type->db_type;
}

unknown's avatar
unknown committed
4346
static inline const char *ha_resolve_storage_engine_name(const handlerton *db_type)
unknown's avatar
unknown committed
4347
{
4348
  return db_type == NULL ? "UNKNOWN" : hton_name(db_type)->str;
unknown's avatar
unknown committed
4349 4350
}

unknown's avatar
unknown committed
4351
static inline bool ha_check_storage_engine_flag(const handlerton *db_type, uint32 flag)
unknown's avatar
unknown committed
4352
{
4353
  return db_type == NULL ? FALSE : MY_TEST(db_type->flags & flag);
unknown's avatar
unknown committed
4354 4355
}

unknown's avatar
unknown committed
4356
static inline bool ha_storage_engine_is_enabled(const handlerton *db_type)
unknown's avatar
unknown committed
4357
{
4358
  return (db_type && db_type->create) ?
unknown's avatar
unknown committed
4359 4360
         (db_type->state == SHOW_OPTION_YES) : FALSE;
}
4361

4362 4363
#define view_pseudo_hton ((handlerton *)1)

4364
/* basic stuff */
4365
int ha_init_errors(void);
unknown's avatar
unknown committed
4366
int ha_init(void);
4367
int ha_end(void);
unknown's avatar
unknown committed
4368 4369
int ha_initialize_handlerton(st_plugin_int *plugin);
int ha_finalize_handlerton(st_plugin_int *plugin);
unknown's avatar
unknown committed
4370

4371
TYPELIB *ha_known_exts(void);
unknown's avatar
unknown committed
4372
int ha_panic(enum ha_panic_function flag);
4373
void ha_close_connection(THD* thd);
4374
void ha_kill_query(THD* thd, enum thd_kill_levels level);
unknown's avatar
unknown committed
4375
bool ha_flush_logs(handlerton *db_type);
4376
void ha_drop_database(char* path);
4377
void ha_checkpoint_state(bool disable);
4378
void ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *));
unknown's avatar
unknown committed
4379 4380
int ha_create_table(THD *thd, const char *path,
                    const char *db, const char *table_name,
4381
                    HA_CREATE_INFO *create_info, LEX_CUSTRING *frm);
unknown's avatar
unknown committed
4382
int ha_delete_table(THD *thd, handlerton *db_type, const char *path,
unknown's avatar
unknown committed
4383
                    const char *db, const char *alias, bool generate_warning);
4384

4385
/* statistics and info */
unknown's avatar
unknown committed
4386
bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat);
4387

4388
/* discovery */
4389 4390 4391 4392 4393
#ifdef MYSQL_SERVER
class Discovered_table_list: public handlerton::discovered_list
{
  THD *thd;
  const char *wild, *wend;
4394
  bool with_temps; // whether to include temp tables in the result
4395 4396 4397 4398 4399
public:
  Dynamic_array<LEX_STRING*> *tables;

  Discovered_table_list(THD *thd_arg, Dynamic_array<LEX_STRING*> *tables_arg,
                        const LEX_STRING *wild_arg);
4400 4401
  Discovered_table_list(THD *thd_arg, Dynamic_array<LEX_STRING*> *tables_arg)
    : thd(thd_arg), wild(NULL), with_temps(true), tables(tables_arg) {}
4402 4403 4404 4405 4406 4407 4408 4409 4410
  ~Discovered_table_list() {}

  bool add_table(const char *tname, size_t tlen);
  bool add_file(const char *fname);

  void sort();
  void remove_duplicates(); // assumes that the list is sorted
};

4411
int ha_discover_table(THD *thd, TABLE_SHARE *share);
4412
int ha_discover_table_names(THD *thd, LEX_STRING *db, MY_DIR *dirp,
4413
                            Discovered_table_list *result, bool reusable);
4414
bool ha_table_exists(THD *thd, const char *db, const char *table_name,
4415
                     handlerton **hton= 0, bool *is_sequence= 0);
4416
#endif
4417 4418

/* key cache */
4419
extern "C" int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *);
unknown's avatar
unknown committed
4420 4421
int ha_resize_key_cache(KEY_CACHE *key_cache);
int ha_change_key_cache_param(KEY_CACHE *key_cache);
4422
int ha_repartition_key_cache(KEY_CACHE *key_cache);
4423 4424
int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache);

unknown's avatar
unknown committed
4425
/* report to InnoDB that control passes to the client */
4426
int ha_release_temporary_latches(THD *thd);
4427 4428 4429

/* transactions: interface to handlerton functions */
int ha_start_consistent_snapshot(THD *thd);
unknown's avatar
unknown committed
4430
int ha_commit_or_rollback_by_xid(XID *xid, bool commit);
4431
int ha_commit_one_phase(THD *thd, bool all);
Konstantin Osipov's avatar
Konstantin Osipov committed
4432
int ha_commit_trans(THD *thd, bool all);
4433 4434 4435 4436 4437
int ha_rollback_trans(THD *thd, bool all);
int ha_prepare(THD *thd);
int ha_recover(HASH *commit_list);

/* transactions: these functions never call handlerton functions directly */
4438
int ha_enable_transaction(THD *thd, bool on);
4439 4440 4441

/* savepoints */
int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv);
4442
bool ha_rollback_to_savepoint_can_release_mdl(THD *thd);
4443 4444
int ha_savepoint(THD *thd, SAVEPOINT *sv);
int ha_release_savepoint(THD *thd, SAVEPOINT *sv);
4445 4446 4447
#ifdef WITH_WSREP
int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal);
void ha_fake_trx_id(THD *thd);
4448 4449 4450
#else
inline void ha_fake_trx_id(THD *thd) { }
#endif
4451

unknown's avatar
unknown committed
4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462
/* these are called by storage engines */
void trans_register_ha(THD *thd, bool all, handlerton *ht);

/*
  Storage engine has to assume the transaction will end up with 2pc if
   - there is more than one 2pc-capable storage engine available
   - in the current transaction 2pc was not disabled yet
*/
#define trans_need_2pc(thd, all)                   ((total_ha_2pc > 1) && \
        !((all ? &thd->transaction.all : &thd->transaction.stmt)->no_2pc))

4463 4464 4465
const char *get_canonical_filename(handler *file, const char *path,
                                   char *tmp_path);
bool mysql_xa_recover(THD *thd);
4466
void commit_checkpoint_notify_ha(handlerton *hton, void *cookie);
4467 4468 4469 4470 4471

inline const char *table_case_name(HA_CREATE_INFO *info, const char *name)
{
  return ((lower_case_table_names == 2 && info->alias) ? info->alias : name);
}
4472

4473 4474 4475 4476 4477
typedef bool Log_func(THD*, TABLE*, bool, const uchar*, const uchar*);
int binlog_log_row(TABLE* table,
                   const uchar *before_record,
                   const uchar *after_record,
                   Log_func *log_func);
4478 4479 4480

#define TABLE_IO_WAIT(TRACKER, PSI, OP, INDEX, FLAGS, PAYLOAD) \
  { \
4481 4482
    Exec_time_tracker *this_tracker; \
    if (unlikely((this_tracker= tracker))) \
4483 4484 4485 4486
      tracker->start_tracking(); \
    \
    MYSQL_TABLE_IO_WAIT(PSI, OP, INDEX, FLAGS, PAYLOAD); \
    \
4487
    if (unlikely(this_tracker)) \
4488 4489 4490
      tracker->stop_tracking(); \
  }

4491 4492
void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag);
void print_keydup_error(TABLE *table, KEY *key, myf errflag);
4493 4494 4495

int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info);
int del_global_table_stat(THD *thd, LEX_STRING *db, LEX_STRING *table);
Aleksey Midenkov's avatar
Aleksey Midenkov committed
4496 4497

inline
Aleksey Midenkov's avatar
Aleksey Midenkov committed
4498
bool handlerton::versioned() const
Aleksey Midenkov's avatar
Aleksey Midenkov committed
4499 4500 4501
{
  return flags & HTON_SUPPORTS_SYS_VERSIONING;
}
4502
#endif /* HANDLER_INCLUDED */