row0merge.h 17.6 KB
Newer Older
1 2
/*****************************************************************************

Sergei Golubchik's avatar
Sergei Golubchik committed
3
Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
4
Copyright (c) 2015, 2017, MariaDB Corporation.
5 6 7 8 9 10 11 12 13 14

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
15 16
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17 18 19

*****************************************************************************/

20 21
/**************************************************//**
@file include/row0merge.h
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
Index build routines using a merge sort

Created 13/06/2005 Jan Lindstrom
*******************************************************/

#ifndef row0merge_h
#define row0merge_h

#include "univ.i"
#include "data0data.h"
#include "dict0types.h"
#include "trx0types.h"
#include "que0types.h"
#include "mtr0mtr.h"
#include "rem0types.h"
#include "rem0rec.h"
#include "btr0types.h"
#include "row0mysql.h"
40
#include "lock0types.h"
41
#include "srv0srv.h"
42 43 44 45
#include "ut0stage.h"

/* Reserve free space from every block for key_version */
#define ROW_MERGE_RESERVE_SIZE 4
46

47 48 49 50 51 52 53 54 55 56 57 58
/* Cluster index read task is mandatory */
#define COST_READ_CLUSTERED_INDEX            1.0

/* Basic fixed cost to build all type of index */
#define COST_BUILD_INDEX_STATIC              0.5
/* Dynamic cost to build all type of index, dynamic cost will be re-distributed based on page count ratio of each index */
#define COST_BUILD_INDEX_DYNAMIC             0.5

/* Sum of below two must be 1.0 */
#define PCT_COST_MERGESORT_INDEX                 0.4
#define PCT_COST_INSERT_INDEX                    0.6

59 60 61
// Forward declaration
struct ib_sequence_t;

62 63 64 65 66 67
/** @brief Block size for I/O operations in merge sort.

The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
rounded to a power of 2.

When not creating a PRIMARY KEY that contains column prefixes, this
68 69
can be set as small as UNIV_PAGE_SIZE / 2. */
typedef byte	row_merge_block_t;
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84

/** @brief Secondary buffer for I/O operations of merge records.

This buffer is used for writing or reading a record that spans two
row_merge_block_t.  Thus, it must be able to hold one merge record,
whose maximum size is the same as the minimum size of
row_merge_block_t. */
typedef byte	mrec_buf_t[UNIV_PAGE_SIZE_MAX];

/** @brief Merge record in row_merge_block_t.

The format is the same as a record in ROW_FORMAT=COMPACT with the
exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
typedef byte	mrec_t;

85 86 87 88 89
/** Merge record in row_merge_buf_t */
struct mtuple_t {
	dfield_t*	fields;		/*!< data fields */
};

90
/** Buffer for sorting in main memory. */
91
struct row_merge_buf_t {
92 93 94 95 96
	mem_heap_t*	heap;		/*!< memory heap where allocated */
	dict_index_t*	index;		/*!< the index the tuples belong to */
	ulint		total_size;	/*!< total amount of data bytes */
	ulint		n_tuples;	/*!< number of data tuples */
	ulint		max_tuples;	/*!< maximum number of data tuples */
97 98
	mtuple_t*	tuples;		/*!< array of data tuples */
	mtuple_t*	tmp_tuples;	/*!< temporary copy of tuples,
99 100 101 102
					for sorting */
};

/** Information about temporary files used in merge sort */
103
struct merge_file_t {
104
	pfs_os_file_t	fd;		/*!< file descriptor */
105 106 107 108
	ulint		offset;		/*!< file offset (end of file) */
	ib_uint64_t	n_rec;		/*!< number of records in the file */
};

109
/** Index field definition */
110 111
struct index_field_t {
	ulint		col_no;		/*!< column offset */
112 113
	ulint		prefix_len;	/*!< column prefix length, or 0
					if indexing the whole column */
114
	bool		is_v_col;	/*!< whether this is a virtual column */
115 116
};

117
/** Definition of an index being created */
118 119
struct index_def_t {
	const char*	name;		/*!< index name */
120
	bool		rebuild;	/*!< whether the table is rebuilt */
121 122 123 124 125 126
	ulint		ind_type;	/*!< 0, DICT_UNIQUE,
					or DICT_CLUSTERED */
	ulint		key_number;	/*!< MySQL key number,
					or ULINT_UNDEFINED if none */
	ulint		n_fields;	/*!< number of fields in index */
	index_field_t*	fields;		/*!< field definitions */
127 128
	st_mysql_ftparser*
			parser;		/*!< fulltext parser plugin */
129 130
};

131
/** Structure for reporting duplicate records. */
132 133 134 135 136 137 138 139
struct row_merge_dup_t {
	dict_index_t*		index;	/*!< index being sorted */
	struct TABLE*		table;	/*!< MySQL table object */
	const ulint*		col_map;/*!< mapping of column numbers
					in table to the rebuilt table
					(index->table), or NULL if not
					rebuilding table */
	ulint			n_dup;	/*!< number of duplicates */
140 141
};

142 143 144 145 146 147 148
/*************************************************************//**
Report a duplicate key. */
void
row_merge_dup_report(
/*=================*/
	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
	const dfield_t*		entry)	/*!< in: duplicate index entry */
Sergei Golubchik's avatar
Sergei Golubchik committed
149
	MY_ATTRIBUTE((nonnull));
150

151
/*********************************************************************//**
152
Sets an exclusive lock on a table, for the duration of creating indexes.
153
@return error code or DB_SUCCESS */
154
dberr_t
155 156
row_merge_lock_table(
/*=================*/
157 158
	trx_t*		trx,		/*!< in/out: transaction */
	dict_table_t*	table,		/*!< in: table to lock */
159
	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
160 161
	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));

162
/*********************************************************************//**
163 164 165
Drop indexes that were created before an error occurred.
The data dictionary must have been locked exclusively by the caller,
because the transaction will not be committed. */
marko's avatar
marko committed
166
void
167 168 169 170
row_merge_drop_indexes_dict(
/*========================*/
	trx_t*		trx,	/*!< in/out: dictionary transaction */
	table_id_t	table_id)/*!< in: table identifier */
Sergei Golubchik's avatar
Sergei Golubchik committed
171
	MY_ATTRIBUTE((nonnull));
172

173
/*********************************************************************//**
174 175 176
Drop those indexes which were created before an error occurred.
The data dictionary must have been locked exclusively by the caller,
because the transaction will not be committed. */
marko's avatar
marko committed
177 178 179
void
row_merge_drop_indexes(
/*===================*/
180 181 182 183
	trx_t*		trx,	/*!< in/out: transaction */
	dict_table_t*	table,	/*!< in/out: table containing the indexes */
	ibool		locked)	/*!< in: TRUE=table locked,
				FALSE=may need to do a lazy drop */
Sergei Golubchik's avatar
Sergei Golubchik committed
184
	MY_ATTRIBUTE((nonnull));
185

186
/*********************************************************************//**
187 188 189 190
Drop all partially created indexes during crash recovery. */
void
row_merge_drop_temp_indexes(void);
/*=============================*/
191

Sergei Golubchik's avatar
Sergei Golubchik committed
192 193
/** Create temporary merge files in the given paramater path, and if
UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
194
@param[in]	path	location for creating temporary merge files, or NULL
195
@return File descriptor */
196
pfs_os_file_t
197 198
row_merge_file_create_low(
	const char*	path)
Sergei Golubchik's avatar
Sergei Golubchik committed
199
	MY_ATTRIBUTE((warn_unused_result));
200 201 202 203 204 205
/*********************************************************************//**
Destroy a merge file. And de-register the file from Performance Schema
if UNIV_PFS_IO is defined. */
void
row_merge_file_destroy_low(
/*=======================*/
206
	const pfs_os_file_t&	fd);	/*!< in: merge file descriptor */
207 208 209 210 211

/*********************************************************************//**
Provide a new pathname for a table that is being renamed if it belongs to
a file-per-table tablespace.  The caller is responsible for freeing the
memory allocated for the return value.
212
@return new pathname of tablespace file, or NULL if space = 0 */
213 214 215 216
char*
row_make_new_pathname(
/*==================*/
	dict_table_t*	table,		/*!< in: table to be renamed */
217 218 219
	const char*	new_name)	/*!< in: new name */
	MY_ATTRIBUTE((nonnull, warn_unused_result));

220
/*********************************************************************//**
221 222
Rename the tables in the data dictionary.  The data dictionary must
have been locked exclusively by the caller, because the transaction
223
will not be committed.
224
@return error code or DB_SUCCESS */
225
dberr_t
226 227
row_merge_rename_tables_dict(
/*=========================*/
228
	dict_table_t*	old_table,	/*!< in/out: old table, renamed to
229
					tmp_name */
230
	dict_table_t*	new_table,	/*!< in/out: new table, renamed to
231
					old_table->name */
232
	const char*	tmp_name,	/*!< in: new name for old_table */
233
	trx_t*		trx)		/*!< in/out: dictionary transaction */
Sergei Golubchik's avatar
Sergei Golubchik committed
234
	MY_ATTRIBUTE((nonnull, warn_unused_result));
235

236
/*********************************************************************//**
237 238 239
Rename an index in the dictionary that was created. The data
dictionary must have been locked exclusively by the caller, because
the transaction will not be committed.
240
@return DB_SUCCESS if all OK */
241 242 243 244 245 246
dberr_t
row_merge_rename_index_to_add(
/*==========================*/
	trx_t*		trx,		/*!< in/out: transaction */
	table_id_t	table_id,	/*!< in: table identifier */
	index_id_t	index_id)	/*!< in: index identifier */
247 248
	MY_ATTRIBUTE((nonnull(1), warn_unused_result));

249
/*********************************************************************//**
250 251 252
Rename an index in the dictionary that is to be dropped. The data
dictionary must have been locked exclusively by the caller, because
the transaction will not be committed.
253
@return DB_SUCCESS if all OK */
254 255 256
dberr_t
row_merge_rename_index_to_drop(
/*===========================*/
257
	trx_t*		trx,		/*!< in/out: transaction */
258 259
	table_id_t	table_id,	/*!< in: table identifier */
	index_id_t	index_id)	/*!< in: index identifier */
260 261
	MY_ATTRIBUTE((nonnull(1), warn_unused_result));

262 263 264 265 266 267
/** Create the index and load in to the dictionary.
@param[in,out]	table		the index is on this table
@param[in]	index_def	the index definition
@param[in]	add_v		new virtual columns added along with add
				index call
@return index, or NULL on error */
268
dict_index_t*
269
row_merge_create_index(
270
	dict_table_t*		table,
271
	const index_def_t*	index_def,
272
	const dict_add_v_col_t*	add_v)
273 274
	MY_ATTRIBUTE((warn_unused_result));

275
/*********************************************************************//**
276
Check if a transaction can use an index.
277 278
@return whether the index can be used by the transaction */
bool
279 280
row_merge_is_index_usable(
/*======================*/
281
	const trx_t*		trx,	/*!< in: transaction */
282 283 284
	const dict_index_t*	index)	/*!< in: index to check */
	MY_ATTRIBUTE((nonnull, warn_unused_result));

285
/*********************************************************************//**
286 287
Drop a table. The caller must have ensured that the background stats
thread is not processing the table. This can be done by calling
288
dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
289
before calling this function.
290
@return DB_SUCCESS or error code */
291
dberr_t
292 293
row_merge_drop_table(
/*=================*/
294
	trx_t*		trx,		/*!< in: transaction */
295
	dict_table_t*	table)		/*!< in: table instance to drop */
296
	MY_ATTRIBUTE((nonnull, warn_unused_result));
297 298 299 300 301 302 303 304 305 306 307 308 309 310

/** Build indexes on a table by reading a clustered index, creating a temporary
file containing index entries, merge sorting these index entries and inserting
sorted index entries to indexes.
@param[in]	trx		transaction
@param[in]	old_table	table where rows are read from
@param[in]	new_table	table where indexes are created; identical to
old_table unless creating a PRIMARY KEY
@param[in]	online		true if creating indexes online
@param[in]	indexes		indexes to be created
@param[in]	key_numbers	MySQL key numbers
@param[in]	n_indexes	size of indexes[]
@param[in,out]	table		MySQL table, for reporting erroneous key value
if applicable
311
@param[in]	defaults	default values of added, changed columns, or NULL
312 313 314 315 316 317 318 319 320 321 322
@param[in]	col_map		mapping of old column numbers to new ones, or
NULL if old_table == new_table
@param[in]	add_autoinc	number of added AUTO_INCREMENT columns, or
ULINT_UNDEFINED if none is added
@param[in,out]	sequence	autoinc sequence
@param[in]	skip_pk_sort	whether the new PRIMARY KEY will follow
existing order
@param[in,out]	stage		performance schema accounting object, used by
ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of
this function and it will be passed to other functions for further accounting.
@param[in]	add_v		new virtual columns added along with indexes
323 324
@param[in]	eval_table	mysql table used to evaluate virtual column
				value, see innobase_get_computed_value().
325
@param[in]	drop_historical	whether to drop historical system rows
326
@return DB_SUCCESS or error code */
327
dberr_t
328
row_merge_build_indexes(
329 330 331 332 333 334 335 336
	trx_t*			trx,
	dict_table_t*		old_table,
	dict_table_t*		new_table,
	bool			online,
	dict_index_t**		indexes,
	const ulint*		key_numbers,
	ulint			n_indexes,
	struct TABLE*		table,
337
	const dtuple_t*		defaults,
338 339 340 341 342
	const ulint*		col_map,
	ulint			add_autoinc,
	ib_sequence_t&		sequence,
	bool			skip_pk_sort,
	ut_stage_alter_t*	stage,
343
	const dict_add_v_col_t*	add_v,
344 345
	struct TABLE*		eval_table,
	bool			drop_historical)
346
	MY_ATTRIBUTE((warn_unused_result));
347

348 349 350 351 352 353 354
/********************************************************************//**
Write a buffer to a block. */
void
row_merge_buf_write(
/*================*/
	const row_merge_buf_t*	buf,	/*!< in: sorted buffer */
	const merge_file_t*	of,	/*!< in: output file */
355
	row_merge_block_t*	block)	/*!< out: buffer for writing to file */
Sergei Golubchik's avatar
Sergei Golubchik committed
356
	MY_ATTRIBUTE((nonnull));
357

358 359 360 361 362
/********************************************************************//**
Sort a buffer. */
void
row_merge_buf_sort(
/*===============*/
363 364 365
	row_merge_buf_t*	buf,	/*!< in/out: sort buffer */
	row_merge_dup_t*	dup)	/*!< in/out: reporter of duplicates
					(NULL if non-unique index) */
Sergei Golubchik's avatar
Sergei Golubchik committed
366
	MY_ATTRIBUTE((nonnull(1)));
367

368 369
/********************************************************************//**
Write a merge block to the file system.
Marko Mäkelä's avatar
Marko Mäkelä committed
370
@return whether the request was completed successfully */
371
UNIV_INTERN
372
bool
373 374
row_merge_write(
/*============*/
375
	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
376 377
	ulint		offset,	/*!< in: offset where to write,
				in number of row_merge_block_t elements */
378 379
	const void*	buf,	/*!< in: data */
	void*		crypt_buf,		/*!< in: crypt buf or NULL */
380 381
	ulint		space)			/*!< in: space id */
	MY_ATTRIBUTE((warn_unused_result));
382

383 384 385 386 387 388
/********************************************************************//**
Empty a sort buffer.
@return sort buffer */
row_merge_buf_t*
row_merge_buf_empty(
/*================*/
389
	row_merge_buf_t*	buf)	/*!< in,own: sort buffer */
Sergei Golubchik's avatar
Sergei Golubchik committed
390
	MY_ATTRIBUTE((warn_unused_result, nonnull));
Sergei Golubchik's avatar
Sergei Golubchik committed
391 392 393

/** Create a merge file in the given location.
@param[out]	merge_file	merge file structure
394
@param[in]	path		location for creating temporary file, or NULL
395
@return file descriptor, or -1 on failure */
396
pfs_os_file_t
397
row_merge_file_create(
398 399
	merge_file_t*	merge_file,
	const char*	path)
400
	MY_ATTRIBUTE((warn_unused_result, nonnull(1)));
401 402 403 404 405 406 407

/** Merge disk files.
@param[in]	trx	transaction
@param[in]	dup	descriptor of index being created
@param[in,out]	file	file containing index entries
@param[in,out]	block	3 buffers
@param[in,out]	tmpfd	temporary file handle
408 409 410 411 412
@param[in]      update_progress true, if we should update progress status
@param[in]      pct_progress total progress percent until now
@param[in]      pct_ocst current progress percent
@param[in]      crypt_block crypt buf or NULL
@param[in]      space    space_id
413 414 415
@param[in,out]	stage	performance schema accounting object, used by
ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially
and then stage->inc() will be called for each record processed.
416
@return DB_SUCCESS or error code */
417
dberr_t
418 419
row_merge_sort(
/*===========*/
420 421 422 423
	trx_t*			trx,
	const row_merge_dup_t*	dup,
	merge_file_t*		file,
	row_merge_block_t*	block,
424
	pfs_os_file_t*		tmpfd,
425
	const bool		update_progress,
426 427
	const double	pct_progress,
	const double	pct_cost,
428 429
	row_merge_block_t*	crypt_block,
	ulint			space,
430
	ut_stage_alter_t*	stage = NULL)
431 432
	MY_ATTRIBUTE((warn_unused_result));

433 434 435 436 437 438
/*********************************************************************//**
Allocate a sort buffer.
@return own: sort buffer */
row_merge_buf_t*
row_merge_buf_create(
/*=================*/
439
	dict_index_t*	index)	/*!< in: secondary index */
Sergei Golubchik's avatar
Sergei Golubchik committed
440
	MY_ATTRIBUTE((warn_unused_result, nonnull, malloc));
441

442 443 444 445 446
/*********************************************************************//**
Deallocate a sort buffer. */
void
row_merge_buf_free(
/*===============*/
447
	row_merge_buf_t*	buf)	/*!< in,own: sort buffer to be freed */
Sergei Golubchik's avatar
Sergei Golubchik committed
448
	MY_ATTRIBUTE((nonnull));
449

450 451 452 453 454
/*********************************************************************//**
Destroy a merge file. */
void
row_merge_file_destroy(
/*===================*/
455
	merge_file_t*	merge_file)	/*!< in/out: merge file structure */
Sergei Golubchik's avatar
Sergei Golubchik committed
456
	MY_ATTRIBUTE((nonnull));
457

Marko Mäkelä's avatar
Marko Mäkelä committed
458
/** Read a merge block from the file system.
Marko Mäkelä's avatar
Marko Mäkelä committed
459
@return whether the request was completed successfully */
Marko Mäkelä's avatar
Marko Mäkelä committed
460
bool
461 462
row_merge_read(
/*===========*/
463
	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
464 465 466
	ulint			offset,	/*!< in: offset where to read
					in number of row_merge_block_t
					elements */
467 468
	row_merge_block_t*	buf,	/*!< out: data */
	row_merge_block_t*	crypt_buf, /*!< in: crypt buf or NULL */
469 470
	ulint			space)	   /*!< in: space id */
	MY_ATTRIBUTE((warn_unused_result));
471

472 473 474 475 476 477 478 479 480 481
/********************************************************************//**
Read a merge record.
@return pointer to next record, or NULL on I/O error or end of list */
const byte*
row_merge_read_rec(
/*===============*/
	row_merge_block_t*	block,	/*!< in/out: file buffer */
	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
	const byte*		b,	/*!< in: pointer to record */
	const dict_index_t*	index,	/*!< in: index of the record */
482
	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
483 484 485 486
	ulint*			foffs,	/*!< in/out: file offset */
	const mrec_t**		mrec,	/*!< out: pointer to merge record,
					or NULL on end of list
					(non-NULL on I/O error) */
487 488 489
	ulint*			offsets,/*!< out: offsets of mrec */
	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
	ulint			space)	   /*!< in: space id */
490
	MY_ATTRIBUTE((warn_unused_result));
491
#endif /* row0merge.h */