Commit 8b0d4cff authored by Thirunarayanan Balathandayuthapani's avatar Thirunarayanan Balathandayuthapani Committed by Marko Mäkelä

MDEV-15855 Deadlock between purge thread and DDL statement

Problem:
========
Truncate operation holds MDL on the table (t1) and tries to
acquire InnoDB dict_operation_lock. Purge holds dict_operation_lock
and tries to acquire MDL on the table (t1) to evaluate virtual
column expressions for indexed virtual columns.
It leads to deadlock of purge and truncate table (DDL).

Solution:
=========
If purge tries to acquire MDL on the table then it should do the following:

i) Purge should release all innodb latches (including dict_operation_lock)
before acquiring metadata lock on the table.

ii) After acquiring metadata lock on the table, it should check whether the
table was dropped or renamed. If the table is dropped then purge should
ignore the undo log record. If the table is renamed then it should
release the old MDL and acquire MDL on the new name.

iii) Once purge acquires MDL, it should use the SQL table handle for all
the remaining virtual index for the purge record.

purge_node_t: Introduce new virtual column information to know whether
the MDL was acquired successfully.

This is joint work with Marko Mäkelä.
parent e3207b6c
......@@ -175,8 +175,38 @@ SET DEBUG_SYNC='now WAIT_FOR halfway';
COMMIT;
InnoDB 0 transactions not purged
SET DEBUG_SYNC='now SIGNAL purged';
disconnect prevent_purge;
connection default;
DROP TABLE t1;
CREATE TABLE t1 (y YEAR, vy YEAR AS (y) VIRTUAL UNIQUE, pk INT PRIMARY KEY)
ENGINE=InnoDB;
INSERT INTO t1 (pk,y) VALUES (1,2022);
CREATE TABLE t2(f1 INT NOT NULL, PRIMARY KEY(f1))ENGINE=InnoDB;
SET GLOBAL debug_dbug = '+d,ib_purge_virtual_index_callback';
BEGIN;
INSERT INTO t2(f1) VALUES(1);
connection prevent_purge;
SET DEBUG_SYNC=RESET;
start transaction with consistent snapshot;
connection default;
COMMIT;
connect truncate,localhost,root,,;
REPLACE INTO t1(pk, y) SELECT pk,y FROM t1;
SET DEBUG_SYNC='row_trunc_before_dict_lock SIGNAL commit WAIT_FOR release';
TRUNCATE TABLE t1;
connection prevent_purge;
SET DEBUG_SYNC='now WAIT_FOR commit';
COMMIT;
SET DEBUG_SYNC='now SIGNAL purge_start';
disconnect prevent_purge;
connection default;
SET DEBUG_SYNC='now WAIT_FOR purge_start';
InnoDB 1 transactions not purged
SET DEBUG_SYNC='now SIGNAL release';
SET GLOBAL debug_dbug=@old_dbug;
connection truncate;
disconnect truncate;
connection default;
InnoDB 0 transactions not purged
DROP TABLE t1, t2;
set debug_sync=reset;
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
......@@ -217,12 +217,54 @@ SET DEBUG_SYNC='now WAIT_FOR halfway';
COMMIT;
--source ../../innodb/include/wait_all_purged.inc
SET DEBUG_SYNC='now SIGNAL purged';
disconnect prevent_purge;
connection default;
reap;
DROP TABLE t1;
CREATE TABLE t1 (y YEAR, vy YEAR AS (y) VIRTUAL UNIQUE, pk INT PRIMARY KEY)
ENGINE=InnoDB;
INSERT INTO t1 (pk,y) VALUES (1,2022);
CREATE TABLE t2(f1 INT NOT NULL, PRIMARY KEY(f1))ENGINE=InnoDB;
SET GLOBAL debug_dbug = '+d,ib_purge_virtual_index_callback';
BEGIN;
INSERT INTO t2(f1) VALUES(1);
connection prevent_purge;
SET DEBUG_SYNC=RESET;
start transaction with consistent snapshot;
connection default;
COMMIT;
connect(truncate,localhost,root,,);
REPLACE INTO t1(pk, y) SELECT pk,y FROM t1;
SET DEBUG_SYNC='row_trunc_before_dict_lock SIGNAL commit WAIT_FOR release';
send TRUNCATE TABLE t1;
connection prevent_purge;
SET DEBUG_SYNC='now WAIT_FOR commit';
COMMIT;
SET DEBUG_SYNC='now SIGNAL purge_start';
disconnect prevent_purge;
connection default;
SET DEBUG_SYNC='now WAIT_FOR purge_start';
let $wait_all_purged=1;
--source ../../innodb/include/wait_all_purged.inc
let $wait_all_purged=0;
SET DEBUG_SYNC='now SIGNAL release';
SET GLOBAL debug_dbug=@old_dbug;
connection truncate;
reap;
disconnect truncate;
connection default;
--source ../../innodb/include/wait_all_purged.inc
DROP TABLE t1, t2;
--source include/wait_until_count_sessions.inc
set debug_sync=reset;
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
......@@ -4404,13 +4404,6 @@ TABLE *open_purge_table(THD *thd, const char *db, size_t dblen,
DBUG_RETURN(error ? NULL : tl->table);
}
TABLE *get_purge_table(THD *thd)
{
/* see above, at most one table can be opened */
DBUG_ASSERT(thd->open_tables == NULL || thd->open_tables->next == NULL);
return thd->open_tables;
}
/** Find an open table in the list of prelocked tabled
......
......@@ -129,7 +129,7 @@ void destroy_thd(MYSQL_THD thd);
void reset_thd(MYSQL_THD thd);
TABLE *open_purge_table(THD *thd, const char *db, size_t dblen,
const char *tb, size_t tblen);
TABLE *get_purge_table(THD *thd);
void close_thread_tables(THD* thd);
/** Check if user has used xtradb extended system variable that
is not currently supported by innodb or marked as deprecated. */
......@@ -21603,63 +21603,154 @@ innobase_index_cond(
return handler_index_cond_check(file);
}
/** Find or open a mysql table for the virtual column template
@param[in] thd mysql thread handle
@param[in,out] table InnoDB table whose virtual column template is to be updated
@return TABLE if successful or NULL */
static TABLE *
innobase_find_mysql_table_for_vc(
/*=============================*/
THD* thd,
dict_table_t* table)
/** Parse the table file name into table name and database name.
@param[in] tbl_name InnoDB table name
@param[out] dbname database name buffer (NAME_LEN + 1 bytes)
@param[out] tblname table name buffer (NAME_LEN + 1 bytes)
@param[out] dbnamelen database name length
@param[out] tblnamelen table name length
@return true if the table name is parsed properly. */
static bool table_name_parse(
const table_name_t& tbl_name,
char* dbname,
char* tblname,
ulint& dbnamelen,
ulint& tblnamelen)
{
TABLE *mysql_table;
bool bg_thread = THDVAR(thd, background_thread);
dbnamelen = dict_get_db_name_len(tbl_name.m_name);
char db_buf[MAX_DATABASE_NAME_LEN + 1];
char tbl_buf[MAX_TABLE_NAME_LEN + 1];
if (bg_thread) {
if ((mysql_table = get_purge_table(thd))) {
return mysql_table;
ut_ad(dbnamelen > 0);
ut_ad(dbnamelen <= MAX_DATABASE_NAME_LEN);
memcpy(db_buf, tbl_name.m_name, dbnamelen);
db_buf[dbnamelen] = 0;
tblnamelen = strlen(tbl_name.m_name + dbnamelen + 1);
memcpy(tbl_buf, tbl_name.m_name + dbnamelen + 1, tblnamelen);
tbl_buf[tblnamelen] = 0;
filename_to_tablename(db_buf, dbname, MAX_DATABASE_NAME_LEN + 1, true);
if (tblnamelen > TEMP_FILE_PREFIX_LENGTH
&& !strncmp(tbl_buf, TEMP_FILE_PREFIX, TEMP_FILE_PREFIX_LENGTH)) {
return false;
}
} else {
if (table->vc_templ->mysql_table_query_id == thd_get_query_id(thd)) {
return table->vc_templ->mysql_table;
if (char *is_part = strchr(tbl_buf, '#')) {
*is_part = '\0';
}
filename_to_tablename(tbl_buf, tblname, MAX_TABLE_NAME_LEN + 1, true);
return true;
}
/** Acquire metadata lock and MariaDB table handle for an InnoDB table.
@param[in,out] thd thread handle
@param[in,out] table InnoDB table
@return MariaDB table handle
@retval NULL if the table does not exist, is unaccessible or corrupted. */
static TABLE* innodb_acquire_mdl(THD* thd, dict_table_t* table)
{
char db_buf[NAME_LEN + 1], db_buf1[NAME_LEN + 1];
char tbl_buf[NAME_LEN + 1], tbl_buf1[NAME_LEN + 1];
ulint db_buf_len, db_buf1_len;
ulint tbl_buf_len, tbl_buf1_len;
if (!table_name_parse(table->name, db_buf, tbl_buf,
db_buf_len, tbl_buf_len)) {
ut_ad(!"invalid table name");
return NULL;
}
char dbname[MAX_DATABASE_NAME_LEN + 1];
char tbname[MAX_TABLE_NAME_LEN + 1];
char* name = table->name.m_name;
uint dbnamelen = (uint) dict_get_db_name_len(name);
uint tbnamelen = (uint) strlen(name) - dbnamelen - 1;
char t_dbname[MAX_DATABASE_NAME_LEN + 1];
char t_tbname[MAX_TABLE_NAME_LEN + 1];
const table_id_t table_id = table->id;
retry_mdl:
const bool unaccessible = !table->is_readable() || table->corrupted;
table->release();
strncpy(dbname, name, dbnamelen);
dbname[dbnamelen] = 0;
strncpy(tbname, name + dbnamelen + 1, tbnamelen);
tbname[tbnamelen] =0;
if (unaccessible) {
return NULL;
}
/* For partition table, remove the partition name and use the
"main" table name to build the template */
char* is_part = is_partition(tbname);
TABLE* mariadb_table = open_purge_table(thd, db_buf, db_buf_len,
tbl_buf, tbl_buf_len);
if (is_part != NULL) {
*is_part = '\0';
table = dict_table_open_on_id(table_id, false, DICT_TABLE_OP_NORMAL);
if (table == NULL) {
/* Table is dropped. */
goto fail;
}
dbnamelen = filename_to_tablename(dbname, t_dbname,
MAX_DATABASE_NAME_LEN + 1);
tbnamelen = filename_to_tablename(tbname, t_tbname,
MAX_TABLE_NAME_LEN + 1);
if (!fil_table_accessible(table)) {
release_fail:
table->release();
fail:
if (mariadb_table) {
close_thread_tables(thd);
}
return NULL;
}
if (!table_name_parse(table->name, db_buf1, tbl_buf1,
db_buf1_len, tbl_buf1_len)) {
ut_ad(!"invalid table name");
goto release_fail;
}
if (bg_thread) {
return open_purge_table(thd, t_dbname, dbnamelen,
t_tbname, tbnamelen);
if (!mariadb_table) {
} else if (!strcmp(db_buf, db_buf1) && !strcmp(tbl_buf, tbl_buf1)) {
return mariadb_table;
} else {
/* Table is renamed. So release MDL for old name and try
to acquire the MDL for new table name. */
close_thread_tables(thd);
}
strcpy(tbl_buf, tbl_buf1);
strcpy(db_buf, db_buf1);
tbl_buf_len = tbl_buf1_len;
db_buf_len = db_buf1_len;
goto retry_mdl;
}
/** Find or open a table handle for the virtual column template
@param[in] thd thread handle
@param[in,out] table InnoDB table whose virtual column template
is to be updated
@return table handle
@retval NULL if the table is dropped, unaccessible or corrupted
for purge thread */
static TABLE* innodb_find_table_for_vc(THD* thd, dict_table_t* table)
{
if (THDVAR(thd, background_thread)) {
/* Purge thread acquires dict_operation_lock while
processing undo log record. Release the dict_operation_lock
before acquiring MDL on the table. */
rw_lock_s_unlock(dict_operation_lock);
return innodb_acquire_mdl(thd, table);
} else {
if (table->vc_templ->mysql_table_query_id
== thd_get_query_id(thd)) {
return table->vc_templ->mysql_table;
}
}
char db_buf[NAME_LEN + 1];
char tbl_buf[NAME_LEN + 1];
ulint db_buf_len, tbl_buf_len;
if (!table_name_parse(table->name, db_buf, tbl_buf,
db_buf_len, tbl_buf_len)) {
ut_ad(!"invalid table name");
return NULL;
}
mysql_table = find_fk_open_table(thd, t_dbname, dbnamelen,
t_tbname, tbnamelen);
TABLE* mysql_table = find_fk_open_table(thd, db_buf, db_buf_len,
tbl_buf, tbl_buf_len);
table->vc_templ->mysql_table = mysql_table;
table->vc_templ->mysql_table_query_id = thd_get_query_id(thd);
......@@ -21678,7 +21769,7 @@ innobase_init_vc_templ(
table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
TABLE *mysql_table= innobase_find_mysql_table_for_vc(current_thd, table);
TABLE *mysql_table= innodb_find_table_for_vc(current_thd, table);
ut_ad(mysql_table);
if (!mysql_table) {
......@@ -21775,12 +21866,13 @@ innobase_get_field_from_update_vector(
@param[in] thd MariaDB THD
@param[in] index Index in use
@param[out] heap Heap that holds temporary row
@param[in,out] mysql_table MariaDB table
@param[out] rec Pointer to allocated MariaDB record
@param[in,out] table MariaDB table
@param[out] record Pointer to allocated MariaDB record
@param[out] storage Internal storage for blobs etc
@return FALSE ok
@return TRUE malloc failure
@retval false on success
@retval true on malloc failure or failed to open the maria table
for purge thread.
*/
bool innobase_allocate_row_for_vcol(
......@@ -21794,7 +21886,12 @@ bool innobase_allocate_row_for_vcol(
TABLE *maria_table;
String *blob_value_storage;
if (!*table)
*table= innobase_find_mysql_table_for_vc(thd, index->table);
*table= innodb_find_table_for_vc(thd, index->table);
/* For purge thread, there is a possiblity that table could have
dropped, corrupted or unaccessible. */
if (!*table)
return true;
maria_table= *table;
if (!*heap && !(*heap= mem_heap_create(srv_page_size)))
{
......
......@@ -115,7 +115,15 @@ enum btr_latch_mode {
/** Attempt to purge a secondary index record
while holding the dict_index_t::lock S-latch. */
BTR_PURGE_LEAF_ALREADY_S_LATCHED = BTR_PURGE_LEAF
| BTR_ALREADY_S_LATCHED
| BTR_ALREADY_S_LATCHED,
/** In the case of BTR_MODIFY_TREE, the caller specifies
the intention to delete record only. It is used to optimize
block->lock range.*/
BTR_LATCH_FOR_DELETE = 65536,
/** Attempt to purge a secondary index record in the tree. */
BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE
};
/** This flag ORed to btr_latch_mode says that we do the search in query
......@@ -131,10 +139,6 @@ the insert buffer to speed up inserts */
to insert record only. It is used to optimize block->lock range.*/
#define BTR_LATCH_FOR_INSERT 32768U
/** In the case of BTR_MODIFY_TREE, the caller specifies the intention
to delete record only. It is used to optimize block->lock range.*/
#define BTR_LATCH_FOR_DELETE 65536U
/** This flag is for undo insert of rtree. For rtree, we need this flag
to find proper rec to undo insert.*/
#define BTR_RTREE_UNDO_INS 131072U
......
......@@ -296,6 +296,21 @@ btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /*!< in: persistent cursor */
mtr_t* mtr); /*!< in: mtr to commit */
/** Commits the mtr and sets the clustered index pcur and secondary index
pcur latch mode to BTR_NO_LATCHES, that is, the cursor becomes detached.
Function btr_pcur_store_position should be used for both cursor before
calling this, if restoration of cursor is wanted later.
@param[in] pcur persistent cursor
@param[in] sec_pcur secondary index persistent cursor
@param[in] mtr mtr to commit */
UNIV_INLINE
void
btr_pcurs_commit_specify_mtr(
btr_pcur_t* pcur,
btr_pcur_t* sec_pcur,
mtr_t* mtr);
/*********************************************************//**
Moves the persistent cursor to the next record in the tree. If no records are
left, the cursor stays 'after last in tree'.
......
......@@ -389,6 +389,32 @@ btr_pcur_commit_specify_mtr(
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/** Commits the mtr and sets the clustered index pcur and secondary index
pcur latch mode to BTR_NO_LATCHES, that is, the cursor becomes detached.
Function btr_pcur_store_position should be used for both cursor before
calling this, if restoration of cursor is wanted later.
@param[in] pcur persistent cursor
@param[in] sec_pcur secondary index persistent cursor
@param[in] mtr mtr to commit */
UNIV_INLINE
void
btr_pcurs_commit_specify_mtr(
btr_pcur_t* pcur,
btr_pcur_t* sec_pcur,
mtr_t* mtr)
{
ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(sec_pcur->pos_state == BTR_PCUR_IS_POSITIONED);
pcur->latch_mode = BTR_NO_LATCHES;
sec_pcur->latch_mode = BTR_NO_LATCHES;
mtr_commit(mtr);
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
sec_pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/**************************************************************//**
Sets the old_rec_buf field to NULL. */
UNIV_INLINE
......
......@@ -36,6 +36,7 @@ Created 3/14/1997 Heikki Tuuri
#include "que0types.h"
#include "row0types.h"
#include "ut0vec.h"
#include "row0mysql.h"
/** Create a purge node to a query graph.
@param[in] parent parent node, i.e., a thr node
......@@ -47,8 +48,7 @@ row_purge_node_create(
mem_heap_t* heap)
MY_ATTRIBUTE((warn_unused_result));
/***********************************************************//**
Determines if it is possible to remove a secondary index entry.
/** Determines if it is possible to remove a secondary index entry.
Removal is possible if the secondary index entry does not refer to any
not delete marked version of a clustered index record where DB_TRX_ID
is newer than the purge view.
......@@ -61,14 +61,27 @@ inserts a record that the secondary index entry would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
@param[in,out] node row purge node
@param[in] index secondary index
@param[in] entry secondary index entry
@param[in,out] sec_pcur secondary index cursor or NULL
if it is called for purge buffering
operation.
@param[in,out] sec_mtr mini-transaction which holds
secondary index entry or NULL if it is
called for purge buffering operation.
@param[in] is_tree true=pessimistic purge,
false=optimistic (leaf-page only)
@return true if the secondary index record can be purged */
bool
row_purge_poss_sec(
/*===============*/
purge_node_t* node, /*!< in/out: row purge node */
dict_index_t* index, /*!< in: secondary index */
const dtuple_t* entry) /*!< in: secondary index entry */
MY_ATTRIBUTE((nonnull, warn_unused_result));
purge_node_t* node,
dict_index_t* index,
const dtuple_t* entry,
btr_pcur_t* sec_pcur=NULL,
mtr_t* sec_mtr=NULL,
bool is_tree=false);
/***************************************************************
Does the purge operation for a single undo log record. This is a high-level
function used in an SQL execution graph.
......@@ -117,6 +130,10 @@ struct purge_node_t{
ibool done; /* Debug flag */
trx_id_t trx_id; /*!< trx id for this purging record */
/** Virtual column information about opening of MariaDB table.
It resets after processing each undo log record. */
purge_vcol_info_t vcol_info;
#ifdef UNIV_DEBUG
/***********************************************************//**
Validate the persisent cursor. The purge node has two references
......@@ -127,6 +144,11 @@ struct purge_node_t{
the ref member.*/
bool validate_pcur();
#endif
/** Whether purge failed to open the maria table for virtual column
computation.
@return true if the table failed to open. */
bool vcol_op_failed() const { return !vcol_info.validate(); }
};
#endif
/*****************************************************************************
Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -52,4 +53,55 @@ struct row_log_t;
/* MySQL data types */
struct TABLE;
/** Purge virtual column node information. */
struct purge_vcol_info_t
{
/** Is there a possible need to evaluate virtual columns? */
bool requested;
/** Do we have to evaluate virtual columns (using mariadb_table)? */
bool used;
/** True if it is used for the first time. */
bool first_use;
/** MariaDB table opened for virtual column computation. */
TABLE* mariadb_table;
/** Reset the state. */
void reset()
{
requested = false;
used = false;
first_use = false;
mariadb_table = NULL;
}
/** Validate the virtual column information.
@return true if the mariadb table opened successfully
or doesn't try to calculate virtual column. */
bool validate() const { return !used || mariadb_table; }
/** Note that the virtual column information is needed. */
void set_used()
{
ut_ad(requested);
if (first_use) {
first_use = false;
ut_ad(used);
return;
}
first_use = used = true;
}
/** Check whether it fetches mariadb table for the first time.
@return true if first time tries to open mariadb table. */
bool is_first_fetch() const
{
ut_ad(!first_use || used);
return first_use;
}
};
#endif
......@@ -35,6 +35,7 @@ Created 2/6/1997 Heikki Tuuri
#include "rem0types.h"
#include "mtr0mtr.h"
#include "dict0mem.h"
#include "row0types.h"
// Forward declaration
class ReadView;
......@@ -68,27 +69,34 @@ row_vers_must_preserve_del_marked(
const table_name_t& name,
mtr_t* mtr);
/*****************************************************************//**
Finds out if a version of the record, where the version >= the current
/** Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry == ientry; exactly in
this case we return TRUE.
@param[in] also_curr TRUE if also rec is included in the versions
to search; otherwise only versions prior
to it are searched
@param[in] rec record in the clustered index; the caller
must have a latch on the page
@param[in] mtr mtr holding the latch on rec; it will
also hold the latch on purge_view
@param[in] index secondary index
@param[in] ientry secondary index entry
@param[in] roll_ptr roll_ptr for the purge record
@param[in] trx_id transaction ID on the purging record
@param[in,out] vcol_info virtual column information for purge thread.
@return TRUE if earlier version should have */
ibool
bool
row_vers_old_has_index_entry(
/*=========================*/
ibool also_curr,/*!< in: TRUE if also rec is included in the
versions to search; otherwise only versions
prior to it are searched */
const rec_t* rec, /*!< in: record in the clustered index; the
caller must have a latch on the page */
mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
also hold the latch on purge_view */
dict_index_t* index, /*!< in: the secondary index */
const dtuple_t* ientry, /*!< in: the secondary index entry */
roll_ptr_t roll_ptr,/*!< in: roll_ptr for the purge record */
trx_id_t trx_id);/*!< in: transaction ID on the purging record */
bool also_curr,
const rec_t* rec,
mtr_t* mtr,
dict_index_t* index,
const dtuple_t* ientry,
roll_ptr_t roll_ptr,
trx_id_t trx_id,
purge_vcol_info_t* vcol_info=NULL);
/*****************************************************************//**
Constructs the version of a clustered index record which a consistent
......
......@@ -136,7 +136,8 @@ row_purge_remove_clust_if_poss_low(
ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs_init(offsets_);
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S)
|| node->vcol_info.used);
index = dict_table_get_first_index(node->table);
......@@ -230,8 +231,55 @@ row_purge_remove_clust_if_poss(
return(false);
}
/***********************************************************//**
Determines if it is possible to remove a secondary index entry.
/** Tries to store secondary index cursor before openin mysql table for
virtual index condition computation.
@param[in,out] node row purge node
@param[in] index secondary index
@param[in,out] sec_pcur secondary index cursor
@param[in,out] sec_mtr mini-transaction which holds
secondary index entry */
static void row_purge_store_vsec_cur(
purge_node_t* node,
dict_index_t* index,
btr_pcur_t* sec_pcur,
mtr_t* sec_mtr)
{
row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, sec_mtr);
if (!node->found_clust) {
return;
}
node->vcol_info.requested = true;
btr_pcur_store_position(sec_pcur, sec_mtr);
btr_pcurs_commit_specify_mtr(&node->pcur, sec_pcur, sec_mtr);
}
/** Tries to restore secondary index cursor after opening the mysql table
@param[in,out] node row purge node
@param[in] index secondary index
@param[in,out] sec_mtr mini-transaction which holds secondary index entry
@param[in] is_tree true=pessimistic purge,
false=optimistic (leaf-page only)
@return false in case of restore failure. */
static bool row_purge_restore_vsec_cur(
purge_node_t* node,
dict_index_t* index,
btr_pcur_t* sec_pcur,
mtr_t* sec_mtr,
bool is_tree)
{
sec_mtr->start();
sec_mtr->set_named_space(index->space);
return btr_pcur_restore_position(
is_tree ? BTR_PURGE_TREE : BTR_PURGE_LEAF,
sec_pcur, sec_mtr);
}
/** Determines if it is possible to remove a secondary index entry.
Removal is possible if the secondary index entry does not refer to any
not delete marked version of a clustered index record where DB_TRX_ID
is newer than the purge view.
......@@ -244,25 +292,66 @@ inserts a record that the secondary index entry would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
@param[in,out] node row purge node
@param[in] index secondary index
@param[in] entry secondary index entry
@param[in,out] sec_pcur secondary index cursor or NULL
if it is called for purge buffering
operation.
@param[in,out] sec_mtr mini-transaction which holds
secondary index entry or NULL if it is
called for purge buffering operation.
@param[in] is_tree true=pessimistic purge,
false=optimistic (leaf-page only)
@return true if the secondary index record can be purged */
bool
row_purge_poss_sec(
/*===============*/
purge_node_t* node, /*!< in/out: row purge node */
dict_index_t* index, /*!< in: secondary index */
const dtuple_t* entry) /*!< in: secondary index entry */
purge_node_t* node,
dict_index_t* index,
const dtuple_t* entry,
btr_pcur_t* sec_pcur,
mtr_t* sec_mtr,
bool is_tree)
{
bool can_delete;
mtr_t mtr;
ut_ad(!dict_index_is_clust(index));
const bool store_cur = sec_mtr && !node->vcol_info.used
&& dict_index_has_virtual(index);
if (store_cur) {
row_purge_store_vsec_cur(node, index, sec_pcur, sec_mtr);
/* The PRIMARY KEY value was not found in the clustered
index. The secondary index record found. We can purge
the secondary index record. */
if (!node->vcol_info.requested) {
ut_ad(!node->found_clust);
return true;
}
}
retry_purge_sec:
mtr_start(&mtr);
can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
|| !row_vers_old_has_index_entry(TRUE,
|| !row_vers_old_has_index_entry(true,
btr_pcur_get_rec(&node->pcur),
&mtr, index, entry,
node->roll_ptr, node->trx_id);
node->roll_ptr, node->trx_id,
&node->vcol_info);
if (node->vcol_info.is_first_fetch()) {
if (node->vcol_info.mariadb_table) {
goto retry_purge_sec;
}
node->table = NULL;
sec_pcur = NULL;
return false;
}
/* Persistent cursor is closed if reposition fails. */
if (node->found_clust) {
......@@ -271,7 +360,12 @@ row_purge_poss_sec(
mtr_commit(&mtr);
}
return(can_delete);
if (store_cur && !row_purge_restore_vsec_cur(
node, index, sec_pcur, sec_mtr, is_tree)) {
return false;
}
return can_delete;
}
/***************************************************************
......@@ -287,7 +381,6 @@ row_purge_remove_sec_if_poss_tree(
const dtuple_t* entry) /*!< in: index entry */
{
btr_pcur_t pcur;
btr_cur_t* btr_cur;
ibool success = TRUE;
dberr_t err;
mtr_t mtr;
......@@ -348,16 +441,16 @@ row_purge_remove_sec_if_poss_tree(
ut_error;
}
btr_cur = btr_pcur_get_btr_cur(&pcur);
/* We should remove the index record if no later version of the row,
which cannot be purged yet, requires its existence. If some requires,
we should do nothing. */
if (row_purge_poss_sec(node, index, entry)) {
if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) {
/* Remove the index record, which should have been
marked for deletion. */
if (!rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
if (!rec_get_deleted_flag(btr_cur_get_rec(
btr_pcur_get_btr_cur(&pcur)),
dict_table_is_comp(index->table))) {
ib::error()
<< "tried to purge non-delete-marked record"
......@@ -365,15 +458,18 @@ row_purge_remove_sec_if_poss_tree(
<< " of table " << index->table->name
<< ": tuple: " << *entry
<< ", record: " << rec_index_print(
btr_cur_get_rec(btr_cur), index);
btr_cur_get_rec(
btr_pcur_get_btr_cur(&pcur)),
index);
ut_ad(0);
goto func_exit;
}
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
false, &mtr);
btr_cur_pessimistic_delete(&err, FALSE,
btr_pcur_get_btr_cur(&pcur),
0, false, &mtr);
switch (UNIV_EXPECT(err, DB_SUCCESS)) {
case DB_SUCCESS:
break;
......@@ -385,6 +481,13 @@ row_purge_remove_sec_if_poss_tree(
}
}
if (node->vcol_op_failed()) {
ut_ad(mtr.has_committed());
ut_ad(!pcur.old_rec_buf);
ut_ad(pcur.pos_state == BTR_PCUR_NOT_POSITIONED);
return false;
}
func_exit:
btr_pcur_close(&pcur);
func_exit_no_pcur:
......@@ -445,8 +548,10 @@ row_purge_remove_sec_if_poss_leaf(
index->is_committed(). */
ut_ad(!dict_index_is_online_ddl(index));
/* Change buffering is disabled for spatial index. */
mode = dict_index_is_spatial(index)
/* Change buffering is disabled for spatial index and
virtual index. */
mode = (dict_index_is_spatial(index)
|| dict_index_has_virtual(index))
? BTR_MODIFY_LEAF
: BTR_PURGE_LEAF;
}
......@@ -474,7 +579,7 @@ row_purge_remove_sec_if_poss_leaf(
case ROW_FOUND:
/* Before attempting to purge a record, check
if it is safe to do so. */
if (row_purge_poss_sec(node, index, entry)) {
if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) {
btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
/* Only delete-marked records should be purged. */
......@@ -540,6 +645,12 @@ row_purge_remove_sec_if_poss_leaf(
success = false;
}
}
if (node->vcol_op_failed()) {
btr_pcur_close(&pcur);
return false;
}
/* (The index entry is still needed,
or the deletion succeeded) */
/* fall through */
......@@ -586,6 +697,10 @@ row_purge_remove_sec_if_poss(
return;
}
retry:
if (node->vcol_op_failed()) {
return;
}
success = row_purge_remove_sec_if_poss_tree(node, index, entry);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
......@@ -652,6 +767,12 @@ row_purge_del_mark(
node->row, NULL, node->index,
heap, ROW_BUILD_FOR_PURGE);
row_purge_remove_sec_if_poss(node, node->index, entry);
if (node->vcol_op_failed()) {
mem_heap_free(heap);
return false;
}
mem_heap_empty(heap);
}
......@@ -678,7 +799,8 @@ row_purge_upd_exist_or_extern_func(
{
mem_heap_t* heap;
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S)
|| node->vcol_info.used);
ut_ad(!node->table->skip_alter_undo);
if (node->rec_type == TRX_UNDO_UPD_DEL_REC
......@@ -1016,10 +1138,15 @@ row_purge(
bool purged = row_purge_record(
node, undo_rec, thr, updated_extern);
if (!node->vcol_info.used) {
rw_lock_s_unlock(dict_operation_lock);
}
ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_S));
if (purged
|| srv_shutdown_state != SRV_SHUTDOWN_NONE) {
|| srv_shutdown_state != SRV_SHUTDOWN_NONE
|| node->vcol_op_failed()) {
return;
}
......@@ -1051,6 +1178,8 @@ row_purge_end(
node->done = TRUE;
node->vcol_info.reset();
ut_a(thr->run_node != NULL);
mem_heap_empty(node->heap);
......@@ -1098,6 +1227,7 @@ row_purge_step(
row_purge_end(thr);
} else {
thr->run_node = node;
node->vcol_info.reset();
}
} else {
row_purge_end(thr);
......
......@@ -1810,6 +1810,8 @@ row_truncate_table_for_mysql(
trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
}
DEBUG_SYNC_C("row_trunc_before_dict_lock");
/* Step-3: Validate ownership of needed locks (Exclusive lock).
Ownership will also ensure there is no active SQL queries, INSERT,
SELECT, .....*/
......
......@@ -508,7 +508,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
any no-redo rollback segment undo logs. */
if (dict_table_is_temporary(node->table)
|| row_vers_old_has_index_entry(
FALSE, btr_pcur_get_rec(&(node->pcur)),
false, btr_pcur_get_rec(&(node->pcur)),
&mtr_vers, index, entry, 0, 0)) {
err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
btr_cur, TRUE, thr, &mtr);
......
......@@ -436,14 +436,16 @@ row_vers_must_preserve_del_marked(
@param[in,out] row the cluster index row in dtuple form
@param[in] clust_index clustered index
@param[in] index the secondary index
@param[in] heap heap used to build virtual dtuple */
@param[in] heap heap used to build virtual dtuple
@param[in,out] vcol_info virtual column information. */
static
void
row_vers_build_clust_v_col(
dtuple_t* row,
dict_index_t* clust_index,
dict_index_t* index,
mem_heap_t* heap)
mem_heap_t* heap,
purge_vcol_info_t* vcol_info)
{
mem_heap_t* local_heap = NULL;
VCOL_STORAGE *vcol_storage= NULL;
......@@ -453,12 +455,23 @@ row_vers_build_clust_v_col(
ut_ad(dict_index_has_virtual(index));
if (vcol_info != NULL) {
vcol_info->set_used();
maria_table = vcol_info->mariadb_table;
}
innobase_allocate_row_for_vcol(thd, index,
&local_heap,
&maria_table,
&record,
&vcol_storage);
if (vcol_info && !vcol_info->mariadb_table) {
vcol_info->mariadb_table = maria_table;
ut_ad(!maria_table || vcol_info->is_first_fetch());
goto func_exit;
}
for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
const dict_field_t* ind_field = dict_index_get_nth_field(
index, i);
......@@ -476,6 +489,7 @@ row_vers_build_clust_v_col(
}
}
func_exit:
if (local_heap) {
if (vcol_storage)
innobase_free_row_for_vcol(vcol_storage);
......@@ -505,7 +519,7 @@ row_vers_build_cur_vrow_low(
roll_ptr_t roll_ptr,
trx_id_t trx_id,
mem_heap_t* v_heap,
const dtuple_t**vrow,
const dtuple_t** vrow,
mtr_t* mtr)
{
const rec_t* version;
......@@ -784,6 +798,7 @@ row_vers_vc_matches_cluster(
@param[in,out] heap heap memory
@param[in,out] v_heap heap memory to keep virtual colum dtuple
@param[in] mtr mtr holding the latch on rec
@param[in,out] vcol_info virtual column information for purge thread
@return dtuple contains virtual column data */
static
const dtuple_t*
......@@ -798,7 +813,8 @@ row_vers_build_cur_vrow(
trx_id_t trx_id,
mem_heap_t* heap,
mem_heap_t* v_heap,
mtr_t* mtr)
mtr_t* mtr,
purge_vcol_info_t* vcol_info)
{
const dtuple_t* cur_vrow = NULL;
......@@ -818,8 +834,17 @@ row_vers_build_cur_vrow(
rec, *clust_offsets,
NULL, NULL, NULL, NULL, heap);
if (vcol_info && !vcol_info->used) {
mtr->commit();
}
row_vers_build_clust_v_col(
row, clust_index, index, heap);
row, clust_index, index, heap, vcol_info);
if (vcol_info != NULL && vcol_info->is_first_fetch()) {
return NULL;
}
cur_vrow = dtuple_copy(row, v_heap);
dtuple_dup_v_fld(cur_vrow, v_heap);
} else {
......@@ -834,27 +859,34 @@ row_vers_build_cur_vrow(
return(cur_vrow);
}
/*****************************************************************//**
Finds out if a version of the record, where the version >= the current
/** Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry and ientry are identified in
the alphabetical ordering; exactly in this case we return TRUE.
id >= purge view, and the secondary index entry == ientry; exactly in
this case we return TRUE.
@param[in] also_curr TRUE if also rec is included in the versions
to search; otherwise only versions prior
to it are searched
@param[in] rec record in the clustered index; the caller
must have a latch on the page
@param[in] mtr mtr holding the latch on rec; it will
also hold the latch on purge_view
@param[in] index secondary index
@param[in] ientry secondary index entry
@param[in] roll_ptr roll_ptr for the purge record
@param[in] trx_id transaction ID on the purging record
@param[in,out] vcol_info virtual column information for purge thread.
@return TRUE if earlier version should have */
ibool
bool
row_vers_old_has_index_entry(
/*=========================*/
ibool also_curr,/*!< in: TRUE if also rec is included in the
versions to search; otherwise only versions
prior to it are searched */
const rec_t* rec, /*!< in: record in the clustered index; the
caller must have a latch on the page */
mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
also hold the latch on purge_view */
dict_index_t* index, /*!< in: the secondary index */
const dtuple_t* ientry, /*!< in: the secondary index entry */
roll_ptr_t roll_ptr,/*!< in: roll_ptr for the purge record */
trx_id_t trx_id) /*!< in: transaction ID on the purging record */
bool also_curr,
const rec_t* rec,
mtr_t* mtr,
dict_index_t* index,
const dtuple_t* ientry,
roll_ptr_t roll_ptr,
trx_id_t trx_id,
purge_vcol_info_t* vcol_info)
{
const rec_t* version;
rec_t* prev_version;
......@@ -922,8 +954,18 @@ row_vers_old_has_index_entry(
columns need to be computed */
if (trx_undo_roll_ptr_is_insert(t_roll_ptr)
|| dbug_v_purge) {
if (vcol_info && !vcol_info->used) {
mtr->commit();
}
row_vers_build_clust_v_col(
row, clust_index, index, heap);
row, clust_index, index, heap,
vcol_info);
if (vcol_info && vcol_info->is_first_fetch()) {
goto unsafe_to_purge;
}
entry = row_build_index_entry(
row, ext, index, heap);
......@@ -988,7 +1030,7 @@ row_vers_old_has_index_entry(
if (v_heap) {
mem_heap_free(v_heap);
}
return(TRUE);
return true;
}
}
} else if (dict_index_has_virtual(index)) {
......@@ -996,9 +1038,15 @@ row_vers_old_has_index_entry(
deleted, but the previous version of it might not. We will
need to get the virtual column data from undo record
associated with current cluster index */
cur_vrow = row_vers_build_cur_vrow(
also_curr, rec, clust_index, &clust_offsets,
index, ientry, roll_ptr, trx_id, heap, v_heap, mtr);
index, ientry, roll_ptr, trx_id, heap, v_heap, mtr,
vcol_info);
if (vcol_info && vcol_info->is_first_fetch()) {
goto unsafe_to_purge;
}
}
version = rec;
......@@ -1017,14 +1065,14 @@ row_vers_old_has_index_entry(
if (!prev_version) {
/* Versions end here */
unsafe_to_purge:
mem_heap_free(heap);
if (v_heap) {
mem_heap_free(v_heap);
}
return(FALSE);
return false;
}
clust_offsets = rec_get_offsets(prev_version, clust_index,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment