Commit 2c4844c9 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-17813 Crash in instant ALTER TABLE due to purge concurrently emptying table

Several race conditions between MDEV-15562 instant ALTER TABLE and purge
were observed.

The most obvious race condition resulted in a reported assertion failure in
dict_index_t::instant_add_field(): instant.n_core_fields == n_core_fields
would not hold if the table was emptied by purge after the time
dict_table_t::prepare_instant() was called.

During purge, it can turn out that the table is logically empty, only
containing a metadata record. If the metadata record is of the type
created by MDEV-11369 instant ADD COLUMN, it can be removed and
dict_index_t::clear_instant_add() can be called. This will convert the
table to the canonical non-instant format. (If the metadata record is
of the MDEV-15562 type, then it can only be deleted if the table becomes
empty as the result of rollback of an instant ALTER TABLE operation.)

row_purge_remove_clust_if_poss_low(): Add a debug check that ensures
that purge can never remove a MDEV-15562 metadata record.

ha_innobase::open(): Add a comment about the necessity of rolling
back any recovered instant ALTER TABLE transaction on the table.

instant_metadata_lock(): An auxiliary function to acquire a page latch
on the metadata record, to prevent race conditions.

dict_table_t::prepare_instant(), dict_index_t::instant_add_field(),
dict_table_t::rollback_instant(), innobase_instant_try():
Invoke instant_metadata_lock() in order to prevent race conditions.

dict_index_t::instant_add_field(): Correct debug assertions.
The == was guaranteed by code in dict_table_t::prepare_instant()
that was introduced in MDEV-15562. Due to the race condition,
we could occasionally have <=, but never >= like the code was
after MDEV-11369.

ha_innobase_inplace_ctx::instant_column(): Wrapper for
dict_table_t::instant_column(). Add debug assertions.
parent 46a41108
...@@ -21,4 +21,26 @@ ALTER TABLE t1 DROP extra; ...@@ -21,4 +21,26 @@ ALTER TABLE t1 DROP extra;
disconnect prevent_purge; disconnect prevent_purge;
InnoDB 0 transactions not purged InnoDB 0 transactions not purged
DROP TABLE t1; DROP TABLE t1;
#
# MDEV-17813 Crash in instant ALTER TABLE due to purge
# concurrently emptying table
#
CREATE TABLE t1 (f2 INT) ENGINE=InnoDB;
INSERT INTO t1 SET f2=1;
ALTER TABLE t1 ADD COLUMN f1 INT;
connect purge_control,localhost,root;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
DELETE FROM t1;
SET DEBUG_SYNC='innodb_commit_inplace_alter_table_enter SIGNAL go WAIT_FOR do';
ALTER TABLE t1 ADD COLUMN f3 INT;
connection purge_control;
SET DEBUG_SYNC='now WAIT_FOR go';
COMMIT;
InnoDB 0 transactions not purged
SET DEBUG_SYNC='now SIGNAL do';
disconnect purge_control;
connection default;
SET DEBUG_SYNC=RESET;
DROP TABLE t1;
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency; SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
...@@ -30,4 +30,34 @@ disconnect prevent_purge; ...@@ -30,4 +30,34 @@ disconnect prevent_purge;
let $wait_all_purged= 0; let $wait_all_purged= 0;
--source include/wait_all_purged.inc --source include/wait_all_purged.inc
DROP TABLE t1; DROP TABLE t1;
--echo #
--echo # MDEV-17813 Crash in instant ALTER TABLE due to purge
--echo # concurrently emptying table
--echo #
CREATE TABLE t1 (f2 INT) ENGINE=InnoDB;
INSERT INTO t1 SET f2=1;
ALTER TABLE t1 ADD COLUMN f1 INT;
connect (purge_control,localhost,root);
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
DELETE FROM t1;
SET DEBUG_SYNC='innodb_commit_inplace_alter_table_enter SIGNAL go WAIT_FOR do';
send ALTER TABLE t1 ADD COLUMN f3 INT;
connection purge_control;
SET DEBUG_SYNC='now WAIT_FOR go';
COMMIT;
--source include/wait_all_purged.inc
SET DEBUG_SYNC='now SIGNAL do';
disconnect purge_control;
connection default;
reap;
SET DEBUG_SYNC=RESET;
DROP TABLE t1;
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency; SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
...@@ -6046,6 +6046,14 @@ initialize_auto_increment(dict_table_t* table, const Field* field) ...@@ -6046,6 +6046,14 @@ initialize_auto_increment(dict_table_t* table, const Field* field)
int int
ha_innobase::open(const char* name, int, uint) ha_innobase::open(const char* name, int, uint)
{ {
/* TODO: If trx_rollback_recovered(bool all=false) is ever
removed, the first-time open() must hold (or acquire and release)
a table lock that conflicts with trx_resurrect_table_locks(),
to ensure that any recovered incomplete ALTER TABLE will have been
rolled back. Otherwise, dict_table_t::instant could be cleared by
the rollback invoking dict_index_t::clear_instant_alter() while
open table handles exist in client connections. */
dict_table_t* ib_table; dict_table_t* ib_table;
char norm_name[FN_REFLEN]; char norm_name[FN_REFLEN];
dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE; dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE;
......
...@@ -136,6 +136,30 @@ static const alter_table_operations INNOBASE_ALTER_INSTANT ...@@ -136,6 +136,30 @@ static const alter_table_operations INNOBASE_ALTER_INSTANT
| ALTER_COLUMN_UNVERSIONED | ALTER_COLUMN_UNVERSIONED
| ALTER_DROP_VIRTUAL_COLUMN; | ALTER_DROP_VIRTUAL_COLUMN;
/** Acquire a page latch on the possible metadata record,
to prevent concurrent invocation of dict_index_t::clear_instant_alter()
by purge when the table turns out to be empty.
@param[in,out] index clustered index
@param[in,out] mtr mini-transaction */
static void instant_metadata_lock(dict_index_t& index, mtr_t& mtr)
{
DBUG_ASSERT(index.is_primary());
if (!index.is_instant()) {
/* dict_index_t::clear_instant_alter() cannot be called.
No need for a latch. */
return;
}
btr_cur_t btr_cur;
btr_cur_open_at_index_side(true, &index, BTR_SEARCH_LEAF,
&btr_cur, 0, &mtr);
ut_ad(page_cur_is_before_first(btr_cur_get_page_cur(&btr_cur)));
ut_ad(page_is_leaf(btr_cur_get_page(&btr_cur)));
ut_ad(!page_has_prev(btr_cur_get_page(&btr_cur)));
ut_ad(!buf_block_get_page_zip(btr_cur_get_block(&btr_cur)));
}
/** Set is_instant() before instant_column(). /** Set is_instant() before instant_column().
@param[in] old previous table definition @param[in] old previous table definition
@param[in] col_map map from old.cols[] and old.v_cols[] to this @param[in] col_map map from old.cols[] and old.v_cols[] to this
...@@ -152,10 +176,16 @@ inline void dict_table_t::prepare_instant(const dict_table_t& old, ...@@ -152,10 +176,16 @@ inline void dict_table_t::prepare_instant(const dict_table_t& old,
DBUG_ASSERT(old.supports_instant()); DBUG_ASSERT(old.supports_instant());
DBUG_ASSERT(supports_instant()); DBUG_ASSERT(supports_instant());
const dict_index_t& oindex = *old.indexes.start; dict_index_t& oindex = *old.indexes.start;
dict_index_t& index = *indexes.start; dict_index_t& index = *indexes.start;
first_alter_pos = 0; first_alter_pos = 0;
mtr_t mtr;
mtr.start();
/* Prevent oindex.n_core_fields and others, so that
purge cannot invoke dict_index_t::clear_instant_alter(). */
instant_metadata_lock(oindex, mtr);
for (unsigned i = 0; i + DATA_N_SYS_COLS < old.n_cols; for (unsigned i = 0; i + DATA_N_SYS_COLS < old.n_cols;
i++) { i++) {
if (col_map[i] != i) { if (col_map[i] != i) {
...@@ -315,6 +345,7 @@ inline void dict_table_t::prepare_instant(const dict_table_t& old, ...@@ -315,6 +345,7 @@ inline void dict_table_t::prepare_instant(const dict_table_t& old,
DBUG_ASSERT(n_dropped() >= old.n_dropped()); DBUG_ASSERT(n_dropped() >= old.n_dropped());
DBUG_ASSERT(index.n_core_fields == oindex.n_core_fields); DBUG_ASSERT(index.n_core_fields == oindex.n_core_fields);
DBUG_ASSERT(index.n_core_null_bytes == oindex.n_core_null_bytes); DBUG_ASSERT(index.n_core_null_bytes == oindex.n_core_null_bytes);
mtr.commit();
} }
...@@ -335,8 +366,15 @@ inline void dict_index_t::instant_add_field(const dict_index_t& instant) ...@@ -335,8 +366,15 @@ inline void dict_index_t::instant_add_field(const dict_index_t& instant)
DBUG_ASSERT(n_uniq == instant.n_uniq); DBUG_ASSERT(n_uniq == instant.n_uniq);
DBUG_ASSERT(instant.n_fields >= n_fields); DBUG_ASSERT(instant.n_fields >= n_fields);
DBUG_ASSERT(instant.n_nullable >= n_nullable); DBUG_ASSERT(instant.n_nullable >= n_nullable);
DBUG_ASSERT(instant.n_core_fields == n_core_fields); /* dict_table_t::prepare_instant() initialized n_core_fields
DBUG_ASSERT(instant.n_core_null_bytes == n_core_null_bytes); to be equal. However, after that purge could have emptied the
table and invoked dict_index_t::clear_instant_alter(). */
DBUG_ASSERT(instant.n_core_fields <= n_core_fields);
DBUG_ASSERT(instant.n_core_null_bytes <= n_core_null_bytes);
DBUG_ASSERT(instant.n_core_fields == n_core_fields
|| (!is_instant() && instant.is_instant()));
DBUG_ASSERT(instant.n_core_null_bytes == n_core_null_bytes
|| (!is_instant() && instant.is_instant()));
/* instant will have all fields (including ones for columns /* instant will have all fields (including ones for columns
that have been or are being instantly dropped) in the same position that have been or are being instantly dropped) in the same position
...@@ -627,7 +665,13 @@ inline void dict_table_t::rollback_instant( ...@@ -627,7 +665,13 @@ inline void dict_table_t::rollback_instant(
const ulint* col_map) const ulint* col_map)
{ {
ut_ad(mutex_own(&dict_sys->mutex)); ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
dict_index_t* index = indexes.start; dict_index_t* index = indexes.start;
mtr_t mtr;
mtr.start();
/* Prevent concurrent execution of dict_index_t::clear_instant_alter()
by acquiring a latch on the leftmost leaf page. */
instant_metadata_lock(*index, mtr);
/* index->is_instant() does not necessarily hold here, because /* index->is_instant() does not necessarily hold here, because
the table may have been emptied */ the table may have been emptied */
DBUG_ASSERT(old_n_cols >= DATA_N_SYS_COLS); DBUG_ASSERT(old_n_cols >= DATA_N_SYS_COLS);
...@@ -667,6 +711,7 @@ inline void dict_table_t::rollback_instant( ...@@ -667,6 +711,7 @@ inline void dict_table_t::rollback_instant(
n_t_def = n_t_cols = n_cols + n_v_cols; n_t_def = n_t_cols = n_cols + n_v_cols;
index->fields = old_fields; index->fields = old_fields;
mtr.commit();
while ((index = dict_table_get_next_index(index)) != NULL) { while ((index = dict_table_get_next_index(index)) != NULL) {
if (index->to_be_dropped) { if (index->to_be_dropped) {
...@@ -924,6 +969,15 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx ...@@ -924,6 +969,15 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
first_alter_pos); first_alter_pos);
} }
/** Adjust table metadata for instant ADD/DROP/reorder COLUMN. */
void instant_column()
{
DBUG_ASSERT(is_instant());
DBUG_ASSERT(old_n_fields
== old_table->indexes.start->n_fields);
old_table->instant_column(*instant_table, col_map);
}
/** Revert prepare_instant() if the transaction is rolled back. */ /** Revert prepare_instant() if the transaction is rolled back. */
void rollback_instant() void rollback_instant()
{ {
...@@ -5287,13 +5341,24 @@ static bool innobase_instant_try( ...@@ -5287,13 +5341,24 @@ static bool innobase_instant_try(
dict_table_t* user_table = ctx->old_table; dict_table_t* user_table = ctx->old_table;
dict_index_t* index = dict_table_get_first_index(user_table); dict_index_t* index = dict_table_get_first_index(user_table);
uint n_old_fields = index->n_fields; mtr_t mtr;
mtr.start();
/* Prevent purge from calling dict_index_t::clear_instant_alter(),
to protect index->n_core_fields, index->table->instant and others
from changing during ctx->instant_column(). */
instant_metadata_lock(*index, mtr);
const unsigned n_old_fields = index->n_fields;
const dict_col_t* old_cols = user_table->cols; const dict_col_t* old_cols = user_table->cols;
DBUG_ASSERT(user_table->n_cols == ctx->old_n_cols); DBUG_ASSERT(user_table->n_cols == ctx->old_n_cols);
user_table->instant_column(*ctx->instant_table, ctx->col_map); ctx->instant_column();
DBUG_ASSERT(index->n_fields >= n_old_fields); DBUG_ASSERT(index->n_fields >= n_old_fields);
/* Release the page latch. Between this and the next
btr_pcur_open_at_index_side(), data fields such as
index->n_core_fields and index->table->instant could change,
but we would handle that in empty_table: below. */
mtr.commit();
/* The table may have been emptied and may have lost its /* The table may have been emptied and may have lost its
'instantness' during this ALTER TABLE. */ 'instantness' during this ALTER TABLE. */
...@@ -5441,7 +5506,6 @@ static bool innobase_instant_try( ...@@ -5441,7 +5506,6 @@ static bool innobase_instant_try(
memset(roll_ptr, 0, sizeof roll_ptr); memset(roll_ptr, 0, sizeof roll_ptr);
dtuple_t* entry = index->instant_metadata(*row, ctx->heap); dtuple_t* entry = index->instant_metadata(*row, ctx->heap);
mtr_t mtr;
mtr.start(); mtr.start();
index->set_modified(mtr); index->set_modified(mtr);
btr_pcur_t pcur; btr_pcur_t pcur;
......
...@@ -127,33 +127,32 @@ row_purge_remove_clust_if_poss_low( ...@@ -127,33 +127,32 @@ row_purge_remove_clust_if_poss_low(
purge_node_t* node, /*!< in/out: row purge node */ purge_node_t* node, /*!< in/out: row purge node */
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{ {
dict_index_t* index;
bool success = true;
mtr_t mtr;
rec_t* rec;
mem_heap_t* heap = NULL;
ulint* offsets;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs_init(offsets_);
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S) ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S)
|| node->vcol_info.is_used()); || node->vcol_info.is_used());
index = dict_table_get_first_index(node->table); dict_index_t* index = dict_table_get_first_index(node->table);
log_free_check(); log_free_check();
mtr_start(&mtr);
index->set_modified(mtr); mtr_t mtr;
mtr.start();
if (!row_purge_reposition_pcur(mode, node, &mtr)) { if (!row_purge_reposition_pcur(mode, node, &mtr)) {
/* The record was already removed. */ /* The record was already removed. */
goto func_exit; mtr.commit();
return true;
} }
rec = btr_pcur_get_rec(&node->pcur); ut_d(const bool was_instant = !!index->table->instant);
index->set_modified(mtr);
offsets = rec_get_offsets( rec_t* rec = btr_pcur_get_rec(&node->pcur);
ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs_init(offsets_);
mem_heap_t* heap = NULL;
ulint* offsets = rec_get_offsets(
rec, index, offsets_, true, ULINT_UNDEFINED, &heap); rec, index, offsets_, true, ULINT_UNDEFINED, &heap);
bool success = true;
if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) { if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
/* Someone else has modified the record later: do not remove */ /* Someone else has modified the record later: do not remove */
...@@ -186,6 +185,10 @@ row_purge_remove_clust_if_poss_low( ...@@ -186,6 +185,10 @@ row_purge_remove_clust_if_poss_low(
} }
} }
/* Prove that dict_index_t::clear_instant_alter() was
not called with index->table->instant != NULL. */
ut_ad(!was_instant || index->table->instant);
func_exit: func_exit:
if (heap) { if (heap) {
mem_heap_free(heap); mem_heap_free(heap);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment