Commit f545e3cf authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-15562: Remove dict_table_t::rollback_instant(unsigned n)

On the rollback of changes to SYS_COLUMNS, MDEV-15562 will
break the assumption that the only instantaneous changes to columns
are the addition to the end of the column list.

The function dict_table_t::rollback_instant(unsigned n)
is inherently incompatible with instantly dropping or reordering
columns.

When a change to SYS_COLUMNS is rolled back, we must simply evict
the affected table definition, at the end of the rollback. We cannot
free the table object immediately, because the current transaction
that is being rolled back may be holding a lock on the table and
its metadata record.

dict_table_remove_from_cache_low(): Replaced
by dict_table_remove_from_cache().

dict_table_remove_from_cache(): Add a third parameter keep=false,
so that the table can be freed by the caller.

trx_lock_t::evicted_tables: List of tables on which trx_t::evict_table()
was invoked.

trx_t::evict_table(): Evict a table definition during rollback.

trx_commit_in_memory(): Empty the trx->lock.evicted_tables list
after the locks were released, by freeing the table objects.

row_undo_ins_remove_clust_rec(), row_undo_mod_clust_low():
Invoke trx_t::evict_table() on the affected table if a change to
SYS_COLUMNS is being rolled back.
parent f58a0b3a
...@@ -1435,7 +1435,7 @@ dict_make_room_in_cache( ...@@ -1435,7 +1435,7 @@ dict_make_room_in_cache(
ut_ad(0); ut_ad(0);
} }
};); };);
dict_table_remove_from_cache_low(table, TRUE); dict_table_remove_from_cache(table, true);
++n_evicted; ++n_evicted;
} }
...@@ -1969,14 +1969,11 @@ dict_table_change_id_in_cache( ...@@ -1969,14 +1969,11 @@ dict_table_change_id_in_cache(
ut_fold_ull(table->id), table); ut_fold_ull(table->id), table);
} }
/**********************************************************************//** /** Evict a table definition from the InnoDB data dictionary cache.
Removes a table object from the dictionary cache. */ @param[in,out] table cached table definition to be evicted
void @param[in] lru whether this is part of least-recently-used eviction
dict_table_remove_from_cache_low( @param[in] keep whether to keep (not free) the object */
/*=============================*/ void dict_table_remove_from_cache(dict_table_t* table, bool lru, bool keep)
dict_table_t* table, /*!< in, own: table */
ibool lru_evict) /*!< in: TRUE if table being evicted
to make room in the table LRU list */
{ {
dict_foreign_t* foreign; dict_foreign_t* foreign;
dict_index_t* index; dict_index_t* index;
...@@ -2009,7 +2006,7 @@ dict_table_remove_from_cache_low( ...@@ -2009,7 +2006,7 @@ dict_table_remove_from_cache_low(
index != NULL; index != NULL;
index = UT_LIST_GET_LAST(table->indexes)) { index = UT_LIST_GET_LAST(table->indexes)) {
dict_index_remove_from_cache_low(table, index, lru_evict); dict_index_remove_from_cache_low(table, index, lru);
} }
/* Remove table from the hash tables of tables */ /* Remove table from the hash tables of tables */
...@@ -2031,7 +2028,7 @@ dict_table_remove_from_cache_low( ...@@ -2031,7 +2028,7 @@ dict_table_remove_from_cache_low(
ut_ad(dict_lru_validate()); ut_ad(dict_lru_validate());
if (lru_evict && table->drop_aborted) { if (lru && table->drop_aborted) {
/* When evicting the table definition, /* When evicting the table definition,
drop the orphan indexes from the data dictionary drop the orphan indexes from the data dictionary
and free the index pages. */ and free the index pages. */
...@@ -2056,17 +2053,9 @@ dict_table_remove_from_cache_low( ...@@ -2056,17 +2053,9 @@ dict_table_remove_from_cache_low(
UT_DELETE(table->vc_templ); UT_DELETE(table->vc_templ);
} }
dict_mem_table_free(table); if (!keep) {
} dict_mem_table_free(table);
}
/**********************************************************************//**
Removes a table object from the dictionary cache. */
void
dict_table_remove_from_cache(
/*=========================*/
dict_table_t* table) /*!< in, own: table */
{
dict_table_remove_from_cache_low(table, FALSE);
} }
/****************************************************************//** /****************************************************************//**
......
...@@ -1346,6 +1346,7 @@ dict_table_t::rollback_instant( ...@@ -1346,6 +1346,7 @@ dict_table_t::rollback_instant(
DBUG_ASSERT(n_cols >= old_n_cols); DBUG_ASSERT(n_cols >= old_n_cols);
DBUG_ASSERT(n_cols == n_def); DBUG_ASSERT(n_cols == n_def);
DBUG_ASSERT(index->n_def == index->n_fields); DBUG_ASSERT(index->n_def == index->n_fields);
DBUG_ASSERT(index->n_core_fields <= index->n_fields);
const unsigned n_remove = n_cols - old_n_cols; const unsigned n_remove = n_cols - old_n_cols;
...@@ -1405,81 +1406,6 @@ dict_table_t::rollback_instant( ...@@ -1405,81 +1406,6 @@ dict_table_t::rollback_instant(
} while ((index = dict_table_get_next_index(index)) != NULL); } while ((index = dict_table_get_next_index(index)) != NULL);
} }
/** Trim the instantly added columns when an insert into SYS_COLUMNS
is rolled back during ALTER TABLE or recovery.
@param[in] n number of surviving non-system columns */
void dict_table_t::rollback_instant(unsigned n)
{
ut_ad(mutex_own(&dict_sys->mutex));
dict_index_t* index = indexes.start;
DBUG_ASSERT(index->is_instant());
DBUG_ASSERT(index->n_def == index->n_fields);
DBUG_ASSERT(n_cols == n_def);
DBUG_ASSERT(n >= index->n_uniq);
DBUG_ASSERT(n_cols > n + DATA_N_SYS_COLS);
const unsigned n_remove = n_cols - n - DATA_N_SYS_COLS;
char* names = const_cast<char*>(dict_table_get_col_name(this, n));
const char* sys = names;
for (unsigned i = n_remove; i--; ) {
sys += strlen(sys) + 1;
}
static const char system[] = "DB_ROW_ID\0DB_TRX_ID\0DB_ROLL_PTR";
DBUG_ASSERT(!memcmp(sys, system, sizeof system));
for (unsigned i = index->n_fields - n_remove; i < index->n_fields;
i++) {
if (index->fields[i].col->is_nullable()) {
index->n_nullable--;
}
}
index->n_fields -= n_remove;
index->n_def = index->n_fields;
memmove(names, sys, sizeof system);
memmove(cols + n, cols + n_cols - DATA_N_SYS_COLS,
DATA_N_SYS_COLS * sizeof *cols);
n_cols -= n_remove;
n_def = n_cols;
n_t_cols -= n_remove;
n_t_def -= n_remove;
for (unsigned i = DATA_N_SYS_COLS; i--; ) {
cols[n_cols - i].ind--;
}
if (dict_index_is_auto_gen_clust(index)) {
DBUG_ASSERT(index->n_uniq == 1);
dict_field_t* field = index->fields;
field->name = sys;
field->col = dict_table_get_sys_col(this, DATA_ROW_ID);
field++;
field->name = sys + sizeof "DB_ROW_ID";
field->col = dict_table_get_sys_col(this, DATA_TRX_ID);
field++;
field->name = sys + sizeof "DB_ROW_ID\0DB_TRX_ID";
field->col = dict_table_get_sys_col(this, DATA_ROLL_PTR);
/* Replace the DB_ROW_ID column in secondary indexes. */
while ((index = dict_table_get_next_index(index)) != NULL) {
field = &index->fields[index->n_fields - 1];
DBUG_ASSERT(field->col->mtype == DATA_SYS);
DBUG_ASSERT(field->col->prtype
== DATA_NOT_NULL + DATA_TRX_ID);
field->col--;
field->name = sys;
}
return;
}
dict_field_t* field = &index->fields[index->n_uniq];
field->name = sys + sizeof "DB_ROW_ID";
field->col = dict_table_get_sys_col(this, DATA_TRX_ID);
field++;
field->name = sys + sizeof "DB_ROW_ID\0DB_TRX_ID";
field->col = dict_table_get_sys_col(this, DATA_ROLL_PTR);
}
/** Check if record in clustered index is historical row. /** Check if record in clustered index is historical row.
@param[in] rec clustered row @param[in] rec clustered row
@param[in] offsets offsets @param[in] offsets offsets
......
...@@ -17212,7 +17212,7 @@ innodb_internal_table_validate( ...@@ -17212,7 +17212,7 @@ innodb_internal_table_validate(
DBUG_EXECUTE_IF("innodb_evict_autoinc_table", DBUG_EXECUTE_IF("innodb_evict_autoinc_table",
mutex_enter(&dict_sys->mutex); mutex_enter(&dict_sys->mutex);
dict_table_remove_from_cache_low(user_table, TRUE); dict_table_remove_from_cache(user_table, true);
mutex_exit(&dict_sys->mutex); mutex_exit(&dict_sys->mutex);
); );
} }
......
...@@ -374,21 +374,12 @@ dict_table_add_system_columns( ...@@ -374,21 +374,12 @@ dict_table_add_system_columns(
dict_table_t* table, /*!< in/out: table */ dict_table_t* table, /*!< in/out: table */
mem_heap_t* heap) /*!< in: temporary heap */ mem_heap_t* heap) /*!< in: temporary heap */
MY_ATTRIBUTE((nonnull)); MY_ATTRIBUTE((nonnull));
/**********************************************************************//** /** Evict a table definition from the InnoDB data dictionary cache.
Removes a table object from the dictionary cache. */ @param[in,out] table cached table definition to be evicted
void @param[in] lru whether this is part of least-recently-used evictiono
dict_table_remove_from_cache( @param[in] keep whether to keep (not free) the object */
/*=========================*/ void dict_table_remove_from_cache(dict_table_t* table, bool lru = false,
dict_table_t* table) /*!< in, own: table */ bool keep = false)
MY_ATTRIBUTE((nonnull));
/**********************************************************************//**
Removes a table object from the dictionary cache. */
void
dict_table_remove_from_cache_low(
/*=============================*/
dict_table_t* table, /*!< in, own: table */
ibool lru_evict) /*!< in: TRUE if table being evicted
to make room in the table LRU list */
MY_ATTRIBUTE((nonnull)); MY_ATTRIBUTE((nonnull));
/**********************************************************************//** /**********************************************************************//**
Renames a table object. Renames a table object.
......
...@@ -1551,11 +1551,6 @@ struct dict_table_t { ...@@ -1551,11 +1551,6 @@ struct dict_table_t {
dict_col_t* old_cols, dict_col_t* old_cols,
const char* old_col_names); const char* old_col_names);
/** Trim the instantly added columns when an insert into SYS_COLUMNS
is rolled back during ALTER TABLE or recovery.
@param[in] n number of surviving non-system columns */
void rollback_instant(unsigned n);
/** Add the table definition to the data dictionary cache */ /** Add the table definition to the data dictionary cache */
void add_to_cache(); void add_to_cache();
......
...@@ -482,6 +482,7 @@ Check transaction state */ ...@@ -482,6 +482,7 @@ Check transaction state */
ut_ad(!(t)->read_view.is_open()); \ ut_ad(!(t)->read_view.is_open()); \
ut_ad((t)->lock.wait_thr == NULL); \ ut_ad((t)->lock.wait_thr == NULL); \
ut_ad(UT_LIST_GET_LEN((t)->lock.trx_locks) == 0); \ ut_ad(UT_LIST_GET_LEN((t)->lock.trx_locks) == 0); \
ut_ad(UT_LIST_GET_LEN((t)->lock.evicted_tables) == 0); \
ut_ad((t)->dict_operation == TRX_DICT_OP_NONE); \ ut_ad((t)->dict_operation == TRX_DICT_OP_NONE); \
} while(0) } while(0)
...@@ -606,6 +607,9 @@ struct trx_lock_t { ...@@ -606,6 +607,9 @@ struct trx_lock_t {
lock_list table_locks; /*!< All table locks requested by this lock_list table_locks; /*!< All table locks requested by this
transaction, including AUTOINC locks */ transaction, including AUTOINC locks */
/** List of pending trx_t::evict_table() */
UT_LIST_BASE_NODE_T(dict_table_t) evicted_tables;
bool cancel; /*!< true if the transaction is being bool cancel; /*!< true if the transaction is being
rolled back either via deadlock rolled back either via deadlock
detection or due to lock timeout. The detection or due to lock timeout. The
...@@ -1112,6 +1116,10 @@ struct trx_t { ...@@ -1112,6 +1116,10 @@ struct trx_t {
return(assign_temp_rseg()); return(assign_temp_rseg());
} }
/** Evict a table definition due to the rollback of ALTER TABLE.
@param[in] table_id table identifier */
void evict_table(table_id_t table_id);
bool is_referenced() bool is_referenced()
{ {
......
...@@ -140,10 +140,11 @@ row_undo_ins_remove_clust_rec( ...@@ -140,10 +140,11 @@ row_undo_ins_remove_clust_rec(
break; break;
case DICT_COLUMNS_ID: case DICT_COLUMNS_ID:
/* This is rolling back an INSERT into SYS_COLUMNS. /* This is rolling back an INSERT into SYS_COLUMNS.
If it was part of an instant ADD COLUMN operation, we If it was part of an instant ALTER TABLE operation, we
must modify the table definition. At this point, any must evict the table definition, so that it can be
corresponding operation to the metadata record will have reloaded after the dictionary operation has been
been rolled back. */ completed. At this point, any corresponding operation
to the metadata record will have been rolled back. */
ut_ad(!online); ut_ad(!online);
ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH); ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
...@@ -158,33 +159,7 @@ row_undo_ins_remove_clust_rec( ...@@ -158,33 +159,7 @@ row_undo_ins_remove_clust_rec(
if (len != 8) { if (len != 8) {
break; break;
} }
const table_id_t table_id = mach_read_from_8(data); node->trx->evict_table(mach_read_from_8(data));
data = rec_get_nth_field_old(rec, DICT_FLD__SYS_COLUMNS__POS,
&len);
if (len != 4) {
break;
}
const unsigned pos = mach_read_from_4(data);
if (pos == 0 || pos >= (1U << 16)) {
break;
}
dict_table_t* table = dict_table_open_on_id(
table_id, true, DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
if (!table) {
break;
}
dict_index_t* index = dict_table_get_first_index(table);
if (index && index->is_instant()
&& DATA_N_SYS_COLS + 1 + pos == table->n_cols) {
/* This is the rollback of an instant ADD COLUMN.
Remove the column from the dictionary cache,
but keep the system columns. */
table->rollback_instant(pos);
}
dict_table_close(table, true, false);
} }
if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) { if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
......
...@@ -147,6 +147,20 @@ row_undo_mod_clust_low( ...@@ -147,6 +147,20 @@ row_undo_mod_clust_low(
ut_a(!dummy_big_rec); ut_a(!dummy_big_rec);
} }
if (err == DB_SUCCESS
&& btr_cur_get_index(btr_cur)->table->id == DICT_COLUMNS_ID) {
/* This is rolling back an UPDATE or DELETE on SYS_COLUMNS.
If it was part of an instant ALTER TABLE operation, we
must evict the table definition, so that it can be
reloaded after the dictionary operation has been
completed. At this point, any corresponding operation
to the metadata record will have been rolled back. */
const dfield_t& table_id = *dtuple_get_nth_field(node->row, 0);
ut_ad(dfield_get_len(&table_id) == 8);
node->trx->evict_table(mach_read_from_8(static_cast<byte*>(
table_id.data)));
}
return(err); return(err);
} }
......
...@@ -214,6 +214,9 @@ struct TrxFactory { ...@@ -214,6 +214,9 @@ struct TrxFactory {
lock_trx_lock_list_init(&trx->lock.trx_locks); lock_trx_lock_list_init(&trx->lock.trx_locks);
UT_LIST_INIT(trx->lock.evicted_tables,
&dict_table_t::table_LRU);
UT_LIST_INIT( UT_LIST_INIT(
trx->trx_savepoints, trx->trx_savepoints,
&trx_named_savept_t::trx_savepoints); &trx_named_savept_t::trx_savepoints);
...@@ -238,6 +241,7 @@ struct TrxFactory { ...@@ -238,6 +241,7 @@ struct TrxFactory {
} }
ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
ut_ad(UT_LIST_GET_LEN(trx->lock.evicted_tables) == 0);
UT_DELETE(trx->xid); UT_DELETE(trx->xid);
ut_free(trx->detailed_error); ut_free(trx->detailed_error);
...@@ -390,6 +394,7 @@ trx_t *trx_create() ...@@ -390,6 +394,7 @@ trx_t *trx_create()
ut_ad(trx->lock.n_rec_locks == 0); ut_ad(trx->lock.n_rec_locks == 0);
ut_ad(trx->lock.table_cached == 0); ut_ad(trx->lock.table_cached == 0);
ut_ad(trx->lock.rec_cached == 0); ut_ad(trx->lock.rec_cached == 0);
ut_ad(UT_LIST_GET_LEN(trx->lock.evicted_tables) == 0);
#ifdef WITH_WSREP #ifdef WITH_WSREP
trx->wsrep_event = NULL; trx->wsrep_event = NULL;
...@@ -1250,6 +1255,37 @@ trx_update_mod_tables_timestamp( ...@@ -1250,6 +1255,37 @@ trx_update_mod_tables_timestamp(
trx->mod_tables.clear(); trx->mod_tables.clear();
} }
/** Evict a table definition due to the rollback of ALTER TABLE.
@param[in] table_id table identifier */
void trx_t::evict_table(table_id_t table_id)
{
ut_ad(in_rollback);
dict_table_t* table = dict_table_open_on_id(
table_id, true, DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
if (!table) {
return;
}
if (!table->release()) {
/* This must be a DDL operation that is being rolled
back in an active connection. */
ut_a(table->get_ref_count() == 1);
ut_ad(!is_recovered);
ut_ad(mysql_thd);
return;
}
/* This table should only be locked by this transaction, if at all. */
ut_ad(UT_LIST_GET_LEN(table->locks) <= 1);
const bool locked = UT_LIST_GET_LEN(table->locks);
ut_ad(!locked || UT_LIST_GET_FIRST(table->locks)->trx == this);
dict_table_remove_from_cache(table, true, locked);
if (locked) {
UT_LIST_ADD_FIRST(lock.evicted_tables, table);
}
}
/****************************************************************//** /****************************************************************//**
Commits a transaction in memory. */ Commits a transaction in memory. */
static static
...@@ -1315,9 +1351,16 @@ trx_commit_in_memory( ...@@ -1315,9 +1351,16 @@ trx_commit_in_memory(
trx_update_mod_tables_timestamp(trx); trx_update_mod_tables_timestamp(trx);
MONITOR_INC(MONITOR_TRX_RW_COMMIT); MONITOR_INC(MONITOR_TRX_RW_COMMIT);
} }
while (dict_table_t* table = UT_LIST_GET_FIRST(
trx->lock.evicted_tables)) {
UT_LIST_REMOVE(trx->lock.evicted_tables, table);
dict_mem_table_free(table);
}
} }
ut_ad(!trx->rsegs.m_redo.undo); ut_ad(!trx->rsegs.m_redo.undo);
ut_ad(UT_LIST_GET_LEN(trx->lock.evicted_tables) == 0);
if (trx_rseg_t* rseg = trx->rsegs.m_redo.rseg) { if (trx_rseg_t* rseg = trx->rsegs.m_redo.rseg) {
mutex_enter(&rseg->mutex); mutex_enter(&rseg->mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment