Commit 5d3c3b49 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-15090 Reduce the overhead of writing undo log records

Inside InnoDB, each mini-transaction that generates any redo log records
will acquire log_sys->mutex during mtr_t::commit() in order to copy the
records into the global log_sys->buf for writing into the redo log file.

For single-row transactions, this incurs quite a bit of overhead.
We would use two mini-transactions for writing a record into a
freshly updated undo log page. (Only if the undo record will
not fit in that page, then we will have to commit and restart
the mini-transaction.)

trx_undo_assign(): Assign undo log for a persistent transaction,
or return the already assigned one.

trx_undo_assign_low(): Assign undo log for an operation on a
persistent or temporary table.

trx_undo_create(), trx_undo_reuse_cached(): Remove redundant parameters.
Merge the logic from trx_undo_mark_as_dict_operation().
parent 4981f95f
...@@ -252,18 +252,31 @@ ulint ...@@ -252,18 +252,31 @@ ulint
trx_undo_lists_init( trx_undo_lists_init(
/*================*/ /*================*/
trx_rseg_t* rseg); /*!< in: rollback segment memory object */ trx_rseg_t* rseg); /*!< in: rollback segment memory object */
/** Assign an undo log for a persistent transaction.
A new undo log is created or a cached undo log reused.
@param[in,out] trx transaction
@param[in,out] mtr mini-transaction
@retval DB_SUCCESS on success
@retval DB_TOO_MANY_CONCURRENT_TRXS
@retval DB_OUT_OF_FILE_SPACE
@retval DB_READ_ONLY
@retval DB_OUT_OF_MEMORY */
dberr_t
trx_undo_assign(trx_t* trx, mtr_t* mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Assign an undo log for a transaction. /** Assign an undo log for a transaction.
A new undo log is created or a cached undo log reused. A new undo log is created or a cached undo log reused.
@param[in,out] trx transaction @param[in,out] trx transaction
@param[in] rseg rollback segment @param[in] rseg rollback segment
@param[out] undo the undo log @param[out] undo the undo log
@param[in,out] mtr mini-transaction
@retval DB_SUCCESS on success @retval DB_SUCCESS on success
@retval DB_TOO_MANY_CONCURRENT_TRXS @retval DB_TOO_MANY_CONCURRENT_TRXS
@retval DB_OUT_OF_FILE_SPACE @retval DB_OUT_OF_FILE_SPACE
@retval DB_READ_ONLY @retval DB_READ_ONLY
@retval DB_OUT_OF_MEMORY */ @retval DB_OUT_OF_MEMORY */
dberr_t dberr_t
trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo, mtr_t*mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result)); MY_ATTRIBUTE((nonnull, warn_unused_result));
/******************************************************************//** /******************************************************************//**
Sets the state of the undo log segment at a transaction finish. Sets the state of the undo log segment at a transaction finish.
......
...@@ -3400,8 +3400,12 @@ row_import_for_mysql( ...@@ -3400,8 +3400,12 @@ row_import_for_mysql(
mutex_enter(&trx->undo_mutex); mutex_enter(&trx->undo_mutex);
/* TODO: Do not write any undo log for the IMPORT cleanup. */ /* TODO: Do not write any undo log for the IMPORT cleanup. */
err = trx_undo_assign_undo(trx, trx->rsegs.m_redo.rseg, {
&trx->rsegs.m_redo.undo); mtr_t mtr;
mtr.start();
err = trx_undo_assign(trx, &mtr);
mtr.commit();
}
mutex_exit(&trx->undo_mutex); mutex_exit(&trx->undo_mutex);
......
...@@ -1819,10 +1819,12 @@ row_truncate_table_for_mysql( ...@@ -1819,10 +1819,12 @@ row_truncate_table_for_mysql(
/* Step-6: Truncate operation can be rolled back in case of error /* Step-6: Truncate operation can be rolled back in case of error
till some point. Associate rollback segment to record undo log. */ till some point. Associate rollback segment to record undo log. */
if (!dict_table_is_temporary(table)) { if (!table->is_temporary()) {
mutex_enter(&trx->undo_mutex); mutex_enter(&trx->undo_mutex);
err = trx_undo_assign_undo(trx, trx->rsegs.m_redo.rseg, mtr_t mtr;
&trx->rsegs.m_redo.undo); mtr.start();
err = trx_undo_assign(trx, &mtr);
mtr.commit();
mutex_exit(&trx->undo_mutex); mutex_exit(&trx->undo_mutex);
DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log", DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log",
......
...@@ -1901,17 +1901,12 @@ trx_undo_report_rename(trx_t* trx, const dict_table_t* table) ...@@ -1901,17 +1901,12 @@ trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
ut_ad(trx->id); ut_ad(trx->id);
ut_ad(!table->is_temporary()); ut_ad(!table->is_temporary());
trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; mtr_t mtr;
trx_undo_t** pundo = &trx->rsegs.m_redo.undo; mtr.start();
mutex_enter(&trx->undo_mutex); mutex_enter(&trx->undo_mutex);
dberr_t err = *pundo dberr_t err = trx_undo_assign(trx, &mtr);
? DB_SUCCESS ut_ad((err == DB_SUCCESS) == (trx->rsegs.m_redo.undo != NULL));
: trx_undo_assign_undo(trx, rseg, pundo); if (trx_undo_t* undo = trx->rsegs.m_redo.undo) {
ut_ad((err == DB_SUCCESS) == (*pundo != NULL));
if (trx_undo_t* undo = *pundo) {
mtr_t mtr;
mtr.start(trx);
buf_block_t* block = buf_page_get_gen( buf_block_t* block = buf_page_get_gen(
page_id_t(undo->space, undo->last_page_no), page_id_t(undo->space, undo->last_page_no),
univ_page_size, RW_X_LATCH, univ_page_size, RW_X_LATCH,
...@@ -1934,12 +1929,13 @@ trx_undo_report_rename(trx_t* trx, const dict_table_t* table) ...@@ -1934,12 +1929,13 @@ trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
undo->top_undo_no = trx->undo_no++; undo->top_undo_no = trx->undo_no++;
undo->guess_block = block; undo->guess_block = block;
trx->undo_rseg_space = rseg->space; trx->undo_rseg_space
= trx->rsegs.m_redo.rseg->space;
err = DB_SUCCESS; err = DB_SUCCESS;
break; break;
} else { } else {
mtr.commit(); mtr.commit();
mtr.start(trx); mtr.start();
block = trx_undo_add_page(trx, undo, &mtr); block = trx_undo_add_page(trx, undo, &mtr);
if (!block) { if (!block) {
err = DB_OUT_OF_FILE_SPACE; err = DB_OUT_OF_FILE_SPACE;
...@@ -2006,7 +2002,7 @@ trx_undo_report_row_operation( ...@@ -2006,7 +2002,7 @@ trx_undo_report_row_operation(
mtr.start(); mtr.start();
trx_undo_t** pundo; trx_undo_t** pundo;
trx_rseg_t* rseg; trx_rseg_t* rseg;
const bool is_temp = dict_table_is_temporary(index->table); const bool is_temp = index->table->is_temporary();
if (is_temp) { if (is_temp) {
mtr.set_log_mode(MTR_LOG_NO_REDO); mtr.set_log_mode(MTR_LOG_NO_REDO);
...@@ -2021,9 +2017,9 @@ trx_undo_report_row_operation( ...@@ -2021,9 +2017,9 @@ trx_undo_report_row_operation(
} }
mutex_enter(&trx->undo_mutex); mutex_enter(&trx->undo_mutex);
dberr_t err = *pundo ? DB_SUCCESS : trx_undo_assign_undo( dberr_t err = *pundo
trx, rseg, pundo); ? DB_SUCCESS : trx_undo_assign_low(trx, rseg, pundo, &mtr);
trx_undo_t* undo = *pundo; trx_undo_t* undo = *pundo;
ut_ad((err == DB_SUCCESS) == (undo != NULL)); ut_ad((err == DB_SUCCESS) == (undo != NULL));
if (undo == NULL) { if (undo == NULL) {
...@@ -2051,12 +2047,6 @@ trx_undo_report_row_operation( ...@@ -2051,12 +2047,6 @@ trx_undo_report_row_operation(
cmpl_info, clust_entry, &mtr); cmpl_info, clust_entry, &mtr);
if (UNIV_UNLIKELY(offset == 0)) { if (UNIV_UNLIKELY(offset == 0)) {
/* The record did not fit on the page. We erase the
end segment of the undo log page and write a log
record of it: this is to ensure that in the debug
version the replicate page constructed using the log
records stays identical to the original page */
if (!trx_undo_erase_page_end(undo_page)) { if (!trx_undo_erase_page_end(undo_page)) {
/* The record did not fit on an empty /* The record did not fit on an empty
undo page. Discard the freshly allocated undo page. Discard the freshly allocated
...@@ -2071,8 +2061,8 @@ trx_undo_report_row_operation( ...@@ -2071,8 +2061,8 @@ trx_undo_report_row_operation(
first, because it may be holding lower-level first, because it may be holding lower-level
latches, such as SYNC_FSP and SYNC_FSP_PAGE. */ latches, such as SYNC_FSP and SYNC_FSP_PAGE. */
mtr_commit(&mtr); mtr.commit();
mtr.start(trx); mtr.start();
if (is_temp) { if (is_temp) {
mtr.set_log_mode(MTR_LOG_NO_REDO); mtr.set_log_mode(MTR_LOG_NO_REDO);
} }
...@@ -2132,7 +2122,7 @@ trx_undo_report_row_operation( ...@@ -2132,7 +2122,7 @@ trx_undo_report_row_operation(
/* We have to extend the undo log by one page */ /* We have to extend the undo log by one page */
ut_ad(++loop_count < 2); ut_ad(++loop_count < 2);
mtr.start(trx); mtr.start();
if (is_temp) { if (is_temp) {
mtr.set_log_mode(MTR_LOG_NO_REDO); mtr.set_log_mode(MTR_LOG_NO_REDO);
......
...@@ -1299,9 +1299,6 @@ trx_undo_create( ...@@ -1299,9 +1299,6 @@ trx_undo_create(
/*============*/ /*============*/
trx_t* trx, /*!< in: transaction */ trx_t* trx, /*!< in: transaction */
trx_rseg_t* rseg, /*!< in: rollback segment memory copy */ trx_rseg_t* rseg, /*!< in: rollback segment memory copy */
trx_id_t trx_id, /*!< in: id of the trx for which the undo log
is created */
const XID* xid, /*!< in: X/Open transaction identification*/
trx_undo_t** undo, /*!< out: the new undo log object, undefined trx_undo_t** undo, /*!< out: the new undo log object, undefined
* if did not succeed */ * if did not succeed */
mtr_t* mtr) /*!< in: mtr */ mtr_t* mtr) /*!< in: mtr */
...@@ -1332,17 +1329,36 @@ trx_undo_create( ...@@ -1332,17 +1329,36 @@ trx_undo_create(
page_no = page_get_page_no(undo_page); page_no = page_get_page_no(undo_page);
offset = trx_undo_header_create(undo_page, trx_id, mtr); offset = trx_undo_header_create(undo_page, trx->id, mtr);
trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr); trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr);
*undo = trx_undo_mem_create(rseg, id, trx_id, xid, page_no, offset); *undo = trx_undo_mem_create(rseg, id, trx->id, trx->xid,
page_no, offset);
if (*undo == NULL) { if (*undo == NULL) {
err = DB_OUT_OF_MEMORY; return DB_OUT_OF_MEMORY;
} else if (rseg != trx->rsegs.m_redo.rseg) {
return DB_SUCCESS;
} }
return(err); switch (trx_get_dict_operation(trx)) {
case TRX_DICT_OP_NONE:
break;
case TRX_DICT_OP_INDEX:
/* Do not discard the table on recovery. */
trx->table_id = 0;
/* fall through */
case TRX_DICT_OP_TABLE:
(*undo)->table_id = trx->table_id;
(*undo)->dict_operation = TRUE;
mlog_write_ulint(undo_page + offset + TRX_UNDO_DICT_TRANS,
TRUE, MLOG_1BYTE, mtr);
mlog_write_ull(undo_page + offset + TRX_UNDO_TABLE_ID,
trx->table_id, mtr);
}
return DB_SUCCESS;
} }
/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/ /*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
...@@ -1356,9 +1372,6 @@ trx_undo_reuse_cached( ...@@ -1356,9 +1372,6 @@ trx_undo_reuse_cached(
/*==================*/ /*==================*/
trx_t* trx, /*!< in: transaction */ trx_t* trx, /*!< in: transaction */
trx_rseg_t* rseg, /*!< in: rollback segment memory object */ trx_rseg_t* rseg, /*!< in: rollback segment memory object */
trx_id_t trx_id, /*!< in: id of the trx for which the undo log
is used */
const XID* xid, /*!< in: X/Open XA transaction identification */
mtr_t* mtr) /*!< in: mtr */ mtr_t* mtr) /*!< in: mtr */
{ {
trx_undo_t* undo; trx_undo_t* undo;
...@@ -1380,50 +1393,72 @@ trx_undo_reuse_cached( ...@@ -1380,50 +1393,72 @@ trx_undo_reuse_cached(
undo_page = trx_undo_page_get( undo_page = trx_undo_page_get(
page_id_t(undo->space, undo->hdr_page_no), mtr); page_id_t(undo->space, undo->hdr_page_no), mtr);
offset = trx_undo_header_create(undo_page, trx_id, mtr); offset = trx_undo_header_create(undo_page, trx->id, mtr);
trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr); trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr);
trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset);
return(undo); trx_undo_mem_init_for_reuse(undo, trx->id, trx->xid, offset);
}
/**********************************************************************//**
Marks an undo log header as a header of a data dictionary operation
transaction. */
static
void
trx_undo_mark_as_dict_operation(
/*============================*/
trx_t* trx, /*!< in: dict op transaction */
trx_undo_t* undo, /*!< in: assigned undo log */
mtr_t* mtr) /*!< in: mtr */
{
page_t* hdr_page;
hdr_page = trx_undo_page_get( if (rseg != trx->rsegs.m_redo.rseg) {
page_id_t(undo->space, undo->hdr_page_no), mtr); return undo;
}
switch (trx_get_dict_operation(trx)) { switch (trx_get_dict_operation(trx)) {
case TRX_DICT_OP_NONE: case TRX_DICT_OP_NONE:
ut_error; return undo;
case TRX_DICT_OP_INDEX: case TRX_DICT_OP_INDEX:
/* Do not discard the table on recovery. */ /* Do not discard the table on recovery. */
undo->table_id = 0; trx->table_id = 0;
break; /* fall through */
case TRX_DICT_OP_TABLE: case TRX_DICT_OP_TABLE:
undo->table_id = trx->table_id; undo->table_id = trx->table_id;
break; undo->dict_operation = TRUE;
mlog_write_ulint(undo_page + offset + TRX_UNDO_DICT_TRANS,
TRUE, MLOG_1BYTE, mtr);
mlog_write_ull(undo_page + offset + TRX_UNDO_TABLE_ID,
trx->table_id, mtr);
} }
mlog_write_ulint(hdr_page + undo->hdr_offset return(undo);
+ TRX_UNDO_DICT_TRANS, }
TRUE, MLOG_1BYTE, mtr);
mlog_write_ull(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID, /** Assign an undo log for a persistent transaction.
undo->table_id, mtr); A new undo log is created or a cached undo log reused.
@param[in,out] trx transaction
@param[in,out] mtr mini-transaction
@retval DB_SUCCESS on success
@retval DB_TOO_MANY_CONCURRENT_TRXS
@retval DB_OUT_OF_FILE_SPACE
@retval DB_READ_ONLY
@retval DB_OUT_OF_MEMORY */
dberr_t
trx_undo_assign(trx_t* trx, mtr_t* mtr)
{
dberr_t err = DB_SUCCESS;
ut_ad(mutex_own(&trx->undo_mutex));
ut_ad(mtr->get_log_mode() == MTR_LOG_ALL);
undo->dict_operation = TRUE; if (trx->rsegs.m_redo.undo) {
return DB_SUCCESS;
}
trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
mutex_enter(&rseg->mutex);
if (!(trx->rsegs.m_redo.undo= trx_undo_reuse_cached(trx, rseg, mtr))) {
err = trx_undo_create(trx, rseg, &trx->rsegs.m_redo.undo, mtr);
if (err != DB_SUCCESS) {
goto func_exit;
}
}
UT_LIST_ADD_FIRST(rseg->undo_list, trx->rsegs.m_redo.undo);
func_exit:
mutex_exit(&rseg->mutex);
return err;
} }
/** Assign an undo log for a transaction. /** Assign an undo log for a transaction.
...@@ -1431,16 +1466,16 @@ A new undo log is created or a cached undo log reused. ...@@ -1431,16 +1466,16 @@ A new undo log is created or a cached undo log reused.
@param[in,out] trx transaction @param[in,out] trx transaction
@param[in] rseg rollback segment @param[in] rseg rollback segment
@param[out] undo the undo log @param[out] undo the undo log
@param[in,out] mtr mini-transaction
@retval DB_SUCCESS on success @retval DB_SUCCESS on success
@retval DB_TOO_MANY_CONCURRENT_TRXS @retval DB_TOO_MANY_CONCURRENT_TRXS
@retval DB_OUT_OF_FILE_SPACE @retval DB_OUT_OF_FILE_SPACE
@retval DB_READ_ONLY @retval DB_READ_ONLY
@retval DB_OUT_OF_MEMORY */ @retval DB_OUT_OF_MEMORY */
dberr_t dberr_t
trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo, mtr_t*mtr)
{ {
const bool is_temp = rseg == trx->rsegs.m_noredo.rseg; const bool is_temp = rseg == trx->rsegs.m_noredo.rseg;
mtr_t mtr;
dberr_t err = DB_SUCCESS; dberr_t err = DB_SUCCESS;
ut_ad(mutex_own(&trx->undo_mutex)); ut_ad(mutex_own(&trx->undo_mutex));
...@@ -1449,12 +1484,9 @@ trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) ...@@ -1449,12 +1484,9 @@ trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo)
ut_ad(undo == (is_temp ut_ad(undo == (is_temp
? &trx->rsegs.m_noredo.undo ? &trx->rsegs.m_noredo.undo
: &trx->rsegs.m_redo.undo)); : &trx->rsegs.m_redo.undo));
ut_ad(!*undo);
mtr.start(trx); ut_ad(mtr->get_log_mode()
== (is_temp ? MTR_LOG_NO_REDO : MTR_LOG_ALL));
if (is_temp) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
}
mutex_enter(&rseg->mutex); mutex_enter(&rseg->mutex);
...@@ -1464,10 +1496,8 @@ trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) ...@@ -1464,10 +1496,8 @@ trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo)
goto func_exit; goto func_exit;
); );
*undo = trx_undo_reuse_cached(trx, rseg, trx->id, trx->xid, &mtr); if (!(*undo= trx_undo_reuse_cached(trx, rseg, mtr))) {
if (*undo == NULL) { err = trx_undo_create(trx, rseg, undo, mtr);
err = trx_undo_create(trx, rseg, trx->id, trx->xid,
undo, &mtr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
goto func_exit; goto func_exit;
} }
...@@ -1475,14 +1505,8 @@ trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) ...@@ -1475,14 +1505,8 @@ trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo)
UT_LIST_ADD_FIRST(rseg->undo_list, *undo); UT_LIST_ADD_FIRST(rseg->undo_list, *undo);
if (!is_temp && trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
trx_undo_mark_as_dict_operation(trx, *undo, &mtr);
}
func_exit: func_exit:
mutex_exit(&rseg->mutex); mutex_exit(&rseg->mutex);
mtr.commit();
return(err); return(err);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment