Commit 8ea923f5 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-24818: Optimize multi-statement INSERT into an empty table

If the user "opts in" (as in the parent
commit 92b2a911),
we can optimize multiple INSERT statements to use table-level locking
and undo logging.

There will be a change of behavior:

    CREATE TABLE t(a PRIMARY KEY) ENGINE=InnoDB;
    SET foreign_key_checks=0, unique_checks=0;
    BEGIN; INSERT INTO t SET a=1; INSERT INTO t SET a=1; COMMIT;

will end up with an empty table, because in case of an error,
the entire transaction will be rolled back, instead of rolling
back the failing statement. Previously, the second INSERT statement
would have been logged row by row, and only that second statement
would have been rolled back, leaving the first INSERT intact.

lock_table_x_unlock(), trx_mod_table_time_t::WAS_BULK: Remove.
Because we cannot really support statement rollback in this
optimized mode, we will not optimize the locking. The exclusive
table lock will be held until the end of the transaction.
parent 92b2a911
......@@ -51,3 +51,54 @@ DROP TEMPORARY TABLE t,t2;
ERROR 25006: Cannot execute statement in a READ ONLY transaction
SET tx_read_only=0;
DROP TEMPORARY TABLE t,t2;
#
# MDEV-24818 Optimize multiple INSERT into empty table
#
CREATE TABLE t1(f1 INT PRIMARY KEY) ENGINE=InnoDB;
BEGIN;
INSERT INTO t1 VALUES (5),(6),(7);
INSERT INTO t1 VALUES (4),(5),(6);
ERROR 23000: Duplicate entry '5' for key 'PRIMARY'
COMMIT;
SELECT * FROM t1;
f1
BEGIN;
INSERT INTO t1 VALUES (5),(6),(7);
SAVEPOINT a;
INSERT INTO t1 VALUES (4),(5),(6);
ERROR 23000: Duplicate entry '5' for key 'PRIMARY'
ROLLBACK TO SAVEPOINT a;
COMMIT;
SELECT * FROM t1;
f1
5
6
7
DROP TABLE t1;
SET foreign_key_checks=1;
CREATE TABLE t1(f1 INT PRIMARY KEY) ENGINE=InnoDB;
BEGIN;
INSERT INTO t1 VALUES (5),(6),(7);
INSERT INTO t1 VALUES (4),(5),(6);
ERROR 23000: Duplicate entry '5' for key 'PRIMARY'
COMMIT;
SELECT * FROM t1;
f1
5
6
7
BEGIN;
INSERT INTO t1 VALUES (5),(6),(7);
ERROR 23000: Duplicate entry '5' for key 'PRIMARY'
SAVEPOINT a;
INSERT INTO t1 VALUES (4),(5),(6);
ERROR 23000: Duplicate entry '5' for key 'PRIMARY'
ROLLBACK TO SAVEPOINT a;
COMMIT;
SELECT * FROM t1;
f1
5
6
7
DROP TABLE t1;
SET foreign_key_checks=0;
......@@ -55,3 +55,47 @@ INSERT INTO t VALUES(0);
DROP TEMPORARY TABLE t,t2;
SET tx_read_only=0;
DROP TEMPORARY TABLE t,t2;
--echo #
--echo # MDEV-24818 Optimize multiple INSERT into empty table
--echo #
CREATE TABLE t1(f1 INT PRIMARY KEY) ENGINE=InnoDB;
BEGIN;
INSERT INTO t1 VALUES (5),(6),(7);
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (4),(5),(6);
COMMIT;
SELECT * FROM t1;
BEGIN;
INSERT INTO t1 VALUES (5),(6),(7);
SAVEPOINT a;
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (4),(5),(6);
ROLLBACK TO SAVEPOINT a;
COMMIT;
SELECT * FROM t1;
DROP TABLE t1;
# Repeat the same with the MDEV-515 test disabled
SET foreign_key_checks=1;
CREATE TABLE t1(f1 INT PRIMARY KEY) ENGINE=InnoDB;
BEGIN;
INSERT INTO t1 VALUES (5),(6),(7);
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (4),(5),(6);
COMMIT;
SELECT * FROM t1;
BEGIN;
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (5),(6),(7);
SAVEPOINT a;
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (4),(5),(6);
ROLLBACK TO SAVEPOINT a;
COMMIT;
SELECT * FROM t1;
DROP TABLE t1;
SET foreign_key_checks=0;
......@@ -3547,12 +3547,11 @@ btr_cur_optimistic_insert(
DATA_TRX_ID_LEN));
} else {
ut_ad(thr->graph->trx->id);
ut_ad(thr->graph->trx->id
ut_ad(thr->graph->trx->bulk_insert
|| thr->graph->trx->id
== trx_read_trx_id(
static_cast<const byte*>(
trx_id->data))
|| static_cast<ins_node_t*>(
thr->run_node)->bulk_insert);
trx_id->data)));
}
}
#endif
......
......@@ -3062,9 +3062,6 @@ ha_innobase::reset_template(void)
m_prebuilt->pk_filter = NULL;
m_prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
}
if (ins_node_t* node = m_prebuilt->ins_node) {
node->bulk_insert = false;
}
}
/*****************************************************************//**
......@@ -3119,6 +3116,7 @@ ha_innobase::init_table_handle_for_HANDLER(void)
m_prebuilt->used_in_HANDLER = TRUE;
reset_template();
m_prebuilt->trx->bulk_insert = false;
}
#ifdef WITH_INNODB_DISALLOW_WRITES
......@@ -15091,9 +15089,7 @@ ha_innobase::extra(
shared lock instead of an exclusive lock. */
stmt_boundary:
trx->end_bulk_insert(*m_prebuilt->table);
if (ins_node_t* node = m_prebuilt->ins_node) {
node->bulk_insert = false;
}
trx->bulk_insert = false;
break;
case HA_EXTRA_NO_KEYREAD:
m_prebuilt->read_just_key = 0;
......@@ -15109,12 +15105,22 @@ ha_innobase::extra(
goto stmt_boundary;
case HA_EXTRA_NO_IGNORE_DUP_KEY:
trx->duplicates &= ~TRX_DUP_IGNORE;
if (trx->is_bulk_insert()) {
/* Allow a subsequent INSERT into an empty table
if !unique_checks && !foreign_key_checks. */
break;
}
goto stmt_boundary;
case HA_EXTRA_WRITE_CAN_REPLACE:
trx->duplicates |= TRX_DUP_REPLACE;
goto stmt_boundary;
case HA_EXTRA_WRITE_CANNOT_REPLACE:
trx->duplicates &= ~TRX_DUP_REPLACE;
if (trx->is_bulk_insert()) {
/* Allow a subsequent INSERT into an empty table
if !unique_checks && !foreign_key_checks. */
break;
}
goto stmt_boundary;
case HA_EXTRA_BEGIN_ALTER_COPY:
m_prebuilt->table->skip_alter_undo = 1;
......@@ -15192,17 +15198,34 @@ ha_innobase::start_stmt(
/* Reset the AUTOINC statement level counter for multi-row INSERTs. */
trx->n_autoinc_rows = 0;
m_prebuilt->sql_stat_start = TRUE;
const auto sql_command = thd_sql_command(thd);
m_prebuilt->hint_need_to_fetch_extra_cols = 0;
reset_template();
trx->end_bulk_insert(*m_prebuilt->table);
switch (sql_command) {
case SQLCOM_INSERT:
case SQLCOM_INSERT_SELECT:
if (trx->is_bulk_insert()) {
/* Allow a subsequent INSERT into an empty table
if !unique_checks && !foreign_key_checks. */
break;
}
/* fall through */
default:
trx->end_bulk_insert(*m_prebuilt->table);
m_prebuilt->sql_stat_start = TRUE;
if (!trx->bulk_insert) {
break;
}
trx->bulk_insert = false;
trx->last_sql_stat_start.least_undo_no = trx->undo_no;
}
if (m_prebuilt->table->is_temporary()
&& m_mysql_has_locked
&& m_prebuilt->select_lock_type == LOCK_NONE) {
dberr_t error;
switch (thd_sql_command(thd)) {
switch (sql_command) {
case SQLCOM_INSERT:
case SQLCOM_UPDATE:
case SQLCOM_DELETE:
......@@ -15210,12 +15233,9 @@ ha_innobase::start_stmt(
init_table_handle_for_HANDLER();
m_prebuilt->select_lock_type = LOCK_X;
m_prebuilt->stored_select_lock_type = LOCK_X;
error = row_lock_table(m_prebuilt);
if (error != DB_SUCCESS) {
int st = convert_error_code_to_mysql(
error, 0, thd);
DBUG_RETURN(st);
if (dberr_t error = row_lock_table(m_prebuilt)) {
DBUG_RETURN(convert_error_code_to_mysql(
error, 0, thd));
}
break;
}
......@@ -15229,9 +15249,9 @@ ha_innobase::start_stmt(
m_prebuilt->select_lock_type = LOCK_X;
} else if (trx->isolation_level != TRX_ISO_SERIALIZABLE
&& thd_sql_command(thd) == SQLCOM_SELECT
&& lock_type == TL_READ) {
} else if (sql_command == SQLCOM_SELECT
&& lock_type == TL_READ
&& trx->isolation_level != TRX_ISO_SERIALIZABLE) {
/* For other than temporary tables, we obtain
no lock for consistent read (plain SELECT). */
......@@ -15339,9 +15359,11 @@ ha_innobase::external_lock(
}
}
const auto sql_command = thd_sql_command(thd);
/* Check for UPDATEs in read-only mode. */
if (srv_read_only_mode) {
switch (thd_sql_command(thd)) {
switch (sql_command) {
case SQLCOM_CREATE_TABLE:
if (lock_type != F_WRLCK) {
break;
......@@ -15368,13 +15390,29 @@ ha_innobase::external_lock(
m_prebuilt->hint_need_to_fetch_extra_cols = 0;
reset_template();
trx->end_bulk_insert(*m_prebuilt->table);
switch (sql_command) {
case SQLCOM_INSERT:
case SQLCOM_INSERT_SELECT:
if (trx->is_bulk_insert()) {
/* Allow a subsequent INSERT into an empty table
if !unique_checks && !foreign_key_checks. */
break;
}
/* fall through */
default:
trx->end_bulk_insert(*m_prebuilt->table);
if (!trx->bulk_insert) {
break;
}
trx->bulk_insert = false;
trx->last_sql_stat_start.least_undo_no = trx->undo_no;
}
switch (m_prebuilt->table->quiesce) {
case QUIESCE_START:
/* Check for FLUSH TABLE t WITH READ LOCK; */
if (!srv_read_only_mode
&& thd_sql_command(thd) == SQLCOM_FLUSH
&& sql_command == SQLCOM_FLUSH
&& lock_type == F_RDLCK) {
if (!m_prebuilt->table->space) {
......@@ -15458,7 +15496,7 @@ ha_innobase::external_lock(
if (m_prebuilt->select_lock_type != LOCK_NONE) {
if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
if (sql_command == SQLCOM_LOCK_TABLES
&& THDVAR(thd, table_locks)
&& thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
&& thd_in_lock_tables(thd)) {
......
......@@ -379,12 +379,6 @@ lock_table(
@param mode LOCK_X or LOCK_IX */
void lock_table_resurrect(dict_table_t *table, trx_t *trx, lock_mode mode);
/** Release a table X lock after rolling back an insert into an empty table
(which was covered by a TRX_UNDO_EMPTY record).
@param table table to be X-unlocked
@param trx transaction */
void lock_table_x_unlock(dict_table_t *table, trx_t *trx);
/** Sets a lock on a table based on the given mode.
@param[in] table table to lock
@param[in,out] trx transaction
......
......@@ -207,9 +207,6 @@ struct ins_node_t
and buffers for sys fields in row allocated */
void vers_update_end(row_prebuilt_t *prebuilt, bool history_row);
bool vers_history_row() const; /* true if 'row' is historical */
/** Bulk insert enabled for this table */
bool bulk_insert= false;
};
/** Create an insert object.
......
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2020, MariaDB Corporation.
Copyright (c) 2015, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -34,14 +34,6 @@ Created 3/26/1996 Heikki Tuuri
extern bool trx_rollback_is_active;
extern const trx_t* trx_roll_crash_recv_trx;
/*******************************************************************//**
Returns a transaction savepoint taken at this point in time.
@return savepoint */
trx_savept_t
trx_savept_take(
/*============*/
trx_t* trx); /*!< in: transaction */
/** Report progress when rolling back a row of a recovered transaction. */
void trx_roll_report_progress();
/*******************************************************************//**
......
......@@ -491,12 +491,8 @@ class trx_mod_table_time_t
covered by a TRX_UNDO_EMPTY record (for the first statement to
insert into an empty table) */
static constexpr undo_no_t BULK= 1ULL << 63;
/** Flag in 'first' to indicate that some operations were
covered by a TRX_UNDO_EMPTY record (for the first statement to
insert into an empty table) */
static constexpr undo_no_t WAS_BULK= 1ULL << 62;
/** First modification of the table, possibly ORed with BULK or WAS_BULK */
/** First modification of the table, possibly ORed with BULK */
undo_no_t first;
/** First modification of a system versioned column (or NONE) */
undo_no_t first_versioned= NONE;
......@@ -525,15 +521,13 @@ class trx_mod_table_time_t
}
/** Notify the start of a bulk insert operation */
void start_bulk_insert() { first|= BULK | WAS_BULK; }
void start_bulk_insert() { first|= BULK; }
/** Notify the end of a bulk insert operation */
void end_bulk_insert() { first&= ~BULK; }
/** @return whether an insert is covered by TRX_UNDO_EMPTY record */
bool is_bulk_insert() const { return first & BULK; }
/** @return whether an insert was covered by TRX_UNDO_EMPTY record */
bool was_bulk_insert() const { return first & WAS_BULK; }
/** Invoked after partial rollback
@param limit number of surviving modified rows (trx_t::undo_no)
......@@ -788,6 +782,8 @@ struct trx_t : ilist_node<> {
wants to suppress foreign key checks,
(in table imports, for example) we
set this FALSE */
/** whether an insert into an empty table is active */
bool bulk_insert;
/*------------------------------*/
/* MySQL has a transaction coordinator to coordinate two phase
commit between multiple storage engines and the binary log. When
......@@ -1090,6 +1086,17 @@ struct trx_t : ilist_node<> {
t.second.end_bulk_insert();
}
/** @return whether a bulk insert into empty table is in progress */
bool is_bulk_insert() const
{
if (!bulk_insert || check_unique_secondary || check_foreigns)
return false;
for (const auto& t : mod_tables)
if (t.second.is_bulk_insert())
return true;
return false;
}
private:
/** Assign a rollback segment for modifying temporary tables.
@return the assigned rollback segment */
......
......@@ -3543,40 +3543,6 @@ static void lock_table_dequeue(lock_t *in_lock, bool owns_wait_mutex)
}
}
/** Release a table X lock after rolling back an insert into an empty table
(which was covered by a TRX_UNDO_EMPTY record).
@param table table to be X-unlocked
@param trx transaction */
void lock_table_x_unlock(dict_table_t *table, trx_t *trx)
{
ut_ad(!trx->is_recovered);
for (lock_t*& lock : trx->lock.table_locks)
{
ut_ad(!lock || lock->trx == trx);
if (!lock)
continue;
ut_ad(!lock->is_waiting());
if (lock->type_mode != (LOCK_TABLE | LOCK_X) ||
lock->un_member.tab_lock.table != table)
continue;
lock_sys.rd_lock(SRW_LOCK_CALL);
table->lock_mutex_lock();
mysql_mutex_lock(&lock_sys.wait_mutex);
trx->mutex_lock();
/* There is no need to check for new deadlocks, because only one
exclusive lock may exist on an object at a time. */
lock_table_dequeue(lock, true);
mysql_mutex_unlock(&lock_sys.wait_mutex);
trx->mutex_unlock();
table->lock_mutex_unlock();
lock_sys.rd_unlock();
lock= nullptr;
return;
}
ut_ad("lock not found" == 0);
}
/** Sets a lock on a table based on the given mode.
@param[in] table table to lock
@param[in,out] trx transaction
......
......@@ -2689,7 +2689,7 @@ row_ins_clust_index_entry_low(
#endif /* BTR_CUR_HASH_ADAPT */
}
static_cast<ins_node_t*>(thr->run_node)->bulk_insert = true;
trx->bulk_insert = true;
}
#ifndef DBUG_OFF
......
......@@ -730,7 +730,13 @@ row_mysql_handle_errors(
trx->rollback(savept);
}
/* MySQL will roll back the latest SQL statement */
if (!trx->bulk_insert) {
/* MariaDB will roll back the latest SQL statement */
break;
}
/* MariaDB will roll back the entire transaction. */
trx->bulk_insert = false;
trx->last_sql_stat_start.least_undo_no = 0;
break;
case DB_LOCK_WAIT:
err = lock_wait(thr);
......@@ -1089,10 +1095,10 @@ row_get_prebuilt_insert_row(
&& prebuilt->ins_node->entry_list.size()
== UT_LIST_GET_LEN(table->indexes)) {
if (prebuilt->ins_node->bulk_insert
if (prebuilt->trx->bulk_insert
&& prebuilt->ins_node->trx_id
!= prebuilt->trx->id) {
prebuilt->ins_node->bulk_insert= false;
prebuilt->trx->bulk_insert = false;
}
return(prebuilt->ins_node->row);
......@@ -1362,7 +1368,12 @@ row_insert_for_mysql(
node->vers_update_end(prebuilt, ins_mode == ROW_INS_HISTORICAL);
}
savept = trx_savept_take(trx);
/* Because we now allow multiple INSERT into the same
initially empty table in bulk insert mode, on error we must
roll back to the start of the transaction. For correctness, it
would suffice to roll back to the start of the first insert
into this empty table, but we will keep it simple and efficient. */
savept.least_undo_no = trx->bulk_insert ? 0 : trx->undo_no;
thr = que_fork_get_first_thr(prebuilt->ins_graph);
......@@ -1776,7 +1787,7 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
generated for the table: MySQL does not know anything about
the row id used as the clustered index key */
savept = trx_savept_take(trx);
savept.least_undo_no = trx->undo_no;
thr = que_fork_get_first_thr(prebuilt->upd_graph);
......
......@@ -2012,12 +2012,9 @@ trx_undo_report_row_operation(
/* We already wrote a TRX_UNDO_EMPTY record. */
ut_ad(thr->run_node);
ut_ad(que_node_get_type(thr->run_node) == QUE_NODE_INSERT);
ut_ad(static_cast<ins_node_t*>(thr->run_node)->bulk_insert);
ut_ad(trx->bulk_insert);
return DB_SUCCESS;
} else if (m.second
&& thr->run_node
&& que_node_get_type(thr->run_node) == QUE_NODE_INSERT
&& static_cast<ins_node_t*>(thr->run_node)->bulk_insert) {
} else if (m.second && trx->bulk_insert) {
m.first->second.start_bulk_insert();
} else {
bulk = false;
......
......@@ -135,11 +135,7 @@ inline void trx_t::rollback_low(trx_savept_t *savept)
trx_mod_tables_t::iterator j= i++;
ut_ad(j->second.valid());
if (j->second.rollback(limit))
{
if (j->second.was_bulk_insert() && !j->first->is_temporary())
lock_table_x_unlock(j->first, this);
mod_tables.erase(j);
}
}
MONITOR_INC(MONITOR_TRX_ROLLBACK_SAVEPOINT);
}
......@@ -303,11 +299,11 @@ trx_rollback_last_sql_stat_for_mysql(
if (trx->fts_trx != NULL) {
fts_savepoint_rollback_last_stmt(trx);
fts_savepoint_laststmt_refresh(trx);
}
/* The following call should not be needed,
but we play it safe: */
trx_mark_sql_stat_end(trx);
trx->last_sql_stat_start.least_undo_no = trx->undo_no;
trx->end_bulk_insert();
trx->op_info = "";
......@@ -532,7 +528,8 @@ trx_savepoint_for_mysql(
savep->name = mem_strdup(savepoint_name);
savep->savept = trx_savept_take(trx);
savep->savept.least_undo_no = trx->undo_no;
trx->last_sql_stat_start.least_undo_no = trx->undo_no;
savep->mysql_binlog_cache_pos = binlog_cache_pos;
......@@ -569,21 +566,6 @@ trx_release_savepoint_for_mysql(
return(savep != NULL ? DB_SUCCESS : DB_NO_SAVEPOINT);
}
/*******************************************************************//**
Returns a transaction savepoint taken at this point in time.
@return savepoint */
trx_savept_t
trx_savept_take(
/*============*/
trx_t* trx) /*!< in: transaction */
{
trx_savept_t savept;
savept.least_undo_no = trx->undo_no;
return(savept);
}
/*******************************************************************//**
Roll back an active transaction. */
static
......
......@@ -137,6 +137,8 @@ trx_init(
trx->internal = false;
trx->bulk_insert = false;
ut_d(trx->start_file = 0);
ut_d(trx->start_line = 0);
......@@ -1683,12 +1685,17 @@ trx_mark_sql_stat_end(
trx->undo_no = 0;
/* fall through */
case TRX_STATE_ACTIVE:
trx->last_sql_stat_start.least_undo_no = trx->undo_no;
if (trx->fts_trx != NULL) {
fts_savepoint_laststmt_refresh(trx);
}
if (trx->is_bulk_insert()) {
/* Allow a subsequent INSERT into an empty table
if !unique_checks && !foreign_key_checks. */
return;
}
trx->last_sql_stat_start.least_undo_no = trx->undo_no;
trx->end_bulk_insert();
return;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment