Commit 4c800623 authored by unknown's avatar unknown

Merge MWL#163 into mariadb-5.2-rpl

parents 31ed214a d4fa9ef0
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (k INT NOT NULL, a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, PRIMARY KEY(k)) ENGINE=InnoDB;
INSERT INTO t1 (k, a, b, c) VALUES (1, 0, 0, 0);
INSERT INTO t1 (k, a, b, c) VALUES (2, 0, 0, 0);
INSERT INTO t1 (k, a, b, c) VALUES (3, 0, 0, 0);
INSERT INTO t1 (k, a, b, c) VALUES (4, 0, 0, 0);
RESET MASTER;
SET DEBUG_SYNC= 'RESET';
# Connection c1
BEGIN;
UPDATE t1 SET a=10 WHERE k=1;
SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
COMMIT;
# Connection c2
SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
BEGIN;
SELECT * FROM t1 WHERE k=1 FOR UPDATE;
k a b c
1 10 0 0
UPDATE t1 SET a=20 WHERE k=1;
SET DEBUG_SYNC="now SIGNAL c2_committing";
COMMIT;
# Connection c1
BEGIN;
UPDATE t1 SET a=10 WHERE k=2;
SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
COMMIT;
# Connection c2
SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
BEGIN;
SELECT * FROM t1 WHERE k=2 FOR UPDATE;
k a b c
2 10 0 0
UPDATE t1 SET a=20 WHERE k=2;
SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
COMMIT;
# Connection c1
# This should warn about DEBUG_SYNC timeout
Warnings:
Warning 1639 debug sync point wait timed out
# Connection c2
SHOW BINLOG EVENTS LIMIT 2,12;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 174 Query 1 265 use `test`; UPDATE t1 SET a=10 WHERE k=1
master-bin.000001 265 Xid 1 292 COMMIT /* xid=XX */
master-bin.000001 292 Query 1 360 BEGIN
master-bin.000001 360 Query 1 451 use `test`; UPDATE t1 SET a=20 WHERE k=1
master-bin.000001 451 Xid 1 478 COMMIT /* xid=XX */
master-bin.000001 478 Query 1 546 BEGIN
master-bin.000001 546 Query 1 637 use `test`; UPDATE t1 SET a=10 WHERE k=2
master-bin.000001 637 Xid 1 664 COMMIT /* xid=XX */
master-bin.000001 664 Query 1 732 BEGIN
master-bin.000001 732 Query 1 823 use `test`; UPDATE t1 SET a=20 WHERE k=2
master-bin.000001 823 Xid 1 850 COMMIT /* xid=XX */
# Connection c1
RESET MASTER;
SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
UPDATE t1 SET a=10 WHERE k=3;
# Connection c2
SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
SELECT * FROM t1 WHERE k=3 FOR UPDATE;
k a b c
3 10 0 0
SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c2_committing";
UPDATE t1 SET a=20 WHERE k=3;
# Connection c1
SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
UPDATE t1 SET a=10 WHERE k=4;
# Connection c2
SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
SELECT * FROM t1 WHERE k=4 FOR UPDATE;
k a b c
4 10 0 0
SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
UPDATE t1 SET a=20 WHERE k=4;
# Connection c1
# This should warn about DEBUG_SYNC timeout
Warnings:
Warning 1639 debug sync point wait timed out
# Connection c2
SHOW BINLOG EVENTS LIMIT 1,12;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 106 Query 1 174 BEGIN
master-bin.000001 174 Query 1 265 use `test`; UPDATE t1 SET a=10 WHERE k=3
master-bin.000001 265 Xid 1 292 COMMIT /* xid=XX */
master-bin.000001 292 Query 1 360 BEGIN
master-bin.000001 360 Query 1 451 use `test`; UPDATE t1 SET a=20 WHERE k=3
master-bin.000001 451 Xid 1 478 COMMIT /* xid=XX */
master-bin.000001 478 Query 1 546 BEGIN
master-bin.000001 546 Query 1 637 use `test`; UPDATE t1 SET a=10 WHERE k=4
master-bin.000001 637 Xid 1 664 COMMIT /* xid=XX */
master-bin.000001 664 Query 1 732 BEGIN
master-bin.000001 732 Query 1 823 use `test`; UPDATE t1 SET a=20 WHERE k=4
master-bin.000001 823 Xid 1 850 COMMIT /* xid=XX */
SELECT * FROM t1 ORDER BY k;
k a b c
1 20 0 0
2 20 0 0
3 20 0 0
4 20 0 0
DROP TABLE t1;
SET DEBUG_SYNC= 'RESET';
--source include/have_debug_sync.inc
--source include/have_innodb.inc
--source include/have_log_bin.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
CREATE TABLE t1 (k INT NOT NULL, a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, PRIMARY KEY(k)) ENGINE=InnoDB;
INSERT INTO t1 (k, a, b, c) VALUES (1, 0, 0, 0);
INSERT INTO t1 (k, a, b, c) VALUES (2, 0, 0, 0);
INSERT INTO t1 (k, a, b, c) VALUES (3, 0, 0, 0);
INSERT INTO t1 (k, a, b, c) VALUES (4, 0, 0, 0);
RESET MASTER;
SET DEBUG_SYNC= 'RESET';
# Two transactions A,B that update the same row.
# A releases row locks during the prepare phase, and waits using DEBUG_SYNC.
# B then updates the same row.
# Verify that
# - B's update can proceed while A is waiting for commit, showing that
# locks are released early.
# - B cannot be binlogged before A.
connect(c1,127.0.0.1,root,,test,$MASTER_MYPORT,);
connect(c2,127.0.0.1,root,,test,$MASTER_MYPORT,);
connection c1;
--echo # Connection c1
# First verify that row locks are released early.
BEGIN;
UPDATE t1 SET a=10 WHERE k=1;
# Wait until c2 starts COMMIT, to verify that we release our locks in prepare.
SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
send COMMIT;
connection c2;
--echo # Connection c2
SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
BEGIN;
SELECT * FROM t1 WHERE k=1 FOR UPDATE;
UPDATE t1 SET a=20 WHERE k=1;
SET DEBUG_SYNC="now SIGNAL c2_committing";
COMMIT;
connection c1;
--echo # Connection c1
reap;
# Now verify that binlog order is correct.
BEGIN;
UPDATE t1 SET a=10 WHERE k=2;
# This time wait until c2 is binlogged. This should time out, as we must not
# allow c2 to finish commit before c1.
SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
send COMMIT;
connection c2;
--echo # Connection c2
SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
BEGIN;
SELECT * FROM t1 WHERE k=2 FOR UPDATE;
UPDATE t1 SET a=20 WHERE k=2;
SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
send COMMIT;
connection c1;
--echo # Connection c1
--echo # This should warn about DEBUG_SYNC timeout
reap;
connection c2;
--echo # Connection c2
reap;
--replace_regex /xid=[0-9]+/xid=XX/
SHOW BINLOG EVENTS LIMIT 2,12;
connection c1;
--echo # Connection c1
# Now the same thing, but using autocommit.
RESET MASTER;
# First verify that row locks are released early.
# Wait until c2 starts COMMIT, to verify that we release our locks in prepare.
SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
send UPDATE t1 SET a=10 WHERE k=3;
connection c2;
--echo # Connection c2
SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
SELECT * FROM t1 WHERE k=3 FOR UPDATE;
SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c2_committing";
UPDATE t1 SET a=20 WHERE k=3;
connection c1;
--echo # Connection c1
reap;
# Now verify that binlog order is correct, this time with autocommit.
# This time wait until c2 is binlogged. This should time out, as we must not
# allow c2 to finish commit before c1.
SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
send UPDATE t1 SET a=10 WHERE k=4;
connection c2;
--echo # Connection c2
SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
SELECT * FROM t1 WHERE k=4 FOR UPDATE;
SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
send UPDATE t1 SET a=20 WHERE k=4;
connection c1;
--echo # Connection c1
--echo # This should warn about DEBUG_SYNC timeout
reap;
connection c2;
--echo # Connection c2
reap;
--replace_regex /xid=[0-9]+/xid=XX/
SHOW BINLOG EVENTS LIMIT 1,12;
SELECT * FROM t1 ORDER BY k;
DROP TABLE t1;
SET DEBUG_SYNC= 'RESET';
...@@ -4991,6 +4991,7 @@ MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) ...@@ -4991,6 +4991,7 @@ MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered"); DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered");
} }
pthread_mutex_unlock(&LOCK_prepare_ordered); pthread_mutex_unlock(&LOCK_prepare_ordered);
DEBUG_SYNC(entry->thd, "commit_after_release_LOCK_prepare_ordered");
/* /*
The first in the queue handle group commit for all; the others just wait The first in the queue handle group commit for all; the others just wait
...@@ -6499,6 +6500,8 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all, ...@@ -6499,6 +6500,8 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
else else
err= binlog_flush_trx_cache(thd, trx_data, NULL, all); err= binlog_flush_trx_cache(thd, trx_data, NULL, all);
DEBUG_SYNC(thd, "binlog_after_log_and_order");
DBUG_RETURN(!err); DBUG_RETURN(!err);
} }
......
...@@ -220,6 +220,7 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = { ...@@ -220,6 +220,7 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
static INNOBASE_SHARE *get_share(const char *table_name); static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share); static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd); static int innobase_close_connection(handlerton *hton, THD* thd);
static void innobase_prepare_ordered(handlerton *hton, THD* thd, bool all);
static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all); static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all); static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all); static int innobase_rollback(handlerton *hton, THD* thd, bool all);
...@@ -2041,6 +2042,10 @@ innobase_init( ...@@ -2041,6 +2042,10 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint; innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint; innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint; innobase_hton->savepoint_release=innobase_release_savepoint;
if (innobase_release_locks_early)
innobase_hton->prepare_ordered=innobase_prepare_ordered;
else
innobase_hton->prepare_ordered=NULL;
innobase_hton->commit_ordered=innobase_commit_ordered; innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit; innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback; innobase_hton->rollback=innobase_rollback;
...@@ -2726,6 +2731,54 @@ innobase_start_trx_and_assign_read_view( ...@@ -2726,6 +2731,54 @@ innobase_start_trx_and_assign_read_view(
DBUG_RETURN(0); DBUG_RETURN(0);
} }
/*****************************************************************//**
Release row locks early during prepare phase.
Only enabled if --innodb-release-locks-early=1. In this case, a prepared
transaction is treated as committed to memory (but not to disk), and we
release row locks at the end of the prepare phase.
The consistent commit ordering guarantees of prepare_ordered() calls means
that transactions will not be binlogged in different order than locks are
released (which would cause trouble for statement-based replication).
This optimisation is not 100% safe, so is not enabled by default. But some
applications may decide to enable it to reduce contention on hotspot rows.
The consequences of enabling this are:
- It is not possible to rollback after successful prepare(). If there is
a need to rollback (ie. failure to binlog the transaction), we crash the
server (!)
- If we crash during commit, it is possible that an application/user can have
seen another transaction committed that is not recovered by XA crash
recovery. Thus durability is partially lost. However, consistency is still
guaranteed, we never recover a transaction and not recover another
transaction that committed before. */
static
void
innobase_prepare_ordered(
/*============*/
handlerton *hton, /*!< in: Innodb handlerton */
THD* thd, /*!< in: MySQL thread handle of the user for whom
the transaction should be committed */
bool all) /*!< in: TRUE - commit transaction
FALSE - the current SQL statement ended */
{
trx_t* trx;
DBUG_ENTER("innobase_prepare_ordered");
DBUG_ASSERT(hton == innodb_hton_ptr);
trx = check_trx_exists(thd);
mutex_enter(&kernel_mutex);
lock_release_off_kernel(trx);
mutex_exit(&kernel_mutex);
DBUG_VOID_RETURN;
}
static static
void void
innobase_commit_ordered_2( innobase_commit_ordered_2(
...@@ -2966,6 +3019,15 @@ innobase_rollback( ...@@ -2966,6 +3019,15 @@ innobase_rollback(
row_unlock_table_autoinc_for_mysql(trx); row_unlock_table_autoinc_for_mysql(trx);
/* if transaction has already released locks, it is too late to
rollback */
if (innobase_release_locks_early && trx->conc_state == TRX_PREPARED
&& UT_LIST_GET_LEN(trx->trx_locks) == 0) {
sql_print_error("Rollback after releasing locks! "
"errno=%d, dberr=%d", errno, trx->error_state);
ut_error;
}
if (all if (all
|| !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
...@@ -11691,6 +11753,11 @@ static MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table, ...@@ -11691,6 +11753,11 @@ static MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table,
"except for the deletion.", "except for the deletion.",
NULL, NULL, 0, 0, 1, 0); NULL, NULL, 0, 0, 1, 0);
static MYSQL_SYSVAR_BOOL(release_locks_early, innobase_release_locks_early,
PLUGIN_VAR_READONLY,
"Release row locks in the prepare stage instead of in the commit stage",
NULL, NULL, FALSE);
static struct st_mysql_sys_var* innobase_system_variables[]= { static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(page_size), MYSQL_SYSVAR(page_size),
MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(additional_mem_pool_size),
...@@ -11778,6 +11845,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { ...@@ -11778,6 +11845,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(auto_lru_dump), MYSQL_SYSVAR(auto_lru_dump),
MYSQL_SYSVAR(use_purge_thread), MYSQL_SYSVAR(use_purge_thread),
MYSQL_SYSVAR(pass_corrupt_table), MYSQL_SYSVAR(pass_corrupt_table),
MYSQL_SYSVAR(release_locks_early),
NULL NULL
}; };
......
...@@ -344,6 +344,9 @@ extern ulint srv_buf_pool_reads; ...@@ -344,6 +344,9 @@ extern ulint srv_buf_pool_reads;
/** Time in seconds between automatic buffer pool dumps */ /** Time in seconds between automatic buffer pool dumps */
extern uint srv_auto_lru_dump; extern uint srv_auto_lru_dump;
/** Release row locks already in the prepare phase */
extern my_bool innobase_release_locks_early;
/** Status variables to be passed to MySQL */ /** Status variables to be passed to MySQL */
typedef struct export_var_struct export_struc; typedef struct export_var_struct export_struc;
......
...@@ -338,6 +338,12 @@ trx_list_get_min_trx_id(void) ...@@ -338,6 +338,12 @@ trx_list_get_min_trx_id(void)
/****************************************************************//** /****************************************************************//**
Checks if a transaction with the given id is active. Checks if a transaction with the given id is active.
IMPORTANT ASSUMPTION:
It is assumed that this function is only used for the purpose of
determining if locks need to be created for the input transaction.
So if innobase_release_locks_early global option is set, and if
the transaction is already in prepared state, this returns FALSE,
as locks are no longer needed for the transaction.
@return TRUE if active */ @return TRUE if active */
UNIV_INLINE UNIV_INLINE
ibool ibool
...@@ -366,7 +372,8 @@ trx_is_active( ...@@ -366,7 +372,8 @@ trx_is_active(
trx = trx_get_on_id(trx_id); trx = trx_get_on_id(trx_id);
if (trx && (trx->conc_state == TRX_ACTIVE if (trx && (trx->conc_state == TRX_ACTIVE
|| trx->conc_state == TRX_PREPARED)) { || (trx->conc_state == TRX_PREPARED &&
!innobase_release_locks_early))) {
return(TRUE); return(TRUE);
} }
......
...@@ -482,6 +482,9 @@ UNIV_INTERN FILE* srv_misc_tmpfile; ...@@ -482,6 +482,9 @@ UNIV_INTERN FILE* srv_misc_tmpfile;
UNIV_INTERN ulint srv_main_thread_process_no = 0; UNIV_INTERN ulint srv_main_thread_process_no = 0;
UNIV_INTERN ulint srv_main_thread_id = 0; UNIV_INTERN ulint srv_main_thread_id = 0;
/* Release row locks already in the prepare phase */
UNIV_INTERN my_bool innobase_release_locks_early = FALSE;
/* The following count work done by srv_master_thread. */ /* The following count work done by srv_master_thread. */
/* Iterations by the 'once per second' loop. */ /* Iterations by the 'once per second' loop. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment