Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
34a48dd4
Commit
34a48dd4
authored
Dec 25, 2010
by
unknown
Browse files
Options
Browse Files
Download
Plain Diff
Merge MWL#116 into mariadb-5.2-rpl.
parents
b3c72b9a
a2d921be
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
23 changed files
with
1921 additions
and
443 deletions
+1921
-443
mysql-test/r/group_commit.result
mysql-test/r/group_commit.result
+63
-0
mysql-test/r/group_commit_binlog_pos.result
mysql-test/r/group_commit_binlog_pos.result
+35
-0
mysql-test/r/group_commit_crash.result
mysql-test/r/group_commit_crash.result
+120
-0
mysql-test/suite/binlog/r/binlog_ioerr.result
mysql-test/suite/binlog/r/binlog_ioerr.result
+28
-0
mysql-test/suite/binlog/t/binlog_ioerr.test
mysql-test/suite/binlog/t/binlog_ioerr.test
+29
-0
mysql-test/suite/pbxt/r/pbxt_xa_binlog.result
mysql-test/suite/pbxt/r/pbxt_xa_binlog.result
+31
-0
mysql-test/suite/pbxt/t/pbxt_xa_binlog.test
mysql-test/suite/pbxt/t/pbxt_xa_binlog.test
+31
-0
mysql-test/t/group_commit.test
mysql-test/t/group_commit.test
+115
-0
mysql-test/t/group_commit_binlog_pos-master.opt
mysql-test/t/group_commit_binlog_pos-master.opt
+1
-0
mysql-test/t/group_commit_binlog_pos.test
mysql-test/t/group_commit_binlog_pos.test
+85
-0
mysql-test/t/group_commit_crash-master.opt
mysql-test/t/group_commit_crash-master.opt
+1
-0
mysql-test/t/group_commit_crash.test
mysql-test/t/group_commit_crash.test
+80
-0
sql/handler.cc
sql/handler.cc
+117
-74
sql/handler.h
sql/handler.h
+90
-1
sql/log.cc
sql/log.cc
+780
-225
sql/log.h
sql/log.h
+114
-8
sql/mysqld.cc
sql/mysqld.cc
+3
-0
sql/sql_class.cc
sql/sql_class.cc
+23
-0
sql/sql_class.h
sql/sql_class.h
+18
-0
sql/sql_parse.cc
sql/sql_parse.cc
+4
-0
storage/xtradb/handler/ha_innodb.cc
storage/xtradb/handler/ha_innodb.cc
+140
-122
storage/xtradb/handler/ha_innodb.h
storage/xtradb/handler/ha_innodb.h
+5
-10
storage/xtradb/include/trx0trx.h
storage/xtradb/include/trx0trx.h
+8
-3
No files found.
mysql-test/r/group_commit.result
0 → 100644
View file @
34a48dd4
CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
SELECT variable_value INTO @commits FROM information_schema.global_status
WHERE variable_name = 'binlog_commits';
SELECT variable_value INTO @group_commits FROM information_schema.global_status
WHERE variable_name = 'binlog_group_commits';
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued";
INSERT INTO t1 VALUES ("con1");
set DEBUG_SYNC= "now WAIT_FOR group1_running";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2";
SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed";
SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
INSERT INTO t1 VALUES ("con2");
SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3";
INSERT INTO t1 VALUES ("con3");
SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4";
INSERT INTO t1 VALUES ("con4");
SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
SELECT * FROM t1 ORDER BY a;
a
SET DEBUG_SYNC= "now SIGNAL group2_queued";
SELECT * FROM t1 ORDER BY a;
a
con1
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued";
INSERT INTO t1 VALUES ("con5");
SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued";
INSERT INTO t1 VALUES ("con6");
SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
SELECT * FROM t1 ORDER BY a;
a
con1
SET DEBUG_SYNC= "now SIGNAL group3_committed";
SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
SELECT * FROM t1 ORDER BY a;
a
con1
con2
con3
con4
SET DEBUG_SYNC= "now SIGNAL group2_checked";
SELECT * FROM t1 ORDER BY a;
a
con1
con2
con3
con4
con5
con6
SELECT variable_value - @commits FROM information_schema.global_status
WHERE variable_name = 'binlog_commits';
variable_value - @commits
6
SELECT variable_value - @group_commits FROM information_schema.global_status
WHERE variable_name = 'binlog_group_commits';
variable_value - @group_commits
3
SET DEBUG_SYNC= 'RESET';
DROP TABLE t1;
mysql-test/r/group_commit_binlog_pos.result
0 → 100644
View file @
34a48dd4
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);
SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3";
INSERT INTO t1 VALUES (1);
SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued";
INSERT INTO t1 VALUES (2);
SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued";
INSERT INTO t1 VALUES (3);
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
SELECT * FROM t1 ORDER BY a;
a
0
1
2
SET SESSION debug="+d,crash_dispatch_command_before";
SELECT 1;
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
SELECT * FROM t1 ORDER BY a;
a
0
1
2
3
InnoDB: Last MySQL binlog file position 0 767, file name ./master-bin.000001
SET DEBUG_SYNC= 'RESET';
DROP TABLE t1;
mysql-test/r/group_commit_crash.result
0 → 100644
View file @
34a48dd4
CREATE TABLE t1(a CHAR(255),
b CHAR(255),
c CHAR(255),
d CHAR(255),
id INT AUTO_INCREMENT,
PRIMARY KEY(id)) ENGINE=InnoDB;
create table t2 like t1;
create procedure setcrash(IN i INT)
begin
CASE i
WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare";
WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log";
WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog";
WHEN 4 THEN SET SESSION debug="d,crash_commit_after";
WHEN 5 THEN SET SESSION debug="d,crash_commit_before";
ELSE BEGIN END;
END CASE;
end //
FLUSH TABLES;
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
RESET MASTER;
START TRANSACTION;
insert into t1 select * from t2;
call setcrash(5);
COMMIT;
Got one of the listed errors
SELECT * FROM t1 ORDER BY id;
a b c d id
SHOW BINLOG EVENTS LIMIT 2,1;
Log_name Pos Event_type Server_id End_log_pos Info
delete from t1;
RESET MASTER;
START TRANSACTION;
insert into t1 select * from t2;
call setcrash(4);
COMMIT;
Got one of the listed errors
SELECT * FROM t1 ORDER BY id;
a b c d id
a b c d 1
a b c d 2
a b c d 3
a b c d 4
a b c d 5
a b c d 6
a b c d 7
a b c d 8
a b c d 9
a b c d 10
SHOW BINLOG EVENTS LIMIT 2,1;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2
delete from t1;
RESET MASTER;
START TRANSACTION;
insert into t1 select * from t2;
call setcrash(3);
COMMIT;
Got one of the listed errors
SELECT * FROM t1 ORDER BY id;
a b c d id
a b c d 1
a b c d 2
a b c d 3
a b c d 4
a b c d 5
a b c d 6
a b c d 7
a b c d 8
a b c d 9
a b c d 10
SHOW BINLOG EVENTS LIMIT 2,1;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2
delete from t1;
RESET MASTER;
START TRANSACTION;
insert into t1 select * from t2;
call setcrash(2);
COMMIT;
Got one of the listed errors
SELECT * FROM t1 ORDER BY id;
a b c d id
a b c d 1
a b c d 2
a b c d 3
a b c d 4
a b c d 5
a b c d 6
a b c d 7
a b c d 8
a b c d 9
a b c d 10
SHOW BINLOG EVENTS LIMIT 2,1;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2
delete from t1;
RESET MASTER;
START TRANSACTION;
insert into t1 select * from t2;
call setcrash(1);
COMMIT;
Got one of the listed errors
SELECT * FROM t1 ORDER BY id;
a b c d id
SHOW BINLOG EVENTS LIMIT 2,1;
Log_name Pos Event_type Server_id End_log_pos Info
delete from t1;
DROP TABLE t1;
DROP TABLE t2;
DROP PROCEDURE setcrash;
mysql-test/suite/binlog/r/binlog_ioerr.result
0 → 100644
View file @
34a48dd4
CALL mtr.add_suppression("Error writing file 'master-bin'");
RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES(0);
SET SESSION debug='+d,fail_binlog_write_1';
INSERT INTO t1 VALUES(1);
ERROR HY000: Error writing file 'master-bin' (errno: 28)
INSERT INTO t1 VALUES(2);
ERROR HY000: Error writing file 'master-bin' (errno: 28)
SET SESSION debug='';
INSERT INTO t1 VALUES(3);
SELECT * FROM t1;
a
0
3
SHOW BINLOG EVENTS;
Log_name Pos Event_type Server_id End_log_pos Info
BINLOG POS Format_desc 1 ENDPOS Server ver: #, Binlog ver: #
BINLOG POS Query 1 ENDPOS use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb
BINLOG POS Query 1 ENDPOS BEGIN
BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(0)
BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
BINLOG POS Query 1 ENDPOS BEGIN
BINLOG POS Query 1 ENDPOS BEGIN
BINLOG POS Query 1 ENDPOS BEGIN
BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(3)
BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
DROP TABLE t1;
mysql-test/suite/binlog/t/binlog_ioerr.test
0 → 100644
View file @
34a48dd4
source
include
/
have_debug
.
inc
;
source
include
/
have_innodb
.
inc
;
source
include
/
have_log_bin
.
inc
;
source
include
/
have_binlog_format_mixed_or_statement
.
inc
;
CALL
mtr
.
add_suppression
(
"Error writing file 'master-bin'"
);
RESET
MASTER
;
CREATE
TABLE
t1
(
a
INT
PRIMARY
KEY
)
ENGINE
=
innodb
;
INSERT
INTO
t1
VALUES
(
0
);
SET
SESSION
debug
=
'+d,fail_binlog_write_1'
;
--
error
ER_ERROR_ON_WRITE
INSERT
INTO
t1
VALUES
(
1
);
--
error
ER_ERROR_ON_WRITE
INSERT
INTO
t1
VALUES
(
2
);
SET
SESSION
debug
=
''
;
INSERT
INTO
t1
VALUES
(
3
);
SELECT
*
FROM
t1
;
# Actually the output from this currently shows a bug.
# The injected IO error leaves partially written transactions in the binlog in
# the form of stray "BEGIN" events.
# These should disappear from the output if binlog error handling is improved.
--
replace_regex
/
\
/
\
*
xid
=.*
\
*
\
//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/
--
replace_column
1
BINLOG
2
POS
5
ENDPOS
SHOW
BINLOG
EVENTS
;
DROP
TABLE
t1
;
mysql-test/suite/pbxt/r/pbxt_xa_binlog.result
0 → 100644
View file @
34a48dd4
drop table if exists t1, t2;
SET binlog_format = 'mixed';
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
CREATE TABLE t2 (b INT PRIMARY KEY) ENGINE=pbxt;
BEGIN;
SELECT @@log_bin;
@@log_bin
1
INSERT INTO t1 VALUES (1);
INSERT INTO t2 VALUES (2);
COMMIT;
select * from t1;
a
1
select * from t2;
b
2
SET sql_log_bin = 0;
INSERT INTO t1 VALUES (3);
INSERT INTO t2 VALUES (4);
COMMIT;
select * from t1 order by a;
a
1
3
select * from t2 order by b;
b
2
4
drop table t1, t2;
drop database pbxt;
mysql-test/suite/pbxt/t/pbxt_xa_binlog.test
0 → 100644
View file @
34a48dd4
--
source
include
/
have_innodb
.
inc
--
source
include
/
have_log_bin
.
inc
--
disable_warnings
drop
table
if
exists
t1
,
t2
;
--
enable_warnings
SET
binlog_format
=
'mixed'
;
CREATE
TABLE
t1
(
a
INT
PRIMARY
KEY
)
ENGINE
=
innodb
;
CREATE
TABLE
t2
(
b
INT
PRIMARY
KEY
)
ENGINE
=
pbxt
;
BEGIN
;
# verify that binlog is on
SELECT
@@
log_bin
;
INSERT
INTO
t1
VALUES
(
1
);
INSERT
INTO
t2
VALUES
(
2
);
COMMIT
;
select
*
from
t1
;
select
*
from
t2
;
# Test 2-phase commit when we disable binlogging.
SET
sql_log_bin
=
0
;
INSERT
INTO
t1
VALUES
(
3
);
INSERT
INTO
t2
VALUES
(
4
);
COMMIT
;
select
*
from
t1
order
by
a
;
select
*
from
t2
order
by
b
;
drop
table
t1
,
t2
;
drop
database
pbxt
;
mysql-test/t/group_commit.test
0 → 100644
View file @
34a48dd4
--
source
include
/
have_debug_sync
.
inc
--
source
include
/
have_innodb
.
inc
--
source
include
/
have_log_bin
.
inc
# Test some group commit code paths by using debug_sync to do controlled
# commits of 6 transactions: first 1 alone, then 3 as a group, then 2 as a
# group.
#
# Group 3 is allowed to race as far as possible ahead before group 2 finishes
# to check some edge case for concurrency control.
CREATE
TABLE
t1
(
a
VARCHAR
(
10
)
PRIMARY
KEY
)
ENGINE
=
innodb
;
SELECT
variable_value
INTO
@
commits
FROM
information_schema
.
global_status
WHERE
variable_name
=
'binlog_commits'
;
SELECT
variable_value
INTO
@
group_commits
FROM
information_schema
.
global_status
WHERE
variable_name
=
'binlog_group_commits'
;
connect
(
con1
,
localhost
,
root
,,);
connect
(
con2
,
localhost
,
root
,,);
connect
(
con3
,
localhost
,
root
,,);
connect
(
con4
,
localhost
,
root
,,);
connect
(
con5
,
localhost
,
root
,,);
connect
(
con6
,
localhost
,
root
,,);
# Start group1 (with one thread) doing commit, waiting for
# group2 to queue up before finishing.
connection
con1
;
SET
DEBUG_SYNC
=
"commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued"
;
send
INSERT
INTO
t1
VALUES
(
"con1"
);
# Make group2 (with three threads) queue up.
# Make sure con2 is the group commit leader for group2.
# Make group2 wait with running commit_ordered() until group3 has committed.
connection
con2
;
set
DEBUG_SYNC
=
"now WAIT_FOR group1_running"
;
SET
DEBUG_SYNC
=
"commit_after_prepare_ordered SIGNAL group2_con2"
;
SET
DEBUG_SYNC
=
"commit_after_release_LOCK_log WAIT_FOR group3_committed"
;
SET
DEBUG_SYNC
=
"commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked"
;
send
INSERT
INTO
t1
VALUES
(
"con2"
);
connection
con3
;
SET
DEBUG_SYNC
=
"now WAIT_FOR group2_con2"
;
SET
DEBUG_SYNC
=
"commit_after_prepare_ordered SIGNAL group2_con3"
;
send
INSERT
INTO
t1
VALUES
(
"con3"
);
connection
con4
;
SET
DEBUG_SYNC
=
"now WAIT_FOR group2_con3"
;
SET
DEBUG_SYNC
=
"commit_after_prepare_ordered SIGNAL group2_con4"
;
send
INSERT
INTO
t1
VALUES
(
"con4"
);
# When group2 is queued, let group1 continue and queue group3.
connection
default
;
SET
DEBUG_SYNC
=
"now WAIT_FOR group2_con4"
;
# At this point, trasaction 1 is still not visible as commit_ordered() has not
# been called yet.
SET
SESSION
TRANSACTION
ISOLATION
LEVEL
READ
COMMITTED
;
SELECT
*
FROM
t1
ORDER
BY
a
;
SET
DEBUG_SYNC
=
"now SIGNAL group2_queued"
;
connection
con1
;
reap
;
# Now transaction 1 is visible.
connection
default
;
SELECT
*
FROM
t1
ORDER
BY
a
;
connection
con5
;
SET
DEBUG_SYNC
=
"commit_before_get_LOCK_commit_ordered SIGNAL group3_con5"
;
SET
DEBUG_SYNC
=
"commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued"
;
send
INSERT
INTO
t1
VALUES
(
"con5"
);
connection
con6
;
SET
DEBUG_SYNC
=
"now WAIT_FOR con5_leader"
;
SET
DEBUG_SYNC
=
"commit_after_prepare_ordered SIGNAL con6_queued"
;
send
INSERT
INTO
t1
VALUES
(
"con6"
);
connection
default
;
SET
DEBUG_SYNC
=
"now WAIT_FOR group3_con5"
;
# Still only transaction 1 visible, as group2 have not yet run commit_ordered().
SELECT
*
FROM
t1
ORDER
BY
a
;
SET
DEBUG_SYNC
=
"now SIGNAL group3_committed"
;
SET
DEBUG_SYNC
=
"now WAIT_FOR group2_visible"
;
# Now transactions 1-4 visible.
SELECT
*
FROM
t1
ORDER
BY
a
;
SET
DEBUG_SYNC
=
"now SIGNAL group2_checked"
;
connection
con2
;
reap
;
connection
con3
;
reap
;
connection
con4
;
reap
;
connection
con5
;
reap
;
connection
con6
;
reap
;
connection
default
;
# Check all transactions finally visible.
SELECT
*
FROM
t1
ORDER
BY
a
;
SELECT
variable_value
-
@
commits
FROM
information_schema
.
global_status
WHERE
variable_name
=
'binlog_commits'
;
SELECT
variable_value
-
@
group_commits
FROM
information_schema
.
global_status
WHERE
variable_name
=
'binlog_group_commits'
;
SET
DEBUG_SYNC
=
'RESET'
;
DROP
TABLE
t1
;
mysql-test/t/group_commit_binlog_pos-master.opt
0 → 100644
View file @
34a48dd4
--skip-stack-trace --skip-core-file
mysql-test/t/group_commit_binlog_pos.test
0 → 100644
View file @
34a48dd4
--
source
include
/
have_debug_sync
.
inc
--
source
include
/
have_innodb
.
inc
--
source
include
/
have_log_bin
.
inc
--
source
include
/
have_binlog_format_mixed_or_statement
.
inc
# Need DBUG to crash the server intentionally
--
source
include
/
have_debug
.
inc
# Don't test this under valgrind, memory leaks will occur as we crash
--
source
include
/
not_valgrind
.
inc
# XtraDB stores the binlog position corresponding to the last commit, and
# prints it during crash recovery.
# Test that we get the correct position when we group commit several
# transactions together.
CREATE
TABLE
t1
(
a
INT
PRIMARY
KEY
)
ENGINE
=
innodb
;
INSERT
INTO
t1
VALUES
(
0
);
connect
(
con1
,
localhost
,
root
,,);
connect
(
con2
,
localhost
,
root
,,);
connect
(
con3
,
localhost
,
root
,,);
# Queue up three commits for group commit.
connection
con1
;
SET
DEBUG_SYNC
=
"commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued"
;
SET
DEBUG_SYNC
=
"commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3"
;
send
INSERT
INTO
t1
VALUES
(
1
);
connection
con2
;
SET
DEBUG_SYNC
=
"now WAIT_FOR con1_waiting"
;
SET
DEBUG_SYNC
=
"commit_after_prepare_ordered SIGNAL con2_queued"
;
send
INSERT
INTO
t1
VALUES
(
2
);
connection
con3
;
SET
DEBUG_SYNC
=
"now WAIT_FOR con2_queued"
;
SET
DEBUG_SYNC
=
"commit_after_prepare_ordered SIGNAL con3_queued"
;
send
INSERT
INTO
t1
VALUES
(
3
);
connection
default
;
SET
DEBUG_SYNC
=
"now WAIT_FOR con1_loop"
;
# At this point, no transactions are committed.
SET
DEBUG_SYNC
=
"now SIGNAL con1_loop_cont"
;
SET
DEBUG_SYNC
=
"now WAIT_FOR con1_loop"
;
# At this point, 1 transaction is committed.
SET
DEBUG_SYNC
=
"now SIGNAL con1_loop_cont"
;
SET
DEBUG_SYNC
=
"now WAIT_FOR con1_loop"
;
# At this point, 2 transactions are committed.
SELECT
*
FROM
t1
ORDER
BY
a
;
connection
con2
;
reap
;
# Now crash the server with 1+2 in-memory committed, 3 only prepared.
connection
default
;
system
echo
wait
-
group_commit_binlog_pos
.
test
>>
$MYSQLTEST_VARDIR
/
tmp
/
mysqld
.
1.
expect
;
SET
SESSION
debug
=
"+d,crash_dispatch_command_before"
;
--
error
2006
,
2013
SELECT
1
;
connection
con1
;
--
error
2006
,
2013
reap
;
connection
con3
;
--
error
2006
,
2013
reap
;
system
echo
restart
-
group_commit_binlog_pos
.
test
>>
$MYSQLTEST_VARDIR
/
tmp
/
mysqld
.
1.
expect
;
connection
default
;
--
enable_reconnect
--
source
include
/
wait_until_connected_again
.
inc
# Crash recovery should recover all three transactions.
SELECT
*
FROM
t1
ORDER
BY
a
;
# Check that the binlog position reported by InnoDB is the correct one
# for the end of the second transaction (as can be checked with
# mysqlbinlog).
let
$MYSQLD_DATADIR
=
`SELECT @@datadir`
;
--
exec
grep
'InnoDB: Last MySQL binlog file position'
$MYSQLD_DATADIR
/../../
log
/
mysqld
.
1.
err
|
tail
-
1
SET
DEBUG_SYNC
=
'RESET'
;
DROP
TABLE
t1
;
mysql-test/t/group_commit_crash-master.opt
0 → 100644
View file @
34a48dd4
--skip-stack-trace --skip-core-file
mysql-test/t/group_commit_crash.test
0 → 100644
View file @
34a48dd4
# Testing group commit by crashing a few times.
# Test adapted from the Facebook patch: lp:mysqlatfacebook
--
source
include
/
not_embedded
.
inc
# Don't test this under valgrind, memory leaks will occur
--
source
include
/
not_valgrind
.
inc
# Binary must be compiled with debug for crash to occur
--
source
include
/
have_debug
.
inc
--
source
include
/
have_innodb
.
inc
--
source
include
/
have_log_bin
.
inc
let
$file_format_check
=
`SELECT @@innodb_file_format_check`
;
CREATE
TABLE
t1
(
a
CHAR
(
255
),
b
CHAR
(
255
),
c
CHAR
(
255
),
d
CHAR
(
255
),
id
INT
AUTO_INCREMENT
,
PRIMARY
KEY
(
id
))
ENGINE
=
InnoDB
;
create
table
t2
like
t1
;
delimiter
//;
create
procedure
setcrash
(
IN
i
INT
)
begin
CASE
i
WHEN
1
THEN
SET
SESSION
debug
=
"d,crash_commit_after_prepare"
;
WHEN
2
THEN
SET
SESSION
debug
=
"d,crash_commit_after_log"
;
WHEN
3
THEN
SET
SESSION
debug
=
"d,crash_commit_before_unlog"
;
WHEN
4
THEN
SET
SESSION
debug
=
"d,crash_commit_after"
;
WHEN
5
THEN
SET
SESSION
debug
=
"d,crash_commit_before"
;
ELSE
BEGIN
END
;
END
CASE
;
end
//
delimiter
;
//
# Avoid getting a crashed mysql.proc table.
FLUSH
TABLES
;
let
$numtests
=
5
;
let
$numinserts
=
10
;
while
(
$numinserts
)
{
dec
$numinserts
;
INSERT
INTO
t2
(
a
,
b
,
c
,
d
)
VALUES
(
'a'
,
'b'
,
'c'
,
'd'
);
}
--
enable_reconnect
while
(
$numtests
)
{
RESET
MASTER
;
START
TRANSACTION
;
insert
into
t1
select
*
from
t2
;
# Write file to make mysql-test-run.pl expect crash
--
exec
echo
"restart"
>
$MYSQLTEST_VARDIR
/
tmp
/
mysqld
.
1.
expect
eval
call
setcrash
(
$numtests
);
# Run the crashing query
--
error
2006
,
2013
COMMIT
;
# Poll the server waiting for it to be back online again.
--
source
include
/
wait_until_connected_again
.
inc
# table and binlog should be in sync.
SELECT
*
FROM
t1
ORDER
BY
id
;
SHOW
BINLOG
EVENTS
LIMIT
2
,
1
;
delete
from
t1
;
dec
$numtests
;
}
# final cleanup
DROP
TABLE
t1
;
DROP
TABLE
t2
;
DROP
PROCEDURE
setcrash
;
--
disable_query_log
eval
SET
GLOBAL
innodb_file_format_check
=
$file_format_check
;
--
enable_query_log
sql/handler.cc
View file @
34a48dd4
...
...
@@ -78,6 +78,8 @@ TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
static
TYPELIB
known_extensions
=
{
0
,
"known_exts"
,
NULL
,
NULL
};
uint
known_extensions_id
=
0
;
static
int
commit_one_phase_2
(
THD
*
thd
,
bool
all
,
THD_TRANS
*
trans
,
bool
is_real_trans
);
static
plugin_ref
ha_default_plugin
(
THD
*
thd
)
...
...
@@ -1076,7 +1078,7 @@ ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
*/
int
ha_commit_trans
(
THD
*
thd
,
bool
all
)
{
int
error
=
0
,
cookie
=
0
;
int
error
=
0
,
cookie
;
/*
'all' means that this is either an explicit commit issued by
user, or an implicit commit issued by a DDL.
...
...
@@ -1091,7 +1093,8 @@ int ha_commit_trans(THD *thd, bool all)
*/
bool
is_real_trans
=
all
||
thd
->
transaction
.
all
.
ha_list
==
0
;
Ha_trx_info
*
ha_info
=
trans
->
ha_list
;
my_xid
xid
=
thd
->
transaction
.
xid_state
.
xid
.
get_my_xid
();
bool
need_prepare_ordered
,
need_commit_ordered
;
my_xid
xid
;
DBUG_ENTER
(
"ha_commit_trans"
);
/* Just a random warning to test warnings pushed during autocommit. */
...
...
@@ -1130,85 +1133,112 @@ int ha_commit_trans(THD *thd, bool all)
DBUG_RETURN
(
2
);
}
#ifdef USING_TRANSACTIONS
if
(
ha_info
)
if
(
!
ha_info
)
{
uint
rw_ha_count
;
bool
rw_trans
;
/* Free resources and perform other cleanup even for 'empty' transactions. */
if
(
is_real_trans
)
thd
->
transaction
.
cleanup
();
DBUG_RETURN
(
0
);
}
DBUG_EXECUTE_IF
(
"crash_commit_before"
,
DBUG_SUICIDE
(););
DBUG_EXECUTE_IF
(
"crash_commit_before"
,
DBUG_SUICIDE
(););
/* Close all cursors that can not survive COMMIT */
if
(
is_real_trans
)
/* not a statement commit */
thd
->
stmt_map
.
close_transient_cursors
();
/* Close all cursors that can not survive COMMIT */
if
(
is_real_trans
)
/* not a statement commit */
thd
->
stmt_map
.
close_transient_cursors
();
rw_ha_count
=
ha_check_and_coalesce_trx_read_only
(
thd
,
ha_info
,
all
);
/* rw_trans is TRUE when we in a transaction changing data */
rw_trans
=
is_real_trans
&&
(
rw_ha_count
>
0
);
uint
rw_ha_count
=
ha_check_and_coalesce_trx_read_only
(
thd
,
ha_info
,
all
);
/* rw_trans is TRUE when we in a transaction changing data */
bool
rw_trans
=
is_real_trans
&&
(
rw_ha_count
>
0
);
if
(
rw_trans
&&
wait_if_global_read_lock
(
thd
,
0
,
0
))
{
ha_rollback_trans
(
thd
,
all
);
DBUG_RETURN
(
1
);
}
if
(
rw_trans
&&
wait_if_global_read_lock
(
thd
,
0
,
0
))
{
ha_rollback_trans
(
thd
,
all
);
DBUG_RETURN
(
1
);
}
if
(
rw_trans
&&
opt_readonly
&&
!
(
thd
->
security_ctx
->
master_access
&
SUPER_ACL
)
&&
!
thd
->
slave_thread
)
{
my_error
(
ER_OPTION_PREVENTS_STATEMENT
,
MYF
(
0
),
"--read-only"
);
ha_rollback_trans
(
thd
,
all
);
error
=
1
;
goto
end
;
}
if
(
rw_trans
&&
opt_readonly
&&
!
(
thd
->
security_ctx
->
master_access
&
SUPER_ACL
)
&&
!
thd
->
slave_thread
)
{
my_error
(
ER_OPTION_PREVENTS_STATEMENT
,
MYF
(
0
),
"--read-only"
);
goto
err
;
}
if
(
!
trans
->
no_2pc
&&
(
rw_ha_count
>
1
))
{
for
(;
ha_info
&&
!
error
;
ha_info
=
ha_info
->
next
())
{
int
err
;
handlerton
*
ht
=
ha_info
->
ht
();
/*
Do not call two-phase commit if this particular
transaction is read-only. This allows for simpler
implementation in engines that are always read-only.
*/
if
(
!
ha_info
->
is_trx_read_write
())
continue
;
/*
Sic: we know that prepare() is not NULL since otherwise
trans->no_2pc would have been set.
*/
if
((
err
=
ht
->
prepare
(
ht
,
thd
,
all
)))
{
my_error
(
ER_ERROR_DURING_COMMIT
,
MYF
(
0
),
err
);
error
=
1
;
}
status_var_increment
(
thd
->
status_var
.
ha_prepare_count
);
}
DBUG_EXECUTE_IF
(
"crash_commit_after_prepare"
,
DBUG_SUICIDE
(););
if
(
error
||
(
is_real_trans
&&
xid
&&
(
error
=
!
(
cookie
=
tc_log
->
log_xid
(
thd
,
xid
)))))
{
ha_rollback_trans
(
thd
,
all
);
error
=
1
;
goto
end
;
}
DBUG_EXECUTE_IF
(
"crash_commit_after_log"
,
DBUG_SUICIDE
(););
}
error
=
ha_commit_one_phase
(
thd
,
all
)
?
(
cookie
?
2
:
1
)
:
0
;
DBUG_EXECUTE_IF
(
"crash_commit_before_unlog"
,
DBUG_SUICIDE
(););
if
(
cookie
)
tc_log
->
unlog
(
cookie
,
xid
);
if
(
trans
->
no_2pc
||
(
rw_ha_count
<=
1
))
{
error
=
ha_commit_one_phase
(
thd
,
all
);
DBUG_EXECUTE_IF
(
"crash_commit_after"
,
DBUG_SUICIDE
(););
end:
if
(
rw_trans
)
start_waiting_global_read_lock
(
thd
);
goto
end
;
}
/* Free resources and perform other cleanup even for 'empty' transactions. */
else
if
(
is_real_trans
)
thd
->
transaction
.
cleanup
();
need_prepare_ordered
=
FALSE
;
need_commit_ordered
=
FALSE
;
xid
=
thd
->
transaction
.
xid_state
.
xid
.
get_my_xid
();
for
(
Ha_trx_info
*
hi
=
ha_info
;
hi
;
hi
=
hi
->
next
())
{
int
err
;
handlerton
*
ht
=
hi
->
ht
();
/*
Do not call two-phase commit if this particular
transaction is read-only. This allows for simpler
implementation in engines that are always read-only.
*/
if
(
!
hi
->
is_trx_read_write
())
continue
;
/*
Sic: we know that prepare() is not NULL since otherwise
trans->no_2pc would have been set.
*/
if
((
err
=
ht
->
prepare
(
ht
,
thd
,
all
)))
my_error
(
ER_ERROR_DURING_COMMIT
,
MYF
(
0
),
err
);
status_var_increment
(
thd
->
status_var
.
ha_prepare_count
);
if
(
err
)
goto
err
;
if
(
ht
->
prepare_ordered
)
need_prepare_ordered
=
TRUE
;
if
(
ht
->
commit_ordered
)
need_commit_ordered
=
TRUE
;
}
DBUG_EXECUTE_IF
(
"crash_commit_after_prepare"
,
DBUG_SUICIDE
(););
if
(
!
is_real_trans
)
{
error
=
commit_one_phase_2
(
thd
,
all
,
trans
,
is_real_trans
);
DBUG_EXECUTE_IF
(
"crash_commit_after"
,
DBUG_SUICIDE
(););
goto
end
;
}
cookie
=
tc_log
->
log_and_order
(
thd
,
xid
,
all
,
need_prepare_ordered
,
need_commit_ordered
);
if
(
!
cookie
)
goto
err
;
DBUG_EXECUTE_IF
(
"crash_commit_after_log"
,
DBUG_SUICIDE
(););
error
=
commit_one_phase_2
(
thd
,
all
,
trans
,
is_real_trans
)
?
2
:
0
;
DBUG_EXECUTE_IF
(
"crash_commit_after"
,
DBUG_SUICIDE
(););
DBUG_EXECUTE_IF
(
"crash_commit_before_unlog"
,
DBUG_SUICIDE
(););
tc_log
->
unlog
(
cookie
,
xid
);
DBUG_EXECUTE_IF
(
"crash_commit_after"
,
DBUG_SUICIDE
(););
goto
end
;
/* Come here if error and we need to rollback. */
err:
if
(
!
error
)
error
=
1
;
ha_rollback_trans
(
thd
,
all
);
end:
if
(
rw_trans
)
start_waiting_global_read_lock
(
thd
);
#endif
/* USING_TRANSACTIONS */
DBUG_RETURN
(
error
);
}
...
...
@@ -1219,7 +1249,6 @@ int ha_commit_trans(THD *thd, bool all)
*/
int
ha_commit_one_phase
(
THD
*
thd
,
bool
all
)
{
int
error
=
0
;
THD_TRANS
*
trans
=
all
?
&
thd
->
transaction
.
all
:
&
thd
->
transaction
.
stmt
;
/*
"real" is a nick name for a transaction for which a commit will
...
...
@@ -1229,8 +1258,16 @@ int ha_commit_one_phase(THD *thd, bool all)
enclosing 'all' transaction is rolled back.
*/
bool
is_real_trans
=
all
||
thd
->
transaction
.
all
.
ha_list
==
0
;
Ha_trx_info
*
ha_info
=
trans
->
ha_list
,
*
ha_info_next
;
DBUG_ENTER
(
"ha_commit_one_phase"
);
DBUG_RETURN
(
commit_one_phase_2
(
thd
,
all
,
trans
,
is_real_trans
));
}
static
int
commit_one_phase_2
(
THD
*
thd
,
bool
all
,
THD_TRANS
*
trans
,
bool
is_real_trans
)
{
int
error
=
0
;
Ha_trx_info
*
ha_info
=
trans
->
ha_list
,
*
ha_info_next
;
DBUG_ENTER
(
"commit_one_phase_2"
);
#ifdef USING_TRANSACTIONS
if
(
ha_info
)
{
...
...
@@ -1847,7 +1884,13 @@ int ha_start_consistent_snapshot(THD *thd)
{
bool
warn
=
true
;
/*
Holding the LOCK_commit_ordered mutex ensures that for any transaction
we either see it committed in all engines, or in none.
*/
pthread_mutex_lock
(
&
LOCK_commit_ordered
);
plugin_foreach
(
thd
,
snapshot_handlerton
,
MYSQL_STORAGE_ENGINE_PLUGIN
,
&
warn
);
pthread_mutex_unlock
(
&
LOCK_commit_ordered
);
/*
Same idea as when one wants to CREATE TABLE in one engine which does not
...
...
sql/handler.h
View file @
34a48dd4
...
...
@@ -764,9 +764,98 @@ struct handlerton
NOTE 'all' is also false in auto-commit mode where 'end of statement'
and 'real commit' mean the same event.
*/
int
(
*
commit
)(
handlerton
*
hton
,
THD
*
thd
,
bool
all
);
int
(
*
commit
)(
handlerton
*
hton
,
THD
*
thd
,
bool
all
);
/*
The commit_ordered() method is called prior to the commit() method, after
the transaction manager has decided to commit (not rollback) the
transaction. Unlike commit(), commit_ordered() is called only when the
full transaction is committed, not for each commit of statement
transaction in a multi-statement transaction.
Not that like prepare(), commit_ordered() is only called when 2-phase
commit takes place. Ie. when no binary log and only a single engine
participates in a transaction, one commit() is called, no
commit_orderd(). So engines must be prepared for this.
The calls to commit_ordered() in multiple parallel transactions is
guaranteed to happen in the same order in every participating
handler. This can be used to ensure the same commit order among multiple
handlers (eg. in table handler and binlog). So if transaction T1 calls
into commit_ordered() of handler A before T2, then T1 will also call
commit_ordered() of handler B before T2.
Engines that implement this method should during this call make the
transaction visible to other transactions, thereby making the order of
transaction commits be defined by the order of commit_ordered() calls.
The intension is that commit_ordered() should do the minimal amount of
work that needs to happen in consistent commit order among handlers. To
preserve ordering, calls need to be serialised on a global mutex, so
doing any time-consuming or blocking operations in commit_ordered() will
limit scalability.
Handlers can rely on commit_ordered() calls to be serialised (no two
calls can run in parallel, so no extra locking on the handler part is
required to ensure this).
Note that commit_ordered() can be called from a different thread than the
one handling the transaction! So it can not do anything that depends on
thread local storage, in particular it can not call my_error() and
friends (instead it can store the error code and delay the call of
my_error() to the commit() method).
Similarly, since commit_ordered() returns void, any return error code
must be saved and returned from the commit() method instead.
The commit_ordered method is optional, and can be left unset if not
needed in a particular handler (then there will be no ordering guarantees
wrt. other engines and binary log).
*/
void
(
*
commit_ordered
)(
handlerton
*
hton
,
THD
*
thd
,
bool
all
);
int
(
*
rollback
)(
handlerton
*
hton
,
THD
*
thd
,
bool
all
);
int
(
*
prepare
)(
handlerton
*
hton
,
THD
*
thd
,
bool
all
);
/*
The prepare_ordered method is optional. If set, it will be called after
successful prepare() in all handlers participating in 2-phase
commit. Like commit_ordered(), it is called only when the full
transaction is committed, not for each commit of statement transaction.
The calls to prepare_ordered() among multiple parallel transactions are
ordered consistently with calls to commit_ordered(). This means that
calls to prepare_ordered() effectively define the commit order, and that
each handler will see the same sequence of transactions calling into
prepare_ordered() and commit_ordered().
Thus, prepare_ordered() can be used to define commit order for handlers
that need to do this in the prepare step (like binlog). It can also be
used to release transaction's locks early in an order consistent with the
order transactions will be eventually committed.
Like commit_ordered(), prepare_ordered() calls are serialised to maintain
ordering, so the intension is that they should execute fast, with only
the minimal amount of work needed to define commit order. Handlers can
rely on this serialisation, and do not need to do any extra locking to
avoid two prepare_ordered() calls running in parallel.
Like commit_ordered(), prepare_ordered() is not guaranteed to be called
in the context of the thread handling the rest of the transaction. So it
cannot invoke code that relies on thread local storage, in particular it
cannot call my_error().
prepare_ordered() cannot cause a rollback by returning an error, all
possible errors must be handled in prepare() (the prepare_ordered()
method returns void). In case of some fatal error, a record of the error
must be made internally by the engine and returned from commit() later.
Note that for user-level XA SQL commands, no consistent ordering among
prepare_ordered() and commit_ordered() is guaranteed (as that would
require blocking all other commits for an indefinite time).
When 2-phase commit is not used (eg. only one engine (and no binlog) in
transaction), prepare() is not called and in such cases prepare_ordered()
also is not called.
*/
void
(
*
prepare_ordered
)(
handlerton
*
hton
,
THD
*
thd
,
bool
all
);
int
(
*
recover
)(
handlerton
*
hton
,
XID
*
xid_list
,
uint
len
);
int
(
*
commit_by_xid
)(
handlerton
*
hton
,
XID
*
xid
);
int
(
*
rollback_by_xid
)(
handlerton
*
hton
,
XID
*
xid
);
...
...
sql/log.cc
View file @
34a48dd4
...
...
@@ -38,6 +38,7 @@
#endif
#include <mysql/plugin.h>
#include "debug_sync.h"
/* max size of the log message */
#define MAX_LOG_BUFFER_SIZE 1024
...
...
@@ -154,7 +155,7 @@ class binlog_trx_data {
public:
binlog_trx_data
()
:
at_least_one_stmt_committed
(
0
),
incident
(
FALSE
),
m_pending
(
0
),
before_stmt_pos
(
MY_OFF_T_UNDEF
)
before_stmt_pos
(
MY_OFF_T_UNDEF
)
,
commit_bin_log_file_pos
(
0
),
using_xa
(
0
)
{
trans_log
.
end_of_file
=
max_binlog_cache_size
;
}
...
...
@@ -208,11 +209,13 @@ class binlog_trx_data {
completely.
*/
void
reset
()
{
if
(
!
empty
())
if
(
trans_log
.
type
!=
WRITE_CACHE
||
!
empty
())
truncate
(
0
);
before_stmt_pos
=
MY_OFF_T_UNDEF
;
incident
=
FALSE
;
trans_log
.
end_of_file
=
max_binlog_cache_size
;
using_xa
=
FALSE
;
commit_bin_log_file_pos
=
0
;
DBUG_ASSERT
(
empty
());
}
...
...
@@ -257,6 +260,17 @@ class binlog_trx_data {
Binlog position before the start of the current statement.
*/
my_off_t
before_stmt_pos
;
/*
Binlog position after current commit, available to storage engines during
commit_ordered() and commit().
*/
ulonglong
commit_bin_log_file_pos
;
/*
Flag set true if this transaction is committed with log_xid() as part of
XA, false if not.
*/
bool
using_xa
;
};
handlerton
*
binlog_hton
;
...
...
@@ -1416,103 +1430,131 @@ static int binlog_close_connection(handlerton *hton, THD *thd)
}
/*
End a transaction.
End a transaction
, writing events to the binary log
.
SYNOPSIS
binlog_
end_trans
()
binlog_
flush_trx_cache
()
thd The thread whose transaction should be ended
trx_data Pointer to the transaction data to use
end_ev The end event to use, or NULL
all True if the entire transaction should be ended, false if
only the statement transaction should be ended.
end_ev The end event to use (COMMIT, ROLLBACK, or commit XID)
DESCRIPTION
End the currently open transaction. The transaction can be either
a real transaction (if 'all' is true) or a statement transaction
(if 'all' is false).
a real transaction or a statement transaction.
If 'end_ev' is NULL, the transaction is a rollback of only
transactional tables, so the transaction cache will be truncated
to either just before the last opened statement transaction (if
'all' is false), or reset completely (if 'all' is true)
.
This can be to commit a transaction, with a COMMIT query event or an XA
commit XID event. But it can also be to rollback a transaction with a
ROLLBACK query event, used for rolling back transactions which also
contain updates to non-transactional tables
.
*/
static
int
binlog_
end_trans
(
THD
*
thd
,
binlog_trx_data
*
trx_data
,
Log_event
*
end_ev
,
bool
all
)
binlog_
flush_trx_cache
(
THD
*
thd
,
binlog_trx_data
*
trx_data
,
Log_event
*
end_ev
,
bool
all
)
{
DBUG_ENTER
(
"binlog_end_trans"
);
int
error
=
0
;
DBUG_ENTER
(
"binlog_flush_trx_cache"
);
IO_CACHE
*
trans_log
=
&
trx_data
->
trans_log
;
DBUG_PRINT
(
"enter"
,
(
"transaction: %s end_ev: 0x%lx"
,
all
?
"all"
:
"stmt"
,
(
long
)
end_ev
));
DBUG_PRINT
(
"info"
,
(
"thd->options={ %s%s}"
,
FLAGSTR
(
thd
->
options
,
OPTION_NOT_AUTOCOMMIT
),
FLAGSTR
(
thd
->
options
,
OPTION_BEGIN
)));
if
(
thd
->
binlog_flush_pending_rows_event
(
TRUE
))
DBUG_RETURN
(
1
);
/*
NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of
only transactional tables. If the transaction contain changes to
any non-transactiona tables, we need write the transaction and log
a ROLLBACK last.
*/
if
(
end_ev
!=
NULL
)
{
if
(
thd
->
binlog_flush_pending_rows_event
(
TRUE
))
DBUG_RETURN
(
1
);
/*
Doing a commit or a rollback including non-transactional tables,
i.e., ending a transaction where we might write the transaction
cache to the binary log.
We can always end the statement when ending a transaction since
transactions are not allowed inside stored functions. If they
were, we would have to ensure that we're not ending a statement
inside a stored function.
*/
error
=
mysql_bin_log
.
write
(
thd
,
&
trx_data
->
trans_log
,
end_ev
,
trx_data
->
has_incident
());
trx_data
->
reset
();
Doing a commit or a rollback including non-transactional tables,
i.e., ending a transaction where we might write the transaction
cache to the binary log.
We can always end the statement when ending a transaction since
transactions are not allowed inside stored functions. If they
were, we would have to ensure that we're not ending a statement
inside a stored function.
*/
int
error
=
mysql_bin_log
.
write_transaction_to_binlog
(
thd
,
trx_data
,
end_ev
,
all
);
statistic_increment
(
binlog_cache_use
,
&
LOCK_status
);
if
(
trans_log
->
disk_writes
!=
0
)
{
statistic_increment
(
binlog_cache_disk_use
,
&
LOCK_status
);
trans_log
->
disk_writes
=
0
;
}
}
else
trx_data
->
reset
();
statistic_increment
(
binlog_cache_use
,
&
LOCK_status
);
if
(
trans_log
->
disk_writes
!=
0
)
{
/*
If rolling back an entire transaction or a single statement not
inside a transaction, we reset the transaction cache.
statistic_increment
(
binlog_cache_disk_use
,
&
LOCK_status
);
trans_log
->
disk_writes
=
0
;
}
If rolling back a statement in a transaction, we truncate the
transaction cache to remove the statement.
*/
thd
->
binlog_remove_pending_rows_event
(
TRUE
);
if
(
all
||
!
(
thd
->
options
&
(
OPTION_BEGIN
|
OPTION_NOT_AUTOCOMMIT
)))
{
if
(
trx_data
->
has_incident
())
error
=
mysql_bin_log
.
write_incident
(
thd
,
TRUE
);
trx_data
->
reset
();
}
else
// ...statement
trx_data
->
truncate
(
trx_data
->
before_stmt_pos
);
DBUG_ASSERT
(
thd
->
binlog_get_pending_rows_event
()
==
NULL
);
DBUG_RETURN
(
error
);
}
/*
Discard a transaction, ie. ROLLBACK with only transactional table updates.
SYNOPSIS
binlog_truncate_trx_cache()
thd The thread whose transaction should be ended
trx_data Pointer to the transaction data to use
all True if the entire transaction should be ended, false if
only the statement transaction should be ended.
DESCRIPTION
Rollback (and end) a transaction that only modifies transactional
tables. The transaction can be either a real transaction (if 'all' is
true) or a statement transaction (if 'all' is false).
The transaction cache will be truncated to either just before the last
opened statement transaction (if 'all' is false), or reset completely (if
'all' is true).
*/
static
int
binlog_truncate_trx_cache
(
THD
*
thd
,
binlog_trx_data
*
trx_data
,
bool
all
)
{
DBUG_ENTER
(
"binlog_truncate_trx_cache"
);
int
error
=
0
;
DBUG_PRINT
(
"enter"
,
(
"transaction: %s"
,
all
?
"all"
:
"stmt"
));
DBUG_PRINT
(
"info"
,
(
"thd->options={ %s%s}"
,
FLAGSTR
(
thd
->
options
,
OPTION_NOT_AUTOCOMMIT
),
FLAGSTR
(
thd
->
options
,
OPTION_BEGIN
)));
/*
ROLLBACK with nothing to replicate: i.e., rollback of only transactional
tables.
*/
/*
If rolling back an entire transaction or a single statement not
inside a transaction, we reset the transaction cache.
If rolling back a statement in a transaction, we truncate the
transaction cache to remove the statement.
*/
thd
->
binlog_remove_pending_rows_event
(
TRUE
);
if
(
all
||
!
(
thd
->
options
&
(
OPTION_BEGIN
|
OPTION_NOT_AUTOCOMMIT
)))
{
if
(
trx_data
->
has_incident
())
error
=
mysql_bin_log
.
write_incident
(
thd
);
trx_data
->
reset
();
}
else
// ...statement
trx_data
->
truncate
(
trx_data
->
before_stmt_pos
);
DBUG_ASSERT
(
thd
->
binlog_get_pending_rows_event
()
==
NULL
);
DBUG_RETURN
(
error
);
}
static
LEX_STRING
const
write_error_msg
=
{
C_STRING_WITH_LEN
(
"error writing to the binary log"
)
};
static
int
binlog_prepare
(
handlerton
*
hton
,
THD
*
thd
,
bool
all
)
{
/*
do nothing.
just pretend we can do 2pc, so that MySQL won't
switch to 1pc.
real work will be done in MYSQL_BIN_LOG::log_
xid
()
real work will be done in MYSQL_BIN_LOG::log_
and_order
()
*/
return
0
;
}
...
...
@@ -1563,8 +1605,8 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all)
(
trans_has_no_stmt_committed
(
thd
,
all
)
&&
!
stmt_has_updated_trans_table
(
thd
)
&&
stmt_has_updated_non_trans_table
(
thd
)))
{
Query_log_event
q
ev
(
thd
,
STRING_WITH_LEN
(
"COMMIT"
),
TRUE
,
TRUE
,
0
);
error
=
binlog_
end_trans
(
thd
,
trx_data
,
&
q
ev
,
all
);
Query_log_event
end_
ev
(
thd
,
STRING_WITH_LEN
(
"COMMIT"
),
TRUE
,
TRUE
,
0
);
error
=
binlog_
flush_trx_cache
(
thd
,
trx_data
,
&
end_
ev
,
all
);
}
trx_data
->
at_least_one_stmt_committed
=
my_b_tell
(
&
trx_data
->
trans_log
)
>
0
;
...
...
@@ -1628,7 +1670,7 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
(
thd
->
options
&
OPTION_KEEP_LOG
))
&&
mysql_bin_log
.
check_write_error
(
thd
))
trx_data
->
set_incident
();
error
=
binlog_
end_trans
(
thd
,
trx_data
,
0
,
all
);
error
=
binlog_
truncate_trx_cache
(
thd
,
trx_data
,
all
);
}
else
{
...
...
@@ -1647,8 +1689,8 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
stmt_has_updated_non_trans_table
(
thd
)
&&
thd
->
current_stmt_binlog_row_based
))
{
Query_log_event
q
ev
(
thd
,
STRING_WITH_LEN
(
"ROLLBACK"
),
TRUE
,
TRUE
,
0
);
error
=
binlog_
end_trans
(
thd
,
trx_data
,
&
q
ev
,
all
);
Query_log_event
end_
ev
(
thd
,
STRING_WITH_LEN
(
"ROLLBACK"
),
TRUE
,
TRUE
,
0
);
error
=
binlog_
flush_trx_cache
(
thd
,
trx_data
,
&
end_
ev
,
all
);
}
/*
Otherwise, we simply truncate the cache as there is no change on
...
...
@@ -1656,7 +1698,7 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
*/
else
if
(
ending_trans
(
thd
,
all
)
||
(
!
(
thd
->
options
&
OPTION_KEEP_LOG
)
&&
!
stmt_has_updated_non_trans_table
(
thd
)))
error
=
binlog_
end_trans
(
thd
,
trx_data
,
0
,
all
);
error
=
binlog_
truncate_trx_cache
(
thd
,
trx_data
,
all
);
}
if
(
!
all
)
trx_data
->
before_stmt_pos
=
MY_OFF_T_UNDEF
;
// part of the stmt rollback
...
...
@@ -2494,6 +2536,7 @@ const char *MYSQL_LOG::generate_name(const char *log_name,
MYSQL_BIN_LOG
::
MYSQL_BIN_LOG
()
:
bytes_written
(
0
),
prepared_xids
(
0
),
file_id
(
1
),
open_count
(
1
),
need_start_event
(
TRUE
),
group_commit_queue
(
0
),
num_commits
(
0
),
num_group_commits
(
0
),
is_relay_log
(
0
),
description_event_for_exec
(
0
),
description_event_for_queue
(
0
)
{
...
...
@@ -3972,6 +4015,10 @@ bool MYSQL_BIN_LOG::appendv(const char* buf, uint len,...)
}
#ifndef DBUG_OFF
static
ulong
opt_binlog_dbug_fsync_sleep
=
0
;
#endif
bool
MYSQL_BIN_LOG
::
flush_and_sync
()
{
int
err
=
0
,
fd
=
log_file
.
file
;
...
...
@@ -3982,6 +4029,11 @@ bool MYSQL_BIN_LOG::flush_and_sync()
{
sync_binlog_counter
=
0
;
err
=
my_sync
(
fd
,
MYF
(
MY_WME
));
#ifndef DBUG_OFF
ulong
usec_sleep
=
opt_binlog_dbug_fsync_sleep
;
if
(
usec_sleep
>
0
)
my_sleep
(
usec_sleep
);
#endif
}
return
err
;
}
...
...
@@ -4280,44 +4332,41 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
if
(
Rows_log_event
*
pending
=
trx_data
->
pending
())
{
IO_CACHE
*
file
=
&
log_file
;
/*
Decide if we should write to the log file directly or to the
transaction log.
*/
if
(
pending
->
get_cache_stmt
()
||
my_b_tell
(
&
trx_data
->
trans_log
))
file
=
&
trx_data
->
trans_log
;
/*
If we are not writing to the log file directly, we could avoid
locking the log.
*/
pthread_mutex_lock
(
&
LOCK_log
);
/*
Write pending event to log file or transaction cache
*/
if
(
pending
->
write
(
file
))
{
pthread_mutex_unlock
(
&
LOCK_log
);
set_write_error
(
thd
);
DBUG_RETURN
(
1
);
/* Write to transaction log/cache. */
if
(
pending
->
write
(
&
trx_data
->
trans_log
))
{
set_write_error
(
thd
);
DBUG_RETURN
(
1
);
}
}
delete
pending
;
if
(
file
==
&
log_file
)
else
{
/* Write directly to log file. */
pthread_mutex_lock
(
&
LOCK_log
);
if
(
pending
->
write
(
&
log_file
))
{
pthread_mutex_unlock
(
&
LOCK_log
);
set_write_error
(
thd
);
DBUG_RETURN
(
1
);
}
error
=
flush_and_sync
();
if
(
!
error
)
{
signal_update
();
rotate_and_purge
(
RP_LOCK_LOG_IS_ALREADY_LOCKED
);
}
pthread_mutex_unlock
(
&
LOCK_log
);
}
pthread_mutex_unlock
(
&
LOCK_log
)
;
delete
pending
;
}
thd
->
binlog_set_pending_rows_event
(
event
);
...
...
@@ -4347,11 +4396,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
}
/*
Flush the pending rows event to the transaction cache or to the
log file. Since this function potentially aquire the LOCK_log
mutex, we do this before aquiring the LOCK_log mutex in this
function.
We only end the statement if we are in a top-level statement. If
we are inside a stored function, we do not end the statement since
this will close all tables on the slave.
...
...
@@ -4361,8 +4405,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
if
(
thd
->
binlog_flush_pending_rows_event
(
end_stmt
))
DBUG_RETURN
(
error
);
pthread_mutex_lock
(
&
LOCK_log
);
/*
In most cases this is only called if 'is_open()' is true; in fact this is
mostly called if is_open() *was* true a few instructions before, but it
...
...
@@ -4384,7 +4426,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
thd
->
lex
->
sql_command
!=
SQLCOM_SAVEPOINT
&&
!
binlog_filter
->
db_ok
(
local_db
)))
{
VOID
(
pthread_mutex_unlock
(
&
LOCK_log
));
DBUG_RETURN
(
0
);
}
#endif
/* HAVE_REPLICATION */
...
...
@@ -4428,15 +4469,11 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
thd
->
binlog_start_trans_and_stmt
();
file
=
trans_log
;
}
/*
TODO as Mats suggested, for all the cases above where we write to
trans_log, it sounds unnecessary to lock LOCK_log. We should rather
test first if we want to write to trans_log, and if not, lock
LOCK_log.
*/
}
#endif
/* USING_TRANSACTIONS */
DBUG_PRINT
(
"info"
,(
"event type: %d"
,
event_info
->
get_type_code
()));
if
(
file
==
&
log_file
)
pthread_mutex_lock
(
&
LOCK_log
);
/*
No check for auto events flag here - this write method should
...
...
@@ -4460,7 +4497,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
Intvar_log_event
e
(
thd
,(
uchar
)
LAST_INSERT_ID_EVENT
,
thd
->
first_successful_insert_id_in_prev_stmt_for_binlog
);
if
(
e
.
write
(
file
))
goto
err
;
goto
err
_unlock
;
}
if
(
thd
->
auto_inc_intervals_in_cur_stmt_for_binlog
.
nb_elements
()
>
0
)
{
...
...
@@ -4471,13 +4508,13 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
thd
->
auto_inc_intervals_in_cur_stmt_for_binlog
.
minimum
());
if
(
e
.
write
(
file
))
goto
err
;
goto
err
_unlock
;
}
if
(
thd
->
rand_used
)
{
Rand_log_event
e
(
thd
,
thd
->
rand_saved_seed1
,
thd
->
rand_saved_seed2
);
if
(
e
.
write
(
file
))
goto
err
;
goto
err
_unlock
;
}
if
(
thd
->
user_var_events
.
elements
)
{
...
...
@@ -4492,7 +4529,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
user_var_event
->
type
,
user_var_event
->
charset_number
);
if
(
e
.
write
(
file
))
goto
err
;
goto
err
_unlock
;
}
}
}
...
...
@@ -4501,7 +4538,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
/* Write the SQL command */
if
(
event_info
->
write
(
file
)
||
DBUG_EVALUATE_IF
(
"injecting_fault_writing"
,
1
,
0
))
goto
err
;
goto
err
_unlock
;
if
(
file
==
&
log_file
)
// we are writing to the real log (disk)
{
...
...
@@ -4509,18 +4546,21 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
status_var_add
(
thd
->
status_var
.
binlog_bytes_written
,
data_written
);
if
(
flush_and_sync
())
goto
err
;
goto
err
_unlock
;
signal_update
();
rotate_and_purge
(
RP_LOCK_LOG_IS_ALREADY_LOCKED
);
}
error
=
0
;
err_unlock:
if
(
file
==
&
log_file
)
pthread_mutex_unlock
(
&
LOCK_log
);
err:
if
(
error
)
set_write_error
(
thd
);
}
pthread_mutex_unlock
(
&
LOCK_log
);
DBUG_RETURN
(
error
);
}
...
...
@@ -4643,19 +4683,14 @@ uint MYSQL_BIN_LOG::next_file_id()
write_cache()
thd Current_thread
cache Cache to write to the binary log
lock_log True if the LOCK_log mutex should be aquired, false otherwise
sync_log True if the log should be flushed and sync:ed
DESCRIPTION
Write the contents of the cache to the binary log. The cache will
be reset as a READ_CACHE to be able to read the contents from it.
*/
int
MYSQL_BIN_LOG
::
write_cache
(
THD
*
thd
,
IO_CACHE
*
cache
,
bool
lock_log
,
bool
sync_log
)
int
MYSQL_BIN_LOG
::
write_cache
(
THD
*
thd
,
IO_CACHE
*
cache
)
{
Mutex_sentry
sentry
(
lock_log
?
&
LOCK_log
:
NULL
);
if
(
reinit_io_cache
(
cache
,
READ_CACHE
,
0
,
0
,
0
))
return
ER_ERROR_ON_WRITE
;
uint
length
=
my_b_bytes_in_cache
(
cache
),
group
,
carry
,
hdr_offs
;
...
...
@@ -4767,6 +4802,8 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
}
/* Write data to the binary log file */
DBUG_EXECUTE_IF
(
"fail_binlog_write_1"
,
errno
=
28
;
return
ER_ERROR_ON_WRITE
;);
if
(
my_b_write
(
&
log_file
,
cache
->
read_pos
,
length
))
return
ER_ERROR_ON_WRITE
;
status_var_add
(
thd
->
status_var
.
binlog_bytes_written
,
length
);
...
...
@@ -4776,9 +4813,6 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
DBUG_ASSERT
(
carry
==
0
);
if
(
sync_log
)
flush_and_sync
();
return
0
;
// All OK
}
...
...
@@ -4811,27 +4845,23 @@ int query_error_code(THD *thd, bool not_killed)
return
error
;
}
bool
MYSQL_BIN_LOG
::
write_incident
(
THD
*
thd
,
bool
lock
)
bool
MYSQL_BIN_LOG
::
write_incident
(
THD
*
thd
)
{
uint
error
=
0
;
DBUG_ENTER
(
"MYSQL_BIN_LOG::write_incident"
);
LEX_STRING
const
write_error_msg
=
{
C_STRING_WITH_LEN
(
"error writing to the binary log"
)
};
Incident
incident
=
INCIDENT_LOST_EVENTS
;
Incident_log_event
ev
(
thd
,
incident
,
write_error_msg
);
if
(
lock
)
pthread_mutex_lock
(
&
LOCK_log
);
pthread_mutex_lock
(
&
LOCK_log
);
error
=
ev
.
write
(
&
log_file
);
status_var_add
(
thd
->
status_var
.
binlog_bytes_written
,
ev
.
data_written
);
if
(
lock
)
if
(
!
error
&&
!
(
error
=
flush_and_sync
())
)
{
if
(
!
error
&&
!
(
error
=
flush_and_sync
()))
{
signal_update
();
rotate_and_purge
(
RP_LOCK_LOG_IS_ALREADY_LOCKED
);
}
pthread_mutex_unlock
(
&
LOCK_log
);
signal_update
();
rotate_and_purge
(
RP_LOCK_LOG_IS_ALREADY_LOCKED
);
}
pthread_mutex_unlock
(
&
LOCK_log
);
DBUG_RETURN
(
error
);
}
...
...
@@ -4859,110 +4889,315 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
'cache' needs to be reinitialized after this functions returns.
*/
bool
MYSQL_BIN_LOG
::
write
(
THD
*
thd
,
IO_CACHE
*
cache
,
Log_event
*
commit_event
,
bool
incident
)
bool
MYSQL_BIN_LOG
::
write_transaction_to_binlog
(
THD
*
thd
,
binlog_trx_data
*
trx_data
,
Log_event
*
end_ev
,
bool
all
)
{
group_commit_entry
entry
;
DBUG_ENTER
(
"MYSQL_BIN_LOG::write_transaction_to_binlog"
);
entry
.
thd
=
thd
;
entry
.
trx_data
=
trx_data
;
entry
.
error
=
0
;
entry
.
all
=
all
;
/*
Create the necessary events here, where we have the correct THD (and
thread context).
Due to group commit the actual writing to binlog may happen in a different
thread.
*/
Query_log_event
qinfo
(
thd
,
STRING_WITH_LEN
(
"BEGIN"
),
TRUE
,
TRUE
,
0
);
entry
.
begin_event
=
&
qinfo
;
entry
.
end_event
=
end_ev
;
if
(
trx_data
->
has_incident
())
{
Incident_log_event
inc_ev
(
thd
,
INCIDENT_LOST_EVENTS
,
write_error_msg
);
entry
.
incident_event
=
&
inc_ev
;
DBUG_RETURN
(
write_transaction_to_binlog_events
(
&
entry
));
}
else
{
entry
.
incident_event
=
NULL
;
DBUG_RETURN
(
write_transaction_to_binlog_events
(
&
entry
));
}
}
bool
MYSQL_BIN_LOG
::
write_transaction_to_binlog_events
(
group_commit_entry
*
entry
)
{
/*
To facilitate group commit for the binlog, we first queue up ourselves in
the group commit queue. Then the first thread to enter the queue waits for
the LOCK_log mutex, and commits for everyone in the queue once it gets the
lock. Any other threads in the queue just wait for the first one to finish
the commit and wake them up.
*/
entry
->
thd
->
clear_wakeup_ready
();
pthread_mutex_lock
(
&
LOCK_prepare_ordered
);
group_commit_entry
*
orig_queue
=
group_commit_queue
;
entry
->
next
=
orig_queue
;
group_commit_queue
=
entry
;
if
(
entry
->
trx_data
->
using_xa
)
{
DEBUG_SYNC
(
entry
->
thd
,
"commit_before_prepare_ordered"
);
run_prepare_ordered
(
entry
->
thd
,
entry
->
all
);
DEBUG_SYNC
(
entry
->
thd
,
"commit_after_prepare_ordered"
);
}
pthread_mutex_unlock
(
&
LOCK_prepare_ordered
);
/*
The first in the queue handle group commit for all; the others just wait
to be signalled when group commit is done.
*/
if
(
orig_queue
!=
NULL
)
entry
->
thd
->
wait_for_wakeup_ready
();
else
trx_group_commit_leader
(
entry
);
if
(
!
entry
->
error
)
return
0
;
switch
(
entry
->
error
)
{
case
ER_ERROR_ON_WRITE
:
my_error
(
ER_ERROR_ON_WRITE
,
MYF
(
ME_NOREFRESH
),
name
,
entry
->
commit_errno
);
break
;
case
ER_ERROR_ON_READ
:
my_error
(
ER_ERROR_ON_READ
,
MYF
(
ME_NOREFRESH
),
entry
->
trx_data
->
trans_log
.
file_name
,
entry
->
commit_errno
);
break
;
default:
/*
There are not (and should not be) any errors thrown not covered above.
But just in case one is added later without updating the above switch
statement, include a catch-all.
*/
my_printf_error
(
entry
->
error
,
"Error writing transaction to binary log: %d"
,
MYF
(
ME_NOREFRESH
),
entry
->
error
);
}
/*
Since we return error, this transaction XID will not be committed, so
we need to mark it as not needed for recovery (unlog() is not called
for a transaction if log_xid() fails).
*/
if
(
entry
->
trx_data
->
using_xa
)
mark_xid_done
();
return
1
;
}
/*
Do binlog group commit as the lead thread.
This must be called when this thread/transaction is queued at the start of
the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
commit all the transactions in the queue (more may have entered while waiting
for LOCK_log). After commit is done, all other threads in the queue will be
signalled.
*/
void
MYSQL_BIN_LOG
::
trx_group_commit_leader
(
group_commit_entry
*
leader
)
{
DBUG_ENTER
(
"MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)"
);
DBUG_ENTER
(
"MYSQL_BIN_LOG::trx_group_commit_leader"
);
uint
xid_count
=
0
;
uint
write_count
=
0
;
/*
Lock the LOCK_log(), and once we get it, collect any additional writes
that queued up while we were waiting.
*/
VOID
(
pthread_mutex_lock
(
&
LOCK_log
));
DEBUG_SYNC
(
leader
->
thd
,
"commit_after_get_LOCK_log"
);
pthread_mutex_lock
(
&
LOCK_prepare_ordered
);
group_commit_entry
*
current
=
group_commit_queue
;
group_commit_queue
=
NULL
;
pthread_mutex_unlock
(
&
LOCK_prepare_ordered
);
/* NULL would represent nothing to replicate after ROLLBACK */
DBUG_ASSERT
(
commit_event
!=
NULL
);
/* As the queue is in reverse order of entering, reverse it. */
group_commit_entry
*
queue
=
NULL
;
while
(
current
)
{
group_commit_entry
*
next
=
current
->
next
;
current
->
next
=
queue
;
queue
=
current
;
current
=
next
;
}
DBUG_ASSERT
(
leader
==
queue
/* the leader should be first in queue */
);
/* Now we have in queue the list of transactions to be committed in order. */
DBUG_ASSERT
(
is_open
());
if
(
likely
(
is_open
()))
// Should always be true
{
/*
We only bother to write to the binary log if there is anything
to write.
*/
if
(
my_b_tell
(
cache
)
>
0
)
Commit every transaction in the queue.
Note that we are doing this in a different thread than the one running
the transaction! So we are limited in the operations we can do. In
particular, we cannot call my_error() on behalf of a transaction, as
that obtains the THD from thread local storage. Instead, we must set
current->error and let the thread do the error reporting itself once
we wake it up.
*/
for
(
current
=
queue
;
current
!=
NULL
;
current
=
current
->
next
)
{
/*
Log "BEGIN" at the beginning of every transaction. Here, a
transaction is either a BEGIN..COMMIT block or a single
statement in autocommit mode.
*/
Query_log_event
qinfo
(
thd
,
STRING_WITH_LEN
(
"BEGIN"
),
TRUE
,
TRUE
,
0
)
;
binlog_trx_data
*
trx_data
=
current
->
trx_data
;
IO_CACHE
*
cache
=
&
trx_data
->
trans_log
;
/* Skip log_xid for transactions without xid, marked by NULL end_event. */
if
(
!
current
->
end_event
)
continue
;
/*
Now this Query_log_event has artificial log_pos 0. It must be
adjusted to reflect the real position in the log. Not doing it
would confuse the slave: it would prevent this one from
knowing where he is in the master's binlog, which would result
in wrong positions being shown to the user, MASTER_POS_WAIT
undue waiting etc.
We only bother to write to the binary log if there is anything
to write.
*/
if
(
qinfo
.
write
(
&
log_file
))
goto
err
;
status_var_add
(
thd
->
status_var
.
binlog_bytes_written
,
qinfo
.
data_written
);
DBUG_EXECUTE_IF
(
"crash_before_writing_xid"
,
{
if
((
write_error
=
write_cache
(
thd
,
cache
,
FALSE
,
TRUE
)))
DBUG_PRINT
(
"info"
,
(
"error writing binlog cache: %d"
,
write_error
));
DBUG_PRINT
(
"info"
,
(
"crashing before writing xid"
));
DBUG_SUICIDE
();
});
if
((
write_error
=
write_cache
(
thd
,
cache
,
FALSE
,
FALSE
)))
goto
err
;
if
(
commit_event
)
if
(
my_b_tell
(
cache
)
>
0
)
{
if
(
commit_event
->
write
(
&
log_file
))
goto
err
;
status_var_add
(
thd
->
status_var
.
binlog_bytes_written
,
commit_event
->
data_written
);
current
->
error
=
write_transaction
(
current
);
if
(
current
->
error
)
current
->
commit_errno
=
errno
;
write_count
++
;
}
if
(
incident
&&
write_incident
(
thd
,
FALSE
))
goto
err
;
trx_data
->
commit_bin_log_file_pos
=
log_file
.
pos_in_file
+
(
log_file
.
write_pos
-
log_file
.
write_buffer
);
if
(
trx_data
->
using_xa
)
xid_count
++
;
}
if
(
write_count
>
0
)
{
if
(
flush_and_sync
())
goto
err
;
DBUG_EXECUTE_IF
(
"half_binlogged_transaction"
,
DBUG_SUICIDE
(););
if
(
cache
->
error
)
// Error on read
{
sql_print_error
(
ER
(
ER_ERROR_ON_READ
),
cache
->
file_name
,
errno
);
write_error
=
1
;
// Don't give more errors
goto
err
;
for
(
current
=
queue
;
current
!=
NULL
;
current
=
current
->
next
)
{
if
(
!
current
->
error
)
{
current
->
error
=
ER_ERROR_ON_WRITE
;
current
->
commit_errno
=
errno
;
}
}
}
else
{
signal_update
();
}
signal_update
();
}
/*
if
commit_event is
Xid_log_event, increase the number of
prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
if
any commit_events are
Xid_log_event, increase the number of
prepared_xids (it's decreas
e
d in ::unlog()). Binlog cannot be rotated
if there're prepared xids in it - see the comment in new_file() for
an explanation.
If
the commit_event is not Xid_log_event (then it's a Query_log_event)
rotate binlog,
if necessary.
If
no Xid_log_events (then it's all Query_log_event) rotate binlog,
if necessary.
*/
if
(
commit_event
&&
commit_event
->
get_type_code
()
==
XID_EVENT
)
if
(
xid_count
>
0
)
{
pthread_mutex_lock
(
&
LOCK_prep_xids
);
prepared_xids
++
;
pthread_mutex_unlock
(
&
LOCK_prep_xids
);
mark_xids_active
(
xid_count
);
}
else
rotate_and_purge
(
RP_LOCK_LOG_IS_ALREADY_LOCKED
);
}
VOID
(
pthread_mutex_unlock
(
&
LOCK_log
));
DBUG_RETURN
(
0
);
DEBUG_SYNC
(
leader
->
thd
,
"commit_before_get_LOCK_commit_ordered"
);
pthread_mutex_lock
(
&
LOCK_commit_ordered
);
/*
We cannot unlock LOCK_log until we have locked LOCK_commit_ordered;
otherwise scheduling could allow the next group commit to run ahead of us,
messing up the order of commit_ordered() calls. But as soon as
LOCK_commit_ordered is obtained, we can let the next group commit start.
*/
pthread_mutex_unlock
(
&
LOCK_log
);
DEBUG_SYNC
(
leader
->
thd
,
"commit_after_release_LOCK_log"
);
++
num_group_commits
;
err:
if
(
!
write_error
)
/*
Wakeup each participant waiting for our group commit, first calling the
commit_ordered() methods for any transactions doing 2-phase commit.
*/
current
=
queue
;
while
(
current
!=
NULL
)
{
write_error
=
1
;
sql_print_error
(
ER
(
ER_ERROR_ON_WRITE
),
name
,
errno
);
DEBUG_SYNC
(
leader
->
thd
,
"commit_loop_entry_commit_ordered"
);
++
num_commits
;
if
(
current
->
trx_data
->
using_xa
&&
!
current
->
error
)
run_commit_ordered
(
current
->
thd
,
current
->
all
);
/*
Careful not to access current->next after waking up the other thread! As
it may change immediately after wakeup.
*/
group_commit_entry
*
next
=
current
->
next
;
if
(
current
!=
leader
)
// Don't wake up ourself
current
->
thd
->
signal_wakeup_ready
();
current
=
next
;
}
VOID
(
pthread_mutex_unlock
(
&
LOCK_log
));
DBUG_RETURN
(
1
);
DEBUG_SYNC
(
leader
->
thd
,
"commit_after_group_run_commit_ordered"
);
pthread_mutex_unlock
(
&
LOCK_commit_ordered
);
DBUG_VOID_RETURN
;
}
int
MYSQL_BIN_LOG
::
write_transaction
(
group_commit_entry
*
entry
)
{
binlog_trx_data
*
trx_data
=
entry
->
trx_data
;
IO_CACHE
*
cache
=
&
trx_data
->
trans_log
;
/*
Log "BEGIN" at the beginning of every transaction. Here, a transaction is
either a BEGIN..COMMIT block or a single statement in autocommit mode. The
event was constructed in write_transaction_to_binlog(), in the thread
running the transaction.
Now this Query_log_event has artificial log_pos 0. It must be
adjusted to reflect the real position in the log. Not doing it
would confuse the slave: it would prevent this one from
knowing where he is in the master's binlog, which would result
in wrong positions being shown to the user, MASTER_POS_WAIT
undue waiting etc.
*/
if
(
entry
->
begin_event
->
write
(
&
log_file
))
return
ER_ERROR_ON_WRITE
;
status_var_add
(
entry
->
thd
->
status_var
.
binlog_bytes_written
,
entry
->
begin_event
->
data_written
);
DBUG_EXECUTE_IF
(
"crash_before_writing_xid"
,
{
if
((
write_cache
(
entry
->
thd
,
cache
)))
DBUG_PRINT
(
"info"
,
(
"error writing binlog cache"
));
else
flush_and_sync
();
DBUG_PRINT
(
"info"
,
(
"crashing before writing xid"
));
DBUG_SUICIDE
();
});
if
(
write_cache
(
entry
->
thd
,
cache
))
return
ER_ERROR_ON_WRITE
;
if
(
entry
->
end_event
->
write
(
&
log_file
))
return
ER_ERROR_ON_WRITE
;
status_var_add
(
entry
->
thd
->
status_var
.
binlog_bytes_written
,
entry
->
end_event
->
data_written
);
if
(
entry
->
incident_event
&&
entry
->
incident_event
->
write
(
&
log_file
))
return
ER_ERROR_ON_WRITE
;
if
(
cache
->
error
)
// Error on read
return
ER_ERROR_ON_READ
;
return
0
;
}
/**
Wait until we get a signal that the binary log has been updated.
...
...
@@ -5395,6 +5630,172 @@ void sql_print_information(const char *format, ...)
}
static
my_bool
mutexes_inited
;
pthread_mutex_t
LOCK_prepare_ordered
;
pthread_mutex_t
LOCK_commit_ordered
;
void
TC_init
()
{
my_pthread_mutex_init
(
&
LOCK_prepare_ordered
,
MY_MUTEX_INIT_SLOW
,
"LOCK_prepare_ordered"
,
MYF
(
0
));
my_pthread_mutex_init
(
&
LOCK_commit_ordered
,
MY_MUTEX_INIT_SLOW
,
"LOCK_commit_ordered"
,
MYF
(
0
));
mutexes_inited
=
TRUE
;
}
void
TC_destroy
()
{
if
(
mutexes_inited
)
{
pthread_mutex_destroy
(
&
LOCK_prepare_ordered
);
pthread_mutex_destroy
(
&
LOCK_commit_ordered
);
mutexes_inited
=
FALSE
;
}
}
void
TC_LOG
::
run_prepare_ordered
(
THD
*
thd
,
bool
all
)
{
Ha_trx_info
*
ha_info
=
all
?
thd
->
transaction
.
all
.
ha_list
:
thd
->
transaction
.
stmt
.
ha_list
;
for
(;
ha_info
;
ha_info
=
ha_info
->
next
())
{
handlerton
*
ht
=
ha_info
->
ht
();
if
(
!
ht
->
prepare_ordered
)
continue
;
safe_mutex_assert_owner
(
&
LOCK_prepare_ordered
);
ht
->
prepare_ordered
(
ht
,
thd
,
all
);
}
}
void
TC_LOG
::
run_commit_ordered
(
THD
*
thd
,
bool
all
)
{
Ha_trx_info
*
ha_info
=
all
?
thd
->
transaction
.
all
.
ha_list
:
thd
->
transaction
.
stmt
.
ha_list
;
for
(;
ha_info
;
ha_info
=
ha_info
->
next
())
{
handlerton
*
ht
=
ha_info
->
ht
();
if
(
!
ht
->
commit_ordered
)
continue
;
safe_mutex_assert_owner
(
&
LOCK_commit_ordered
);
ht
->
commit_ordered
(
ht
,
thd
,
all
);
DEBUG_SYNC
(
thd
,
"commit_after_run_commit_ordered"
);
}
}
int
TC_LOG_MMAP
::
log_and_order
(
THD
*
thd
,
my_xid
xid
,
bool
all
,
bool
need_prepare_ordered
,
bool
need_commit_ordered
)
{
int
cookie
;
struct
commit_entry
entry
;
bool
is_group_commit_leader
;
LINT_INIT
(
is_group_commit_leader
);
if
(
need_prepare_ordered
)
{
pthread_mutex_lock
(
&
LOCK_prepare_ordered
);
run_prepare_ordered
(
thd
,
all
);
if
(
need_commit_ordered
)
{
/*
Must put us in queue so we can run_commit_ordered() in same sequence
as we did run_prepare_ordered().
*/
thd
->
clear_wakeup_ready
();
entry
.
thd
=
thd
;
commit_entry
*
previous_queue
=
commit_ordered_queue
;
entry
.
next
=
previous_queue
;
commit_ordered_queue
=
&
entry
;
is_group_commit_leader
=
(
previous_queue
==
NULL
);
}
pthread_mutex_unlock
(
&
LOCK_prepare_ordered
);
}
if
(
xid
)
cookie
=
log_one_transaction
(
xid
);
else
cookie
=
0
;
if
(
need_commit_ordered
)
{
if
(
need_prepare_ordered
)
{
/*
We did the run_prepare_ordered() serialised, then ran the log_xid() in
parallel. Now we have to do run_commit_ordered() serialised in the
same sequence as run_prepare_ordered().
We do this starting from the head of the queue, each thread doing
run_commit_ordered() and signalling the next in queue.
*/
if
(
is_group_commit_leader
)
{
/* The first in queue starts the ball rolling. */
pthread_mutex_lock
(
&
LOCK_prepare_ordered
);
while
(
commit_ordered_queue_busy
)
pthread_cond_wait
(
&
COND_queue_busy
,
&
LOCK_prepare_ordered
);
commit_entry
*
queue
=
commit_ordered_queue
;
commit_ordered_queue
=
NULL
;
/*
Mark the queue busy while we bounce it from one thread to the
next.
*/
commit_ordered_queue_busy
=
true
;
pthread_mutex_unlock
(
&
LOCK_prepare_ordered
);
/* Reverse the queue list so we get correct order. */
commit_entry
*
prev
=
NULL
;
while
(
queue
)
{
commit_entry
*
next
=
queue
->
next
;
queue
->
next
=
prev
;
prev
=
queue
;
queue
=
next
;
}
DBUG_ASSERT
(
prev
==
&
entry
&&
prev
->
thd
==
thd
);
}
else
{
/* Not first in queue; just wait until previous thread wakes us up. */
thd
->
wait_for_wakeup_ready
();
}
}
/* Only run commit_ordered() if log_xid was successful. */
if
(
cookie
)
{
pthread_mutex_lock
(
&
LOCK_commit_ordered
);
run_commit_ordered
(
thd
,
all
);
pthread_mutex_unlock
(
&
LOCK_commit_ordered
);
}
if
(
need_prepare_ordered
)
{
commit_entry
*
next
=
entry
.
next
;
if
(
next
)
{
next
->
thd
->
signal_wakeup_ready
();
}
else
{
pthread_mutex_lock
(
&
LOCK_prepare_ordered
);
commit_ordered_queue_busy
=
false
;
pthread_cond_signal
(
&
COND_queue_busy
);
pthread_mutex_unlock
(
&
LOCK_prepare_ordered
);
}
}
}
return
cookie
;
}
/********* transaction coordinator log for 2pc - mmap() based solution *******/
/*
...
...
@@ -5531,6 +5932,7 @@ int TC_LOG_MMAP::open(const char *opt_name)
pthread_mutex_init
(
&
LOCK_pool
,
MY_MUTEX_INIT_FAST
);
pthread_cond_init
(
&
COND_active
,
0
);
pthread_cond_init
(
&
COND_pool
,
0
);
pthread_cond_init
(
&
COND_queue_busy
,
0
);
inited
=
6
;
...
...
@@ -5538,6 +5940,8 @@ int TC_LOG_MMAP::open(const char *opt_name)
active
=
pages
;
pool
=
pages
+
1
;
pool_last
=
pages
+
npages
-
1
;
commit_ordered_queue
=
NULL
;
commit_ordered_queue_busy
=
false
;
return
0
;
...
...
@@ -5643,7 +6047,7 @@ int TC_LOG_MMAP::overflow()
to the position in memory where xid was logged to.
*/
int
TC_LOG_MMAP
::
log_
xid
(
THD
*
thd
,
my_xid
xid
)
int
TC_LOG_MMAP
::
log_
one_transaction
(
my_xid
xid
)
{
int
err
;
PAGE
*
p
;
...
...
@@ -5812,6 +6216,8 @@ void TC_LOG_MMAP::close()
pthread_mutex_destroy
(
&
LOCK_active
);
pthread_mutex_destroy
(
&
LOCK_pool
);
pthread_cond_destroy
(
&
COND_pool
);
pthread_cond_destroy
(
&
COND_active
);
pthread_cond_destroy
(
&
COND_queue_busy
);
case
5
:
data
[
0
]
=
'A'
;
// garble the first (signature) byte, in case my_delete fails
case
4
:
...
...
@@ -6020,30 +6426,66 @@ void TC_LOG_BINLOG::close()
pthread_cond_destroy
(
&
COND_prep_xids
);
}
/**
@todo
group commit
@retval
0 error
@retval
1 success
/*
Do a binlog log_xid() for a group of transactions, linked through
thd->next_commit_ordered.
*/
int
TC_LOG_BINLOG
::
log_xid
(
THD
*
thd
,
my_xid
xid
)
int
TC_LOG_BINLOG
::
log_and_order
(
THD
*
thd
,
my_xid
xid
,
bool
all
,
bool
need_prepare_ordered
__attribute__
((
unused
)),
bool
need_commit_ordered
__attribute__
((
unused
)))
{
DBUG_ENTER
(
"TC_LOG_BINLOG::log"
);
Xid_log_event
xle
(
thd
,
xid
);
binlog_trx_data
*
trx_data
=
int
err
;
DBUG_ENTER
(
"TC_LOG_BINLOG::log_and_order"
);
binlog_trx_data
*
const
trx_data
=
(
binlog_trx_data
*
)
thd_get_ha_data
(
thd
,
binlog_hton
);
/*
We always commit the entire transaction when writing an XID. Also
note that the return value is inverted.
*/
DBUG_RETURN
(
!
binlog_end_trans
(
thd
,
trx_data
,
&
xle
,
TRUE
));
trx_data
->
using_xa
=
TRUE
;
if
(
xid
)
{
Xid_log_event
xid_event
(
thd
,
xid
);
err
=
binlog_flush_trx_cache
(
thd
,
trx_data
,
&
xid_event
,
all
);
}
else
err
=
binlog_flush_trx_cache
(
thd
,
trx_data
,
NULL
,
all
);
DBUG_RETURN
(
!
err
);
}
void
TC_LOG_BINLOG
::
unlog
(
ulong
cookie
,
my_xid
xid
)
/*
After an XID is logged, we need to hold on to the current binlog file until
it is fully committed in the storage engine. The reason is that crash
recovery only looks at the latest binlog, so we must make sure there are no
outstanding prepared (but not committed) transactions before rotating the
binlog.
To handle this, we keep a count of outstanding XIDs. This function is used
to increase this count when committing one or more transactions to the
binary log.
*/
void
TC_LOG_BINLOG
::
mark_xids_active
(
uint
xid_count
)
{
DBUG_ENTER
(
"TC_LOG_BINLOG::mark_xids_active"
);
DBUG_PRINT
(
"info"
,
(
"xid_count=%u"
,
xid_count
));
pthread_mutex_lock
(
&
LOCK_prep_xids
);
prepared_xids
+=
xid_count
;
pthread_mutex_unlock
(
&
LOCK_prep_xids
);
DBUG_VOID_RETURN
;
}
/*
Once an XID is committed, it is safe to rotate the binary log, as it can no
longer be needed during crash recovery.
This function is called to mark an XID this way. It needs to decrease the
count of pending XIDs, and signal the log rotator thread when it reaches zero.
*/
void
TC_LOG_BINLOG
::
mark_xid_done
()
{
DBUG_ENTER
(
"TC_LOG_BINLOG::mark_xid_done"
);
pthread_mutex_lock
(
&
LOCK_prep_xids
);
DBUG_ASSERT
(
prepared_xids
>
0
);
if
(
--
prepared_xids
==
0
)
{
...
...
@@ -6051,7 +6493,16 @@ void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
pthread_cond_signal
(
&
COND_prep_xids
);
}
pthread_mutex_unlock
(
&
LOCK_prep_xids
);
rotate_and_purge
(
0
);
// as ::write() did not rotate
DBUG_VOID_RETURN
;
}
void
TC_LOG_BINLOG
::
unlog
(
ulong
cookie
,
my_xid
xid
)
{
DBUG_ENTER
(
"TC_LOG_BINLOG::unlog"
);
if
(
xid
)
mark_xid_done
();
rotate_and_purge
(
0
);
// as ::write_transaction_to_binlog() did not rotate
DBUG_VOID_RETURN
;
}
int
TC_LOG_BINLOG
::
recover
(
IO_CACHE
*
log
,
Format_description_log_event
*
fdle
)
...
...
@@ -6120,9 +6571,105 @@ ulonglong mysql_bin_log_file_pos(void)
{
return
(
ulonglong
)
mysql_bin_log
.
get_log_file
()
->
pos_in_file
;
}
/*
Get the current position of the MySQL binlog for transaction currently being
committed.
This is valid to call from within storage engine commit_ordered() and
commit() methods only.
Since it stores the position inside THD, it is safe to call without any
locking.
Note that currently the binlog file name is not stored inside THD, but this
is still safe as it can only change when the log is rotated, and we never
rotate the binlog while commits are pending inside storage engines.
*/
void
mysql_bin_log_commit_pos
(
THD
*
thd
,
ulonglong
*
out_pos
,
const
char
**
out_file
)
{
binlog_trx_data
*
const
trx_data
=
(
binlog_trx_data
*
)
thd_get_ha_data
(
thd
,
binlog_hton
);
if
(
trx_data
)
{
*
out_pos
=
trx_data
->
commit_bin_log_file_pos
;
*
out_file
=
mysql_bin_log
.
get_log_fname
();
}
else
{
*
out_pos
=
NULL
;
*
out_file
=
NULL
;
}
}
#endif
/* INNODB_COMPATIBILITY_HOOKS */
static
ulonglong
binlog_status_var_num_commits
;
static
ulonglong
binlog_status_var_num_group_commits
;
static
SHOW_VAR
binlog_status_vars_detail
[]
=
{
{
"commits"
,
(
char
*
)
&
binlog_status_var_num_commits
,
SHOW_LONGLONG
},
{
"group_commits"
,
(
char
*
)
&
binlog_status_var_num_group_commits
,
SHOW_LONGLONG
},
{
NullS
,
NullS
,
SHOW_LONG
}
};
static
int
show_binlog_vars
(
THD
*
thd
,
SHOW_VAR
*
var
,
char
*
buff
)
{
mysql_bin_log
.
set_status_variables
();
var
->
type
=
SHOW_ARRAY
;
var
->
value
=
(
char
*
)
&
binlog_status_vars_detail
;
return
0
;
}
static
SHOW_VAR
binlog_status_vars_top
[]
=
{
{
"binlog"
,
(
char
*
)
&
show_binlog_vars
,
SHOW_FUNC
},
{
NullS
,
NullS
,
SHOW_LONG
}
};
#ifndef DBUG_OFF
static
MYSQL_SYSVAR_ULONG
(
dbug_fsync_sleep
,
opt_binlog_dbug_fsync_sleep
,
PLUGIN_VAR_RQCMDARG
,
"Extra sleep (in microseconds) to add to binlog fsync(), for debugging"
,
NULL
,
NULL
,
0
,
0
,
ULONG_MAX
,
0
);
static
struct
st_mysql_sys_var
*
binlog_sys_vars
[]
=
{
MYSQL_SYSVAR
(
dbug_fsync_sleep
),
NULL
};
#endif
/*
Copy out current values of status variables, for SHOW STATUS or
information_schema.global_status.
This is called only under LOCK_status, so we can fill in a static array.
*/
void
TC_LOG_BINLOG
::
set_status_variables
()
{
ulonglong
num_commits
,
num_group_commits
;
pthread_mutex_lock
(
&
LOCK_commit_ordered
);
num_commits
=
this
->
num_commits
;
num_group_commits
=
this
->
num_group_commits
;
pthread_mutex_unlock
(
&
LOCK_commit_ordered
);
binlog_status_var_num_commits
=
num_commits
;
binlog_status_var_num_group_commits
=
num_group_commits
;
}
struct
st_mysql_storage_engine
binlog_storage_engine
=
{
MYSQL_HANDLERTON_INTERFACE_VERSION
};
...
...
@@ -6137,8 +6684,12 @@ mysql_declare_plugin(binlog)
binlog_init
,
/* Plugin Init */
NULL
,
/* Plugin Deinit */
0x0100
/* 1.0 */
,
NULL
,
/* status variables */
binlog_status_vars_top
,
/* status variables */
#ifndef DBUG_OFF
binlog_sys_vars
,
/* system variables */
#else
NULL
,
/* system variables */
#endif
NULL
/* config options */
}
mysql_declare_plugin_end
;
...
...
@@ -6153,8 +6704,12 @@ maria_declare_plugin(binlog)
binlog_init
,
/* Plugin Init */
NULL
,
/* Plugin Deinit */
0x0100
/* 1.0 */
,
NULL
,
/* status variables */
binlog_status_vars_top
,
/* status variables */
#ifndef DBUG_OFF
binlog_sys_vars
,
/* system variables */
#else
NULL
,
/* system variables */
#endif
"1.0"
,
/* string version */
MariaDB_PLUGIN_MATURITY_STABLE
/* maturity */
}
...
...
sql/log.h
View file @
34a48dd4
...
...
@@ -38,17 +38,58 @@ class TC_LOG
virtual
int
open
(
const
char
*
opt_name
)
=
0
;
virtual
void
close
()
=
0
;
virtual
int
log_xid
(
THD
*
thd
,
my_xid
xid
)
=
0
;
virtual
int
log_and_order
(
THD
*
thd
,
my_xid
xid
,
bool
all
,
bool
need_prepare_ordered
,
bool
need_commit_ordered
)
=
0
;
virtual
void
unlog
(
ulong
cookie
,
my_xid
xid
)
=
0
;
protected:
/*
These methods are meant to be invoked from log_and_order() implementations
to run any prepare_ordered() respectively commit_ordered() methods in
participating handlers.
They must be called using suitable thread syncronisation to ensure that
they are each called in the correct commit order among all
transactions. However, it is only necessary to call them if the
corresponding flag passed to log_and_order is set (it is safe, but not
required, to call them when the flag is false).
The caller must be holding LOCK_prepare_ordered respectively
LOCK_commit_ordered when calling these methods.
*/
void
run_prepare_ordered
(
THD
*
thd
,
bool
all
);
void
run_commit_ordered
(
THD
*
thd
,
bool
all
);
};
/*
Locks used to ensure serialised execution of TC_LOG::run_prepare_ordered()
and TC_LOG::run_commit_ordered(), or any other code that calls handler
prepare_ordered() or commit_ordered() methods.
*/
extern
pthread_mutex_t
LOCK_prepare_ordered
;
extern
pthread_mutex_t
LOCK_commit_ordered
;
extern
void
TC_init
();
extern
void
TC_destroy
();
class
TC_LOG_DUMMY
:
public
TC_LOG
// use it to disable the logging
{
public:
TC_LOG_DUMMY
()
{}
int
open
(
const
char
*
opt_name
)
{
return
0
;
}
void
close
()
{
}
int
log_xid
(
THD
*
thd
,
my_xid
xid
)
{
return
1
;
}
/*
TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we
only use internal XA during commit when >= 2 XA-capable engines
participate.
*/
int
log_and_order
(
THD
*
thd
,
my_xid
xid
,
bool
all
,
bool
need_prepare_ordered
,
bool
need_commit_ordered
)
{
DBUG_ASSERT
(
0
/* Internal error - TC_LOG_DUMMY::log_and_order() called */
);
return
1
;
}
void
unlog
(
ulong
cookie
,
my_xid
xid
)
{
}
};
...
...
@@ -74,6 +115,13 @@ class TC_LOG_MMAP: public TC_LOG
pthread_cond_t
cond
;
// to wait for a sync
}
PAGE
;
/* List of THDs for which to invoke commit_ordered(), in order. */
struct
commit_entry
{
struct
commit_entry
*
next
;
THD
*
thd
;
};
char
logname
[
FN_REFLEN
];
File
fd
;
my_off_t
file_length
;
...
...
@@ -88,16 +136,38 @@ class TC_LOG_MMAP: public TC_LOG
*/
pthread_mutex_t
LOCK_active
,
LOCK_pool
,
LOCK_sync
;
pthread_cond_t
COND_pool
,
COND_active
;
/*
Queue of threads that need to call commit_ordered().
Access to this queue must be protected by LOCK_prepare_ordered.
*/
commit_entry
*
commit_ordered_queue
;
/*
This flag and condition is used to reserve the queue while threads in it
each run the commit_ordered() methods one after the other. Only once the
last commit_ordered() in the queue is done can we start on a new queue
run.
Since we start this process in the first thread in the queue and finish in
the last (and possibly different) thread, we need a condition variable for
this (we cannot unlock a mutex in a different thread than the one who
locked it).
The condition is used together with the LOCK_prepare_ordered mutex.
*/
my_bool
commit_ordered_queue_busy
;
pthread_cond_t
COND_queue_busy
;
public:
TC_LOG_MMAP
()
:
inited
(
0
)
{}
int
open
(
const
char
*
opt_name
);
void
close
();
int
log_xid
(
THD
*
thd
,
my_xid
xid
);
int
log_and_order
(
THD
*
thd
,
my_xid
xid
,
bool
all
,
bool
need_prepare_ordered
,
bool
need_commit_ordered
);
void
unlog
(
ulong
cookie
,
my_xid
xid
);
int
recover
();
private:
int
log_one_transaction
(
my_xid
xid
);
void
get_active_from_pool
();
int
sync
();
int
overflow
();
...
...
@@ -232,9 +302,31 @@ class MYSQL_QUERY_LOG: public MYSQL_LOG
time_t
last_time
;
};
class
binlog_trx_data
;
class
MYSQL_BIN_LOG
:
public
TC_LOG
,
private
MYSQL_LOG
{
private:
struct
group_commit_entry
{
struct
group_commit_entry
*
next
;
THD
*
thd
;
binlog_trx_data
*
trx_data
;
/*
Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
written during group commit. The incident_event is only valid if
trx_data->has_incident() is true.
*/
Log_event
*
begin_event
;
Log_event
*
end_event
;
Log_event
*
incident_event
;
/* Set during group commit to record any per-thread error. */
int
error
;
int
commit_errno
;
/* This is the `all' parameter for ha_commit_ordered(). */
bool
all
;
/* True if we come in through XA log_and_order(), false otherwise. */
};
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
pthread_mutex_t
LOCK_index
;
pthread_mutex_t
LOCK_prep_xids
;
...
...
@@ -276,6 +368,12 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
In 5.0 it's 0 for relay logs too!
*/
bool
no_auto_events
;
/* Queue of transactions queued up to participate in group commit. */
group_commit_entry
*
group_commit_queue
;
/* Total number of committed transactions. */
ulonglong
num_commits
;
/* Number of group commits done. */
ulonglong
num_group_commits
;
int
write_to_file
(
IO_CACHE
*
cache
);
/*
...
...
@@ -285,6 +383,11 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
*/
void
new_file_without_locking
();
void
new_file_impl
(
bool
need_lock
);
int
write_transaction
(
group_commit_entry
*
entry
);
bool
write_transaction_to_binlog_events
(
group_commit_entry
*
entry
);
void
trx_group_commit_leader
(
group_commit_entry
*
leader
);
void
mark_xid_done
();
void
mark_xids_active
(
uint
xid_count
);
public:
MYSQL_LOG
::
generate_name
;
...
...
@@ -313,7 +416,8 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
int
open
(
const
char
*
opt_name
);
void
close
();
int
log_xid
(
THD
*
thd
,
my_xid
xid
);
int
log_and_order
(
THD
*
thd
,
my_xid
xid
,
bool
all
,
bool
need_prepare_ordered
,
bool
need_commit_ordered
);
void
unlog
(
ulong
cookie
,
my_xid
xid
);
int
recover
(
IO_CACHE
*
log
,
Format_description_log_event
*
fdle
);
#if !defined(MYSQL_CLIENT)
...
...
@@ -358,10 +462,11 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
void
reset_gathered_updates
(
THD
*
thd
);
bool
write
(
Log_event
*
event_info
);
// binary log write
bool
write
(
THD
*
thd
,
IO_CACHE
*
cache
,
Log_event
*
commit_event
,
bool
incident
);
bool
write_incident
(
THD
*
thd
,
bool
lock
);
int
write_cache
(
THD
*
thd
,
IO_CACHE
*
cache
,
bool
lock_log
,
bool
flush_and_sync
);
bool
write_transaction_to_binlog
(
THD
*
thd
,
binlog_trx_data
*
trx_data
,
Log_event
*
end_ev
,
bool
all
);
bool
write_incident
(
THD
*
thd
);
int
write_cache
(
THD
*
thd
,
IO_CACHE
*
cache
);
void
set_write_error
(
THD
*
thd
);
bool
check_write_error
(
THD
*
thd
);
...
...
@@ -416,6 +521,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
inline
void
unlock_index
()
{
pthread_mutex_unlock
(
&
LOCK_index
);}
inline
IO_CACHE
*
get_index_file
()
{
return
&
index_file
;}
inline
uint32
get_open_count
()
{
return
open_count
;
}
void
set_status_variables
();
};
class
Log_event_handler
...
...
sql/mysqld.cc
View file @
34a48dd4
...
...
@@ -1391,6 +1391,7 @@ void clean_up(bool print_message)
ha_end
();
if
(
tc_log
)
tc_log
->
close
();
TC_destroy
();
xid_cache_free
();
wt_end
();
delete_elements
(
&
key_caches
,
(
void
(
*
)(
const
char
*
,
uchar
*
))
free_key_cache
);
...
...
@@ -4241,6 +4242,8 @@ a file name for --log-bin-index option", opt_binlog_index_name);
if
(
!
errmesg
[
0
][
0
])
unireg_abort
(
1
);
TC_init
();
/* We have to initialize the storage engines before CSV logging */
if
(
ha_init
())
{
...
...
sql/sql_class.cc
View file @
34a48dd4
...
...
@@ -761,6 +761,8 @@ THD::THD()
active_vio
=
0
;
#endif
pthread_mutex_init
(
&
LOCK_thd_data
,
MY_MUTEX_INIT_FAST
);
pthread_mutex_init
(
&
LOCK_wakeup_ready
,
MY_MUTEX_INIT_FAST
);
pthread_cond_init
(
&
COND_wakeup_ready
,
0
);
/* Variables with default values */
proc_info
=
"login"
;
...
...
@@ -1147,6 +1149,8 @@ THD::~THD()
free_root
(
&
transaction
.
mem_root
,
MYF
(
0
));
#endif
mysys_var
=
0
;
// Safety (shouldn't be needed)
pthread_cond_destroy
(
&
COND_wakeup_ready
);
pthread_mutex_destroy
(
&
LOCK_wakeup_ready
);
pthread_mutex_destroy
(
&
LOCK_thd_data
);
#ifndef DBUG_OFF
dbug_sentry
=
THD_SENTRY_GONE
;
...
...
@@ -4150,6 +4154,25 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
DBUG_RETURN
(
0
);
}
void
THD
::
wait_for_wakeup_ready
()
{
pthread_mutex_lock
(
&
LOCK_wakeup_ready
);
while
(
!
wakeup_ready
)
pthread_cond_wait
(
&
COND_wakeup_ready
,
&
LOCK_wakeup_ready
);
pthread_mutex_unlock
(
&
LOCK_wakeup_ready
);
}
void
THD
::
signal_wakeup_ready
()
{
pthread_mutex_lock
(
&
LOCK_wakeup_ready
);
wakeup_ready
=
true
;
pthread_cond_signal
(
&
COND_wakeup_ready
);
pthread_mutex_unlock
(
&
LOCK_wakeup_ready
);
}
bool
Discrete_intervals_list
::
append
(
ulonglong
start
,
ulonglong
val
,
ulonglong
incr
)
{
...
...
sql/sql_class.h
View file @
34a48dd4
...
...
@@ -2454,6 +2454,14 @@ class THD :public Statement,
return
backup
;
}
void
clear_wakeup_ready
()
{
wakeup_ready
=
false
;
}
/*
Sleep waiting for others to wake us up with signal_wakeup_ready().
Must call clear_wakeup_ready() before waiting.
*/
void
wait_for_wakeup_ready
();
/* Wake this thread up from wait_for_wakeup_ready(). */
void
signal_wakeup_ready
();
private
:
/** The current internal error handler for this thread, or NULL. */
Internal_error_handler
*
m_internal_handler
;
...
...
@@ -2492,6 +2500,16 @@ class THD :public Statement,
*/
LEX_STRING
invoker_user
;
LEX_STRING
invoker_host
;
/*
Flag, mutex and condition for a thread to wait for a signal from another
thread.
Currently used to wait for group commit to complete, can also be used for
other purposes.
*/
bool
wakeup_ready
;
pthread_mutex_t
LOCK_wakeup_ready
;
pthread_cond_t
COND_wakeup_ready
;
}
;
/** A short cut for thd->main_da.set_ok_status(). */
...
...
sql/sql_parse.cc
View file @
34a48dd4
...
...
@@ -1007,6 +1007,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
DBUG_ENTER
(
"dispatch_command"
);
DBUG_PRINT
(
"info"
,
(
"command: %d"
,
command
));
DBUG_EXECUTE_IF
(
"crash_dispatch_command_before"
,
{
DBUG_PRINT
(
"crash_dispatch_command_before"
,
(
"now"
));
DBUG_ABORT
();
});
thd
->
command
=
command
;
/*
Commands which always take a long time are logged into
...
...
storage/xtradb/handler/ha_innodb.cc
View file @
34a48dd4
...
...
@@ -117,8 +117,6 @@ bool check_global_access(THD *thd, ulong want_access);
/** to protect innobase_open_files */
static
pthread_mutex_t
innobase_share_mutex
;
/** to force correct commit order in binlog */
static
pthread_mutex_t
prepare_commit_mutex
;
static
ulong
commit_threads
=
0
;
static
pthread_mutex_t
commit_threads_m
;
static
pthread_cond_t
commit_cond
;
...
...
@@ -222,6 +220,7 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
static
INNOBASE_SHARE
*
get_share
(
const
char
*
table_name
);
static
void
free_share
(
INNOBASE_SHARE
*
share
);
static
int
innobase_close_connection
(
handlerton
*
hton
,
THD
*
thd
);
static
void
innobase_commit_ordered
(
handlerton
*
hton
,
THD
*
thd
,
bool
all
);
static
int
innobase_commit
(
handlerton
*
hton
,
THD
*
thd
,
bool
all
);
static
int
innobase_rollback
(
handlerton
*
hton
,
THD
*
thd
,
bool
all
);
static
int
innobase_rollback_to_savepoint
(
handlerton
*
hton
,
THD
*
thd
,
...
...
@@ -1365,7 +1364,6 @@ innobase_trx_init(
trx_t
*
trx
)
/*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER
(
"innobase_trx_init"
);
DBUG_ASSERT
(
EQ_CURRENT_THD
(
thd
));
DBUG_ASSERT
(
thd
==
trx
->
mysql_thd
);
trx
->
check_foreigns
=
!
thd_test_options
(
...
...
@@ -1424,8 +1422,6 @@ check_trx_exists(
{
trx_t
*&
trx
=
thd_to_trx
(
thd
);
ut_ad
(
EQ_CURRENT_THD
(
thd
));
if
(
trx
==
NULL
)
{
trx
=
innobase_trx_allocate
(
thd
);
}
else
if
(
UNIV_UNLIKELY
(
trx
->
magic_n
!=
TRX_MAGIC_N
))
{
...
...
@@ -1717,10 +1713,10 @@ innobase_query_caching_of_table_permitted(
/* The call of row_search_.. will start a new transaction if it is
not yet started */
if
(
trx
->
active_trans
==
0
)
{
if
(
(
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
)
==
0
)
{
innobase_register_trx_and_stmt
(
innodb_hton_ptr
,
thd
);
trx
->
active_trans
=
1
;
trx
->
active_trans
|=
TRX_ACTIVE_IN_MYSQL
;
}
if
(
row_search_check_if_query_cache_permitted
(
trx
,
norm_name
))
{
...
...
@@ -1990,11 +1986,11 @@ ha_innobase::init_table_handle_for_HANDLER(void)
/* Set the MySQL flag to mark that there is an active transaction */
if
(
prebuilt
->
trx
->
active_trans
==
0
)
{
if
(
(
prebuilt
->
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
)
==
0
)
{
innobase_register_trx_and_stmt
(
ht
,
user_thd
);
prebuilt
->
trx
->
active_trans
=
1
;
prebuilt
->
trx
->
active_trans
|=
TRX_ACTIVE_IN_MYSQL
;
}
/* We did the necessary inits in this function, no need to repeat them
...
...
@@ -2045,6 +2041,7 @@ innobase_init(
innobase_hton
->
savepoint_set
=
innobase_savepoint
;
innobase_hton
->
savepoint_rollback
=
innobase_rollback_to_savepoint
;
innobase_hton
->
savepoint_release
=
innobase_release_savepoint
;
innobase_hton
->
commit_ordered
=
innobase_commit_ordered
;
innobase_hton
->
commit
=
innobase_commit
;
innobase_hton
->
rollback
=
innobase_rollback
;
innobase_hton
->
prepare
=
innobase_xa_prepare
;
...
...
@@ -2547,7 +2544,6 @@ innobase_init(
innobase_open_tables
=
hash_create
(
200
);
pthread_mutex_init
(
&
innobase_share_mutex
,
MY_MUTEX_INIT_FAST
);
pthread_mutex_init
(
&
prepare_commit_mutex
,
MY_MUTEX_INIT_FAST
);
pthread_mutex_init
(
&
commit_threads_m
,
MY_MUTEX_INIT_FAST
);
pthread_mutex_init
(
&
commit_cond_m
,
MY_MUTEX_INIT_FAST
);
pthread_cond_init
(
&
commit_cond
,
NULL
);
...
...
@@ -2602,7 +2598,6 @@ innobase_end(
my_free
(
internal_innobase_data_file_path
,
MYF
(
MY_ALLOW_ZERO_PTR
));
pthread_mutex_destroy
(
&
innobase_share_mutex
);
pthread_mutex_destroy
(
&
prepare_commit_mutex
);
pthread_mutex_destroy
(
&
commit_threads_m
);
pthread_mutex_destroy
(
&
commit_cond_m
);
pthread_cond_destroy
(
&
commit_cond
);
...
...
@@ -2723,14 +2718,119 @@ innobase_start_trx_and_assign_read_view(
/* Set the MySQL flag to mark that there is an active transaction */
if
(
trx
->
active_trans
==
0
)
{
if
(
(
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
)
==
0
)
{
innobase_register_trx_and_stmt
(
hton
,
thd
);
trx
->
active_trans
=
1
;
trx
->
active_trans
|=
TRX_ACTIVE_IN_MYSQL
;
}
DBUG_RETURN
(
0
);
}
static
void
innobase_commit_ordered_2
(
/*============*/
trx_t
*
trx
,
/*!< in: Innodb transaction */
THD
*
thd
)
/*!< in: MySQL thread handle */
{
ulonglong
tmp_pos
;
DBUG_ENTER
(
"innobase_commit_ordered"
);
/* We need current binlog position for ibbackup to work.
Note, the position is current because commit_ordered is guaranteed
to be called in same sequenece as writing to binlog. */
retry:
if
(
innobase_commit_concurrency
>
0
)
{
pthread_mutex_lock
(
&
commit_cond_m
);
commit_threads
++
;
if
(
commit_threads
>
innobase_commit_concurrency
)
{
commit_threads
--
;
pthread_cond_wait
(
&
commit_cond
,
&
commit_cond_m
);
pthread_mutex_unlock
(
&
commit_cond_m
);
goto
retry
;
}
else
{
pthread_mutex_unlock
(
&
commit_cond_m
);
}
}
mysql_bin_log_commit_pos
(
thd
,
&
tmp_pos
,
&
(
trx
->
mysql_log_file_name
));
trx
->
mysql_log_offset
=
(
ib_int64_t
)
tmp_pos
;
/* Don't do write + flush right now. For group commit
to work we want to do the flush in the innobase_commit()
method, which runs without holding any locks. */
trx
->
flush_log_later
=
TRUE
;
innobase_commit_low
(
trx
);
trx
->
flush_log_later
=
FALSE
;
if
(
innobase_commit_concurrency
>
0
)
{
pthread_mutex_lock
(
&
commit_cond_m
);
commit_threads
--
;
pthread_cond_signal
(
&
commit_cond
);
pthread_mutex_unlock
(
&
commit_cond_m
);
}
DBUG_VOID_RETURN
;
}
/*****************************************************************//**
Perform the first, fast part of InnoDB commit.
Doing it in this call ensures that we get the same commit order here
as in binlog and any other participating transactional storage engines.
Note that we want to do as little as really needed here, as we run
under a global mutex. The expensive fsync() is done later, in
innobase_commit(), without a lock so group commit can take place.
Note also that this method can be called from a different thread than
the one handling the rest of the transaction. */
static
void
innobase_commit_ordered
(
/*============*/
handlerton
*
hton
,
/*!< in: Innodb handlerton */
THD
*
thd
,
/*!< in: MySQL thread handle of the user for whom
the transaction should be committed */
bool
all
)
/*!< in: TRUE - commit transaction
FALSE - the current SQL statement ended */
{
trx_t
*
trx
;
DBUG_ENTER
(
"innobase_commit_ordered"
);
DBUG_ASSERT
(
hton
==
innodb_hton_ptr
);
trx
=
check_trx_exists
(
thd
);
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
if
(
trx
->
has_search_latch
)
{
trx_search_latch_release_if_reserved
(
trx
);
}
if
((
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
)
==
0
&&
trx
->
conc_state
!=
TRX_NOT_STARTED
)
{
/* We cannot throw error here; instead we will catch this error
again in innobase_commit() and report it from there. */
DBUG_VOID_RETURN
;
}
/* commit_ordered is only called when committing the whole transaction
(or an SQL statement when autocommit is on). */
DBUG_ASSERT
(
all
||
(
!
thd_test_options
(
thd
,
OPTION_NOT_AUTOCOMMIT
|
OPTION_BEGIN
)));
innobase_commit_ordered_2
(
trx
,
thd
);
trx
->
active_trans
|=
TRX_ACTIVE_COMMIT_ORDERED
;
DBUG_VOID_RETURN
;
}
/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
...
...
@@ -2756,11 +2856,12 @@ innobase_commit(
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
if
(
trx
->
has_search_latch
)
{
if
(
trx
->
has_search_latch
&&
(
trx
->
active_trans
&
TRX_ACTIVE_COMMIT_ORDERED
)
==
0
)
{
trx_search_latch_release_if_reserved
(
trx
);
}
/* The flag
trx->active_trans is set to 1
in
/* The flag
TRX_ACTIVE_IN_MYSQL in trx->active_trans is set
in
1. ::external_lock(),
2. ::start_stmt(),
...
...
@@ -2770,79 +2871,31 @@ innobase_commit(
6. innobase_start_trx_and_assign_read_view(),
7. ::transactional_table_lock()
and it is only
set to 0 in a commit or a rollback. If it is 0
we know
and it is only
cleared in a commit or a rollback. If it is unset
we know
there cannot be resources to be freed and we could return immediately.
For the time being, we play safe and do the cleanup though there should
be nothing to clean up. */
if
(
trx
->
active_trans
==
0
if
(
(
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
)
==
0
&&
trx
->
conc_state
!=
TRX_NOT_STARTED
)
{
sql_print_error
(
"trx->active_trans == 0, but"
" trx->conc_state != TRX_NOT_STARTED"
);
}
if
(
all
||
(
!
thd_test_options
(
thd
,
OPTION_NOT_AUTOCOMMIT
|
OPTION_BEGIN
)))
{
/* We were instructed to commit the whole transaction, or
this is an SQL statement end and autocommit is on */
/* We need current binlog position for ibbackup to work.
Note, the position is current because of
prepare_commit_mutex */
retry:
if
(
innobase_commit_concurrency
>
0
)
{
pthread_mutex_lock
(
&
commit_cond_m
);
commit_threads
++
;
if
(
commit_threads
>
innobase_commit_concurrency
)
{
commit_threads
--
;
pthread_cond_wait
(
&
commit_cond
,
&
commit_cond_m
);
pthread_mutex_unlock
(
&
commit_cond_m
);
goto
retry
;
}
else
{
pthread_mutex_unlock
(
&
commit_cond_m
);
}
}
/* The following calls to read the MySQL binary log
file name and the position return consistent results:
1) Other InnoDB transactions cannot intervene between
these calls as we are holding prepare_commit_mutex.
2) Binary logging of other engines is not relevant
to InnoDB as all InnoDB requires is that committing
InnoDB transactions appear in the same order in the
MySQL binary log as they appear in InnoDB logs.
3) A MySQL log file rotation cannot happen because
MySQL protects against this by having a counter of
transactions in prepared state and it only allows
a rotation when the counter drops to zero. See
LOCK_prep_xids and COND_prep_xids in log.cc. */
trx
->
mysql_log_file_name
=
mysql_bin_log_file_name
();
trx
->
mysql_log_offset
=
(
ib_int64_t
)
mysql_bin_log_file_pos
();
/* Don't do write + flush right now. For group commit
to work we want to do the flush after releasing the
prepare_commit_mutex. */
trx
->
flush_log_later
=
TRUE
;
innobase_commit_low
(
trx
);
trx
->
flush_log_later
=
FALSE
;
if
(
innobase_commit_concurrency
>
0
)
{
pthread_mutex_lock
(
&
commit_cond_m
);
commit_threads
--
;
pthread_cond_signal
(
&
commit_cond
);
pthread_mutex_unlock
(
&
commit_cond_m
);
/* Run the fast part of commit if we did not already. */
if
((
trx
->
active_trans
&
TRX_ACTIVE_COMMIT_ORDERED
)
==
0
)
{
innobase_commit_ordered_2
(
trx
,
thd
);
}
if
(
trx
->
active_trans
==
2
)
{
pthread_mutex_unlock
(
&
prepare_commit_mutex
);
}
/* We were instructed to commit the whole transaction, or
this is an SQL statement end and autocommit is on */
/* Now do a write + flush of logs. */
/* We did the first part already in innobase_commit_ordered(),
Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql
(
trx
);
trx
->
active_trans
=
0
;
...
...
@@ -3061,7 +3114,7 @@ innobase_savepoint(
innobase_release_stat_resources
(
trx
);
/* cannot happen outside of transaction */
DBUG_ASSERT
(
trx
->
active_trans
);
DBUG_ASSERT
(
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
);
/* TODO: use provided savepoint data area to store savepoint data */
char
name
[
64
];
...
...
@@ -3091,7 +3144,7 @@ innobase_close_connection(
ut_a
(
trx
);
if
(
trx
->
active_trans
==
0
if
(
(
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
)
==
0
&&
trx
->
conc_state
!=
TRX_NOT_STARTED
)
{
sql_print_error
(
"trx->active_trans == 0, but"
...
...
@@ -5019,7 +5072,7 @@ ha_innobase::write_row(
/* Altering to InnoDB format */
innobase_commit
(
ht
,
user_thd
,
1
);
/* Note that this transaction is still active. */
prebuilt
->
trx
->
active_trans
=
1
;
prebuilt
->
trx
->
active_trans
|=
TRX_ACTIVE_IN_MYSQL
;
/* We will need an IX lock on the destination table. */
prebuilt
->
sql_stat_start
=
TRUE
;
}
else
{
...
...
@@ -5035,7 +5088,7 @@ ha_innobase::write_row(
locks, so they have to be acquired again. */
innobase_commit
(
ht
,
user_thd
,
1
);
/* Note that this transaction is still active. */
prebuilt
->
trx
->
active_trans
=
1
;
prebuilt
->
trx
->
active_trans
|=
TRX_ACTIVE_IN_MYSQL
;
/* Re-acquire the table lock on the source table. */
row_lock_table_for_mysql
(
prebuilt
,
src_table
,
mode
);
/* We will need an IX lock on the destination table. */
...
...
@@ -8944,10 +8997,10 @@ ha_innobase::start_stmt(
trx
->
detailed_error
[
0
]
=
'\0'
;
/* Set the MySQL flag to mark that there is an active transaction */
if
(
trx
->
active_trans
==
0
)
{
if
(
(
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
)
==
0
)
{
innobase_register_trx_and_stmt
(
ht
,
thd
);
trx
->
active_trans
=
1
;
trx
->
active_trans
|=
TRX_ACTIVE_IN_MYSQL
;
}
else
{
innobase_register_stmt
(
ht
,
thd
);
}
...
...
@@ -9045,10 +9098,10 @@ ha_innobase::external_lock(
/* Set the MySQL flag to mark that there is an active
transaction */
if
(
trx
->
active_trans
==
0
)
{
if
(
(
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
)
==
0
)
{
innobase_register_trx_and_stmt
(
ht
,
thd
);
trx
->
active_trans
=
1
;
trx
->
active_trans
|=
TRX_ACTIVE_IN_MYSQL
;
}
else
if
(
trx
->
n_mysql_tables_in_use
==
0
)
{
innobase_register_stmt
(
ht
,
thd
);
}
...
...
@@ -9146,7 +9199,7 @@ ha_innobase::external_lock(
prebuilt
->
used_in_HANDLER
=
FALSE
;
if
(
!
thd_test_options
(
thd
,
OPTION_NOT_AUTOCOMMIT
|
OPTION_BEGIN
))
{
if
(
trx
->
active_trans
!=
0
)
{
if
(
(
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
)
!=
0
)
{
innobase_commit
(
ht
,
thd
,
TRUE
);
}
}
else
{
...
...
@@ -9231,10 +9284,10 @@ ha_innobase::transactional_table_lock(
/* MySQL is setting a new transactional table lock */
/* Set the MySQL flag to mark that there is an active transaction */
if
(
trx
->
active_trans
==
0
)
{
if
(
(
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
)
==
0
)
{
innobase_register_trx_and_stmt
(
ht
,
thd
);
trx
->
active_trans
=
1
;
trx
->
active_trans
|=
TRX_ACTIVE_IN_MYSQL
;
}
if
(
THDVAR
(
thd
,
table_locks
)
&&
thd_in_lock_tables
(
thd
))
{
...
...
@@ -10288,7 +10341,8 @@ innobase_xa_prepare(
innobase_release_stat_resources
(
trx
);
if
(
trx
->
active_trans
==
0
&&
trx
->
conc_state
!=
TRX_NOT_STARTED
)
{
if
((
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
)
==
0
&&
trx
->
conc_state
!=
TRX_NOT_STARTED
)
{
sql_print_error
(
"trx->active_trans == 0, but trx->conc_state != "
"TRX_NOT_STARTED"
);
...
...
@@ -10300,7 +10354,7 @@ innobase_xa_prepare(
/* We were instructed to prepare the whole transaction, or
this is an SQL statement end and autocommit is on */
ut_ad
(
trx
->
active_trans
);
ut_ad
(
trx
->
active_trans
&
TRX_ACTIVE_IN_MYSQL
);
error
=
(
int
)
trx_prepare_for_mysql
(
trx
);
}
else
{
...
...
@@ -10324,36 +10378,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread
();
if
(
thd_sql_command
(
thd
)
!=
SQLCOM_XA_PREPARE
&&
(
all
||
!
thd_test_options
(
thd
,
OPTION_NOT_AUTOCOMMIT
|
OPTION_BEGIN
)))
{
if
(
srv_enable_unsafe_group_commit
&&
!
THDVAR
(
thd
,
support_xa
))
{
/* choose group commit rather than binlog order */
return
(
error
);
}
/* For ibbackup to work the order of transactions in binlog
and InnoDB must be the same. Consider the situation
thread1> prepare; write to binlog; ...
<context switch>
thread2> prepare; write to binlog; commit
thread1> ... commit
To ensure this will not happen we're taking the mutex on
prepare, and releasing it on commit.
Note: only do it for normal commits, done via ha_commit_trans.
If 2pc protocol is executed by external transaction
coordinator, it will be just a regular MySQL client
executing XA PREPARE and XA COMMIT commands.
In this case we cannot know how many minutes or hours
will be between XA PREPARE and XA COMMIT, and we don't want
to block for undefined period of time. */
pthread_mutex_lock
(
&
prepare_commit_mutex
);
trx
->
active_trans
=
2
;
}
return
(
error
);
}
...
...
@@ -11638,11 +11662,6 @@ static MYSQL_SYSVAR_ENUM(adaptive_checkpoint, srv_adaptive_checkpoint,
"Enable/Disable flushing along modified age. (none, reflex, [estimate])"
,
NULL
,
innodb_adaptive_checkpoint_update
,
2
,
&
adaptive_checkpoint_typelib
);
static
MYSQL_SYSVAR_ULONG
(
enable_unsafe_group_commit
,
srv_enable_unsafe_group_commit
,
PLUGIN_VAR_RQCMDARG
,
"Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine."
,
NULL
,
NULL
,
0
,
0
,
1
,
0
);
static
MYSQL_SYSVAR_ULONG
(
expand_import
,
srv_expand_import
,
PLUGIN_VAR_RQCMDARG
,
"Enable/Disable converting automatically *.ibd files when import tablespace."
,
...
...
@@ -11746,7 +11765,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR
(
read_ahead
),
MYSQL_SYSVAR
(
adaptive_checkpoint
),
MYSQL_SYSVAR
(
flush_log_at_trx_commit_session
),
MYSQL_SYSVAR
(
enable_unsafe_group_commit
),
MYSQL_SYSVAR
(
expand_import
),
MYSQL_SYSVAR
(
extra_rsegments
),
MYSQL_SYSVAR
(
dict_size_limit
),
...
...
storage/xtradb/handler/ha_innodb.h
View file @
34a48dd4
...
...
@@ -241,16 +241,6 @@ LEX_STRING *thd_query_string(MYSQL_THD thd);
char
**
thd_query
(
MYSQL_THD
thd
);
#endif
/** Get the file name of the MySQL binlog.
* @return the name of the binlog file
*/
const
char
*
mysql_bin_log_file_name
(
void
);
/** Get the current position of the MySQL binlog.
* @return byte offset from the beginning of the binlog
*/
ulonglong
mysql_bin_log_file_pos
(
void
);
/**
Check if a user thread is a replication slave thread
@param thd user thread
...
...
@@ -291,6 +281,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
#endif
/* MYSQL_VERSION_ID > 50140 */
}
/** Get the file name and position of the MySQL binlog corresponding to the
* current commit.
*/
extern
void
mysql_bin_log_commit_pos
(
THD
*
thd
,
ulonglong
*
out_pos
,
const
char
**
out_file
);
typedef
struct
trx_struct
trx_t
;
/********************************************************************//**
@file handler/ha_innodb.h
...
...
storage/xtradb/include/trx0trx.h
View file @
34a48dd4
...
...
@@ -511,9 +511,10 @@ struct trx_struct{
in that case we must flush the log
in trx_commit_complete_for_mysql() */
ulint
duplicates
;
/*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
ulint
active_trans
;
/*!< 1 - if a transaction in MySQL
is active. 2 - if prepare_commit_mutex
was taken */
ulint
active_trans
;
/*!< TRX_ACTIVE_IN_MYSQL - set if a
transaction in MySQL is active.
TRX_ACTIVE_COMMIT_ORDERED - set if
innobase_commit_ordered has run */
ulint
has_search_latch
;
/* TRUE if this trx has latched the
search system latch in S-mode */
...
...
@@ -824,6 +825,10 @@ Multiple flags can be combined with bitwise OR. */
#define TRX_SIG_OTHER_SESS 1
/* sent by another session (which
must hold rights to this) */
/* Flag bits for trx_struct.active_trans */
#define TRX_ACTIVE_IN_MYSQL (1<<0)
#define TRX_ACTIVE_COMMIT_ORDERED (1<<1)
/** Commit node states */
enum
commit_node_state
{
COMMIT_NODE_SEND
=
1
,
/*!< about to send a commit signal to
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment