Bug#16228/Bug#20697 - related.

Bug#23831  deadlock not noticed

RBR bug in that when replicated msta (multi-statement-trans-action) deadlocks
with a local at write row event or gets timed-out, the event handler did not return
the correct error code.
Wrong error code stops slave sql thread instead of to proceed with
rollback and replay.

The correct code is typed in error log and stored for error handling rotine
to conduct rollback and replay of the transaction. The handling for the rbr
remains the same as for the sbr events.
Particularly, timed-out transaction still is rolled back - look at the related bugs.
parent 3a3d673d
......@@ -31,7 +31,9 @@ stop slave;
connection master;
begin;
# Let's keep BEGIN and the locked statement in two different relay logs.
let $1=200;
insert into t2 values (0); # t2,t1 actors of deadlock in repl-ed ta
#insert into t3 select * from t2 for update;
let $1=10;
disable_query_log;
while ($1)
{
......@@ -39,16 +41,14 @@ while ($1)
dec $1;
}
enable_query_log;
insert into t3 select * from t2 for update;
insert into t1 values(1);
commit;
save_master_pos;
connection slave;
begin;
# Let's make our transaction large so that it's slave who is chosen as
# victim
let $1=1000;
# Let's make our transaction large so that it's repl-ed msta that's victim
let $1=100;
disable_query_log;
while ($1)
{
......@@ -56,14 +56,21 @@ while ($1)
dec $1;
}
enable_query_log;
select * from t1 for update;
select * from t1 for update; # t1,t2 on local slave's
start slave;
# bad option, todo: replicate a non-transactional t_sync with the transaction
# and use wait_until_rows_count macro below
--real_sleep 3 # hope that slave is blocked now
insert into t2 values(201); # provoke deadlock, slave should be victim
#let $count=11;
#let $table=t_sync;
#--include wait_until_rows_count.inc
select * from t2 for update /* dl */; # provoke deadlock, repl-ed should be victim
commit;
sync_with_master;
select * from t1; # check that slave succeeded finally
select * from t2;
select * from t1; # check that repl-ed succeeded finally
select * from t2 /* must be 1 */;
# check that no error is reported
--replace_column 1 # 7 # 8 # 9 # 16 # 22 # 23 # 33 #
--replace_result $MASTER_MYPORT MASTER_MYPORT
......@@ -79,11 +86,11 @@ change master to master_log_pos=544; # the BEGIN log event
begin;
select * from t2 for update; # hold lock
start slave;
--real_sleep 10 # slave should have blocked, and be retrying
--real_sleep 10 # repl-ed should have blocked, and be retrying
select count(*) from t3 /* must be zero */; # replaying begins after rollback
commit;
sync_with_master;
select * from t1; # check that slave succeeded finally
select * from t1; # check that repl-ed succeeded finally
select * from t2;
# check that no error is reported
--replace_column 1 # 7 # 8 # 9 # 11 # 16 # 22 # 23 # 33 #
......
......@@ -28,21 +28,22 @@ Variable_name Value
slave_transaction_retries 2
stop slave;
begin;
insert into t3 select * from t2 for update;
insert into t2 values (0);
insert into t1 values(1);
commit;
begin;
select * from t1 for update;
a
start slave;
insert into t2 values(201);
select * from t2 for update /* dl */;
a
commit;
select * from t1;
a
1
select * from t2;
select * from t2 /* must be 1 */;
a
201
0
show slave status;
Slave_IO_State #
Master_Host 127.0.0.1
......@@ -83,7 +84,7 @@ change master to master_log_pos=544;
begin;
select * from t2 for update;
a
201
0
start slave;
select count(*) from t3 /* must be zero */;
count(*)
......@@ -95,7 +96,8 @@ a
1
select * from t2;
a
201
0
0
show slave status;
Slave_IO_State #
Master_Host 127.0.0.1
......@@ -137,7 +139,8 @@ change master to master_log_pos=544;
begin;
select * from t2 for update;
a
201
0
0
start slave;
select count(*) from t3 /* must be zero */;
count(*)
......@@ -150,7 +153,9 @@ a
1
select * from t2;
a
201
0
0
0
show slave status;
Slave_IO_State #
Master_Host 127.0.0.1
......
......@@ -5803,9 +5803,10 @@ int Rows_log_event::exec_event(st_relay_log_info *rli)
STMT_END_F.
For now we code, knowing that error is not skippable and so slave SQL
thread is certainly going to stop.
rollback at the caller along with sbr.
*/
thd->reset_current_stmt_binlog_row_based();
rli->cleanup_context(thd, 1);
rli->cleanup_context(thd, 0); /* rollback at caller in step with sbr */
thd->query_error= 1;
DBUG_RETURN(error);
}
......@@ -6595,6 +6596,11 @@ replace_record(THD *thd, TABLE *table,
while ((error= table->file->ha_write_row(table->record[0])))
{
if (error == HA_ERR_LOCK_DEADLOCK || error == HA_ERR_LOCK_WAIT_TIMEOUT)
{
table->file->print_error(error, MYF(0)); /* to check at exec_relay_log_event */
DBUG_RETURN(error);
}
if ((keynum= table->file->get_dup_key(error)) < 0)
{
/* We failed to retrieve the duplicate key */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment