Commit dafc5fb9 authored by Hugo Wen's avatar Hugo Wen Committed by Otto Kekäläinen

MDEV-27342: Fix issue of recovery failure using new server id

Commit 6c39eaeb made the crash recovery dependent on server_id.
The crash recovery could fail when restoring a new instance from
original crashed data directory USING A NEW SERVER ID.

The issue doesn't exist in previous major versions before 10.6.

Root cause is when generating the input XID to be searched in the hash,
server id is populated with the current server id.
So if the server id changed when recovering, the XID couldn't be found
in the hash due to server id doesn't match.

This fix is to use original server id when creating the input XID
object in function `xarecover_do_commit_or_rollback`.

All new code of the whole pull request, including one or several files
that are either new files or modified ones, are contributed under the
BSD-new license. I am contributing on behalf of my employer Amazon Web
Services, Inc.
parent 572e3430
========= Set server_id to 99 and prepare test table.
SET GLOBAL server_id= 99;
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
========= Crash the server.
SET SESSION debug_dbug="+d,crash_commit_after_log";
INSERT INTO t1 VALUES (1, NULL);
Got one of the listed errors
========= Restart the server with default config file in which server_id= 1.
========= Check that recover succeeds and server is up.
connection default;
========= Check that all transactions are recovered.
SELECT a FROM t1 ORDER BY a;
a
1
========= Cleanup.
connection default;
DROP TABLE t1;
# This test verifies attempt to xa recover using a new server id that
# different from the transaction's original server_id.
#
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_binlog_format_row.inc
# Valgrind does not work well with test that crashes the server
--source include/not_valgrind.inc
--echo ========= Set server_id to 99 and prepare test table.
SET GLOBAL server_id= 99;
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
--echo ========= Crash the server.
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
wait-binlog_xa_recover_using_new_server_id.test
EOF
SET SESSION debug_dbug="+d,crash_commit_after_log";
--error 2006,2013
INSERT INTO t1 VALUES (1, NULL);
--echo ========= Restart the server with default config file in which server_id= 1.
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
restart-binlog_xa_recover_using_new_server_id.test
EOF
--echo ========= Check that recover succeeds and server is up.
connection default;
--enable_reconnect
--source include/wait_until_connected_again.inc
--echo ========= Check that all transactions are recovered.
SELECT a FROM t1 ORDER BY a;
--echo ========= Cleanup.
connection default;
DROP TABLE t1;
......@@ -2414,7 +2414,7 @@ struct xarecover_st
*/
static xid_recovery_member*
xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root,
XID *full_xid_arg)
XID *full_xid_arg, decltype(::server_id) server_id_arg)
{
xid_recovery_member *member= (xid_recovery_member *)
alloc_root(ptr_mem_root, sizeof(xid_recovery_member));
......@@ -2428,7 +2428,7 @@ xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root,
if (full_xid_arg)
*xid_full= *full_xid_arg;
*member= xid_recovery_member(xid_arg, 1, false, xid_full);
*member= xid_recovery_member(xid_arg, 1, false, xid_full, server_id_arg);
return
my_hash_insert(hash_arg, (uchar*) member) ? NULL : member;
......@@ -2443,14 +2443,15 @@ xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root,
*/
static bool xid_member_replace(HASH *hash_arg, my_xid xid_arg,
MEM_ROOT *ptr_mem_root,
XID *full_xid_arg)
XID *full_xid_arg,
decltype(::server_id) server_id_arg)
{
xid_recovery_member* member;
if ((member= (xid_recovery_member *)
my_hash_search(hash_arg, (uchar *)& xid_arg, sizeof(xid_arg))))
member->in_engine_prepare++;
else
member= xid_member_insert(hash_arg, xid_arg, ptr_mem_root, full_xid_arg);
member= xid_member_insert(hash_arg, xid_arg, ptr_mem_root, full_xid_arg, server_id_arg);
return member == NULL;
}
......@@ -2502,7 +2503,8 @@ static void xarecover_do_commit_or_rollback(handlerton *hton,
Binlog_offset *ptr_commit_max= arg->binlog_coord;
if (!member->full_xid)
x.set(member->xid);
// Populate xid using the server_id from original transaction
x.set(member->xid, member->server_id);
else
x= *member->full_xid;
......@@ -2658,9 +2660,12 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
*/
if (info->mem_root)
{
// remember "full" xid too when it's not in mysql format
// remember "full" xid too when it's not in mysql format.
// Also record the transaction's original server_id. It will be used for
// populating the input XID to be searched in hash.
if (xid_member_replace(info->commit_list, x, info->mem_root,
is_server_xid? NULL : &info->list[i]))
is_server_xid? NULL : &info->list[i],
is_server_xid? info->list[i].get_trx_server_id() : server_id))
{
info->error= true;
sql_print_error("Error in memory allocation at xarecover_handlerton");
......
......@@ -900,12 +900,13 @@ struct xid_t {
if ((bqual_length= bl))
memcpy(data+gl, b, bl);
}
void set(ulonglong xid)
// Populate server_id if it's specified, otherwise use the current server_id
void set(ulonglong xid, decltype(::server_id) trx_server_id= server_id)
{
my_xid tmp;
formatID= 1;
set(MYSQL_XID_PREFIX_LEN, 0, MYSQL_XID_PREFIX);
memcpy(data+MYSQL_XID_PREFIX_LEN, &server_id, sizeof(server_id));
memcpy(data+MYSQL_XID_PREFIX_LEN, &trx_server_id, sizeof(trx_server_id));
tmp= xid;
memcpy(data+MYSQL_XID_OFFSET, &tmp, sizeof(tmp));
gtrid_length=MYSQL_XID_GTRID_LEN;
......@@ -931,6 +932,12 @@ struct xid_t {
!memcmp(data, MYSQL_XID_PREFIX, MYSQL_XID_PREFIX_LEN) ?
quick_get_my_xid() : 0;
}
decltype(::server_id) get_trx_server_id()
{
decltype(::server_id) trx_server_id;
memcpy(&trx_server_id, data+MYSQL_XID_PREFIX_LEN, sizeof(trx_server_id));
return trx_server_id;
}
uint length()
{
return static_cast<uint>(sizeof(formatID)) + key_length();
......@@ -972,11 +979,12 @@ struct xid_recovery_member
bool decided_to_commit;
Binlog_offset binlog_coord; // semisync recovery binlog offset
XID *full_xid; // needed by wsrep or past it recovery
decltype(::server_id) server_id; // server id of orginal server
xid_recovery_member(my_xid xid_arg, uint prepare_arg, bool decided_arg,
XID *full_xid_arg)
XID *full_xid_arg, decltype(::server_id) server_id_arg)
: xid(xid_arg), in_engine_prepare(prepare_arg),
decided_to_commit(decided_arg), full_xid(full_xid_arg) {};
decided_to_commit(decided_arg), full_xid(full_xid_arg) , server_id(server_id_arg) {};
};
/* for recover() handlerton call */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment