Commit 69517b22 authored by guilhem@mysql.com's avatar guilhem@mysql.com

Fix for BUG#2983 "If statement was killed on master, slave errors despite replicate-wild-ignore-t"

We introduce a new function mysql_test_parse_for_slave().
If the slave sees that the query got a really bad error on master
(killed e.g.), then it calls this function to know if this query
can be ignored because of replicate-*-table rules (do not worry
about replicate-*-db rules: they are checked so early that they have
no bug). If the answer is yes, it skips the query and continues. If
it's no, then it stops and say "fix your slave data manually" (like it
did before this change).
parent b36b2567
......@@ -13,3 +13,28 @@ Master_Host Master_User Master_Port Connect_retry Master_Log_File Read_Master_Lo
show tables like 't1';
Tables_in_test (t1)
drop table t1;
select get_lock('crash_lock%20C', 10);
get_lock('crash_lock%20C', 10)
1
create table t2 (a int primary key);
insert into t2 values(1);
create table t3 (id int);
insert into t3 values(connection_id());
update t2 set a = a + 1 + get_lock('crash_lock%20C', 10);
select (@id := id) - id from t3;
(@id := id) - id
0
kill @id;
drop table t2,t3;
Server shutdown in progress
show binlog events from 79;
Log_name Pos Event_type Server_id Orig_log_pos Info
master-bin.001 79 Query 1 79 use `test`; create table t1 (a int primary key)
master-bin.001 149 Query 1 149 use `test`; insert into t1 values (1),(1)
master-bin.001 213 Query 1 213 use `test`; drop table t1
master-bin.001 261 Query 1 261 use `test`; create table t2 (a int primary key)
master-bin.001 331 Query 1 331 use `test`; insert into t2 values(1)
master-bin.001 390 Query 1 390 use `test`; create table t3 (id int)
master-bin.001 449 Query 1 449 use `test`; insert into t3 values(connection_id())
master-bin.001 522 Query 1 522 use `test`; update t2 set a = a + 1 + get_lock('crash_lock%20C', 10)
master-bin.001 613 Query 1 613 use `test`; drop table t2,t3
--replicate-ignore-table=test.t1
--replicate-ignore-table=test.t1 --replicate-ignore-table=test.t2 --replicate-ignore-table=test.t3
......@@ -23,3 +23,33 @@ drop table t1;
save_master_pos;
connection slave;
sync_with_master;
# Now test that even critical errors (connection killed)
# are ignored if rules allow it.
# The "kill" idea was copied from rpl000001.test.
connection master1;
select get_lock('crash_lock%20C', 10);
connection master;
create table t2 (a int primary key);
insert into t2 values(1);
create table t3 (id int);
insert into t3 values(connection_id());
send update t2 set a = a + 1 + get_lock('crash_lock%20C', 10);
connection master1;
sleep 2;
select (@id := id) - id from t3;
kill @id;
drop table t2,t3;
connection master;
--error 1053;
reap;
connection master1;
show binlog events from 79;
save_master_pos;
connection slave;
# SQL slave thread should not have stopped (because table of the killed
# query is in the ignore list).
sync_with_master;
......@@ -53,6 +53,14 @@ static void pretty_print_str(FILE* file, char* str, int len)
#ifndef MYSQL_CLIENT
static void clear_all_errors(THD *thd, struct st_relay_log_info *rli)
{
thd->query_error = 0;
thd->clear_error();
*rli->last_slave_error = 0;
rli->last_slave_errno = 0;
}
inline int ignored_error_code(int err_code)
{
return ((err_code == ER_SLAVE_IGNORED_TABLE) ||
......@@ -1803,8 +1811,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
#else
rli->future_group_master_log_pos= log_pos;
#endif
thd->query_error= 0; // clear error
thd->clear_error();
clear_all_errors(thd, rli);
if (db_ok(thd->db, replicate_do_db, replicate_ignore_db))
{
......@@ -1817,84 +1824,93 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
VOID(pthread_mutex_unlock(&LOCK_thread_count));
thd->slave_proxy_id = thread_id; // for temp tables
/*
Sanity check to make sure the master did not get a really bad
error on the query.
*/
if (ignored_error_code((expected_error = error_code)) ||
!check_expected_error(thd,rli,expected_error))
{
mysql_log.write(thd,COM_QUERY,"%s",thd->query);
DBUG_PRINT("query",("%s",thd->query));
mysql_log.write(thd,COM_QUERY,"%s",thd->query);
DBUG_PRINT("query",("%s",thd->query));
if (ignored_error_code(expected_error = error_code) ||
!check_expected_error(thd,rli,expected_error))
mysql_parse(thd, thd->query, q_len);
/*
Set a flag if we are inside an transaction so that we can restart
the transaction from the start if we are killed
This will only be done if we are supporting transactional tables
in the slave.
*/
if (!strcmp(thd->query,"BEGIN"))
rli->inside_transaction= opt_using_transactions;
else if (!(strcmp(thd->query,"COMMIT") && strcmp(thd->query,"ROLLBACK")))
rli->inside_transaction=0;
else
{
/*
If we expected a non-zero error code, and we don't get the same error
code, and none of them should be ignored.
The query got a really bad error on the master (thread killed etc),
which could be inconsistent. Parse it to test the table names: if the
replicate-*-do|ignore-table rules say "this query must be ignored" then
we exit gracefully; otherwise we warn about the bad error and tell DBA
to check/fix it.
*/
if ((expected_error != (actual_error = thd->net.last_errno)) &&
expected_error &&
!ignored_error_code(actual_error) &&
!ignored_error_code(expected_error))
if (mysql_test_parse_for_slave(thd, thd->query, q_len))
/* Can ignore query */
clear_all_errors(thd, rli);
else
{
slave_print_error(rli, 0,
"\
slave_print_error(rli,expected_error,
"query '%s' partially completed on the master \
(error on master: %d) \
and was aborted. There is a chance that your master is inconsistent at this \
point. If you are sure that your master is ok, run this query manually on the\
slave and then restart the slave with SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1;\
START SLAVE; .", thd->query, expected_error);
thd->query_error= 1;
}
goto end;
}
/*
Set a flag if we are inside an transaction so that we can restart
the transaction from the start if we are killed
This will only be done if we are supporting transactional tables
in the slave.
*/
if (!strcmp(thd->query,"BEGIN"))
rli->inside_transaction= opt_using_transactions;
else if (!(strcmp(thd->query,"COMMIT") && strcmp(thd->query,"ROLLBACK")))
rli->inside_transaction=0;
/*
If we expected a non-zero error code, and we don't get the same error
code, and none of them should be ignored.
*/
if ((expected_error != (actual_error = thd->net.last_errno)) &&
expected_error &&
!ignored_error_code(actual_error) &&
!ignored_error_code(expected_error))
{
slave_print_error(rli, 0,
"\
Query '%s' caused different errors on master and slave. \
Error on master: '%s' (%d), Error on slave: '%s' (%d). \
Default database: '%s'",
query,
ER_SAFE(expected_error),
expected_error,
actual_error ? thd->net.last_error: "no error",
actual_error,
print_slave_db_safe(db));
thd->query_error= 1;
}
/*
If we get the same error code as expected, or they should be ignored.
*/
else if (expected_error == actual_error ||
ignored_error_code(actual_error))
{
thd->query_error = 0;
thd->clear_error();
*rli->last_slave_error = 0;
rli->last_slave_errno = 0;
}
/*
Other cases: mostly we expected no error and get one.
*/
else if (thd->query_error || thd->fatal_error)
{
slave_print_error(rli,actual_error,
"Error '%s' on query '%s'. Default database: '%s'",
(actual_error ? thd->net.last_error :
"unexpected success or fatal error"),
query,
print_slave_db_safe(db));
thd->query_error= 1;
}
}
/*
End of sanity check. If the test was wrong, the query got a really bad
error on the master, which could be inconsistent, abort and tell DBA to
check/fix it. check_expected_error() already printed the message to
stderr and rli, and set thd->query_error to 1.
query,
ER_SAFE(expected_error),
expected_error,
actual_error ? thd->net.last_error: "no error",
actual_error,
print_slave_db_safe(db));
thd->query_error= 1;
}
/*
If we get the same error code as expected, or they should be ignored.
*/
else if (expected_error == actual_error ||
ignored_error_code(actual_error))
clear_all_errors(thd, rli);
/*
Other cases: mostly we expected no error and get one.
*/
else if (thd->query_error || thd->fatal_error)
{
slave_print_error(rli,actual_error,
"Error '%s' on query '%s'. Default database: '%s'",
(actual_error ? thd->net.last_error :
"unexpected success or fatal error"),
query,
print_slave_db_safe(db));
thd->query_error= 1;
}
} /* End of if (db_ok(... */
end:
VOID(pthread_mutex_lock(&LOCK_thread_count));
thd->db= 0; // prevent db from being freed
thd->query= 0; // just to be sure
......@@ -1939,8 +1955,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
thd->db= (char*) rewrite_db(db);
DBUG_ASSERT(thd->query == 0);
thd->query = 0; // Should not be needed
thd->query_error = 0;
thd->clear_error();
clear_all_errors(thd, rli);
if (!use_rli_only_for_errors)
{
......
......@@ -353,6 +353,7 @@ int quick_rm_table(enum db_type base,const char *db,
bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list);
bool mysql_change_db(THD *thd,const char *name);
void mysql_parse(THD *thd,char *inBuf,uint length);
bool mysql_test_parse_for_slave(THD *thd,char *inBuf,uint length);
void mysql_init_select(LEX *lex);
bool mysql_new_select(LEX *lex);
void mysql_init_multi_delete(LEX *lex);
......
......@@ -2250,13 +2250,6 @@ int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int expected_error)
case ER_NET_ERROR_ON_WRITE:
case ER_SERVER_SHUTDOWN:
case ER_NEW_ABORTING_CONNECTION:
slave_print_error(rli,expected_error,
"query '%s' partially completed on the master \
and was aborted. There is a chance that your master is inconsistent at this \
point. If you are sure that your master is ok, run this query manually on the\
slave and then restart the slave with SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1;\
SLAVE START; .", thd->query);
thd->query_error= 1;
return 1;
default:
return 0;
......
......@@ -65,6 +65,7 @@ static bool create_total_list(THD *thd, LEX *lex,
TABLE_LIST **result, bool skip_first);
static bool check_one_table_access(THD *thd, ulong want_access,
TABLE_LIST *table, bool no_errors);
static inline bool all_tables_not_ok(THD *thd, TABLE_LIST *tables);
const char *any_db="*any*"; // Special symbol for check_access
......@@ -1332,9 +1333,7 @@ mysql_execute_command(void)
Skip if we are in the slave thread, some table rules have been
given and the table list says the query should not be replicated
*/
if (table_rules_on && tables && !tables_ok(thd,tables) &&
((lex->sql_command != SQLCOM_DELETE_MULTI) ||
!tables_ok(thd,(TABLE_LIST *)thd->lex.auxilliary_table_list.first)))
if (all_tables_not_ok(thd,tables))
{
/* we warn the slave SQL thread */
my_error(ER_SLAVE_IGNORED_TABLE, MYF(0));
......@@ -2968,9 +2967,18 @@ void mysql_init_multi_delete(LEX *lex)
lex->select->table_list.save_and_clear(&lex->auxilliary_table_list);
}
static inline bool all_tables_not_ok(THD *thd, TABLE_LIST *tables)
{
return (table_rules_on && tables && !tables_ok(thd,tables) &&
((thd->lex.sql_command != SQLCOM_DELETE_MULTI) ||
!tables_ok(thd,(TABLE_LIST *)thd->lex.auxilliary_table_list.first)));
}
void
mysql_parse(THD *thd,char *inBuf,uint length)
/*
When you modify mysql_parse(), you may need to mofify
mysql_test_parse_for_slave() in this same file.
*/
void mysql_parse(THD *thd, char *inBuf, uint length)
{
DBUG_ENTER("mysql_parse");
......@@ -3005,6 +3013,31 @@ mysql_parse(THD *thd,char *inBuf,uint length)
DBUG_VOID_RETURN;
}
/*
Usable by the replication SQL thread only: just parse a query to know if it
can be ignored because of replicate-*-table rules.
RETURN VALUES
0 cannot be ignored
1 can be ignored
*/
bool mysql_test_parse_for_slave(THD *thd, char *inBuf, uint length)
{
LEX *lex;
bool error= 0;
mysql_init_query(thd);
lex= lex_start(thd, (uchar*) inBuf, length);
if (!yyparse() && ! thd->fatal_error &&
all_tables_not_ok(thd,(TABLE_LIST*) lex->select_lex.table_list.first))
error= 1; /* Ignore question */
free_items(thd); /* Free strings used by items */
lex_end(lex);
return error;
}
/*****************************************************************************
** Store field definition for create
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment